From 7ee42d19e8abc0f4dd9408dc41adf7ca3ca24b2a Mon Sep 17 00:00:00 2001
From: Antoine Pitrou <antoine@python.org>
Date: Wed, 4 Jun 2025 09:54:42 +0200
Subject: [PATCH] GH-46704: [C++] Fix OSS-Fuzz build failure

PR #46408 changed by mistake list-view IPC tests to use the same data as list tests.
This was detected as a duplicate corpus file by the OSS-Fuzz CI build.

This PR also includes a fix for a regression in the CUDA tests, due to reading non-CPU memory.
---
 cpp/build-support/fuzzing/pack_corpus.py | 10 ++++++----
 cpp/src/arrow/ipc/test_common.cc         |  2 +-
 cpp/src/arrow/ipc/writer.cc              | 18 +++++++++++++++---
 3 files changed, 22 insertions(+), 8 deletions(-)
diff --git a/cpp/build-support/fuzzing/pack_corpus.py b/cpp/build-support/fuzzing/pack_corpus.py
index 07fc09f9026..94d9a88b387 100755
--- a/cpp/build-support/fuzzing/pack_corpus.py
+++ b/cpp/build-support/fuzzing/pack_corpus.py
@@ -27,7 +27,7 @@
 
 
 def process_dir(corpus_dir, zip_output):
-    seen = set()
+    seen_hashes = {}
 
     for child in corpus_dir.iterdir():
         if not child.is_file():
@@ -35,10 +35,12 @@ def process_dir(corpus_dir, zip_output):
         with child.open('rb') as f:
             data = f.read()
         arcname = hashlib.sha1(data).hexdigest()
-        if arcname in seen:
-            raise ValueError(f"Duplicate hash: {arcname} (in file {child})")
+        if arcname in seen_hashes:
+            raise ValueError(
+                f"Duplicate hash: {arcname} (in file {child}), "
+                f"already seen in file {seen_hashes[arcname]}")
         zip_output.writestr(str(arcname), data)
-        seen.add(arcname)
+        seen_hashes[arcname] = child
 
 
 def main(corpus_dir, zip_output_name):
diff --git a/cpp/src/arrow/ipc/test_common.cc b/cpp/src/arrow/ipc/test_common.cc
index 46060a0db10..a739990fc93 100644
--- a/cpp/src/arrow/ipc/test_common.cc
+++ b/cpp/src/arrow/ipc/test_common.cc
@@ -474,7 +474,7 @@ Status MakeListViewRecordBatchSized(const int length, std::shared_ptr<RecordBatc
 }
 
 Status MakeListViewRecordBatch(std::shared_ptr<RecordBatch>* out) {
-  return MakeListRecordBatchSized(200, out);
+  return MakeListViewRecordBatchSized(200, out);
 }
 
 Status MakeFixedSizeListRecordBatch(std::shared_ptr<RecordBatch>* out) {
diff --git a/cpp/src/arrow/ipc/writer.cc b/cpp/src/arrow/ipc/writer.cc
index 8b7d943fc71..4238ecbf3a0 100644
--- a/cpp/src/arrow/ipc/writer.cc
+++ b/cpp/src/arrow/ipc/writer.cc
@@ -329,15 +329,24 @@ class RecordBatchSerializer {
       return Status::OK();
     }
 
-    int64_t required_bytes = sizeof(offset_type) * (array.length() + 1);
-    if (array.value_offset(0) > 0) {
+    const int64_t required_bytes = sizeof(offset_type) * (array.length() + 1);
+
+    offset_type first_offset = 0;
+    RETURN_NOT_OK(MemoryManager::CopyBufferSliceToCPU(
+        array.data()->buffers[1], array.offset() * sizeof(offset_type),
+        sizeof(offset_type), reinterpret_cast<uint8_t*>(&first_offset)));
+
+    if (first_offset > 0) {
       // If the offset of the first value is non-zero, then we must create a new
       // offsets buffer with shifted offsets.
+      if (!array.data()->buffers[1]->is_cpu()) {
+        return Status::NotImplemented("Rebasing non-CPU offsets");
+      }
       ARROW_ASSIGN_OR_RAISE(auto shifted_offsets,
                             AllocateBuffer(required_bytes, options_.memory_pool));
 
-      auto dest_offsets = shifted_offsets->mutable_span_as<offset_type>();
       const offset_type* source_offsets = array.raw_value_offsets();
+      auto dest_offsets = shifted_offsets->mutable_span_as<offset_type>();
       const offset_type start_offset = source_offsets[0];
 
       for (int i = 0; i <= array.length(); ++i) {
@@ -369,6 +378,9 @@ class RecordBatchSerializer {
       // If we have a non-zero offset, it's likely that the smallest offset is
       // not zero. We must a) create a new offsets array with shifted offsets and
       // b) slice the values array accordingly.
+      if (!array.data()->buffers[1]->is_cpu()) {
+        return Status::NotImplemented("Rebasing non-CPU list view offsets");
+      }
 
       ARROW_ASSIGN_OR_RAISE(auto shifted_offsets,
                             AllocateBuffer(required_bytes, options_.memory_pool));