From 7c62c5b6daec1d35f0f75bac56163323e8559596 Mon Sep 17 00:00:00 2001
From: Bryan Cutler <cutlerb@gmail.com>
Date: Mon, 5 Oct 2020 16:14:45 -0700
Subject: [PATCH 1/3] Remove patch to fix Spark master build

---
 ci/docker/conda-python-spark.dockerfile   |  4 --
 ci/etc/integration_spark_ARROW-9438.patch | 72 -----------------------
 2 files changed, 76 deletions(-)
 delete mode 100644 ci/etc/integration_spark_ARROW-9438.patch
diff --git a/ci/docker/conda-python-spark.dockerfile b/ci/docker/conda-python-spark.dockerfile
index a20f1ff3521..d3f0a224582 100644
--- a/ci/docker/conda-python-spark.dockerfile
+++ b/ci/docker/conda-python-spark.dockerfile
@@ -36,10 +36,6 @@ ARG spark=master
 COPY ci/scripts/install_spark.sh /arrow/ci/scripts/
 RUN /arrow/ci/scripts/install_spark.sh ${spark} /spark
 
-# patch spark to build with current Arrow Java
-COPY ci/etc/integration_spark_ARROW-9438.patch /arrow/ci/etc/
-RUN patch -d /spark -p1 -i /arrow/ci/etc/integration_spark_ARROW-9438.patch
-
 # build cpp with tests
 ENV CC=gcc \
     CXX=g++ \
diff --git a/ci/etc/integration_spark_ARROW-9438.patch b/ci/etc/integration_spark_ARROW-9438.patch
deleted file mode 100644
index 2baed303717..00000000000
--- a/ci/etc/integration_spark_ARROW-9438.patch
+++ /dev/null
@@ -1,72 +0,0 @@
-From 0b5388a945a7e5c5706cf00d0754540a6c68254d Mon Sep 17 00:00:00 2001
-From: Bryan Cutler <cutlerb@gmail.com>
-Date: Mon, 13 Jul 2020 23:12:25 -0700
-Subject: [PATCH] Update Arrow Java for 1.0.0
-
----
- pom.xml              | 17 ++++++++++++++---
- sql/catalyst/pom.xml |  4 ++++
- 2 files changed, 18 insertions(+), 3 deletions(-)
-
-diff --git a/pom.xml b/pom.xml
-index 08ca13bfe9..6619fca200 100644
---- a/pom.xml
-+++ b/pom.xml
-@@ -199,7 +199,7 @@
-     If you are changing Arrow version specification, please check ./python/pyspark/sql/utils.py,
-     and ./python/setup.py too.
-     -->
--    <arrow.version>0.15.1</arrow.version>
-+    <arrow.version>1.0.0-SNAPSHOT</arrow.version>
-     <!-- org.fusesource.leveldbjni will be used except on arm64 platform. -->
-     <leveldbjni.group>org.fusesource.leveldbjni</leveldbjni.group>
- 
-@@ -2288,7 +2288,7 @@
-           </exclusion>
-           <exclusion>
-             <groupId>com.fasterxml.jackson.core</groupId>
--            <artifactId>jackson-databind</artifactId>
-+            <artifactId>jackson-core</artifactId>
-           </exclusion>
-           <exclusion>
-             <groupId>io.netty</groupId>
-@@ -2298,9 +2298,20 @@
-             <groupId>io.netty</groupId>
-             <artifactId>netty-common</artifactId>
-           </exclusion>
-+        </exclusions>
-+      </dependency>
-+      <dependency>
-+        <groupId>org.apache.arrow</groupId>
-+        <artifactId>arrow-memory-netty</artifactId>
-+        <version>${arrow.version}</version>
-+        <exclusions>
-           <exclusion>
-             <groupId>io.netty</groupId>
--            <artifactId>netty-handler</artifactId>
-+            <artifactId>netty-buffer</artifactId>
-+          </exclusion>
-+          <exclusion>
-+            <groupId>io.netty</groupId>
-+            <artifactId>netty-common</artifactId>
-           </exclusion>
-         </exclusions>
-       </dependency>
-diff --git a/sql/catalyst/pom.xml b/sql/catalyst/pom.xml
-index 9edbb7fec9..6b79eb722f 100644
---- a/sql/catalyst/pom.xml
-+++ b/sql/catalyst/pom.xml
-@@ -117,6 +117,10 @@
-       <groupId>org.apache.arrow</groupId>
-       <artifactId>arrow-vector</artifactId>
-     </dependency>
-+    <dependency>
-+      <groupId>org.apache.arrow</groupId>
-+      <artifactId>arrow-memory-netty</artifactId>
-+    </dependency>
-   </dependencies>
-   <build>
-     <outputDirectory>target/scala-${scala.binary.version}/classes</outputDirectory>
--- 
-2.17.1
-

From ed31bb2fea84d753539752aae3e80ae800c69ab6 Mon Sep 17 00:00:00 2001
From: Bryan Cutler <cutlerb@gmail.com>
Date: Tue, 6 Oct 2020 15:44:45 -0700
Subject: [PATCH 2/3] Add Spark patch to correct nested timestamps

---
 ci/docker/conda-python-spark.dockerfile    |  4 +++
 ci/etc/integration_spark_ARROW-10178.patch | 37 ++++++++++++++++++++++
 2 files changed, 41 insertions(+)
 create mode 100644 ci/etc/integration_spark_ARROW-10178.patch

diff --git a/ci/docker/conda-python-spark.dockerfile b/ci/docker/conda-python-spark.dockerfile
index d3f0a224582..4d1a6933f03 100644
--- a/ci/docker/conda-python-spark.dockerfile
+++ b/ci/docker/conda-python-spark.dockerfile
@@ -36,6 +36,10 @@ ARG spark=master
 COPY ci/scripts/install_spark.sh /arrow/ci/scripts/
 RUN /arrow/ci/scripts/install_spark.sh ${spark} /spark
 
+# patch spark for nested timestamp correction, remove after SPARK-32285
+COPY ci/etc/integration_spark_ARROW-10178.patch /arrow/ci/etc/
+RUN patch -d /spark -p1 -i /arrow/ci/etc/integration_spark_ARROW-10178.patch
+
 # build cpp with tests
 ENV CC=gcc \
     CXX=g++ \
diff --git a/ci/etc/integration_spark_ARROW-10178.patch b/ci/etc/integration_spark_ARROW-10178.patch
new file mode 100644
index 00000000000..39bcc0bac3a
--- /dev/null
+++ b/ci/etc/integration_spark_ARROW-10178.patch
@@ -0,0 +1,37 @@
+diff --git a/python/pyspark/sql/pandas/serializers.py b/python/pyspark/sql/pandas/serializers.py
+index 42562e1fb9..d00b67e99b 100644
+--- a/python/pyspark/sql/pandas/serializers.py
++++ b/python/pyspark/sql/pandas/serializers.py
+@@ -120,15 +120,30 @@ class ArrowStreamPandasSerializer(ArrowStreamSerializer):
+ 
+     def arrow_to_pandas(self, arrow_column):
+         from pyspark.sql.pandas.types import _check_series_localize_timestamps
+-        import pyarrow
++        import pyarrow as pa
+ 
+         # If the given column is a date type column, creates a series of datetime.date directly
+         # instead of creating datetime64[ns] as intermediate data to avoid overflow caused by
+         # datetime64[ns] type handling.
+         s = arrow_column.to_pandas(date_as_object=True)
+ 
+-        if pyarrow.types.is_timestamp(arrow_column.type):
++        if pa.types.is_timestamp(arrow_column.type):
+             return _check_series_localize_timestamps(s, self._timezone)
++        elif pa.types.is_struct(arrow_column.type):
++            if isinstance(arrow_column, pa.ChunkedArray):
++                arrow_column = pa.concat_arrays(arrow_column.iterchunks())
++            children = []
++            names = []
++            for f in arrow_column.type:
++                child = arrow_column.field(f.name)
++                if pa.types.is_timestamp(child.type):
++                    child_series = child.to_pandas()
++                    child_series = _check_series_localize_timestamps(child_series, self._timezone)
++                    child = pa.array(child_series, type=pa.timestamp('us'))
++                children.append(child)
++                names.append(f.name)
++            arr = pa.StructArray.from_arrays(children, names)
++            return arr.to_pandas(date_as_object=True)
+         else:
+             return s
+ 

From 6a064e5084029ac8407836a1bd56b3437599761a Mon Sep 17 00:00:00 2001
From: Bryan Cutler <cutlerb@gmail.com>
Date: Tue, 6 Oct 2020 23:06:32 -0700
Subject: [PATCH 3/3] Revert "Add Spark patch to correct nested timestamps"

This reverts commit ed31bb2fea84d753539752aae3e80ae800c69ab6.
---
 ci/docker/conda-python-spark.dockerfile    |  4 ---
 ci/etc/integration_spark_ARROW-10178.patch | 37 ----------------------
 2 files changed, 41 deletions(-)
 delete mode 100644 ci/etc/integration_spark_ARROW-10178.patch

diff --git a/ci/docker/conda-python-spark.dockerfile b/ci/docker/conda-python-spark.dockerfile
index 4d1a6933f03..d3f0a224582 100644
--- a/ci/docker/conda-python-spark.dockerfile
+++ b/ci/docker/conda-python-spark.dockerfile
@@ -36,10 +36,6 @@ ARG spark=master
 COPY ci/scripts/install_spark.sh /arrow/ci/scripts/
 RUN /arrow/ci/scripts/install_spark.sh ${spark} /spark
 
-# patch spark for nested timestamp correction, remove after SPARK-32285
-COPY ci/etc/integration_spark_ARROW-10178.patch /arrow/ci/etc/
-RUN patch -d /spark -p1 -i /arrow/ci/etc/integration_spark_ARROW-10178.patch
-
 # build cpp with tests
 ENV CC=gcc \
     CXX=g++ \
diff --git a/ci/etc/integration_spark_ARROW-10178.patch b/ci/etc/integration_spark_ARROW-10178.patch
deleted file mode 100644
index 39bcc0bac3a..00000000000
--- a/ci/etc/integration_spark_ARROW-10178.patch
+++ /dev/null
@@ -1,37 +0,0 @@
-diff --git a/python/pyspark/sql/pandas/serializers.py b/python/pyspark/sql/pandas/serializers.py
-index 42562e1fb9..d00b67e99b 100644
---- a/python/pyspark/sql/pandas/serializers.py
-+++ b/python/pyspark/sql/pandas/serializers.py
-@@ -120,15 +120,30 @@ class ArrowStreamPandasSerializer(ArrowStreamSerializer):
- 
-     def arrow_to_pandas(self, arrow_column):
-         from pyspark.sql.pandas.types import _check_series_localize_timestamps
--        import pyarrow
-+        import pyarrow as pa
- 
-         # If the given column is a date type column, creates a series of datetime.date directly
-         # instead of creating datetime64[ns] as intermediate data to avoid overflow caused by
-         # datetime64[ns] type handling.
-         s = arrow_column.to_pandas(date_as_object=True)
- 
--        if pyarrow.types.is_timestamp(arrow_column.type):
-+        if pa.types.is_timestamp(arrow_column.type):
-             return _check_series_localize_timestamps(s, self._timezone)
-+        elif pa.types.is_struct(arrow_column.type):
-+            if isinstance(arrow_column, pa.ChunkedArray):
-+                arrow_column = pa.concat_arrays(arrow_column.iterchunks())
-+            children = []
-+            names = []
-+            for f in arrow_column.type:
-+                child = arrow_column.field(f.name)
-+                if pa.types.is_timestamp(child.type):
-+                    child_series = child.to_pandas()
-+                    child_series = _check_series_localize_timestamps(child_series, self._timezone)
-+                    child = pa.array(child_series, type=pa.timestamp('us'))
-+                children.append(child)
-+                names.append(f.name)
-+            arr = pa.StructArray.from_arrays(children, names)
-+            return arr.to_pandas(date_as_object=True)
-         else:
-             return s
-