diff --git a/paimon-arrow/src/main/java/org/apache/paimon/arrow/writer/ArrowFieldWriters.java b/paimon-arrow/src/main/java/org/apache/paimon/arrow/writer/ArrowFieldWriters.java
index feb3bc2460d5..1c7bb742f529 100644
--- a/paimon-arrow/src/main/java/org/apache/paimon/arrow/writer/ArrowFieldWriters.java
+++ b/paimon-arrow/src/main/java/org/apache/paimon/arrow/writer/ArrowFieldWriters.java
@@ -19,6 +19,7 @@
package org.apache.paimon.arrow.writer;
import org.apache.paimon.arrow.ArrowUtils;
+import org.apache.paimon.data.BinaryString;
import org.apache.paimon.data.DataGetters;
import org.apache.paimon.data.InternalArray;
import org.apache.paimon.data.InternalMap;
@@ -39,6 +40,7 @@
import org.apache.paimon.data.columnar.ShortColumnVector;
import org.apache.paimon.data.columnar.TimestampColumnVector;
import org.apache.paimon.data.columnar.VectorizedColumnBatch;
+import org.apache.paimon.memory.MemorySegment;
import org.apache.paimon.utils.IntArrayList;
import org.apache.arrow.vector.BigIntVector;
@@ -94,7 +96,25 @@ protected void doWrite(
@Override
protected void doWrite(int rowIndex, DataGetters getters, int pos) {
- ((VarCharVector) fieldVector).setSafe(rowIndex, getters.getString(pos).toBytes());
+ BinaryString binaryString = getters.getString(pos);
+ MemorySegment[] segments = binaryString.getSegments();
+
+ // Very important performance optimization, which can avoid copying out new byte array
+ if (segments.length == 1) {
+ byte[] heapMemory = segments[0].getHeapMemory();
+ if (heapMemory != null) {
+ ((VarCharVector) fieldVector)
+ .setSafe(
+ rowIndex,
+ heapMemory,
+ binaryString.getOffset(),
+ binaryString.getSizeInBytes());
+ return;
+ }
+ }
+
+ // Else copy new byte array
+ ((VarCharVector) fieldVector).setSafe(rowIndex, binaryString.toBytes());
}
}
diff --git a/paimon-benchmark/paimon-micro-benchmarks/pom.xml b/paimon-benchmark/paimon-micro-benchmarks/pom.xml
index 319433f41ffe..ddac785a1e41 100644
--- a/paimon-benchmark/paimon-micro-benchmarks/pom.xml
+++ b/paimon-benchmark/paimon-micro-benchmarks/pom.xml
@@ -146,6 +146,12 @@ under the License.
${project.version}
+
+ org.apache.paimon
+ paimon-arrow
+ ${project.version}
+
+
diff --git a/paimon-benchmark/paimon-micro-benchmarks/src/test/java/org/apache/paimon/benchmark/arrow/ArrowWriteBenchmark.java b/paimon-benchmark/paimon-micro-benchmarks/src/test/java/org/apache/paimon/benchmark/arrow/ArrowWriteBenchmark.java
new file mode 100644
index 000000000000..8bdedbc0fdd8
--- /dev/null
+++ b/paimon-benchmark/paimon-micro-benchmarks/src/test/java/org/apache/paimon/benchmark/arrow/ArrowWriteBenchmark.java
@@ -0,0 +1,68 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.paimon.benchmark.arrow;
+
+import org.apache.paimon.arrow.vector.ArrowFormatWriter;
+import org.apache.paimon.benchmark.Benchmark;
+import org.apache.paimon.data.BinaryString;
+import org.apache.paimon.data.GenericRow;
+import org.apache.paimon.types.DataTypes;
+import org.apache.paimon.types.RowType;
+
+import org.junit.jupiter.api.Test;
+
+import java.util.concurrent.ThreadLocalRandom;
+
+/** Benchmark for arrow write. */
+public class ArrowWriteBenchmark {
+
+ @Test
+ public void testWrite() {
+ int batch = 1024 * 20;
+ Benchmark benchmark =
+ new Benchmark("read", batch * batch)
+ .setNumWarmupIters(1)
+ .setOutputPerIteration(true);
+ RowType rowType =
+ RowType.of(
+ DataTypes.STRING(),
+ DataTypes.STRING(),
+ DataTypes.STRING(),
+ DataTypes.STRING());
+ ThreadLocalRandom rnd = ThreadLocalRandom.current();
+ byte[] bytes = new byte[100];
+ rnd.nextBytes(bytes);
+ BinaryString binaryString = BinaryString.fromBytes(bytes, 1, 99);
+ GenericRow row = GenericRow.of(binaryString, binaryString, binaryString, binaryString);
+ benchmark.addCase(
+ "write",
+ 1,
+ () -> {
+ try (ArrowFormatWriter writer = new ArrowFormatWriter(rowType, batch, true)) {
+ for (int i = 0; i < batch; i++) {
+ writer.reset();
+ for (int j = 0; j < batch; j++) {
+ writer.write(row);
+ }
+ }
+ }
+ });
+ benchmark.run();
+ }
+}