diff --git a/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/JdbcFieldInfo.java b/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/JdbcFieldInfo.java
new file mode 100644
index 00000000000..db20bef2907
--- /dev/null
+++ b/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/JdbcFieldInfo.java
@@ -0,0 +1,114 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.adapter.jdbc;
+
+import java.sql.ResultSetMetaData;
+import java.sql.SQLException;
+import java.sql.Types;
+
+import com.google.common.base.Preconditions;
+
+/**
+ * This class represents the information about a JDBC ResultSet Field that is
+ * needed to construct an {@link org.apache.arrow.vector.types.pojo.ArrowType}.
+ * Currently, this is:
+ *
+ * - The JDBC {@link java.sql.Types} type.
+ * - The field's precision (used for {@link java.sql.Types#DECIMAL} and {@link java.sql.Types#NUMERIC} types)
+ * - The field's scale (used for {@link java.sql.Types#DECIMAL} and {@link java.sql.Types#NUMERIC} types)
+ *
+ */
+public class JdbcFieldInfo {
+ private final int jdbcType;
+ private final int precision;
+ private final int scale;
+
+ /**
+ * Builds a JdbcFieldInfo using only the {@link java.sql.Types} type. Do not use this constructor
+ * if the field type is {@link java.sql.Types#DECIMAL} or {@link java.sql.Types#NUMERIC}; the precision and
+ * scale will be set to 0.
+ *
+ * @param jdbcType The {@link java.sql.Types} type.
+ * @throws IllegalArgumentException if jdbcType is {@link java.sql.Types#DECIMAL} or {@link java.sql.Types#NUMERIC}.
+ */
+ public JdbcFieldInfo(int jdbcType) {
+ Preconditions.checkArgument(
+ (jdbcType != Types.DECIMAL && jdbcType != Types.NUMERIC),
+ "DECIMAL and NUMERIC types require a precision and scale; please use another constructor.");
+
+ this.jdbcType = jdbcType;
+ this.precision = 0;
+ this.scale = 0;
+ }
+
+ /**
+ * Builds a JdbcFieldInfo from the {@link java.sql.Types} type, precision, and scale.
+ * Use this constructor for {@link java.sql.Types#DECIMAL} and {@link java.sql.Types#NUMERIC} types.
+ *
+ * @param jdbcType The {@link java.sql.Types} type.
+ * @param precision The field's numeric precision.
+ * @param scale The field's numeric scale.
+ */
+ public JdbcFieldInfo(int jdbcType, int precision, int scale) {
+ this.jdbcType = jdbcType;
+ this.precision = precision;
+ this.scale = scale;
+ }
+
+ /**
+ * Builds a JdbcFieldInfo from the corresponding {@link java.sql.ResultSetMetaData} column.
+ *
+ * @param rsmd The {@link java.sql.ResultSetMetaData} to get the field information from.
+ * @param column The column to get the field information for (on a 1-based index).
+ * @throws SQLException If the column information cannot be retrieved.
+ * @throws NullPointerException if rsmd is null.
+ * @throws IllegalArgumentException if column is out of bounds.
+ */
+ public JdbcFieldInfo(ResultSetMetaData rsmd, int column) throws SQLException {
+ Preconditions.checkNotNull(rsmd, "ResultSetMetaData cannot be null.");
+ Preconditions.checkArgument(column > 0, "ResultSetMetaData columns have indices starting at 1.");
+ Preconditions.checkArgument(
+ column <= rsmd.getColumnCount(),
+ "The index must be within the number of columns (1 to %s, inclusive)", rsmd.getColumnCount());
+
+ this.jdbcType = rsmd.getColumnType(column);
+ this.precision = rsmd.getPrecision(column);
+ this.scale = rsmd.getScale(column);
+ }
+
+ /**
+ * The {@link java.sql.Types} type.
+ */
+ public int getJdbcType() {
+ return jdbcType;
+ }
+
+ /**
+ * The numeric precision, for {@link java.sql.Types#NUMERIC} and {@link java.sql.Types#DECIMAL} types.
+ */
+ public int getPrecision() {
+ return precision;
+ }
+
+ /**
+ * The numeric scale, for {@link java.sql.Types#NUMERIC} and {@link java.sql.Types#DECIMAL} types.
+ */
+ public int getScale() {
+ return scale;
+ }
+}
diff --git a/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/JdbcToArrow.java b/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/JdbcToArrow.java
index 79102043a0f..d5be486c843 100644
--- a/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/JdbcToArrow.java
+++ b/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/JdbcToArrow.java
@@ -88,7 +88,7 @@ public static VectorSchemaRoot sqlToArrow(Connection connection, String query, B
Preconditions.checkNotNull(allocator, "Memory allocator object can not be null");
JdbcToArrowConfig config =
- new JdbcToArrowConfig(allocator, JdbcToArrowUtils.getUtcCalendar(), false);
+ new JdbcToArrowConfig(allocator, JdbcToArrowUtils.getUtcCalendar());
return sqlToArrow(connection, query, config);
}
@@ -116,7 +116,7 @@ public static VectorSchemaRoot sqlToArrow(
Preconditions.checkNotNull(allocator, "Memory allocator object can not be null");
Preconditions.checkNotNull(calendar, "Calendar object can not be null");
- return sqlToArrow(connection, query, new JdbcToArrowConfig(allocator, calendar, false));
+ return sqlToArrow(connection, query, new JdbcToArrowConfig(allocator, calendar));
}
/**
@@ -170,7 +170,7 @@ public static VectorSchemaRoot sqlToArrow(ResultSet resultSet, BaseAllocator all
Preconditions.checkNotNull(allocator, "Memory Allocator object can not be null");
JdbcToArrowConfig config =
- new JdbcToArrowConfig(allocator, JdbcToArrowUtils.getUtcCalendar(), false);
+ new JdbcToArrowConfig(allocator, JdbcToArrowUtils.getUtcCalendar());
return sqlToArrow(resultSet, config);
}
@@ -184,8 +184,7 @@ public static VectorSchemaRoot sqlToArrow(ResultSet resultSet, BaseAllocator all
*/
public static VectorSchemaRoot sqlToArrow(ResultSet resultSet, Calendar calendar) throws SQLException, IOException {
Preconditions.checkNotNull(resultSet, "JDBC ResultSet object can not be null");
-
- return sqlToArrow(resultSet, new JdbcToArrowConfig(new RootAllocator(Integer.MAX_VALUE), calendar, false));
+ return sqlToArrow(resultSet, new JdbcToArrowConfig(new RootAllocator(Integer.MAX_VALUE), calendar));
}
/**
@@ -205,7 +204,7 @@ public static VectorSchemaRoot sqlToArrow(
Preconditions.checkNotNull(resultSet, "JDBC ResultSet object can not be null");
Preconditions.checkNotNull(allocator, "Memory Allocator object can not be null");
- return sqlToArrow(resultSet, new JdbcToArrowConfig(allocator, calendar, false));
+ return sqlToArrow(resultSet, new JdbcToArrowConfig(allocator, calendar));
}
/**
diff --git a/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/JdbcToArrowConfig.java b/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/JdbcToArrowConfig.java
index 8f2a8ef54f8..c5fcceec282 100644
--- a/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/JdbcToArrowConfig.java
+++ b/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/JdbcToArrowConfig.java
@@ -18,6 +18,7 @@
package org.apache.arrow.adapter.jdbc;
import java.util.Calendar;
+import java.util.Map;
import org.apache.arrow.memory.BaseAllocator;
@@ -28,16 +29,29 @@
*
* The allocator is used to construct the {@link org.apache.arrow.vector.VectorSchemaRoot},
* and the calendar is used to define the time zone of any {@link org.apahe.arrow.vector.pojo.ArrowType.Timestamp}
- * fields that are created during the conversion.
+ * fields that are created during the conversion. Neither field may be null.
*
*
- * Neither field may be null.
+ * If the includeMetadata flag is set, the Arrow field metadata will contain information
+ * from the corresponding {@link java.sql.ResultSetMetaData} that was used to create the
+ * {@link org.apache.arrow.vector.types.pojo.FieldType} of the corresponding
+ * {@link org.apache.arrow.vector.FieldVector}.
+ *
+ *
+ * If there are any {@link java.sql.Types#ARRAY} fields in the {@link java.sql.ResultSet}, the corresponding
+ * {@link JdbcFieldInfo} for the array's contents must be defined here. Unfortunately, the sub-type
+ * information cannot be retrieved from all JDBC implementations (H2 for example, returns
+ * {@link java.sql.Types#NULL} for the array sub-type), so it must be configured here. The column index
+ * or name can be used to map to a {@link JdbcFieldInfo}, and that will be used for the conversion.
*
*/
public final class JdbcToArrowConfig {
+
private Calendar calendar;
private BaseAllocator allocator;
private boolean includeMetadata;
+ private Map arraySubTypesByColumnIndex;
+ private Map arraySubTypesByColumnName;
/**
* Constructs a new configuration from the provided allocator and calendar. The allocator
@@ -46,20 +60,47 @@ public final class JdbcToArrowConfig {
*
* @param allocator The memory allocator to construct the Arrow vectors with.
* @param calendar The calendar to use when constructing Timestamp fields and reading time-based results.
- * @param includeMetadata Whether to include JDBC field metadata in the Arrow Schema Field metadata.
*/
- JdbcToArrowConfig(BaseAllocator allocator, Calendar calendar, boolean includeMetadata) {
+ JdbcToArrowConfig(BaseAllocator allocator, Calendar calendar) {
Preconditions.checkNotNull(allocator, "Memory allocator cannot be null");
this.allocator = allocator;
this.calendar = calendar;
+ this.includeMetadata = false;
+ this.arraySubTypesByColumnIndex = null;
+ this.arraySubTypesByColumnName = null;
+ }
+
+ /**
+ * Constructs a new configuration from the provided allocator and calendar. The allocator
+ * is used when constructing the Arrow vectors from the ResultSet, and the calendar is used to define
+ * Arrow Timestamp fields, and to read time-based fields from the JDBC ResultSet.
+ *
+ * @param allocator The memory allocator to construct the Arrow vectors with.
+ * @param calendar The calendar to use when constructing Timestamp fields and reading time-based results.
+ * @param includeMetadata Whether to include JDBC field metadata in the Arrow Schema Field metadata.
+ * @param arraySubTypesByColumnIndex The type of the JDBC array at the column index (1-based).
+ * @param arraySubTypesByColumnName The type of the JDBC array at the column name.
+ */
+ JdbcToArrowConfig(
+ BaseAllocator allocator,
+ Calendar calendar,
+ boolean includeMetadata,
+ Map arraySubTypesByColumnIndex,
+ Map arraySubTypesByColumnName) {
+
+ this(allocator, calendar);
+
this.includeMetadata = includeMetadata;
+ this.arraySubTypesByColumnIndex = arraySubTypesByColumnIndex;
+ this.arraySubTypesByColumnName = arraySubTypesByColumnName;
}
/**
* The calendar to use when defining Arrow Timestamp fields
* and retrieving {@link Date}, {@link Time}, or {@link Timestamp}
* data types from the {@link ResultSet}, or null if not converting.
+ *
* @return the calendar.
*/
public Calendar getCalendar() {
@@ -82,4 +123,32 @@ public BaseAllocator getAllocator() {
public boolean shouldIncludeMetadata() {
return includeMetadata;
}
+
+ /**
+ * Returns the array sub-type {@link JdbcFieldInfo} defined for the provided column index.
+ *
+ * @param index The {@link java.sql.ResultSetMetaData} column index of an {@link java.sql.Types#ARRAY} type.
+ * @return The {@link JdbcFieldInfo} for that array's sub-type, or null if not defined.
+ */
+ public JdbcFieldInfo getArraySubTypeByColumnIndex(int index) {
+ if (arraySubTypesByColumnIndex == null) {
+ return null;
+ } else {
+ return arraySubTypesByColumnIndex.get(index);
+ }
+ }
+
+ /**
+ * Returns the array sub-type {@link JdbcFieldInfo} defined for the provided column name.
+ *
+ * @param index The {@link java.sql.ResultSetMetaData} column name of an {@link java.sql.Types#ARRAY} type.
+ * @return The {@link JdbcFieldInfo} for that array's sub-type, or null if not defined.
+ */
+ public JdbcFieldInfo getArraySubTypeByColumnName(String name) {
+ if (arraySubTypesByColumnName == null) {
+ return null;
+ } else {
+ return arraySubTypesByColumnName.get(name);
+ }
+ }
}
diff --git a/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/JdbcToArrowConfigBuilder.java b/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/JdbcToArrowConfigBuilder.java
index 51327aa2d0f..ea351d8279e 100644
--- a/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/JdbcToArrowConfigBuilder.java
+++ b/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/JdbcToArrowConfigBuilder.java
@@ -18,6 +18,7 @@
package org.apache.arrow.adapter.jdbc;
import java.util.Calendar;
+import java.util.Map;
import org.apache.arrow.memory.BaseAllocator;
@@ -30,6 +31,8 @@ public class JdbcToArrowConfigBuilder {
private Calendar calendar;
private BaseAllocator allocator;
private boolean includeMetadata;
+ private Map arraySubTypesByColumnIndex;
+ private Map arraySubTypesByColumnName;
/**
* Default constructor for the JdbcToArrowConfigBuilder}.
@@ -40,6 +43,8 @@ public JdbcToArrowConfigBuilder() {
this.allocator = null;
this.calendar = null;
this.includeMetadata = false;
+ this.arraySubTypesByColumnIndex = null;
+ this.arraySubTypesByColumnName = null;
}
/**
@@ -126,6 +131,29 @@ public JdbcToArrowConfigBuilder setIncludeMetadata(boolean includeMetadata) {
return this;
}
+ /**
+ * Sets the mapping of column-index-to-{@link JdbcFieldInfo} used for columns of type {@link java.sql.Types#ARRAY}.
+ * The column index is 1-based, to match the JDBC column index.
+ *
+ * @param map The mapping.
+ * @return This instance of the JdbcToArrowConfig, for chaining.
+ */
+ public JdbcToArrowConfigBuilder setArraySubTypeByColumnIndexMap(Map map) {
+ this.arraySubTypesByColumnIndex = map;
+ return this;
+ }
+
+ /**
+ * Sets the mapping of column-name-to-{@link JdbcFieldInfo} used for columns of type {@link java.sql.Types#ARRAY}.
+ *
+ * @param map The mapping.
+ * @return This instance of the JdbcToArrowConfig, for chaining.
+ */
+ public JdbcToArrowConfigBuilder setArraySubTypeByColumnNameMap(Map map) {
+ this.arraySubTypesByColumnName = map;
+ return this;
+ }
+
/**
* This builds the {@link JdbcToArrowConfig} from the provided
* {@link BaseAllocator} and {@link Calendar}.
@@ -134,6 +162,11 @@ public JdbcToArrowConfigBuilder setIncludeMetadata(boolean includeMetadata) {
* @throws NullPointerException if either the allocator or calendar was not set.
*/
public JdbcToArrowConfig build() {
- return new JdbcToArrowConfig(allocator, calendar, includeMetadata);
+ return new JdbcToArrowConfig(
+ allocator,
+ calendar,
+ includeMetadata,
+ arraySubTypesByColumnIndex,
+ arraySubTypesByColumnName);
}
}
diff --git a/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/JdbcToArrowUtils.java b/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/JdbcToArrowUtils.java
index 833ca8410a9..f54363f5bef 100644
--- a/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/JdbcToArrowUtils.java
+++ b/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/JdbcToArrowUtils.java
@@ -25,6 +25,7 @@
import java.io.InputStream;
import java.math.BigDecimal;
import java.nio.charset.StandardCharsets;
+import java.sql.Array;
import java.sql.Blob;
import java.sql.Clob;
import java.sql.Date;
@@ -59,6 +60,7 @@
import org.apache.arrow.vector.VarBinaryVector;
import org.apache.arrow.vector.VarCharVector;
import org.apache.arrow.vector.VectorSchemaRoot;
+import org.apache.arrow.vector.complex.ListVector;
import org.apache.arrow.vector.holders.NullableBigIntHolder;
import org.apache.arrow.vector.holders.NullableBitHolder;
import org.apache.arrow.vector.holders.NullableDateMilliHolder;
@@ -95,6 +97,15 @@ public class JdbcToArrowUtils {
private static final int DEFAULT_STREAM_BUFFER_SIZE = 1024;
private static final int DEFAULT_CLOB_SUBSTRING_READ_SIZE = 256;
+ private static final int JDBC_ARRAY_VALUE_COLUMN = 2;
+
+ /**
+ * Returns the instance of a {java.util.Calendar} with the UTC time zone and root locale.
+ */
+ public static Calendar getUtcCalendar() {
+ return Calendar.getInstance(TimeZone.getTimeZone("UTC"), Locale.ROOT);
+ }
+
/**
* Create Arrow {@link Schema} object for the given JDBC {@link ResultSetMetaData}.
*
@@ -107,67 +118,46 @@ public static Schema jdbcToArrowSchema(ResultSetMetaData rsmd, Calendar calendar
Preconditions.checkNotNull(rsmd, "JDBC ResultSetMetaData object can't be null");
Preconditions.checkNotNull(calendar, "Calendar object can't be null");
- return jdbcToArrowSchema(rsmd, new JdbcToArrowConfig(new RootAllocator(0), calendar, false));
- }
-
- /**
- * Returns the instance of a {java.util.Calendar} with the UTC time zone and root locale.
- */
- public static Calendar getUtcCalendar() {
- return Calendar.getInstance(TimeZone.getTimeZone("UTC"), Locale.ROOT);
+ return jdbcToArrowSchema(rsmd, new JdbcToArrowConfig(new RootAllocator(0), calendar));
}
/**
- * Create Arrow {@link Schema} object for the given JDBC {@link ResultSetMetaData}.
- *
- * This method currently performs following type mapping for JDBC SQL data types to corresponding Arrow data types.
- *
- *
CHAR --> ArrowType.Utf8
- * NCHAR --> ArrowType.Utf8
- * VARCHAR --> ArrowType.Utf8
- * NVARCHAR --> ArrowType.Utf8
- * LONGVARCHAR --> ArrowType.Utf8
- * LONGNVARCHAR --> ArrowType.Utf8
- * NUMERIC --> ArrowType.Decimal(precision, scale)
- * DECIMAL --> ArrowType.Decimal(precision, scale)
- * BIT --> ArrowType.Bool
- * TINYINT --> ArrowType.Int(8, signed)
- * SMALLINT --> ArrowType.Int(16, signed)
- * INTEGER --> ArrowType.Int(32, signed)
- * BIGINT --> ArrowType.Int(64, signed)
- * REAL --> ArrowType.FloatingPoint(FloatingPointPrecision.SINGLE)
- * FLOAT --> ArrowType.FloatingPoint(FloatingPointPrecision.SINGLE)
- * DOUBLE --> ArrowType.FloatingPoint(FloatingPointPrecision.DOUBLE)
- * BINARY --> ArrowType.Binary
- * VARBINARY --> ArrowType.Binary
- * LONGVARBINARY --> ArrowType.Binary
- * DATE --> ArrowType.Date(DateUnit.MILLISECOND)
- * TIME --> ArrowType.Time(TimeUnit.MILLISECOND, 32)
- * TIMESTAMP --> ArrowType.Timestamp(TimeUnit.MILLISECOND, timezone=null)
- * CLOB --> ArrowType.Utf8
- * BLOB --> ArrowType.Binary
+ * Create Arrow {@link Schema} object for the given JDBC {@link java.sql.ResultSetMetaData}.
+ *
+ * The {@link JdbcToArrowUtils#getArrowTypeForJdbcField(JdbcFieldInfo, Calendar)} method is used to construct a
+ * {@link org.apache.arrow.vector.types.pojo.ArrowType} for each field in the {@link java.sql.ResultSetMetaData}.
+ *
+ *
+ * If {@link JdbcToArrowConfig#getIncludeMetadata()} returns true, the following fields
+ * will be added to the {@link FieldType#getMetadata()}:
+ *
+ * - {@link Constants#SQL_CATALOG_NAME_KEY} representing {@link ResultSetMetaData#getCatalogName(int)}
+ * - {@link Constants#SQL_TABLE_NAME_KEY} representing {@link ResultSetMetaData#getTableName(int)}
+ * - {@link Constants#SQL_COLUMN_NAME_KEY} representing {@link ResultSetMetaData#getColumnName(int)}
+ * - {@link Constants#SQL_TYPE_KEY} representing {@link ResultSetMetaData#getColumnTypeName(int)}
+ *
+ *
+ *
+ * If any columns are of type {@link java.sql.Types#ARRAY}, the configuration object will be used to look up
+ * the array sub-type field. The {@link JdbcToArrowConfig#getArraySubTypeByColumnIndex(int)} method will be
+ * checked first, followed by the {@link JdbcToArrowConfig#getArraySubTypeByColumnName(String)} method.
+ *
*
* @param rsmd The ResultSetMetaData containing the results, to read the JDBC metadata from.
* @param config The configuration to use when constructing the schema.
* @return {@link Schema}
* @throws SQLException on error
+ * @throws IllegalArgumentException if rsmd contains an {@link java.sql.Types#ARRAY} but the
+ * config does not have a sub-type definition for it.
*/
public static Schema jdbcToArrowSchema(ResultSetMetaData rsmd, JdbcToArrowConfig config) throws SQLException {
Preconditions.checkNotNull(rsmd, "JDBC ResultSetMetaData object can't be null");
Preconditions.checkNotNull(config, "The configuration object must not be null");
- final String timezone;
- if (config.getCalendar() != null) {
- timezone = config.getCalendar().getTimeZone().getID();
- } else {
- timezone = null;
- }
-
List fields = new ArrayList<>();
int columnCount = rsmd.getColumnCount();
for (int i = 1; i <= columnCount; i++) {
final String columnName = rsmd.getColumnName(i);
- final FieldType fieldType;
final Map metadata;
if (config.shouldIncludeMetadata()) {
@@ -181,83 +171,172 @@ public static Schema jdbcToArrowSchema(ResultSetMetaData rsmd, JdbcToArrowConfig
metadata = null;
}
- switch (rsmd.getColumnType(i)) {
- case Types.BOOLEAN:
- case Types.BIT:
- fieldType = new FieldType(true, new ArrowType.Bool(), null, metadata);
- break;
- case Types.TINYINT:
- fieldType = new FieldType(true, new ArrowType.Int(8, true), null, metadata);
- break;
- case Types.SMALLINT:
- fieldType = new FieldType(true, new ArrowType.Int(16, true), null, metadata);
- break;
- case Types.INTEGER:
- fieldType = new FieldType(true, new ArrowType.Int(32, true), null, metadata);
- break;
- case Types.BIGINT:
- fieldType = new FieldType(true, new ArrowType.Int(64, true), null, metadata);
- break;
- case Types.NUMERIC:
- case Types.DECIMAL:
- int precision = rsmd.getPrecision(i);
- int scale = rsmd.getScale(i);
- fieldType = new FieldType(true, new ArrowType.Decimal(precision, scale), null, metadata);
- break;
- case Types.REAL:
- case Types.FLOAT:
- fieldType = new FieldType(true, new ArrowType.FloatingPoint(SINGLE), null, metadata);
- break;
- case Types.DOUBLE:
- fieldType = new FieldType(true, new ArrowType.FloatingPoint(DOUBLE), null, metadata);
- break;
- case Types.CHAR:
- case Types.NCHAR:
- case Types.VARCHAR:
- case Types.NVARCHAR:
- case Types.LONGVARCHAR:
- case Types.LONGNVARCHAR:
- case Types.CLOB:
- fieldType = new FieldType(true, new ArrowType.Utf8(), null, metadata);
- break;
- case Types.DATE:
- fieldType = new FieldType(true, new ArrowType.Date(DateUnit.MILLISECOND), null, metadata);
- break;
- case Types.TIME:
- fieldType = new FieldType(true, new ArrowType.Time(TimeUnit.MILLISECOND, 32), null, metadata);
- break;
- case Types.TIMESTAMP:
- fieldType =
- new FieldType(
- true,
- new ArrowType.Timestamp(TimeUnit.MILLISECOND, timezone),
- null,
- metadata);
- break;
- case Types.BINARY:
- case Types.VARBINARY:
- case Types.LONGVARBINARY:
- case Types.BLOB:
- fieldType = new FieldType(true, new ArrowType.Binary(), null, metadata);
- break;
-
- case Types.ARRAY:
- // TODO Need to handle this type
- // fields.add(new Field("list", FieldType.nullable(new ArrowType.List()), null));
- default:
- // no-op, shouldn't get here
- fieldType = null;
- break;
- }
+ final ArrowType arrowType = getArrowTypeForJdbcField(new JdbcFieldInfo(rsmd, i), config.getCalendar());
+ if (arrowType != null) {
+ final FieldType fieldType = new FieldType(true, arrowType, /* dictionary encoding */ null, metadata);
+
+ List children = null;
+ if (arrowType.getTypeID() == ArrowType.List.TYPE_TYPE) {
+ final JdbcFieldInfo arrayFieldInfo = getJdbcFieldInfoForArraySubType(rsmd, i, config);
+ if (arrayFieldInfo == null) {
+ throw new IllegalArgumentException("Configuration does not provide a mapping for array column " + i);
+ }
+ children = new ArrayList();
+ final ArrowType childType =
+ getArrowTypeForJdbcField(arrayFieldInfo, config.getCalendar());
+ children.add(new Field("child", FieldType.nullable(childType), null));
+ }
- if (fieldType != null) {
- fields.add(new Field(columnName, fieldType, null));
+ fields.add(new Field(columnName, fieldType, children));
}
}
return new Schema(fields, null);
}
+ /**
+ * Creates an {@link org.apache.arrow.vector.types.pojo.ArrowType}
+ * from the {@link JdbcFieldInfo} and {@link java.util.Calendar}.
+ *
+ * This method currently performs following type mapping for JDBC SQL data types to corresponding Arrow data types.
+ *
+ *
+ * - CHAR --> ArrowType.Utf8
+ * - NCHAR --> ArrowType.Utf8
+ * - VARCHAR --> ArrowType.Utf8
+ * - NVARCHAR --> ArrowType.Utf8
+ * - LONGVARCHAR --> ArrowType.Utf8
+ * - LONGNVARCHAR --> ArrowType.Utf8
+ * - NUMERIC --> ArrowType.Decimal(precision, scale)
+ * - DECIMAL --> ArrowType.Decimal(precision, scale)
+ * - BIT --> ArrowType.Bool
+ * - TINYINT --> ArrowType.Int(8, signed)
+ * - SMALLINT --> ArrowType.Int(16, signed)
+ * - INTEGER --> ArrowType.Int(32, signed)
+ * - BIGINT --> ArrowType.Int(64, signed)
+ * - REAL --> ArrowType.FloatingPoint(FloatingPointPrecision.SINGLE)
+ * - FLOAT --> ArrowType.FloatingPoint(FloatingPointPrecision.SINGLE)
+ * - DOUBLE --> ArrowType.FloatingPoint(FloatingPointPrecision.DOUBLE)
+ * - BINARY --> ArrowType.Binary
+ * - VARBINARY --> ArrowType.Binary
+ * - LONGVARBINARY --> ArrowType.Binary
+ * - DATE --> ArrowType.Date(DateUnit.MILLISECOND)
+ * - TIME --> ArrowType.Time(TimeUnit.MILLISECOND, 32)
+ * - TIMESTAMP --> ArrowType.Timestamp(TimeUnit.MILLISECOND, calendar timezone)
+ * - CLOB --> ArrowType.Utf8
+ * - BLOB --> ArrowType.Binary
+ *
+ *
+ * @param fieldInfo The field information to construct the ArrowType from.
+ * @param calendar The calendar to use when constructing the ArrowType.Timestamp
+ * for {@link java.sql.Types#TIMESTAMP} types.
+ * @return The corresponding ArrowType.
+ * @throws NullPointerException if either fieldInfo or calendar are null.
+ */
+ public static ArrowType getArrowTypeForJdbcField(JdbcFieldInfo fieldInfo, Calendar calendar) {
+ Preconditions.checkNotNull(fieldInfo, "JdbcFieldInfo object cannot be null");
+
+ final String timezone;
+ if (calendar != null) {
+ timezone = calendar.getTimeZone().getID();
+ } else {
+ timezone = null;
+ }
+
+
+ final ArrowType arrowType;
+
+ switch (fieldInfo.getJdbcType()) {
+ case Types.BOOLEAN:
+ case Types.BIT:
+ arrowType = new ArrowType.Bool();
+ break;
+ case Types.TINYINT:
+ arrowType = new ArrowType.Int(8, true);
+ break;
+ case Types.SMALLINT:
+ arrowType = new ArrowType.Int(16, true);
+ break;
+ case Types.INTEGER:
+ arrowType = new ArrowType.Int(32, true);
+ break;
+ case Types.BIGINT:
+ arrowType = new ArrowType.Int(64, true);
+ break;
+ case Types.NUMERIC:
+ case Types.DECIMAL:
+ int precision = fieldInfo.getPrecision();
+ int scale = fieldInfo.getScale();
+ arrowType = new ArrowType.Decimal(precision, scale);
+ break;
+ case Types.REAL:
+ case Types.FLOAT:
+ arrowType = new ArrowType.FloatingPoint(SINGLE);
+ break;
+ case Types.DOUBLE:
+ arrowType = new ArrowType.FloatingPoint(DOUBLE);
+ break;
+ case Types.CHAR:
+ case Types.NCHAR:
+ case Types.VARCHAR:
+ case Types.NVARCHAR:
+ case Types.LONGVARCHAR:
+ case Types.LONGNVARCHAR:
+ case Types.CLOB:
+ arrowType = new ArrowType.Utf8();
+ break;
+ case Types.DATE:
+ arrowType = new ArrowType.Date(DateUnit.MILLISECOND);
+ break;
+ case Types.TIME:
+ arrowType = new ArrowType.Time(TimeUnit.MILLISECOND, 32);
+ break;
+ case Types.TIMESTAMP:
+ arrowType = new ArrowType.Timestamp(TimeUnit.MILLISECOND, timezone);
+ break;
+ case Types.BINARY:
+ case Types.VARBINARY:
+ case Types.LONGVARBINARY:
+ case Types.BLOB:
+ arrowType = new ArrowType.Binary();
+ break;
+ case Types.ARRAY:
+ arrowType = new ArrowType.List();
+ break;
+ default:
+ // no-op, shouldn't get here
+ arrowType = null;
+ break;
+ }
+
+ return arrowType;
+ }
+
+ /* Uses the configuration to determine what the array sub-type JdbcFieldInfo is.
+ * If no sub-type can be found, returns null.
+ */
+ private static JdbcFieldInfo getJdbcFieldInfoForArraySubType(
+ ResultSetMetaData rsmd,
+ int arrayColumn,
+ JdbcToArrowConfig config)
+ throws SQLException {
+
+ Preconditions.checkNotNull(rsmd, "ResultSet MetaData object cannot be null");
+ Preconditions.checkNotNull(config, "Configuration must not be null");
+ Preconditions.checkArgument(
+ arrayColumn > 0,
+ "ResultSetMetaData columns start with 1; column cannot be less than 1");
+ Preconditions.checkArgument(
+ arrayColumn <= rsmd.getColumnCount(),
+ "Column number cannot be more than the number of columns");
+
+ JdbcFieldInfo fieldInfo = config.getArraySubTypeByColumnIndex(arrayColumn);
+ if (fieldInfo == null) {
+ fieldInfo = config.getArraySubTypeByColumnName(rsmd.getColumnName(arrayColumn));
+ }
+ return fieldInfo;
+ }
+
private static void allocateVectors(VectorSchemaRoot root, int size) {
List vectors = root.getFieldVectors();
for (FieldVector fieldVector : vectors) {
@@ -284,9 +363,10 @@ public static void jdbcToArrowVectors(ResultSet rs, VectorSchemaRoot root, Calen
throws SQLException, IOException {
Preconditions.checkNotNull(rs, "JDBC ResultSet object can't be null");
- Preconditions.checkNotNull(root, "JDBC ResultSet object can't be null");
+ Preconditions.checkNotNull(root, "Vector Schema cannot be null");
+ Preconditions.checkNotNull(calendar, "Calendar object can't be null");
- jdbcToArrowVectors(rs, root, new JdbcToArrowConfig(new RootAllocator(0), calendar, false));
+ jdbcToArrowVectors(rs, root, new JdbcToArrowConfig(new RootAllocator(0), calendar));
}
/**
@@ -310,117 +390,133 @@ public static void jdbcToArrowVectors(ResultSet rs, VectorSchemaRoot root, JdbcT
allocateVectors(root, DEFAULT_BUFFER_SIZE);
- final Calendar calendar = config.getCalendar();
-
int rowCount = 0;
while (rs.next()) {
for (int i = 1; i <= columnCount; i++) {
- String columnName = rsmd.getColumnName(i);
- switch (rsmd.getColumnType(i)) {
- case Types.BOOLEAN:
- case Types.BIT:
- updateVector((BitVector) root.getVector(columnName),
- rs.getBoolean(i), !rs.wasNull(), rowCount);
- break;
- case Types.TINYINT:
- updateVector((TinyIntVector) root.getVector(columnName),
- rs.getInt(i), !rs.wasNull(), rowCount);
- break;
- case Types.SMALLINT:
- updateVector((SmallIntVector) root.getVector(columnName),
- rs.getInt(i), !rs.wasNull(), rowCount);
- break;
- case Types.INTEGER:
- updateVector((IntVector) root.getVector(columnName),
- rs.getInt(i), !rs.wasNull(), rowCount);
- break;
- case Types.BIGINT:
- updateVector((BigIntVector) root.getVector(columnName),
- rs.getLong(i), !rs.wasNull(), rowCount);
- break;
- case Types.NUMERIC:
- case Types.DECIMAL:
- updateVector((DecimalVector) root.getVector(columnName),
- rs.getBigDecimal(i), !rs.wasNull(), rowCount);
- break;
- case Types.REAL:
- case Types.FLOAT:
- updateVector((Float4Vector) root.getVector(columnName),
- rs.getFloat(i), !rs.wasNull(), rowCount);
- break;
- case Types.DOUBLE:
- updateVector((Float8Vector) root.getVector(columnName),
- rs.getDouble(i), !rs.wasNull(), rowCount);
- break;
- case Types.CHAR:
- case Types.NCHAR:
- case Types.VARCHAR:
- case Types.NVARCHAR:
- case Types.LONGVARCHAR:
- case Types.LONGNVARCHAR:
- updateVector((VarCharVector) root.getVector(columnName),
- rs.getString(i), !rs.wasNull(), rowCount);
- break;
- case Types.DATE:
- final Date date;
- if (calendar != null) {
- date = rs.getDate(i, calendar);
- } else {
- date = rs.getDate(i);
- }
-
- updateVector((DateMilliVector) root.getVector(columnName), date, !rs.wasNull(), rowCount);
- break;
- case Types.TIME:
- final Time time;
- if (calendar != null) {
- time = rs.getTime(i, calendar);
- } else {
- time = rs.getTime(i);
- }
-
- updateVector((TimeMilliVector) root.getVector(columnName), time, !rs.wasNull(), rowCount);
- break;
- case Types.TIMESTAMP:
- final Timestamp ts;
- if (calendar != null) {
- ts = rs.getTimestamp(i, calendar);
- } else {
- ts = rs.getTimestamp(i);
- }
-
- // TODO: Need to handle precision such as milli, micro, nano
- updateVector((TimeStampVector) root.getVector(columnName), ts, !rs.wasNull(), rowCount);
- break;
- case Types.BINARY:
- case Types.VARBINARY:
- case Types.LONGVARBINARY:
- updateVector((VarBinaryVector) root.getVector(columnName),
- rs.getBinaryStream(i), !rs.wasNull(), rowCount);
- break;
- case Types.ARRAY:
- // TODO Need to handle this type
- // fields.add(new Field("list", FieldType.nullable(new ArrowType.List()), null));
- break;
- case Types.CLOB:
- updateVector((VarCharVector) root.getVector(columnName),
- rs.getClob(i), !rs.wasNull(), rowCount);
- break;
- case Types.BLOB:
- updateVector((VarBinaryVector) root.getVector(columnName),
- rs.getBlob(i), !rs.wasNull(), rowCount);
- break;
-
- default:
- // no-op, shouldn't get here
- break;
- }
+ jdbcToFieldVector(
+ rs,
+ i,
+ rs.getMetaData().getColumnType(i),
+ rowCount,
+ root.getVector(rsmd.getColumnName(i)),
+ config);
}
rowCount++;
}
root.setRowCount(rowCount);
}
+ private static void jdbcToFieldVector(
+ ResultSet rs,
+ int columnIndex,
+ int jdbcColType,
+ int rowCount,
+ FieldVector vector,
+ JdbcToArrowConfig config)
+ throws SQLException, IOException {
+
+ final Calendar calendar = config.getCalendar();
+
+ switch (jdbcColType) {
+ case Types.BOOLEAN:
+ case Types.BIT:
+ updateVector((BitVector) vector,
+ rs.getBoolean(columnIndex), !rs.wasNull(), rowCount);
+ break;
+ case Types.TINYINT:
+ updateVector((TinyIntVector) vector,
+ rs.getInt(columnIndex), !rs.wasNull(), rowCount);
+ break;
+ case Types.SMALLINT:
+ updateVector((SmallIntVector) vector,
+ rs.getInt(columnIndex), !rs.wasNull(), rowCount);
+ break;
+ case Types.INTEGER:
+ updateVector((IntVector) vector,
+ rs.getInt(columnIndex), !rs.wasNull(), rowCount);
+ break;
+ case Types.BIGINT:
+ updateVector((BigIntVector) vector,
+ rs.getLong(columnIndex), !rs.wasNull(), rowCount);
+ break;
+ case Types.NUMERIC:
+ case Types.DECIMAL:
+ updateVector((DecimalVector) vector,
+ rs.getBigDecimal(columnIndex), !rs.wasNull(), rowCount);
+ break;
+ case Types.REAL:
+ case Types.FLOAT:
+ updateVector((Float4Vector) vector,
+ rs.getFloat(columnIndex), !rs.wasNull(), rowCount);
+ break;
+ case Types.DOUBLE:
+ updateVector((Float8Vector) vector,
+ rs.getDouble(columnIndex), !rs.wasNull(), rowCount);
+ break;
+ case Types.CHAR:
+ case Types.NCHAR:
+ case Types.VARCHAR:
+ case Types.NVARCHAR:
+ case Types.LONGVARCHAR:
+ case Types.LONGNVARCHAR:
+ updateVector((VarCharVector) vector,
+ rs.getString(columnIndex), !rs.wasNull(), rowCount);
+ break;
+ case Types.DATE:
+ final Date date;
+ if (calendar != null) {
+ date = rs.getDate(columnIndex, calendar);
+ } else {
+ date = rs.getDate(columnIndex);
+ }
+
+ updateVector((DateMilliVector) vector, date, !rs.wasNull(), rowCount);
+ break;
+ case Types.TIME:
+ final Time time;
+ if (calendar != null) {
+ time = rs.getTime(columnIndex, calendar);
+ } else {
+ time = rs.getTime(columnIndex);
+ }
+
+ updateVector((TimeMilliVector) vector, time, !rs.wasNull(), rowCount);
+ break;
+ case Types.TIMESTAMP:
+ final Timestamp ts;
+ if (calendar != null) {
+ ts = rs.getTimestamp(columnIndex, calendar);
+ } else {
+ ts = rs.getTimestamp(columnIndex);
+ }
+
+ // TODO: Need to handle precision such as milli, micro, nano
+ updateVector((TimeStampVector) vector, ts, !rs.wasNull(), rowCount);
+ break;
+ case Types.BINARY:
+ case Types.VARBINARY:
+ case Types.LONGVARBINARY:
+ updateVector((VarBinaryVector) vector,
+ rs.getBinaryStream(columnIndex), !rs.wasNull(), rowCount);
+ break;
+ case Types.ARRAY:
+ updateVector((ListVector) vector, rs, columnIndex, rowCount, config);
+ break;
+ case Types.CLOB:
+ updateVector((VarCharVector) vector,
+ rs.getClob(columnIndex), !rs.wasNull(), rowCount);
+ break;
+ case Types.BLOB:
+ updateVector((VarBinaryVector) vector,
+ rs.getBlob(columnIndex), !rs.wasNull(), rowCount);
+ break;
+
+ default:
+ // no-op, shouldn't get here
+ break;
+ }
+ }
+
private static void updateVector(BitVector bitVector, boolean value, boolean isNonNull, int rowCount) {
NullableBitHolder holder = new NullableBitHolder();
holder.isSet = isNonNull ? 1 : 0;
@@ -620,4 +716,45 @@ private static void updateVector(VarBinaryVector varBinaryVector, Blob blob, boo
updateVector(varBinaryVector, blob != null ? blob.getBinaryStream() : null, isNonNull, rowCount);
}
+ private static void updateVector(
+ ListVector listVector,
+ ResultSet resultSet,
+ int arrayIndex,
+ int rowCount,
+ JdbcToArrowConfig config)
+ throws SQLException, IOException {
+
+ final JdbcFieldInfo fieldInfo = getJdbcFieldInfoForArraySubType(resultSet.getMetaData(), arrayIndex, config);
+ if (fieldInfo == null) {
+ throw new IllegalArgumentException("Column " + arrayIndex + " is an array of unknown type.");
+ }
+
+ final int valueCount = listVector.getValueCount();
+ final Array array = resultSet.getArray(arrayIndex);
+
+ FieldVector fieldVector = listVector.getDataVector();
+ int arrayRowCount = 0;
+
+ if (!resultSet.wasNull()) {
+ listVector.startNewValue(rowCount);
+
+ try (ResultSet rs = array.getResultSet()) {
+
+ while (rs.next()) {
+ jdbcToFieldVector(
+ rs,
+ JDBC_ARRAY_VALUE_COLUMN,
+ fieldInfo.getJdbcType(),
+ valueCount + arrayRowCount,
+ fieldVector,
+ config);
+ arrayRowCount++;
+ }
+ }
+
+ listVector.endValue(rowCount, arrayRowCount);
+ }
+
+ listVector.setValueCount(valueCount + arrayRowCount);
+ }
}
diff --git a/java/adapter/jdbc/src/test/java/org/apache/arrow/adapter/jdbc/JdbcFieldInfoTest.java b/java/adapter/jdbc/src/test/java/org/apache/arrow/adapter/jdbc/JdbcFieldInfoTest.java
new file mode 100644
index 00000000000..3d6074be613
--- /dev/null
+++ b/java/adapter/jdbc/src/test/java/org/apache/arrow/adapter/jdbc/JdbcFieldInfoTest.java
@@ -0,0 +1,44 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.adapter.jdbc;
+
+import static org.junit.Assert.*;
+
+import java.sql.Types;
+
+import org.junit.Test;
+
+public class JdbcFieldInfoTest {
+
+ @Test
+ public void testCreateJdbcFieldInfoWithJdbcType() {
+ JdbcFieldInfo fieldInfo = new JdbcFieldInfo(Types.BLOB);
+
+ assertEquals(Types.BLOB, fieldInfo.getJdbcType());
+ assertEquals(0, fieldInfo.getPrecision());
+ assertEquals(0, fieldInfo.getScale());
+ }
+
+ public void testCreateJdbcFieldInfoWithJdbcTypePrecisionAndScale() {
+ JdbcFieldInfo fieldInfo = new JdbcFieldInfo(Types.BLOB, 1, 2);
+
+ assertEquals(Types.BLOB, fieldInfo.getJdbcType());
+ assertEquals(1, fieldInfo.getPrecision());
+ assertEquals(2, fieldInfo.getScale());
+ }
+}
diff --git a/java/adapter/jdbc/src/test/java/org/apache/arrow/adapter/jdbc/JdbcToArrowConfigTest.java b/java/adapter/jdbc/src/test/java/org/apache/arrow/adapter/jdbc/JdbcToArrowConfigTest.java
index bafb2dcdcc3..46d8b044c90 100644
--- a/java/adapter/jdbc/src/test/java/org/apache/arrow/adapter/jdbc/JdbcToArrowConfigTest.java
+++ b/java/adapter/jdbc/src/test/java/org/apache/arrow/adapter/jdbc/JdbcToArrowConfigTest.java
@@ -19,7 +19,9 @@
import static org.junit.Assert.*;
+import java.sql.Types;
import java.util.Calendar;
+import java.util.HashMap;
import java.util.Locale;
import java.util.TimeZone;
@@ -34,7 +36,7 @@ public class JdbcToArrowConfigTest {
@Test(expected = NullPointerException.class)
public void testConfigNullArguments() {
- new JdbcToArrowConfig(null, null, false);
+ new JdbcToArrowConfig(null, null);
}
@Test(expected = NullPointerException.class)
@@ -43,7 +45,7 @@ public void testBuilderNullArguments() {
}
public void testConfigNullCalendar() {
- JdbcToArrowConfig config = new JdbcToArrowConfig(allocator, null, false);
+ JdbcToArrowConfig config = new JdbcToArrowConfig(allocator, null);
assertNull(config.getCalendar());
}
@@ -56,7 +58,7 @@ public void testBuilderNullCalendar() {
@Test(expected = NullPointerException.class)
public void testConfigNullAllocator() {
- new JdbcToArrowConfig(null, calendar, false);
+ new JdbcToArrowConfig(null, calendar);
}
@Test(expected = NullPointerException.class)
@@ -95,7 +97,8 @@ public void testConfig() {
assertTrue(newCalendar == config.getCalendar());
}
- @Test public void testIncludeMetadata() {
+ @Test
+ public void testIncludeMetadata() {
JdbcToArrowConfigBuilder builder = new JdbcToArrowConfigBuilder(allocator, calendar, false);
JdbcToArrowConfig config = builder.build();
@@ -108,10 +111,42 @@ public void testConfig() {
config = new JdbcToArrowConfigBuilder(allocator, calendar, true).build();
assertTrue(config.shouldIncludeMetadata());
- config = new JdbcToArrowConfig(allocator, calendar, true);
+ config = new JdbcToArrowConfig(allocator, calendar, true, null, null);
assertTrue(config.shouldIncludeMetadata());
- config = new JdbcToArrowConfig(allocator, calendar, false);
+ config = new JdbcToArrowConfig(allocator, calendar, false, null, null);
assertFalse(config.shouldIncludeMetadata());
}
+
+ @Test
+ public void testArraySubTypes() {
+ JdbcToArrowConfigBuilder builder = new JdbcToArrowConfigBuilder(allocator, calendar, false);
+ JdbcToArrowConfig config = builder.build();
+
+ final int columnIndex = 1;
+ final String columnName = "COLUMN";
+
+ assertNull(config.getArraySubTypeByColumnIndex(columnIndex));
+ assertNull(config.getArraySubTypeByColumnName(columnName));
+
+ final HashMap indexMapping = new HashMap();
+ indexMapping.put(2, new JdbcFieldInfo(Types.BIGINT));
+
+ final HashMap fieldMapping = new HashMap();
+ fieldMapping.put("NEW_COLUMN", new JdbcFieldInfo(Types.BINARY));
+
+ builder.setArraySubTypeByColumnIndexMap(indexMapping);
+ builder.setArraySubTypeByColumnNameMap(fieldMapping);
+ config = builder.build();
+
+ assertNull(config.getArraySubTypeByColumnIndex(columnIndex));
+ assertNull(config.getArraySubTypeByColumnName(columnName));
+
+ indexMapping.put(columnIndex, new JdbcFieldInfo(Types.BIT));
+ fieldMapping.put(columnName, new JdbcFieldInfo(Types.BLOB));
+
+ assertNotNull(config.getArraySubTypeByColumnIndex(columnIndex));
+ assertEquals(Types.BIT, config.getArraySubTypeByColumnIndex(columnIndex).getJdbcType());
+ assertEquals(Types.BLOB, config.getArraySubTypeByColumnName(columnName).getJdbcType());
+ }
}
diff --git a/java/adapter/jdbc/src/test/java/org/apache/arrow/adapter/jdbc/Table.java b/java/adapter/jdbc/src/test/java/org/apache/arrow/adapter/jdbc/Table.java
index 98f799c1b5b..2137162667e 100644
--- a/java/adapter/jdbc/src/test/java/org/apache/arrow/adapter/jdbc/Table.java
+++ b/java/adapter/jdbc/src/test/java/org/apache/arrow/adapter/jdbc/Table.java
@@ -204,7 +204,7 @@ public void setRowCount(int rowCount) {
this.rowCount = rowCount;
}
- private byte[][] getByteArray(String[] data) {
+ static byte[][] getByteArray(String[] data) {
byte[][] byteArr = new byte[data.length][];
for (int i = 0; i < data.length; i++) {
@@ -213,7 +213,7 @@ private byte[][] getByteArray(String[] data) {
return byteArr;
}
- private byte[][] getHexToByteArray(String[] data) {
+ static byte[][] getHexToByteArray(String[] data) {
byte[][] byteArr = new byte[data.length][];
for (int i = 0; i < data.length; i++) {
@@ -222,7 +222,7 @@ private byte[][] getHexToByteArray(String[] data) {
return byteArr;
}
- private static byte[] hexStringToByteArray(String s) {
+ static byte[] hexStringToByteArray(String s) {
int len = s.length();
byte[] data = new byte[len / 2];
for (int i = 0; i < len; i += 2) {
diff --git a/java/adapter/jdbc/src/test/java/org/apache/arrow/adapter/jdbc/h2/JdbcToArrowArrayTest.java b/java/adapter/jdbc/src/test/java/org/apache/arrow/adapter/jdbc/h2/JdbcToArrowArrayTest.java
new file mode 100644
index 00000000000..e0f8ad957d3
--- /dev/null
+++ b/java/adapter/jdbc/src/test/java/org/apache/arrow/adapter/jdbc/h2/JdbcToArrowArrayTest.java
@@ -0,0 +1,374 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.adapter.jdbc.h2;
+
+import static org.junit.Assert.*;
+
+import java.sql.Array;
+import java.sql.Connection;
+import java.sql.DriverManager;
+import java.sql.PreparedStatement;
+import java.sql.ResultSet;
+import java.sql.ResultSetMetaData;
+import java.sql.SQLException;
+import java.sql.Statement;
+import java.sql.Types;
+import java.util.HashMap;
+import java.util.Map;
+
+import org.apache.arrow.adapter.jdbc.JdbcFieldInfo;
+import org.apache.arrow.adapter.jdbc.JdbcToArrow;
+import org.apache.arrow.adapter.jdbc.JdbcToArrowConfig;
+import org.apache.arrow.adapter.jdbc.JdbcToArrowConfigBuilder;
+import org.apache.arrow.adapter.jdbc.JdbcToArrowUtils;
+import org.apache.arrow.memory.RootAllocator;
+import org.apache.arrow.vector.Float4Vector;
+import org.apache.arrow.vector.IntVector;
+import org.apache.arrow.vector.VarCharVector;
+import org.apache.arrow.vector.VectorSchemaRoot;
+import org.apache.arrow.vector.complex.ListVector;
+import org.junit.After;
+import org.junit.Before;
+import org.junit.Test;
+
+import io.netty.buffer.ArrowBuf;
+
+public class JdbcToArrowArrayTest {
+ private Connection conn = null;
+
+ private static final String CREATE_STATEMENT =
+ "CREATE TABLE array_table (id INTEGER, int_array ARRAY, float_array ARRAY, string_array ARRAY);";
+ private static final String INSERT_STATEMENT =
+ "INSERT INTO array_table (id, int_array, float_array, string_array) VALUES (?, ?, ?, ?);";
+ private static final String QUERY = "SELECT int_array, float_array, string_array FROM array_table ORDER BY id;";
+ private static final String DROP_STATEMENT = "DROP TABLE array_table;";
+
+ private static Map arrayFieldMapping;
+
+ private static final String INT_ARRAY_FIELD_NAME = "INT_ARRAY";
+ private static final String FLOAT_ARRAY_FIELD_NAME = "FLOAT_ARRAY";
+ private static final String STRING_ARRAY_FIELD_NAME = "STRING_ARRAY";
+
+ @Before
+ public void setUp() throws Exception {
+ String url = "jdbc:h2:mem:JdbcToArrowTest";
+ String driver = "org.h2.Driver";
+ Class.forName(driver);
+ conn = DriverManager.getConnection(url);
+ try (Statement stmt = conn.createStatement()) {
+ stmt.executeUpdate(CREATE_STATEMENT);
+ }
+
+ arrayFieldMapping = new HashMap();
+ arrayFieldMapping.put(INT_ARRAY_FIELD_NAME, new JdbcFieldInfo(Types.INTEGER));
+ arrayFieldMapping.put(FLOAT_ARRAY_FIELD_NAME, new JdbcFieldInfo(Types.REAL));
+ arrayFieldMapping.put(STRING_ARRAY_FIELD_NAME, new JdbcFieldInfo(Types.VARCHAR));
+ }
+
+ // This test verifies reading an array field from an H2 database
+ // works as expected. If this test fails, something is either wrong
+ // with the setup, or the H2 SQL behavior changed.
+ @Test
+ public void testReadH2Array() throws Exception {
+ int rowCount = 4;
+
+ Integer[][] intArrays = generateIntegerArrayField(rowCount);
+ Float[][] floatArrays = generateFloatArrayField(rowCount);
+ String[][] strArrays = generateStringArrayField(rowCount);
+
+ insertRows(rowCount, intArrays, floatArrays, strArrays);
+
+ try (ResultSet resultSet = conn.createStatement().executeQuery(QUERY)) {
+ ResultSetMetaData rsmd = resultSet.getMetaData();
+ assertEquals(3, rsmd.getColumnCount());
+
+ for (int i = 1; i <= rsmd.getColumnCount(); ++i) {
+ assertEquals(Types.ARRAY, rsmd.getColumnType(i));
+ }
+
+ int rowNum = 0;
+
+ while (resultSet.next()) {
+ Array intArray = resultSet.getArray(INT_ARRAY_FIELD_NAME);
+ assertFalse(resultSet.wasNull());
+ try (ResultSet rs = intArray.getResultSet()) {
+ int arrayIndex = 0;
+ while (rs.next()) {
+ assertEquals(intArrays[rowNum][arrayIndex].intValue(), rs.getInt(2));
+ ++arrayIndex;
+ }
+ assertEquals(intArrays[rowNum].length, arrayIndex);
+ }
+
+ Array floatArray = resultSet.getArray(FLOAT_ARRAY_FIELD_NAME);
+ assertFalse(resultSet.wasNull());
+ try (ResultSet rs = floatArray.getResultSet()) {
+ int arrayIndex = 0;
+ while (rs.next()) {
+ assertEquals(floatArrays[rowNum][arrayIndex].floatValue(), rs.getFloat(2), 0.001);
+ ++arrayIndex;
+ }
+ assertEquals(floatArrays[rowNum].length, arrayIndex);
+ }
+
+ Array strArray = resultSet.getArray(STRING_ARRAY_FIELD_NAME);
+ assertFalse(resultSet.wasNull());
+ try (ResultSet rs = strArray.getResultSet()) {
+ int arrayIndex = 0;
+ while (rs.next()) {
+ assertEquals(strArrays[rowNum][arrayIndex], rs.getString(2));
+ ++arrayIndex;
+ }
+ assertEquals(strArrays[rowNum].length, arrayIndex);
+ }
+
+ ++rowNum;
+ }
+
+ assertEquals(rowCount, rowNum);
+ }
+ }
+
+ @Test
+ public void testJdbcToArrow() throws Exception {
+ int rowCount = 4;
+
+ Integer[][] intArrays = generateIntegerArrayField(rowCount);
+ Float[][] floatArrays = generateFloatArrayField(rowCount);
+ String[][] strArrays = generateStringArrayField(rowCount);
+
+ insertRows(rowCount, intArrays, floatArrays, strArrays);
+
+ final JdbcToArrowConfigBuilder builder =
+ new JdbcToArrowConfigBuilder(new RootAllocator(Integer.MAX_VALUE), JdbcToArrowUtils.getUtcCalendar(), false);
+ builder.setArraySubTypeByColumnNameMap(arrayFieldMapping);
+
+ final JdbcToArrowConfig config = builder.build();
+
+ try (ResultSet resultSet = conn.createStatement().executeQuery(QUERY)) {
+ final VectorSchemaRoot vector = JdbcToArrow.sqlToArrow(resultSet, config);
+
+ assertEquals(rowCount, vector.getRowCount());
+
+ assertIntegerVectorEquals((ListVector) vector.getVector(INT_ARRAY_FIELD_NAME), rowCount, intArrays);
+ assertFloatVectorEquals((ListVector) vector.getVector(FLOAT_ARRAY_FIELD_NAME), rowCount, floatArrays);
+ assertStringVectorEquals((ListVector) vector.getVector(STRING_ARRAY_FIELD_NAME), rowCount, strArrays);
+ }
+ }
+
+ @Test
+ public void testJdbcToArrowWithNulls() throws Exception {
+ int rowCount = 4;
+
+ Integer[][] intArrays = {
+ null,
+ {0},
+ {1},
+ {},
+ };
+
+ Float[][] floatArrays = {
+ { 2.0f },
+ null,
+ { 3.0f },
+ {},
+ };
+
+ String[][] stringArrays = {
+ {"4"},
+ null,
+ {"5"},
+ {},
+ };
+
+ insertRows(rowCount, intArrays, floatArrays, stringArrays);
+
+ final JdbcToArrowConfigBuilder builder =
+ new JdbcToArrowConfigBuilder(new RootAllocator(Integer.MAX_VALUE), JdbcToArrowUtils.getUtcCalendar(), false);
+ builder.setArraySubTypeByColumnNameMap(arrayFieldMapping);
+
+ final JdbcToArrowConfig config = builder.build();
+
+ try (ResultSet resultSet = conn.createStatement().executeQuery(QUERY)) {
+ final VectorSchemaRoot vector = JdbcToArrow.sqlToArrow(resultSet, config);
+
+ assertEquals(rowCount, vector.getRowCount());
+
+ assertIntegerVectorEquals((ListVector) vector.getVector(INT_ARRAY_FIELD_NAME), rowCount, intArrays);
+ assertFloatVectorEquals((ListVector) vector.getVector(FLOAT_ARRAY_FIELD_NAME), rowCount, floatArrays);
+ assertStringVectorEquals((ListVector) vector.getVector(STRING_ARRAY_FIELD_NAME), rowCount, stringArrays);
+ }
+ }
+
+ private void assertIntegerVectorEquals(ListVector listVector, int rowCount, Integer[][] expectedValues) {
+ IntVector vector = (IntVector) listVector.getDataVector();
+ ArrowBuf offsetBuffer = listVector.getOffsetBuffer();
+
+ int prevOffset = 0;
+ for (int row = 0; row < rowCount; ++row) {
+ int offset = offsetBuffer.getInt((row + 1) * ListVector.OFFSET_WIDTH);
+
+ if (expectedValues[row] == null) {
+ assertEquals(0, listVector.isSet(row));
+ assertEquals(0, offset - prevOffset);
+ continue;
+ }
+
+ assertEquals(1, listVector.isSet(row));
+ assertEquals(expectedValues[row].length, offset - prevOffset);
+
+ for (int i = prevOffset; i < offset; ++i) {
+ assertEquals(expectedValues[row][i - prevOffset].intValue(), vector.get(i));
+ }
+
+ prevOffset = offset;
+ }
+ }
+
+ private void assertFloatVectorEquals(ListVector listVector, int rowCount, Float[][] expectedValues) {
+ Float4Vector vector = (Float4Vector) listVector.getDataVector();
+ ArrowBuf offsetBuffer = listVector.getOffsetBuffer();
+
+ int prevOffset = 0;
+ for (int row = 0; row < rowCount; ++row) {
+ int offset = offsetBuffer.getInt((row + 1) * ListVector.OFFSET_WIDTH);
+
+ if (expectedValues[row] == null) {
+ assertEquals(0, listVector.isSet(row));
+ assertEquals(0, offset - prevOffset);
+ continue;
+ }
+
+ assertEquals(1, listVector.isSet(row));
+ assertEquals(expectedValues[row].length, offset - prevOffset);
+
+ for (int i = prevOffset; i < offset; ++i) {
+ assertEquals(expectedValues[row][i - prevOffset].floatValue(), vector.get(i), 0);
+ }
+
+ prevOffset = offset;
+ }
+ }
+
+ private void assertStringVectorEquals(ListVector listVector, int rowCount, String[][] expectedValues) {
+ VarCharVector vector = (VarCharVector) listVector.getDataVector();
+ ArrowBuf offsetBuffer = listVector.getOffsetBuffer();
+
+ int prevOffset = 0;
+ for (int row = 0; row < rowCount; ++row) {
+ int offset = offsetBuffer.getInt((row + 1) * ListVector.OFFSET_WIDTH);
+
+ if (expectedValues[row] == null) {
+ assertEquals(0, listVector.isSet(row));
+ assertEquals(0, offset - prevOffset);
+ continue;
+ }
+
+ assertEquals(1, listVector.isSet(row));
+ assertEquals(expectedValues[row].length, offset - prevOffset);
+ for (int i = prevOffset; i < offset; ++i) {
+ assertArrayEquals(expectedValues[row][i - prevOffset].getBytes(), vector.get(i));
+ }
+
+ prevOffset = offset;
+ }
+ }
+
+ @After
+ public void tearDown() throws SQLException {
+ try (Statement stmt = conn.createStatement()) {
+ stmt.executeUpdate(DROP_STATEMENT);
+ } finally {
+ if (conn != null) {
+ conn.close();
+ conn = null;
+ }
+ }
+ }
+
+ private Integer[][] generateIntegerArrayField(int numRows) {
+ Integer[][] result = new Integer[numRows][];
+
+ for (int i = 0; i < numRows; ++i) {
+ int val = i * 4;
+ result[i] = new Integer[]{val, val + 1, val + 2, val + 3};
+ }
+
+ return result;
+ }
+
+ private Float[][] generateFloatArrayField(int numRows) {
+ Float[][] result = new Float[numRows][];
+
+ for (int i = 0; i < numRows; ++i) {
+ int val = i * 4;
+ result[i] = new Float[]{(float) val, (float) val + 1, (float) val + 2, (float) val + 3};
+ }
+
+ return result;
+ }
+
+ private String[][] generateStringArrayField(int numRows) {
+ String[][] result = new String[numRows][];
+
+ for (int i = 0; i < numRows; ++i) {
+ int val = i * 4;
+ result[i] = new String[]{
+ String.valueOf(val),
+ String.valueOf(val + 1),
+ String.valueOf(val + 2),
+ String.valueOf(val + 3) };
+ }
+
+ return result;
+ }
+
+ private void insertRows(
+ int numRows,
+ Integer[][] integerArrays,
+ Float[][] floatArrays,
+ String[][] strArrays)
+ throws SQLException {
+
+ // Insert 4 Rows
+ try (PreparedStatement stmt = conn.prepareStatement(INSERT_STATEMENT)) {
+
+ for (int i = 0; i < numRows; ++i) {
+ Integer[] integerArray = integerArrays[i];
+ Float[] floatArray = floatArrays[i];
+ String[] strArray = strArrays[i];
+
+ Array intArray = conn.createArrayOf("INT", integerArray);
+ Array realArray = conn.createArrayOf("REAL", floatArray);
+ Array varcharArray = conn.createArrayOf("VARCHAR", strArray);
+
+ // Insert Arrays of 4 Values in Each Row
+ stmt.setInt(1, i);
+ stmt.setArray(2, intArray);
+ stmt.setArray(3, realArray);
+ stmt.setArray(4, varcharArray);
+
+ stmt.executeUpdate();
+
+ intArray.free();
+ realArray.free();
+ varcharArray.free();
+ }
+ }
+ }
+}