-
Notifications
You must be signed in to change notification settings - Fork 4k
ARROW-4142: [Java] JDBC Array -> Arrow ListVector #3294
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,114 @@ | ||
| /* | ||
| * Licensed to the Apache Software Foundation (ASF) under one or more | ||
| * contributor license agreements. See the NOTICE file distributed with | ||
| * this work for additional information regarding copyright ownership. | ||
| * The ASF licenses this file to You under the Apache License, Version 2.0 | ||
| * (the "License"); you may not use this file except in compliance with | ||
| * the License. You may obtain a copy of the License at | ||
| * | ||
| * http://www.apache.org/licenses/LICENSE-2.0 | ||
| * | ||
| * Unless required by applicable law or agreed to in writing, software | ||
| * distributed under the License is distributed on an "AS IS" BASIS, | ||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
| * See the License for the specific language governing permissions and | ||
| * limitations under the License. | ||
| */ | ||
|
|
||
| package org.apache.arrow.adapter.jdbc; | ||
|
|
||
| import java.sql.ResultSetMetaData; | ||
| import java.sql.SQLException; | ||
| import java.sql.Types; | ||
|
|
||
| import com.google.common.base.Preconditions; | ||
|
|
||
| /** | ||
| * This class represents the information about a JDBC ResultSet Field that is | ||
| * needed to construct an {@link org.apache.arrow.vector.types.pojo.ArrowType}. | ||
| * Currently, this is: | ||
| * <ul> | ||
| * <li>The JDBC {@link java.sql.Types} type.</li> | ||
| * <li>The field's precision (used for {@link java.sql.Types#DECIMAL} and {@link java.sql.Types#NUMERIC} types)</li> | ||
| * <li>The field's scale (used for {@link java.sql.Types#DECIMAL} and {@link java.sql.Types#NUMERIC} types)</li> | ||
| * </ul> | ||
| */ | ||
| public class JdbcFieldInfo { | ||
| private final int jdbcType; | ||
| private final int precision; | ||
| private final int scale; | ||
|
|
||
| /** | ||
| * Builds a <code>JdbcFieldInfo</code> using only the {@link java.sql.Types} type. Do not use this constructor | ||
| * if the field type is {@link java.sql.Types#DECIMAL} or {@link java.sql.Types#NUMERIC}; the precision and | ||
| * scale will be set to <code>0</code>. | ||
| * | ||
| * @param jdbcType The {@link java.sql.Types} type. | ||
| * @throws IllegalArgumentException if jdbcType is {@link java.sql.Types#DECIMAL} or {@link java.sql.Types#NUMERIC}. | ||
| */ | ||
| public JdbcFieldInfo(int jdbcType) { | ||
| Preconditions.checkArgument( | ||
| (jdbcType != Types.DECIMAL && jdbcType != Types.NUMERIC), | ||
| "DECIMAL and NUMERIC types require a precision and scale; please use another constructor."); | ||
|
|
||
| this.jdbcType = jdbcType; | ||
| this.precision = 0; | ||
| this.scale = 0; | ||
| } | ||
|
|
||
| /** | ||
| * Builds a <code>JdbcFieldInfo</code> from the {@link java.sql.Types} type, precision, and scale. | ||
| * Use this constructor for {@link java.sql.Types#DECIMAL} and {@link java.sql.Types#NUMERIC} types. | ||
|
||
| * | ||
| * @param jdbcType The {@link java.sql.Types} type. | ||
| * @param precision The field's numeric precision. | ||
| * @param scale The field's numeric scale. | ||
| */ | ||
| public JdbcFieldInfo(int jdbcType, int precision, int scale) { | ||
| this.jdbcType = jdbcType; | ||
|
||
| this.precision = precision; | ||
| this.scale = scale; | ||
| } | ||
|
|
||
| /** | ||
| * Builds a <code>JdbcFieldInfo</code> from the corresponding {@link java.sql.ResultSetMetaData} column. | ||
| * | ||
| * @param rsmd The {@link java.sql.ResultSetMetaData} to get the field information from. | ||
| * @param column The column to get the field information for (on a 1-based index). | ||
| * @throws SQLException If the column information cannot be retrieved. | ||
| * @throws NullPointerException if <code>rsmd</code> is <code>null</code>. | ||
| * @throws IllegalArgumentException if <code>column</code> is out of bounds. | ||
| */ | ||
| public JdbcFieldInfo(ResultSetMetaData rsmd, int column) throws SQLException { | ||
| Preconditions.checkNotNull(rsmd, "ResultSetMetaData cannot be null."); | ||
| Preconditions.checkArgument(column > 0, "ResultSetMetaData columns have indices starting at 1."); | ||
| Preconditions.checkArgument( | ||
| column <= rsmd.getColumnCount(), | ||
| "The index must be within the number of columns (1 to %s, inclusive)", rsmd.getColumnCount()); | ||
|
|
||
| this.jdbcType = rsmd.getColumnType(column); | ||
| this.precision = rsmd.getPrecision(column); | ||
| this.scale = rsmd.getScale(column); | ||
| } | ||
|
|
||
| /** | ||
| * The {@link java.sql.Types} type. | ||
| */ | ||
| public int getJdbcType() { | ||
| return jdbcType; | ||
| } | ||
|
|
||
| /** | ||
| * The numeric precision, for {@link java.sql.Types#NUMERIC} and {@link java.sql.Types#DECIMAL} types. | ||
| */ | ||
| public int getPrecision() { | ||
| return precision; | ||
| } | ||
|
|
||
| /** | ||
| * The numeric scale, for {@link java.sql.Types#NUMERIC} and {@link java.sql.Types#DECIMAL} types. | ||
| */ | ||
| public int getScale() { | ||
| return scale; | ||
| } | ||
| } | ||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -18,6 +18,7 @@ | |
| package org.apache.arrow.adapter.jdbc; | ||
|
|
||
| import java.util.Calendar; | ||
| import java.util.Map; | ||
|
|
||
| import org.apache.arrow.memory.BaseAllocator; | ||
|
|
||
|
|
@@ -28,16 +29,29 @@ | |
| * <p> | ||
| * The allocator is used to construct the {@link org.apache.arrow.vector.VectorSchemaRoot}, | ||
| * and the calendar is used to define the time zone of any {@link org.apahe.arrow.vector.pojo.ArrowType.Timestamp} | ||
| * fields that are created during the conversion. | ||
| * fields that are created during the conversion. Neither field may be <code>null</code>. | ||
| * </p> | ||
| * <p> | ||
| * Neither field may be <code>null</code>. | ||
| * If the <code>includeMetadata</code> flag is set, the Arrow field metadata will contain information | ||
| * from the corresponding {@link java.sql.ResultSetMetaData} that was used to create the | ||
| * {@link org.apache.arrow.vector.types.pojo.FieldType} of the corresponding | ||
| * {@link org.apache.arrow.vector.FieldVector}. | ||
| * </p> | ||
| * <p> | ||
| * If there are any {@link java.sql.Types#ARRAY} fields in the {@link java.sql.ResultSet}, the corresponding | ||
| * {@link JdbcFieldInfo} for the array's contents must be defined here. Unfortunately, the sub-type | ||
| * information cannot be retrieved from all JDBC implementations (H2 for example, returns | ||
| * {@link java.sql.Types#NULL} for the array sub-type), so it must be configured here. The column index | ||
| * or name can be used to map to a {@link JdbcFieldInfo}, and that will be used for the conversion. | ||
mikepigott marked this conversation as resolved.
Outdated
Show resolved
Hide resolved
|
||
| * </p> | ||
| */ | ||
| public final class JdbcToArrowConfig { | ||
|
|
||
| private Calendar calendar; | ||
| private BaseAllocator allocator; | ||
| private boolean includeMetadata; | ||
| private Map<Integer, JdbcFieldInfo> arraySubTypesByColumnIndex; | ||
| private Map<String, JdbcFieldInfo> arraySubTypesByColumnName; | ||
|
|
||
| /** | ||
| * Constructs a new configuration from the provided allocator and calendar. The <code>allocator</code> | ||
|
|
@@ -46,20 +60,47 @@ public final class JdbcToArrowConfig { | |
| * | ||
| * @param allocator The memory allocator to construct the Arrow vectors with. | ||
| * @param calendar The calendar to use when constructing Timestamp fields and reading time-based results. | ||
| * @param includeMetadata Whether to include JDBC field metadata in the Arrow Schema Field metadata. | ||
| */ | ||
| JdbcToArrowConfig(BaseAllocator allocator, Calendar calendar, boolean includeMetadata) { | ||
| JdbcToArrowConfig(BaseAllocator allocator, Calendar calendar) { | ||
| Preconditions.checkNotNull(allocator, "Memory allocator cannot be null"); | ||
|
||
|
|
||
| this.allocator = allocator; | ||
| this.calendar = calendar; | ||
| this.includeMetadata = false; | ||
| this.arraySubTypesByColumnIndex = null; | ||
| this.arraySubTypesByColumnName = null; | ||
| } | ||
|
|
||
| /** | ||
| * Constructs a new configuration from the provided allocator and calendar. The <code>allocator</code> | ||
| * is used when constructing the Arrow vectors from the ResultSet, and the calendar is used to define | ||
| * Arrow Timestamp fields, and to read time-based fields from the JDBC <code>ResultSet</code>. | ||
| * | ||
| * @param allocator The memory allocator to construct the Arrow vectors with. | ||
| * @param calendar The calendar to use when constructing Timestamp fields and reading time-based results. | ||
| * @param includeMetadata Whether to include JDBC field metadata in the Arrow Schema Field metadata. | ||
| * @param arraySubTypesByColumnIndex The type of the JDBC array at the column index (1-based). | ||
| * @param arraySubTypesByColumnName The type of the JDBC array at the column name. | ||
| */ | ||
| JdbcToArrowConfig( | ||
| BaseAllocator allocator, | ||
| Calendar calendar, | ||
| boolean includeMetadata, | ||
| Map<Integer, JdbcFieldInfo> arraySubTypesByColumnIndex, | ||
| Map<String, JdbcFieldInfo> arraySubTypesByColumnName) { | ||
|
|
||
| this(allocator, calendar); | ||
|
|
||
| this.includeMetadata = includeMetadata; | ||
| this.arraySubTypesByColumnIndex = arraySubTypesByColumnIndex; | ||
| this.arraySubTypesByColumnName = arraySubTypesByColumnName; | ||
| } | ||
|
|
||
| /** | ||
| * The calendar to use when defining Arrow Timestamp fields | ||
| * and retrieving {@link Date}, {@link Time}, or {@link Timestamp} | ||
| * data types from the {@link ResultSet}, or <code>null</code> if not converting. | ||
| * | ||
| * @return the calendar. | ||
| */ | ||
| public Calendar getCalendar() { | ||
|
|
@@ -82,4 +123,32 @@ public BaseAllocator getAllocator() { | |
| public boolean shouldIncludeMetadata() { | ||
| return includeMetadata; | ||
| } | ||
|
|
||
| /** | ||
| * Returns the array sub-type {@link JdbcFieldInfo} defined for the provided column index. | ||
| * | ||
| * @param index The {@link java.sql.ResultSetMetaData} column index of an {@link java.sql.Types#ARRAY} type. | ||
| * @return The {@link JdbcFieldInfo} for that array's sub-type, or <code>null</code> if not defined. | ||
| */ | ||
| public JdbcFieldInfo getArraySubTypeByColumnIndex(int index) { | ||
| if (arraySubTypesByColumnIndex == null) { | ||
| return null; | ||
| } else { | ||
| return arraySubTypesByColumnIndex.get(index); | ||
| } | ||
| } | ||
|
|
||
| /** | ||
| * Returns the array sub-type {@link JdbcFieldInfo} defined for the provided column name. | ||
| * | ||
| * @param index The {@link java.sql.ResultSetMetaData} column name of an {@link java.sql.Types#ARRAY} type. | ||
| * @return The {@link JdbcFieldInfo} for that array's sub-type, or <code>null</code> if not defined. | ||
| */ | ||
| public JdbcFieldInfo getArraySubTypeByColumnName(String name) { | ||
|
||
| if (arraySubTypesByColumnName == null) { | ||
|
||
| return null; | ||
| } else { | ||
| return arraySubTypesByColumnName.get(name); | ||
| } | ||
| } | ||
| } | ||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -18,6 +18,7 @@ | |
| package org.apache.arrow.adapter.jdbc; | ||
|
|
||
| import java.util.Calendar; | ||
| import java.util.Map; | ||
|
|
||
| import org.apache.arrow.memory.BaseAllocator; | ||
|
|
||
|
|
@@ -30,6 +31,8 @@ public class JdbcToArrowConfigBuilder { | |
| private Calendar calendar; | ||
| private BaseAllocator allocator; | ||
| private boolean includeMetadata; | ||
| private Map<Integer, JdbcFieldInfo> arraySubTypesByColumnIndex; | ||
|
||
| private Map<String, JdbcFieldInfo> arraySubTypesByColumnName; | ||
|
|
||
| /** | ||
| * Default constructor for the <code>JdbcToArrowConfigBuilder}</code>. | ||
|
|
@@ -40,6 +43,8 @@ public JdbcToArrowConfigBuilder() { | |
| this.allocator = null; | ||
| this.calendar = null; | ||
| this.includeMetadata = false; | ||
| this.arraySubTypesByColumnIndex = null; | ||
| this.arraySubTypesByColumnName = null; | ||
| } | ||
|
|
||
| /** | ||
|
|
@@ -126,6 +131,29 @@ public JdbcToArrowConfigBuilder setIncludeMetadata(boolean includeMetadata) { | |
| return this; | ||
| } | ||
|
|
||
| /** | ||
| * Sets the mapping of column-index-to-{@link JdbcFieldInfo} used for columns of type {@link java.sql.Types#ARRAY}. | ||
| * The column index is 1-based, to match the JDBC column index. | ||
| * | ||
| * @param map The mapping. | ||
| * @return This instance of the <code>JdbcToArrowConfig</code>, for chaining. | ||
| */ | ||
| public JdbcToArrowConfigBuilder setArraySubTypeByColumnIndexMap(Map<Integer, JdbcFieldInfo> map) { | ||
| this.arraySubTypesByColumnIndex = map; | ||
| return this; | ||
| } | ||
|
|
||
| /** | ||
| * Sets the mapping of column-name-to-{@link JdbcFieldInfo} used for columns of type {@link java.sql.Types#ARRAY}. | ||
| * | ||
| * @param map The mapping. | ||
| * @return This instance of the <code>JdbcToArrowConfig</code>, for chaining. | ||
| */ | ||
| public JdbcToArrowConfigBuilder setArraySubTypeByColumnNameMap(Map<String, JdbcFieldInfo> map) { | ||
| this.arraySubTypesByColumnName = map; | ||
| return this; | ||
| } | ||
|
|
||
| /** | ||
| * This builds the {@link JdbcToArrowConfig} from the provided | ||
| * {@link BaseAllocator} and {@link Calendar}. | ||
|
|
@@ -134,6 +162,11 @@ public JdbcToArrowConfigBuilder setIncludeMetadata(boolean includeMetadata) { | |
| * @throws NullPointerException if either the allocator or calendar was not set. | ||
| */ | ||
| public JdbcToArrowConfig build() { | ||
| return new JdbcToArrowConfig(allocator, calendar, includeMetadata); | ||
| return new JdbcToArrowConfig( | ||
| allocator, | ||
| calendar, | ||
| includeMetadata, | ||
| arraySubTypesByColumnIndex, | ||
| arraySubTypesByColumnName); | ||
| } | ||
| } | ||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
maybe put a check for this in the constructor for this condition, in case someone doesn't read the javadoc?