Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
@@ -0,0 +1,114 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

package org.apache.arrow.adapter.jdbc;

import java.sql.ResultSetMetaData;
import java.sql.SQLException;
import java.sql.Types;

import com.google.common.base.Preconditions;

/**
* This class represents the information about a JDBC ResultSet Field that is
* needed to construct an {@link org.apache.arrow.vector.types.pojo.ArrowType}.
* Currently, this is:
* <ul>
* <li>The JDBC {@link java.sql.Types} type.</li>
* <li>The field's precision (used for {@link java.sql.Types#DECIMAL} and {@link java.sql.Types#NUMERIC} types)</li>
* <li>The field's scale (used for {@link java.sql.Types#DECIMAL} and {@link java.sql.Types#NUMERIC} types)</li>
* </ul>
*/
public class JdbcFieldInfo {
private final int jdbcType;
private final int precision;
private final int scale;

/**
* Builds a <code>JdbcFieldInfo</code> using only the {@link java.sql.Types} type. Do not use this constructor
* if the field type is {@link java.sql.Types#DECIMAL} or {@link java.sql.Types#NUMERIC}; the precision and
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

maybe put a check for this in the constructor for this condition, in case someone doesn't read the javadoc?

* scale will be set to <code>0</code>.
*
* @param jdbcType The {@link java.sql.Types} type.
* @throws IllegalArgumentException if jdbcType is {@link java.sql.Types#DECIMAL} or {@link java.sql.Types#NUMERIC}.
*/
public JdbcFieldInfo(int jdbcType) {
Preconditions.checkArgument(
(jdbcType != Types.DECIMAL && jdbcType != Types.NUMERIC),
"DECIMAL and NUMERIC types require a precision and scale; please use another constructor.");

this.jdbcType = jdbcType;
this.precision = 0;
this.scale = 0;
}

/**
* Builds a <code>JdbcFieldInfo</code> from the {@link java.sql.Types} type, precision, and scale.
* Use this constructor for {@link java.sql.Types#DECIMAL} and {@link java.sql.Types#NUMERIC} types.
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

maybe warn if this is used with other types?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

As in, log a warning? I don't think this project logs much of anything.

*
* @param jdbcType The {@link java.sql.Types} type.
* @param precision The field's numeric precision.
* @param scale The field's numeric scale.
*/
public JdbcFieldInfo(int jdbcType, int precision, int scale) {
this.jdbcType = jdbcType;
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Might want to add a similar check to the one above in here (only reversed). Or comment why the check isn't there (maybe it makes reflection or something else easier?)

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

There's no real reason not to use this constructor for non-numeric & non-decimal types; the precision & scale are just ignored. No harm done.

this.precision = precision;
this.scale = scale;
}

/**
* Builds a <code>JdbcFieldInfo</code> from the corresponding {@link java.sql.ResultSetMetaData} column.
*
* @param rsmd The {@link java.sql.ResultSetMetaData} to get the field information from.
* @param column The column to get the field information for (on a 1-based index).
* @throws SQLException If the column information cannot be retrieved.
* @throws NullPointerException if <code>rsmd</code> is <code>null</code>.
* @throws IllegalArgumentException if <code>column</code> is out of bounds.
*/
public JdbcFieldInfo(ResultSetMetaData rsmd, int column) throws SQLException {
Preconditions.checkNotNull(rsmd, "ResultSetMetaData cannot be null.");
Preconditions.checkArgument(column > 0, "ResultSetMetaData columns have indices starting at 1.");
Preconditions.checkArgument(
column <= rsmd.getColumnCount(),
"The index must be within the number of columns (1 to %s, inclusive)", rsmd.getColumnCount());

this.jdbcType = rsmd.getColumnType(column);
this.precision = rsmd.getPrecision(column);
this.scale = rsmd.getScale(column);
}

/**
* The {@link java.sql.Types} type.
*/
public int getJdbcType() {
return jdbcType;
}

/**
* The numeric precision, for {@link java.sql.Types#NUMERIC} and {@link java.sql.Types#DECIMAL} types.
*/
public int getPrecision() {
return precision;
}

/**
* The numeric scale, for {@link java.sql.Types#NUMERIC} and {@link java.sql.Types#DECIMAL} types.
*/
public int getScale() {
return scale;
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -88,7 +88,7 @@ public static VectorSchemaRoot sqlToArrow(Connection connection, String query, B
Preconditions.checkNotNull(allocator, "Memory allocator object can not be null");

JdbcToArrowConfig config =
new JdbcToArrowConfig(allocator, JdbcToArrowUtils.getUtcCalendar(), false);
new JdbcToArrowConfig(allocator, JdbcToArrowUtils.getUtcCalendar());
return sqlToArrow(connection, query, config);
}

Expand Down Expand Up @@ -116,7 +116,7 @@ public static VectorSchemaRoot sqlToArrow(
Preconditions.checkNotNull(allocator, "Memory allocator object can not be null");
Preconditions.checkNotNull(calendar, "Calendar object can not be null");

return sqlToArrow(connection, query, new JdbcToArrowConfig(allocator, calendar, false));
return sqlToArrow(connection, query, new JdbcToArrowConfig(allocator, calendar));
}

/**
Expand Down Expand Up @@ -170,7 +170,7 @@ public static VectorSchemaRoot sqlToArrow(ResultSet resultSet, BaseAllocator all
Preconditions.checkNotNull(allocator, "Memory Allocator object can not be null");

JdbcToArrowConfig config =
new JdbcToArrowConfig(allocator, JdbcToArrowUtils.getUtcCalendar(), false);
new JdbcToArrowConfig(allocator, JdbcToArrowUtils.getUtcCalendar());
return sqlToArrow(resultSet, config);
}

Expand All @@ -184,8 +184,7 @@ public static VectorSchemaRoot sqlToArrow(ResultSet resultSet, BaseAllocator all
*/
public static VectorSchemaRoot sqlToArrow(ResultSet resultSet, Calendar calendar) throws SQLException, IOException {
Preconditions.checkNotNull(resultSet, "JDBC ResultSet object can not be null");

return sqlToArrow(resultSet, new JdbcToArrowConfig(new RootAllocator(Integer.MAX_VALUE), calendar, false));
return sqlToArrow(resultSet, new JdbcToArrowConfig(new RootAllocator(Integer.MAX_VALUE), calendar));
}

/**
Expand All @@ -205,7 +204,7 @@ public static VectorSchemaRoot sqlToArrow(
Preconditions.checkNotNull(resultSet, "JDBC ResultSet object can not be null");
Preconditions.checkNotNull(allocator, "Memory Allocator object can not be null");

return sqlToArrow(resultSet, new JdbcToArrowConfig(allocator, calendar, false));
return sqlToArrow(resultSet, new JdbcToArrowConfig(allocator, calendar));
}

/**
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@
package org.apache.arrow.adapter.jdbc;

import java.util.Calendar;
import java.util.Map;

import org.apache.arrow.memory.BaseAllocator;

Expand All @@ -28,16 +29,29 @@
* <p>
* The allocator is used to construct the {@link org.apache.arrow.vector.VectorSchemaRoot},
* and the calendar is used to define the time zone of any {@link org.apahe.arrow.vector.pojo.ArrowType.Timestamp}
* fields that are created during the conversion.
* fields that are created during the conversion. Neither field may be <code>null</code>.
* </p>
* <p>
* Neither field may be <code>null</code>.
* If the <code>includeMetadata</code> flag is set, the Arrow field metadata will contain information
* from the corresponding {@link java.sql.ResultSetMetaData} that was used to create the
* {@link org.apache.arrow.vector.types.pojo.FieldType} of the corresponding
* {@link org.apache.arrow.vector.FieldVector}.
* </p>
* <p>
* If there are any {@link java.sql.Types#ARRAY} fields in the {@link java.sql.ResultSet}, the corresponding
* {@link JdbcFieldInfo} for the array's contents must be defined here. Unfortunately, the sub-type
* information cannot be retrieved from all JDBC implementations (H2 for example, returns
* {@link java.sql.Types#NULL} for the array sub-type), so it must be configured here. The column index
* or name can be used to map to a {@link JdbcFieldInfo}, and that will be used for the conversion.
* </p>
*/
public final class JdbcToArrowConfig {

private Calendar calendar;
private BaseAllocator allocator;
private boolean includeMetadata;
private Map<Integer, JdbcFieldInfo> arraySubTypesByColumnIndex;
private Map<String, JdbcFieldInfo> arraySubTypesByColumnName;

/**
* Constructs a new configuration from the provided allocator and calendar. The <code>allocator</code>
Expand All @@ -46,20 +60,47 @@ public final class JdbcToArrowConfig {
*
* @param allocator The memory allocator to construct the Arrow vectors with.
* @param calendar The calendar to use when constructing Timestamp fields and reading time-based results.
* @param includeMetadata Whether to include JDBC field metadata in the Arrow Schema Field metadata.
*/
JdbcToArrowConfig(BaseAllocator allocator, Calendar calendar, boolean includeMetadata) {
JdbcToArrowConfig(BaseAllocator allocator, Calendar calendar) {
Preconditions.checkNotNull(allocator, "Memory allocator cannot be null");
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This looks to be the style in the code base, but this in theory could be inlined below like:

this.allocator = Preconditions.checkNotNul(...)

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'm just trying to follow the style in the existing code. I think stylistic changes like these should be a different PR ...

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

SGTM.


this.allocator = allocator;
this.calendar = calendar;
this.includeMetadata = false;
this.arraySubTypesByColumnIndex = null;
this.arraySubTypesByColumnName = null;
}

/**
* Constructs a new configuration from the provided allocator and calendar. The <code>allocator</code>
* is used when constructing the Arrow vectors from the ResultSet, and the calendar is used to define
* Arrow Timestamp fields, and to read time-based fields from the JDBC <code>ResultSet</code>.
*
* @param allocator The memory allocator to construct the Arrow vectors with.
* @param calendar The calendar to use when constructing Timestamp fields and reading time-based results.
* @param includeMetadata Whether to include JDBC field metadata in the Arrow Schema Field metadata.
* @param arraySubTypesByColumnIndex The type of the JDBC array at the column index (1-based).
* @param arraySubTypesByColumnName The type of the JDBC array at the column name.
*/
JdbcToArrowConfig(
BaseAllocator allocator,
Calendar calendar,
boolean includeMetadata,
Map<Integer, JdbcFieldInfo> arraySubTypesByColumnIndex,
Map<String, JdbcFieldInfo> arraySubTypesByColumnName) {

this(allocator, calendar);

this.includeMetadata = includeMetadata;
this.arraySubTypesByColumnIndex = arraySubTypesByColumnIndex;
this.arraySubTypesByColumnName = arraySubTypesByColumnName;
}

/**
* The calendar to use when defining Arrow Timestamp fields
* and retrieving {@link Date}, {@link Time}, or {@link Timestamp}
* data types from the {@link ResultSet}, or <code>null</code> if not converting.
*
* @return the calendar.
*/
public Calendar getCalendar() {
Expand All @@ -82,4 +123,32 @@ public BaseAllocator getAllocator() {
public boolean shouldIncludeMetadata() {
return includeMetadata;
}

/**
* Returns the array sub-type {@link JdbcFieldInfo} defined for the provided column index.
*
* @param index The {@link java.sql.ResultSetMetaData} column index of an {@link java.sql.Types#ARRAY} type.
* @return The {@link JdbcFieldInfo} for that array's sub-type, or <code>null</code> if not defined.
*/
public JdbcFieldInfo getArraySubTypeByColumnIndex(int index) {
if (arraySubTypesByColumnIndex == null) {
return null;
} else {
return arraySubTypesByColumnIndex.get(index);
}
}

/**
* Returns the array sub-type {@link JdbcFieldInfo} defined for the provided column name.
*
* @param index The {@link java.sql.ResultSetMetaData} column name of an {@link java.sql.Types#ARRAY} type.
* @return The {@link JdbcFieldInfo} for that array's sub-type, or <code>null</code> if not defined.
*/
public JdbcFieldInfo getArraySubTypeByColumnName(String name) {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

going back to my previous comments about nulls, I wonder if Optional might make sense

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I don't expect client code to call this function; client code would call the setter (which was moved to the builder). Likewise, I don't think it really matters?

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Probably not for this review. I think having a clear convention around nulls is important (and my preference but I'm sure there are others) is to assume things are non-null by default and mark things that can be null explcitly. I'll go through the mailing list archive to see if this was discussed before and if not post to the mailing list.

if (arraySubTypesByColumnName == null) {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

maybe just construct the maps to be empty instead of having a separate check?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The user could also pass in a null value for the map. A null check is gonna go somewhere ...

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is probably a style thing also, so not relevant for this review, but in the past I've had a preference for validating up front (again I'm not familiar with conventions in this code base though).

return null;
} else {
return arraySubTypesByColumnName.get(name);
}
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@
package org.apache.arrow.adapter.jdbc;

import java.util.Calendar;
import java.util.Map;

import org.apache.arrow.memory.BaseAllocator;

Expand All @@ -30,6 +31,8 @@ public class JdbcToArrowConfigBuilder {
private Calendar calendar;
private BaseAllocator allocator;
private boolean includeMetadata;
private Map<Integer, JdbcFieldInfo> arraySubTypesByColumnIndex;
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

probably not worth changing for this review, but given this would be a relatively dense map, it might be slightly more performant to have this as an JdbcFieldInfo[]

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Depends on the number of fields returned by the query. If a query returns 10 fields and the ninth is an array, the map wouldn't be very dense at all. I'd prefer to leave this as-is until proven otherwise?

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Fine with leaving as is. You are right not quite dense, but still a few number of elements in general vs boxing/unboxing cost for integers on every call (like I said probably not a performance issue at all.

private Map<String, JdbcFieldInfo> arraySubTypesByColumnName;

/**
* Default constructor for the <code>JdbcToArrowConfigBuilder}</code>.
Expand All @@ -40,6 +43,8 @@ public JdbcToArrowConfigBuilder() {
this.allocator = null;
this.calendar = null;
this.includeMetadata = false;
this.arraySubTypesByColumnIndex = null;
this.arraySubTypesByColumnName = null;
}

/**
Expand Down Expand Up @@ -126,6 +131,29 @@ public JdbcToArrowConfigBuilder setIncludeMetadata(boolean includeMetadata) {
return this;
}

/**
* Sets the mapping of column-index-to-{@link JdbcFieldInfo} used for columns of type {@link java.sql.Types#ARRAY}.
* The column index is 1-based, to match the JDBC column index.
*
* @param map The mapping.
* @return This instance of the <code>JdbcToArrowConfig</code>, for chaining.
*/
public JdbcToArrowConfigBuilder setArraySubTypeByColumnIndexMap(Map<Integer, JdbcFieldInfo> map) {
this.arraySubTypesByColumnIndex = map;
return this;
}

/**
* Sets the mapping of column-name-to-{@link JdbcFieldInfo} used for columns of type {@link java.sql.Types#ARRAY}.
*
* @param map The mapping.
* @return This instance of the <code>JdbcToArrowConfig</code>, for chaining.
*/
public JdbcToArrowConfigBuilder setArraySubTypeByColumnNameMap(Map<String, JdbcFieldInfo> map) {
this.arraySubTypesByColumnName = map;
return this;
}

/**
* This builds the {@link JdbcToArrowConfig} from the provided
* {@link BaseAllocator} and {@link Calendar}.
Expand All @@ -134,6 +162,11 @@ public JdbcToArrowConfigBuilder setIncludeMetadata(boolean includeMetadata) {
* @throws NullPointerException if either the allocator or calendar was not set.
*/
public JdbcToArrowConfig build() {
return new JdbcToArrowConfig(allocator, calendar, includeMetadata);
return new JdbcToArrowConfig(
allocator,
calendar,
includeMetadata,
arraySubTypesByColumnIndex,
arraySubTypesByColumnName);
}
}
Loading