Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -18,17 +18,11 @@
package org.apache.arrow.adapter.jdbc;

import java.io.IOException;
import java.sql.Connection;
import java.sql.ResultSet;
import java.sql.SQLException;
import java.sql.Statement;
import java.util.Calendar;

import org.apache.arrow.memory.BufferAllocator;
import org.apache.arrow.memory.RootAllocator;
import org.apache.arrow.util.Preconditions;
import org.apache.arrow.vector.VectorSchemaRoot;
import org.apache.arrow.vector.util.ValueVectorUtility;

/**
* Utility class to convert JDBC objects to columnar Arrow format objects.
Expand Down Expand Up @@ -64,170 +58,6 @@
*/
public class JdbcToArrow {

/**
* For the given SQL query, execute and fetch the data from Relational DB and convert it to Arrow objects.
* This method uses the default Calendar instance with default TimeZone and Locale as returned by the JVM.
* If you wish to use specific TimeZone or Locale for any Date, Time and Timestamp datasets, you may want use
* overloaded API that taken Calendar object instance.
*
* @param connection Database connection to be used. This method will not close the passed connection object. Since
* the caller has passed the connection object it's the responsibility of the caller to close or
* return the connection to the pool.
* @param query The DB Query to fetch the data.
* @param allocator Memory allocator
* @return Arrow Data Objects {@link VectorSchemaRoot}
* @throws SQLException Propagate any SQL Exceptions to the caller after closing any resources opened such as
* ResultSet and Statement objects.
*/
@Deprecated
public static VectorSchemaRoot sqlToArrow(Connection connection, String query, BufferAllocator allocator)
throws SQLException, IOException {
Preconditions.checkNotNull(allocator, "Memory allocator object can not be null");

JdbcToArrowConfig config =
new JdbcToArrowConfig(allocator, JdbcToArrowUtils.getUtcCalendar());
return sqlToArrow(connection, query, config);
}

/**
* For the given SQL query, execute and fetch the data from Relational DB and convert it to Arrow objects.
*
* @param connection Database connection to be used. This method will not close the passed connection object. Since
* the caller has passed the connection object it's the responsibility of the caller to close or
* return the connection to the pool.
* @param query The DB Query to fetch the data.
* @param allocator Memory allocator
* @param calendar Calendar object to use to handle Date, Time and Timestamp datasets.
* @return Arrow Data Objects {@link VectorSchemaRoot}
* @throws SQLException Propagate any SQL Exceptions to the caller after closing any resources opened such as
* ResultSet and Statement objects.
*/
@Deprecated
public static VectorSchemaRoot sqlToArrow(
Connection connection,
String query,
BufferAllocator allocator,
Calendar calendar) throws SQLException, IOException {

Preconditions.checkNotNull(allocator, "Memory allocator object can not be null");
Preconditions.checkNotNull(calendar, "Calendar object can not be null");

return sqlToArrow(connection, query, new JdbcToArrowConfig(allocator, calendar));
}

/**
* For the given SQL query, execute and fetch the data from Relational DB and convert it to Arrow objects.
*
* @param connection Database connection to be used. This method will not close the passed connection object.
* Since the caller has passed the connection object it's the responsibility of the caller
* to close or return the connection to the pool.
* @param query The DB Query to fetch the data.
* @param config Configuration
* @return Arrow Data Objects {@link VectorSchemaRoot}
* @throws SQLException Propagate any SQL Exceptions to the caller after closing any resources opened such as
* ResultSet and Statement objects.
*/
@Deprecated
public static VectorSchemaRoot sqlToArrow(Connection connection, String query, JdbcToArrowConfig config)
throws SQLException, IOException {
Preconditions.checkNotNull(connection, "JDBC connection object can not be null");
Preconditions.checkArgument(query != null && query.length() > 0, "SQL query can not be null or empty");

try (Statement stmt = connection.createStatement()) {
return sqlToArrow(stmt.executeQuery(query), config);
}
}

/**
* For the given JDBC {@link ResultSet}, fetch the data from Relational DB and convert it to Arrow objects. This
* method uses the default RootAllocator and Calendar object.
*
* @param resultSet ResultSet to use to fetch the data from underlying database
* @return Arrow Data Objects {@link VectorSchemaRoot}
* @throws SQLException on error
*/
@Deprecated
public static VectorSchemaRoot sqlToArrow(ResultSet resultSet) throws SQLException, IOException {
Preconditions.checkNotNull(resultSet, "JDBC ResultSet object can not be null");

return sqlToArrow(resultSet, JdbcToArrowUtils.getUtcCalendar());
}

/**
* For the given JDBC {@link ResultSet}, fetch the data from Relational DB and convert it to Arrow objects.
*
* @param resultSet ResultSet to use to fetch the data from underlying database
* @param allocator Memory allocator
* @return Arrow Data Objects {@link VectorSchemaRoot}
* @throws SQLException on error
*/
@Deprecated
public static VectorSchemaRoot sqlToArrow(ResultSet resultSet, BufferAllocator allocator)
throws SQLException, IOException {
Preconditions.checkNotNull(allocator, "Memory Allocator object can not be null");

JdbcToArrowConfig config =
new JdbcToArrowConfig(allocator, JdbcToArrowUtils.getUtcCalendar());
return sqlToArrow(resultSet, config);
}

/**
* For the given JDBC {@link ResultSet}, fetch the data from Relational DB and convert it to Arrow objects.
*
* @param resultSet ResultSet to use to fetch the data from underlying database
* @param calendar Calendar instance to use for Date, Time and Timestamp datasets, or <code>null</code> if none.
* @return Arrow Data Objects {@link VectorSchemaRoot}
* @throws SQLException on error
*/
@Deprecated
public static VectorSchemaRoot sqlToArrow(ResultSet resultSet, Calendar calendar) throws SQLException, IOException {
Preconditions.checkNotNull(resultSet, "JDBC ResultSet object can not be null");
return sqlToArrow(resultSet, new JdbcToArrowConfig(new RootAllocator(Integer.MAX_VALUE), calendar));
}

/**
* For the given JDBC {@link ResultSet}, fetch the data from Relational DB and convert it to Arrow objects.
*
* @param resultSet ResultSet to use to fetch the data from underlying database
* @param allocator Memory allocator to use.
* @param calendar Calendar instance to use for Date, Time and Timestamp datasets, or <code>null</code> if none.
* @return Arrow Data Objects {@link VectorSchemaRoot}
* @throws SQLException on error
*/
@Deprecated
public static VectorSchemaRoot sqlToArrow(
ResultSet resultSet,
BufferAllocator allocator,
Calendar calendar)
throws SQLException, IOException {
Preconditions.checkNotNull(allocator, "Memory Allocator object can not be null");

return sqlToArrow(resultSet, new JdbcToArrowConfig(allocator, calendar));
}

/**
* For the given JDBC {@link ResultSet}, fetch the data from Relational DB and convert it to Arrow objects.
*
* @param resultSet ResultSet to use to fetch the data from underlying database
* @param config Configuration of the conversion from JDBC to Arrow.
* @return Arrow Data Objects {@link VectorSchemaRoot}
* @throws SQLException on error
*/
@Deprecated
public static VectorSchemaRoot sqlToArrow(ResultSet resultSet, JdbcToArrowConfig config)
throws SQLException, IOException {
Preconditions.checkNotNull(resultSet, "JDBC ResultSet object can not be null");
Preconditions.checkNotNull(config, "The configuration cannot be null");

VectorSchemaRoot root = VectorSchemaRoot.create(
JdbcToArrowUtils.jdbcToArrowSchema(resultSet.getMetaData(), config), config.getAllocator());
if (config.getTargetBatchSize() != JdbcToArrowConfig.NO_LIMIT_BATCH_SIZE) {
ValueVectorUtility.preAllocate(root, config.getTargetBatchSize());
}
JdbcToArrowUtils.jdbcToArrowVectors(resultSet, root, config);
return root;
}

/*----------------------------------------------------------------*
| |
| Partial Convert API |
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -20,10 +20,16 @@
import java.io.IOException;
import java.sql.Connection;
import java.sql.DriverManager;
import java.sql.ResultSet;
import java.sql.SQLException;
import java.sql.Statement;
import java.util.Calendar;

import org.apache.arrow.memory.BufferAllocator;
import org.apache.arrow.memory.RootAllocator;
import org.apache.arrow.util.Preconditions;
import org.apache.arrow.vector.VectorSchemaRoot;
import org.apache.arrow.vector.util.ValueVectorUtility;
import org.junit.After;
import org.junit.Before;
import org.junit.Test;
Expand Down Expand Up @@ -140,4 +146,160 @@ public static Object[][] prepareTestData(String[] testFiles, @SuppressWarnings("
*/
public abstract void testDataSets(VectorSchemaRoot root);

/**
* For the given SQL query, execute and fetch the data from Relational DB and convert it to Arrow objects.
* This method uses the default Calendar instance with default TimeZone and Locale as returned by the JVM.
* If you wish to use specific TimeZone or Locale for any Date, Time and Timestamp datasets, you may want use
* overloaded API that taken Calendar object instance.
*
* @param connection Database connection to be used. This method will not close the passed connection object. Since
* the caller has passed the connection object it's the responsibility of the caller to close or
* return the connection to the pool.
* @param query The DB Query to fetch the data.
* @param allocator Memory allocator
* @return Arrow Data Objects {@link VectorSchemaRoot}
* @throws SQLException Propagate any SQL Exceptions to the caller after closing any resources opened such as
* ResultSet and Statement objects.
*/
public static VectorSchemaRoot sqlToArrow(Connection connection, String query, BufferAllocator allocator)
throws SQLException, IOException {
Preconditions.checkNotNull(allocator, "Memory allocator object can not be null");

JdbcToArrowConfig config =
new JdbcToArrowConfig(allocator, JdbcToArrowUtils.getUtcCalendar());
return sqlToArrow(connection, query, config);
}

/**
* For the given SQL query, execute and fetch the data from Relational DB and convert it to Arrow objects.
*
* @param connection Database connection to be used. This method will not close the passed connection object. Since
* the caller has passed the connection object it's the responsibility of the caller to close or
* return the connection to the pool.
* @param query The DB Query to fetch the data.
* @param allocator Memory allocator
* @param calendar Calendar object to use to handle Date, Time and Timestamp datasets.
* @return Arrow Data Objects {@link VectorSchemaRoot}
* @throws SQLException Propagate any SQL Exceptions to the caller after closing any resources opened such as
* ResultSet and Statement objects.
*/
public static VectorSchemaRoot sqlToArrow(
Connection connection,
String query,
BufferAllocator allocator,
Calendar calendar) throws SQLException, IOException {

Preconditions.checkNotNull(allocator, "Memory allocator object can not be null");
Preconditions.checkNotNull(calendar, "Calendar object can not be null");

return sqlToArrow(connection, query, new JdbcToArrowConfig(allocator, calendar));
}

/**
* For the given SQL query, execute and fetch the data from Relational DB and convert it to Arrow objects.
*
* @param connection Database connection to be used. This method will not close the passed connection object.
* Since the caller has passed the connection object it's the responsibility of the caller
* to close or return the connection to the pool.
* @param query The DB Query to fetch the data.
* @param config Configuration
* @return Arrow Data Objects {@link VectorSchemaRoot}
* @throws SQLException Propagate any SQL Exceptions to the caller after closing any resources opened such as
* ResultSet and Statement objects.
*/
public static VectorSchemaRoot sqlToArrow(Connection connection, String query, JdbcToArrowConfig config)
throws SQLException, IOException {
Preconditions.checkNotNull(connection, "JDBC connection object can not be null");
Preconditions.checkArgument(query != null && query.length() > 0, "SQL query can not be null or empty");

try (Statement stmt = connection.createStatement()) {
return sqlToArrow(stmt.executeQuery(query), config);
}
}

/**
* For the given JDBC {@link ResultSet}, fetch the data from Relational DB and convert it to Arrow objects. This
* method uses the default RootAllocator and Calendar object.
*
* @param resultSet ResultSet to use to fetch the data from underlying database
* @return Arrow Data Objects {@link VectorSchemaRoot}
* @throws SQLException on error
*/
public static VectorSchemaRoot sqlToArrow(ResultSet resultSet) throws SQLException, IOException {
Preconditions.checkNotNull(resultSet, "JDBC ResultSet object can not be null");

return sqlToArrow(resultSet, JdbcToArrowUtils.getUtcCalendar());
}

/**
* For the given JDBC {@link ResultSet}, fetch the data from Relational DB and convert it to Arrow objects.
*
* @param resultSet ResultSet to use to fetch the data from underlying database
* @param allocator Memory allocator
* @return Arrow Data Objects {@link VectorSchemaRoot}
* @throws SQLException on error
*/
public static VectorSchemaRoot sqlToArrow(ResultSet resultSet, BufferAllocator allocator)
throws SQLException, IOException {
Preconditions.checkNotNull(allocator, "Memory Allocator object can not be null");

JdbcToArrowConfig config =
new JdbcToArrowConfig(allocator, JdbcToArrowUtils.getUtcCalendar());
return sqlToArrow(resultSet, config);
}

/**
* For the given JDBC {@link ResultSet}, fetch the data from Relational DB and convert it to Arrow objects.
*
* @param resultSet ResultSet to use to fetch the data from underlying database
* @param calendar Calendar instance to use for Date, Time and Timestamp datasets, or <code>null</code> if none.
* @return Arrow Data Objects {@link VectorSchemaRoot}
* @throws SQLException on error
*/
public static VectorSchemaRoot sqlToArrow(ResultSet resultSet, Calendar calendar) throws SQLException, IOException {
Preconditions.checkNotNull(resultSet, "JDBC ResultSet object can not be null");
return sqlToArrow(resultSet, new JdbcToArrowConfig(new RootAllocator(Integer.MAX_VALUE), calendar));
}

/**
* For the given JDBC {@link ResultSet}, fetch the data from Relational DB and convert it to Arrow objects.
*
* @param resultSet ResultSet to use to fetch the data from underlying database
* @param allocator Memory allocator to use.
* @param calendar Calendar instance to use for Date, Time and Timestamp datasets, or <code>null</code> if none.
* @return Arrow Data Objects {@link VectorSchemaRoot}
* @throws SQLException on error
*/
public static VectorSchemaRoot sqlToArrow(
ResultSet resultSet,
BufferAllocator allocator,
Calendar calendar)
throws SQLException, IOException {
Preconditions.checkNotNull(allocator, "Memory Allocator object can not be null");

return sqlToArrow(resultSet, new JdbcToArrowConfig(allocator, calendar));
}

/**
* For the given JDBC {@link ResultSet}, fetch the data from Relational DB and convert it to Arrow objects.
*
* @param resultSet ResultSet to use to fetch the data from underlying database
* @param config Configuration of the conversion from JDBC to Arrow.
* @return Arrow Data Objects {@link VectorSchemaRoot}
* @throws SQLException on error
*/
public static VectorSchemaRoot sqlToArrow(ResultSet resultSet, JdbcToArrowConfig config)
throws SQLException, IOException {
Preconditions.checkNotNull(resultSet, "JDBC ResultSet object can not be null");
Preconditions.checkNotNull(config, "The configuration cannot be null");

VectorSchemaRoot root = VectorSchemaRoot.create(
JdbcToArrowUtils.jdbcToArrowSchema(resultSet.getMetaData(), config), config.getAllocator());
if (config.getTargetBatchSize() != JdbcToArrowConfig.NO_LIMIT_BATCH_SIZE) {
ValueVectorUtility.preAllocate(root, config.getTargetBatchSize());
}
JdbcToArrowUtils.jdbcToArrowVectors(resultSet, root, config);
return root;
}

}
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@

package org.apache.arrow.adapter.jdbc.h2;

import static org.apache.arrow.adapter.jdbc.AbstractJdbcToArrowTest.sqlToArrow;
import static org.junit.Assert.assertEquals;

import java.sql.Connection;
Expand All @@ -28,7 +29,6 @@
import java.sql.Statement;
import java.util.List;

import org.apache.arrow.adapter.jdbc.JdbcToArrow;
import org.apache.arrow.memory.RootAllocator;
import org.apache.arrow.vector.VectorSchemaRoot;
import org.apache.arrow.vector.types.pojo.Field;
Expand Down Expand Up @@ -106,7 +106,7 @@ public void testJdbcAliasToArrow() throws Exception {

try (ResultSet resultSet = conn.createStatement().executeQuery(QUERY)) {
final VectorSchemaRoot vector =
JdbcToArrow.sqlToArrow(resultSet, new RootAllocator(Integer.MAX_VALUE));
sqlToArrow(resultSet, new RootAllocator(Integer.MAX_VALUE));

assertEquals(rowCount, vector.getRowCount());
Schema vectorSchema = vector.getSchema();
Expand Down
Loading