From 3c4751137e93306e779eb69cdff57637ed6b5fed Mon Sep 17 00:00:00 2001 From: Li Jin Date: Tue, 7 Nov 2017 11:12:57 -0500 Subject: [PATCH 1/3] UnitTest passing. Integration test broke. --- integration/integration_test.py | 1 + .../BaseNullableVariableWidthVector.java | 46 -- .../arrow/vector/NullableBigIntVector.java | 37 +- .../arrow/vector/NullableDateDayVector.java | 34 -- .../arrow/vector/NullableDateMilliVector.java | 34 -- .../arrow/vector/NullableDecimalVector.java | 37 +- .../arrow/vector/NullableFloat4Vector.java | 37 +- .../arrow/vector/NullableFloat8Vector.java | 37 +- .../arrow/vector/NullableIntVector.java | 36 +- .../arrow/vector/NullableSmallIntVector.java | 37 +- .../arrow/vector/NullableTimeMicroVector.java | 34 -- .../arrow/vector/NullableTimeMilliVector.java | 34 -- .../arrow/vector/NullableTimeNanoVector.java | 34 -- .../arrow/vector/NullableTimeSecVector.java | 34 -- .../arrow/vector/NullableTimeStampVector.java | 36 -- .../arrow/vector/NullableTinyIntVector.java | 36 -- .../vector/file/json/JsonFileReader.java | 443 ++++++++++++------ .../arrow/vector/file/json/TestJSONFile.java | 83 ++++ 18 files changed, 394 insertions(+), 676 deletions(-) diff --git a/integration/integration_test.py b/integration/integration_test.py index 59a1de5a463..ea9bfa1f4d5 100644 --- a/integration/integration_test.py +++ b/integration/integration_test.py @@ -1025,6 +1025,7 @@ def get_static_json_files(): def run_all_tests(debug=False): testers = [CPPTester(debug=debug), JavaTester(debug=debug)] + #testers = [JavaTester(debug=debug)] static_json_files = get_static_json_files() generated_json_files = get_generated_json_files() json_files = static_json_files + generated_json_files diff --git a/java/vector/src/main/java/org/apache/arrow/vector/BaseNullableVariableWidthVector.java b/java/vector/src/main/java/org/apache/arrow/vector/BaseNullableVariableWidthVector.java index 6587cde633e..47daeee7f20 100644 --- a/java/vector/src/main/java/org/apache/arrow/vector/BaseNullableVariableWidthVector.java +++ b/java/vector/src/main/java/org/apache/arrow/vector/BaseNullableVariableWidthVector.java @@ -909,52 +909,6 @@ protected final void handleSafe(int index, int dataLength) { } } - - /****************************************************************** - * * - * helper methods currently * - * used by JsonFileReader and * - * JsonFileWriter * - * * - ******************************************************************/ - - - /** - * Method used by Json Reader to explicitly set the data of the variable - * width vector elements. The method takes care of allocating the memory - * for the vector if caller hasn't done so. - * - * This method should not be used externally. - * - * @param data ArrowBuf for storing variable width elements in the vector - * @param offset offset of the element - * @param allocator memory allocator - * @param index position of the element in the vector - * @param value array of bytes for the element - * @param valueCount number of elements in the vector - * @return buffer holding the variable width data. - */ - public static ArrowBuf set(ArrowBuf data, ArrowBuf offset, - BufferAllocator allocator, int index, byte[] value, - int valueCount) { - if (data == null) { - data = allocator.buffer(INITIAL_BYTE_COUNT); - } - final int currentBufferCapacity = data.capacity(); - final int currentStartOffset = offset.getInt(index * OFFSET_WIDTH); - while (currentBufferCapacity < currentStartOffset + value.length) { - final ArrowBuf newBuf = allocator.buffer(currentBufferCapacity * 2); - newBuf.setBytes(0, data, 0, currentBufferCapacity); - data.release(); - data = newBuf; - } - data.setBytes(currentStartOffset, value, 0, value.length); - if (index == (valueCount - 1)) { - data.writerIndex(offset.getInt(valueCount * OFFSET_WIDTH)); - } - return data; - } - /** * Method used by Json Writer to read a variable width element from * the variable width vector and write to Json. diff --git a/java/vector/src/main/java/org/apache/arrow/vector/NullableBigIntVector.java b/java/vector/src/main/java/org/apache/arrow/vector/NullableBigIntVector.java index ee40d708cc1..253427333a2 100644 --- a/java/vector/src/main/java/org/apache/arrow/vector/NullableBigIntVector.java +++ b/java/vector/src/main/java/org/apache/arrow/vector/NullableBigIntVector.java @@ -34,7 +34,7 @@ * maintained to track which elements in the vector are null. */ public class NullableBigIntVector extends BaseNullableFixedWidthVector { - private static final byte TYPE_WIDTH = 8; + public static final byte TYPE_WIDTH = 8; private final FieldReader reader; /** @@ -290,41 +290,6 @@ public void setSafe(int index, int isSet, long value) { set(index, isSet, value); } - - /****************************************************************** - * * - * helper routines currently * - * used in JsonFileReader and JsonFileWriter * - * * - ******************************************************************/ - - - /** - * Given a data buffer, this method sets the element value at a particular - * position. Reallocates the buffer if needed. - * - * This method should not be used externally. - * - * @param buffer data buffer - * @param allocator allocator - * @param valueCount number of elements in the vector - * @param index position of the new element - * @param value element value - * @return data buffer - */ - public static ArrowBuf set(ArrowBuf buffer, BufferAllocator allocator, - int valueCount, int index, long value) { - if (buffer == null) { - buffer = allocator.buffer(valueCount * TYPE_WIDTH); - } - buffer.setLong(index * TYPE_WIDTH, value); - if (index == (valueCount - 1)) { - buffer.writerIndex(valueCount * TYPE_WIDTH); - } - - return buffer; - } - /** * Given a data buffer, get the value stored at a particular position * in the vector. diff --git a/java/vector/src/main/java/org/apache/arrow/vector/NullableDateDayVector.java b/java/vector/src/main/java/org/apache/arrow/vector/NullableDateDayVector.java index 949287ecafe..e6b5b590f3f 100644 --- a/java/vector/src/main/java/org/apache/arrow/vector/NullableDateDayVector.java +++ b/java/vector/src/main/java/org/apache/arrow/vector/NullableDateDayVector.java @@ -292,40 +292,6 @@ public void setSafe(int index, int isSet, int value) { set(index, isSet, value); } - /****************************************************************** - * * - * helper routines currently * - * used in JsonFileReader * - * * - ******************************************************************/ - - - /** - * Given a data buffer, this method sets the element value at a particular - * position. Reallocates the buffer if needed. - * - * This method should not be used externally. - * - * @param buffer data buffer - * @param allocator allocator - * @param valueCount number of elements in the vector - * @param index position of the new element - * @param value element value - * @return data buffer - */ - public static ArrowBuf set(ArrowBuf buffer, BufferAllocator allocator, - int valueCount, int index, int value) { - if (buffer == null) { - buffer = allocator.buffer(valueCount * TYPE_WIDTH); - } - buffer.setInt(index * TYPE_WIDTH, value); - if (index == (valueCount - 1)) { - buffer.writerIndex(valueCount * TYPE_WIDTH); - } - - return buffer; - } - /** * Given a data buffer, get the value stored at a particular position * in the vector. diff --git a/java/vector/src/main/java/org/apache/arrow/vector/NullableDateMilliVector.java b/java/vector/src/main/java/org/apache/arrow/vector/NullableDateMilliVector.java index a0bdccedea4..8e15100f708 100644 --- a/java/vector/src/main/java/org/apache/arrow/vector/NullableDateMilliVector.java +++ b/java/vector/src/main/java/org/apache/arrow/vector/NullableDateMilliVector.java @@ -296,40 +296,6 @@ public void setSafe(int index, int isSet, long value) { set(index, isSet, value); } - /****************************************************************** - * * - * helper routines currently * - * used in JsonFileReader * - * * - ******************************************************************/ - - - /** - * Given a data buffer, this method sets the element value at a particular - * position. Reallocates the buffer if needed. - * - * This method should not be used externally. - * - * @param buffer data buffer - * @param allocator allocator - * @param valueCount number of elements in the vector - * @param index position of the new element - * @param value element value - * @return data buffer - */ - public static ArrowBuf set(ArrowBuf buffer, BufferAllocator allocator, - int valueCount, int index, long value) { - if (buffer == null) { - buffer = allocator.buffer(valueCount * TYPE_WIDTH); - } - buffer.setLong(index * TYPE_WIDTH, value); - if (index == (valueCount - 1)) { - buffer.writerIndex(valueCount * TYPE_WIDTH); - } - - return buffer; - } - /** * Given a data buffer, get the value stored at a particular position * in the vector. diff --git a/java/vector/src/main/java/org/apache/arrow/vector/NullableDecimalVector.java b/java/vector/src/main/java/org/apache/arrow/vector/NullableDecimalVector.java index 8320f90830a..2a611c6a2d5 100644 --- a/java/vector/src/main/java/org/apache/arrow/vector/NullableDecimalVector.java +++ b/java/vector/src/main/java/org/apache/arrow/vector/NullableDecimalVector.java @@ -37,7 +37,7 @@ * maintained to track which elements in the vector are null. */ public class NullableDecimalVector extends BaseNullableFixedWidthVector { - private static final byte TYPE_WIDTH = 16; + public static final byte TYPE_WIDTH = 16; private final FieldReader reader; private final int precision; @@ -355,41 +355,6 @@ public void setSafe(int index, int isSet, int start, ArrowBuf buffer) { } - /****************************************************************** - * * - * helper routines currently * - * used in JsonFileReader * - * * - ******************************************************************/ - - - /** - * Given a data buffer, this method sets the element value at a particular - * position. Reallocates the buffer if needed. - * - * This method should not be used externally. - * - * @param buffer data buffer - * @param allocator allocator - * @param valueCount number of elements in the vector - * @param index position of the new element - * @param value element value as array of bytes - * @return data buffer - */ - public static ArrowBuf set(ArrowBuf buffer, BufferAllocator allocator, - int valueCount, int index, byte[] value) { - if (buffer == null) { - buffer = allocator.buffer(valueCount * TYPE_WIDTH); - } - DecimalUtility.writeByteArrayToArrowBuf(value, buffer, index); - if (index == (valueCount - 1)) { - buffer.writerIndex(valueCount * TYPE_WIDTH); - } - - return buffer; - } - - /****************************************************************** * * * vector transfer * diff --git a/java/vector/src/main/java/org/apache/arrow/vector/NullableFloat4Vector.java b/java/vector/src/main/java/org/apache/arrow/vector/NullableFloat4Vector.java index 5b28065c402..3ba5cfcea52 100644 --- a/java/vector/src/main/java/org/apache/arrow/vector/NullableFloat4Vector.java +++ b/java/vector/src/main/java/org/apache/arrow/vector/NullableFloat4Vector.java @@ -34,7 +34,7 @@ * maintained to track which elements in the vector are null. */ public class NullableFloat4Vector extends BaseNullableFixedWidthVector { - private static final byte TYPE_WIDTH = 4; + public static final byte TYPE_WIDTH = 4; private final FieldReader reader; /** @@ -291,41 +291,6 @@ public void setSafe(int index, int isSet, float value) { set(index, isSet, value); } - - /****************************************************************** - * * - * helper routines currently * - * used in JsonFileReader and JsonFileWriter * - * * - ******************************************************************/ - - - /** - * Given a data buffer, this method sets the element value at a particular - * position. Reallocates the buffer if needed. - * - * This method should not be used externally. - * - * @param buffer data buffer - * @param allocator allocator - * @param valueCount number of elements in the vector - * @param index position of the new element - * @param value element value - * @return data buffer - */ - public static ArrowBuf set(ArrowBuf buffer, BufferAllocator allocator, - int valueCount, int index, float value) { - if (buffer == null) { - buffer = allocator.buffer(valueCount * TYPE_WIDTH); - } - buffer.setFloat(index * TYPE_WIDTH, value); - if (index == (valueCount - 1)) { - buffer.writerIndex(valueCount * TYPE_WIDTH); - } - - return buffer; - } - /** * Given a data buffer, get the value stored at a particular position * in the vector. diff --git a/java/vector/src/main/java/org/apache/arrow/vector/NullableFloat8Vector.java b/java/vector/src/main/java/org/apache/arrow/vector/NullableFloat8Vector.java index 624abf2f272..2fb96a44fe7 100644 --- a/java/vector/src/main/java/org/apache/arrow/vector/NullableFloat8Vector.java +++ b/java/vector/src/main/java/org/apache/arrow/vector/NullableFloat8Vector.java @@ -34,7 +34,7 @@ * maintained to track which elements in the vector are null. */ public class NullableFloat8Vector extends BaseNullableFixedWidthVector { - private static final byte TYPE_WIDTH = 8; + public static final byte TYPE_WIDTH = 8; private final FieldReader reader; /** @@ -291,41 +291,6 @@ public void setSafe(int index, int isSet, double value) { set(index, isSet, value); } - - /****************************************************************** - * * - * helper routines currently * - * used in JsonFileReader and JsonFileWriter * - * * - ******************************************************************/ - - - /** - * Given a data buffer, this method sets the element value at a particular - * position. Reallocates the buffer if needed. - * - * This method should not be used externally. - * - * @param buffer data buffer - * @param allocator allocator - * @param valueCount number of elements in the vector - * @param index position of the new element - * @param value element value - * @return data buffer - */ - public static ArrowBuf set(ArrowBuf buffer, BufferAllocator allocator, - int valueCount, int index, double value) { - if (buffer == null) { - buffer = allocator.buffer(valueCount * TYPE_WIDTH); - } - buffer.setDouble(index * TYPE_WIDTH, value); - if (index == (valueCount - 1)) { - buffer.writerIndex(valueCount * TYPE_WIDTH); - } - - return buffer; - } - /** * Given a data buffer, get the value stored at a particular position * in the vector. diff --git a/java/vector/src/main/java/org/apache/arrow/vector/NullableIntVector.java b/java/vector/src/main/java/org/apache/arrow/vector/NullableIntVector.java index 6311daf4f5c..93deacbdfdc 100644 --- a/java/vector/src/main/java/org/apache/arrow/vector/NullableIntVector.java +++ b/java/vector/src/main/java/org/apache/arrow/vector/NullableIntVector.java @@ -34,7 +34,7 @@ * maintained to track which elements in the vector are null. */ public class NullableIntVector extends BaseNullableFixedWidthVector { - private static final byte TYPE_WIDTH = 4; + public static final byte TYPE_WIDTH = 4; private final FieldReader reader; /** @@ -291,40 +291,6 @@ public void setSafe(int index, int isSet, int value) { set(index, isSet, value); } - /****************************************************************** - * * - * helper routines currently * - * used in JsonFileReader and JsonFileWriter * - * * - ******************************************************************/ - - - /** - * Given a data buffer, this method sets the element value at a particular - * position. Reallocates the buffer if needed. - * - * This method should not be used externally. - * - * @param buffer data buffer - * @param allocator allocator - * @param valueCount number of elements in the vector - * @param index position of the new element - * @param value element value - * @return data buffer - */ - public static ArrowBuf set(ArrowBuf buffer, BufferAllocator allocator, - int valueCount, int index, int value) { - if (buffer == null) { - buffer = allocator.buffer(valueCount * TYPE_WIDTH); - } - buffer.setInt(index * TYPE_WIDTH, value); - if (index == (valueCount - 1)) { - buffer.writerIndex(valueCount * TYPE_WIDTH); - } - - return buffer; - } - /** * Given a data buffer, get the value stored at a particular position * in the vector. diff --git a/java/vector/src/main/java/org/apache/arrow/vector/NullableSmallIntVector.java b/java/vector/src/main/java/org/apache/arrow/vector/NullableSmallIntVector.java index c45a8d5f5bc..ed337188b5b 100644 --- a/java/vector/src/main/java/org/apache/arrow/vector/NullableSmallIntVector.java +++ b/java/vector/src/main/java/org/apache/arrow/vector/NullableSmallIntVector.java @@ -34,7 +34,7 @@ * maintained to track which elements in the vector are null. */ public class NullableSmallIntVector extends BaseNullableFixedWidthVector { - private static final byte TYPE_WIDTH = 2; + public static final byte TYPE_WIDTH = 2; private final FieldReader reader; /** @@ -319,41 +319,6 @@ public void setSafe(int index, int isSet, short value) { set(index, isSet, value); } - - /****************************************************************** - * * - * helper routines currently * - * used in JsonFileReader and JsonFileWriter * - * * - ******************************************************************/ - - - /** - * Given a data buffer, this method sets the element value at a particular - * position. Reallocates the buffer if needed. - * - * This method should not be used externally. - * - * @param buffer data buffer - * @param allocator allocator - * @param valueCount number of elements in the vector - * @param index position of the new element - * @param value element value - * @return data buffer - */ - public static ArrowBuf set(ArrowBuf buffer, BufferAllocator allocator, - int valueCount, int index, short value) { - if (buffer == null) { - buffer = allocator.buffer(valueCount * TYPE_WIDTH); - } - buffer.setShort(index * TYPE_WIDTH, value); - if (index == (valueCount - 1)) { - buffer.writerIndex(valueCount * TYPE_WIDTH); - } - - return buffer; - } - /** * Given a data buffer, get the value stored at a particular position * in the vector. diff --git a/java/vector/src/main/java/org/apache/arrow/vector/NullableTimeMicroVector.java b/java/vector/src/main/java/org/apache/arrow/vector/NullableTimeMicroVector.java index 454a4ac41a6..d7ea3222397 100644 --- a/java/vector/src/main/java/org/apache/arrow/vector/NullableTimeMicroVector.java +++ b/java/vector/src/main/java/org/apache/arrow/vector/NullableTimeMicroVector.java @@ -292,40 +292,6 @@ public void setSafe(int index, int isSet, long value) { set(index, isSet, value); } - - /****************************************************************** - * * - * helper routines currently * - * used in JsonFileReader and JsonFileWriter * - * * - ******************************************************************/ - - /** - * Given a data buffer, this method sets the element value at a particular - * position. Reallocates the buffer if needed. - * - * This method should not be used externally. - * - * @param buffer data buffer - * @param allocator allocator - * @param valueCount number of elements in the vector - * @param index position of the new element - * @param value element value - * @return data buffer - */ - public static ArrowBuf set(ArrowBuf buffer, BufferAllocator allocator, - int valueCount, int index, long value) { - if (buffer == null) { - buffer = allocator.buffer(valueCount * TYPE_WIDTH); - } - buffer.setLong(index * TYPE_WIDTH, value); - if (index == (valueCount - 1)) { - buffer.writerIndex(valueCount * TYPE_WIDTH); - } - - return buffer; - } - /** * Given a data buffer, get the value stored at a particular position * in the vector. diff --git a/java/vector/src/main/java/org/apache/arrow/vector/NullableTimeMilliVector.java b/java/vector/src/main/java/org/apache/arrow/vector/NullableTimeMilliVector.java index 8540d169242..07d8abba4e5 100644 --- a/java/vector/src/main/java/org/apache/arrow/vector/NullableTimeMilliVector.java +++ b/java/vector/src/main/java/org/apache/arrow/vector/NullableTimeMilliVector.java @@ -294,40 +294,6 @@ public void setSafe(int index, int isSet, int value) { } - /****************************************************************** - * * - * helper routines currently * - * used in JsonFileReader and JsonFileWriter * - * * - ******************************************************************/ - - - /** - * Given a data buffer, this method sets the element value at a particular - * position. Reallocates the buffer if needed. - * - * This method should not be used externally. - * - * @param buffer data buffer - * @param allocator allocator - * @param valueCount number of elements in the vector - * @param index position of the new element - * @param value element value - * @return data buffer - */ - public static ArrowBuf set(ArrowBuf buffer, BufferAllocator allocator, - int valueCount, int index, int value) { - if (buffer == null) { - buffer = allocator.buffer(valueCount * TYPE_WIDTH); - } - buffer.setInt(index * TYPE_WIDTH, value); - if (index == (valueCount - 1)) { - buffer.writerIndex(valueCount * TYPE_WIDTH); - } - - return buffer; - } - /** * Given a data buffer, get the value stored at a particular position * in the vector. diff --git a/java/vector/src/main/java/org/apache/arrow/vector/NullableTimeNanoVector.java b/java/vector/src/main/java/org/apache/arrow/vector/NullableTimeNanoVector.java index 015226da223..947b2392f7a 100644 --- a/java/vector/src/main/java/org/apache/arrow/vector/NullableTimeNanoVector.java +++ b/java/vector/src/main/java/org/apache/arrow/vector/NullableTimeNanoVector.java @@ -291,40 +291,6 @@ public void setSafe(int index, int isSet, long value) { set(index, isSet, value); } - - /****************************************************************** - * * - * helper routines currently * - * used in JsonFileReader and JsonFileWriter * - * * - ******************************************************************/ - - /** - * Given a data buffer, this method sets the element value at a particular - * position. Reallocates the buffer if needed. - * - * This method should not be used externally. - * - * @param buffer data buffer - * @param allocator allocator - * @param valueCount number of elements in the vector - * @param index position of the new element - * @param value element value - * @return data buffer - */ - public static ArrowBuf set(ArrowBuf buffer, BufferAllocator allocator, - int valueCount, int index, long value) { - if (buffer == null) { - buffer = allocator.buffer(valueCount * TYPE_WIDTH); - } - buffer.setLong(index * TYPE_WIDTH, value); - if (index == (valueCount - 1)) { - buffer.writerIndex(valueCount * TYPE_WIDTH); - } - - return buffer; - } - /** * Given a data buffer, get the value stored at a particular position * in the vector. diff --git a/java/vector/src/main/java/org/apache/arrow/vector/NullableTimeSecVector.java b/java/vector/src/main/java/org/apache/arrow/vector/NullableTimeSecVector.java index 2b2375e92f1..0a3cfaac047 100644 --- a/java/vector/src/main/java/org/apache/arrow/vector/NullableTimeSecVector.java +++ b/java/vector/src/main/java/org/apache/arrow/vector/NullableTimeSecVector.java @@ -291,40 +291,6 @@ public void setSafe(int index, int isSet, int value) { set(index, isSet, value); } - /****************************************************************** - * * - * helper routines currently * - * used in JsonFileReader and JsonFileWriter * - * * - ******************************************************************/ - - - /** - * Given a data buffer, this method sets the element value at a particular - * position. Reallocates the buffer if needed. - * - * This method should not be used externally. - * - * @param buffer data buffer - * @param allocator allocator - * @param valueCount number of elements in the vector - * @param index position of the new element - * @param value element value - * @return data buffer - */ - public static ArrowBuf set(ArrowBuf buffer, BufferAllocator allocator, - int valueCount, int index, int value) { - if (buffer == null) { - buffer = allocator.buffer(valueCount * TYPE_WIDTH); - } - buffer.setInt(index * TYPE_WIDTH, value); - if (index == (valueCount - 1)) { - buffer.writerIndex(valueCount * TYPE_WIDTH); - } - - return buffer; - } - /** * Given a data buffer, get the value stored at a particular position * in the vector. diff --git a/java/vector/src/main/java/org/apache/arrow/vector/NullableTimeStampVector.java b/java/vector/src/main/java/org/apache/arrow/vector/NullableTimeStampVector.java index b2a58bd4568..1bf2abc384d 100644 --- a/java/vector/src/main/java/org/apache/arrow/vector/NullableTimeStampVector.java +++ b/java/vector/src/main/java/org/apache/arrow/vector/NullableTimeStampVector.java @@ -165,42 +165,6 @@ public void setSafe(int index, int isSet, long value) { set(index, isSet, value); } - - /****************************************************************** - * * - * helper routines currently * - * used in JsonFileReader and * - * JsonFileWriter * - * * - ******************************************************************/ - - - /** - * Given a data buffer, this method sets the element value at a particular - * position. Reallocates the buffer if needed. - * - * This method should not be used externally. - * - * @param buffer data buffer - * @param allocator allocator - * @param valueCount number of elements in the vector - * @param index position of the new element - * @param value element value - * @return data buffer - */ - public static ArrowBuf set(ArrowBuf buffer, BufferAllocator allocator, - int valueCount, int index, long value) { - if (buffer == null) { - buffer = allocator.buffer(valueCount * TYPE_WIDTH); - } - buffer.setLong(index * TYPE_WIDTH, value); - if (index == (valueCount - 1)) { - buffer.writerIndex(valueCount * TYPE_WIDTH); - } - - return buffer; - } - /** * Given a data buffer, get the value stored at a particular position * in the vector. diff --git a/java/vector/src/main/java/org/apache/arrow/vector/NullableTinyIntVector.java b/java/vector/src/main/java/org/apache/arrow/vector/NullableTinyIntVector.java index 15100530d0e..ccbfa32b85f 100644 --- a/java/vector/src/main/java/org/apache/arrow/vector/NullableTinyIntVector.java +++ b/java/vector/src/main/java/org/apache/arrow/vector/NullableTinyIntVector.java @@ -318,42 +318,6 @@ public void setSafe(int index, int isSet, byte value) { set(index, isSet, value); } - - - /****************************************************************** - * * - * helper routines currently * - * used in JsonFileReader and JsonFileWriter * - * * - ******************************************************************/ - - - /** - * Given a data buffer, this method sets the element value at a particular - * position. Reallocates the buffer if needed. - * - * This method should not be used externally. - * - * @param buffer data buffer - * @param allocator allocator - * @param valueCount number of elements in the vector - * @param index position of the new element - * @param value element value - * @return data buffer - */ - public static ArrowBuf set(ArrowBuf buffer, BufferAllocator allocator, - int valueCount, int index, byte value) { - if (buffer == null) { - buffer = allocator.buffer(valueCount * TYPE_WIDTH); - } - buffer.setByte(index * TYPE_WIDTH, value); - if (index == (valueCount - 1)) { - buffer.writerIndex(valueCount * TYPE_WIDTH); - } - - return buffer; - } - /** * Given a data buffer, get the value stored at a particular position * in the vector. diff --git a/java/vector/src/main/java/org/apache/arrow/vector/file/json/JsonFileReader.java b/java/vector/src/main/java/org/apache/arrow/vector/file/json/JsonFileReader.java index c6a82510e60..0c9a5548d1d 100644 --- a/java/vector/src/main/java/org/apache/arrow/vector/file/json/JsonFileReader.java +++ b/java/vector/src/main/java/org/apache/arrow/vector/file/json/JsonFileReader.java @@ -27,15 +27,17 @@ import java.io.File; import java.io.IOException; +import java.nio.charset.Charset; import java.util.*; import com.google.common.collect.ImmutableList; +import com.google.common.collect.Lists; import io.netty.buffer.ArrowBuf; import org.apache.arrow.memory.BufferAllocator; import org.apache.arrow.vector.*; -import org.apache.arrow.vector.complex.ListVector; import org.apache.arrow.vector.dictionary.Dictionary; import org.apache.arrow.vector.dictionary.DictionaryProvider; +import org.apache.arrow.vector.file.InvalidArrowFileException; import org.apache.arrow.vector.schema.ArrowFieldNode; import org.apache.arrow.vector.schema.ArrowVectorType; import org.apache.arrow.vector.types.Types; @@ -183,6 +185,278 @@ public VectorSchemaRoot read() throws IOException { } } + private abstract class BufferReader { + abstract protected ArrowBuf read(BufferAllocator allocator, int count) throws IOException; + + final ArrowBuf readBuffer(BufferAllocator allocator, int count) throws IOException { + readToken(START_ARRAY); + ArrowBuf buf = read(allocator, count); + readToken(END_ARRAY); + return buf; + } + } + + private class BufferHelper { + BufferReader BIT = new BufferReader() { + + @Override + protected ArrowBuf read(BufferAllocator allocator, int count) throws IOException { + ArrowBuf buf = allocator.buffer(BitVectorHelper.getValidityBufferSize(count)); + + for (int i = 0; i < count; i++) { + parser.nextToken(); + BitVectorHelper.setValidityBit(buf, i, parser.readValueAs(Boolean.class) ? 1 : 0); + } + + return buf; + } + }; + + BufferReader INT1 = new BufferReader() { + @Override + protected ArrowBuf read(BufferAllocator allocator, int count) throws IOException { + ArrowBuf buf = allocator.buffer(count * NullableTinyIntVector.TYPE_WIDTH); + + for (int i = 0; i < count; i++) { + parser.nextToken(); + buf.writeByte(parser.getByteValue()); + } + + return buf; + } + }; + + BufferReader INT2 = new BufferReader() { + @Override + protected ArrowBuf read(BufferAllocator allocator, int count) throws IOException { + ArrowBuf buf = allocator.buffer(count * NullableSmallIntVector.TYPE_WIDTH); + + for (int i = 0; i < count; i++) { + parser.nextToken(); + buf.writeShort(parser.getShortValue()); + } + + return buf; + } + }; + + BufferReader INT4 = new BufferReader() { + + @Override + protected ArrowBuf read(BufferAllocator allocator, int count) throws IOException { + ArrowBuf buf = allocator.buffer(count * NullableIntVector.TYPE_WIDTH); + + for (int i = 0; i < count; i++) { + parser.nextToken(); + buf.writeInt(parser.getIntValue()); + } + + return buf; + } + }; + + BufferReader INT8 = new BufferReader() { + + @Override + protected ArrowBuf read(BufferAllocator allocator, int count) throws IOException { + ArrowBuf buf = allocator.buffer(count * NullableBigIntVector.TYPE_WIDTH); + + for (int i = 0; i < count; i++) { + parser.nextToken(); + buf.writeLong(parser.getLongValue()); + } + + return buf; + } + }; + + BufferReader FLOAT4 = new BufferReader() { + + @Override + protected ArrowBuf read(BufferAllocator allocator, int count) throws IOException { + ArrowBuf buf = allocator.buffer(count * NullableFloat4Vector.TYPE_WIDTH); + + for (int i = 0; i < count; i++) { + parser.nextToken(); + buf.writeFloat(parser.getFloatValue()); + } + + return buf; + } + }; + + BufferReader FLOAT8 = new BufferReader() { + + @Override + protected ArrowBuf read(BufferAllocator allocator, int count) throws IOException { + ArrowBuf buf = allocator.buffer(count * NullableFloat8Vector.TYPE_WIDTH); + + for (int i = 0; i < count; i++) { + parser.nextToken(); + buf.writeDouble(parser.getDoubleValue()); + } + + return buf; + } + }; + + BufferReader DECIMAL = new BufferReader() { + + @Override + protected ArrowBuf read(BufferAllocator allocator, int count) throws IOException { + ArrowBuf buf = allocator.buffer(count * NullableDecimalVector.TYPE_WIDTH); + + for (int i = 0; i < count; i++) { + parser.nextToken(); + final byte[] value = decodeHexSafe(parser.getValueAsString()); + DecimalUtility.writeByteArrayToArrowBuf(value, buf, i); + } + + return buf; + } + }; + + BufferReader VARCHAR = new BufferReader() { + @Override + protected ArrowBuf read(BufferAllocator allocator, int count) throws IOException { + ArrayList values = Lists.newArrayList(); + int bufferSize = 0; + for (int i = 0; i < count; i++) { + parser.nextToken(); + final byte[] value = parser.getValueAsString().getBytes(UTF_8); + values.add(value); + bufferSize += value.length; + + } + + ArrowBuf buf = allocator.buffer(bufferSize); + + for (byte[] value : values) { + buf.writeBytes(value); + } + + return buf; + } + }; + + BufferReader VARBINARY = new BufferReader() { + @Override + protected ArrowBuf read(BufferAllocator allocator, int count) throws IOException { + ArrayList values = Lists.newArrayList(); + int bufferSize = 0; + for (int i = 0; i < count; i++) { + parser.nextToken(); + final byte[] value = decodeHexSafe(parser.readValueAs(String.class)); + values.add(value); + bufferSize += value.length; + + } + + ArrowBuf buf = allocator.buffer(bufferSize); + + for (byte[] value : values) { + buf.writeBytes(value); + } + + return buf; + } + }; + + } + + private ArrowBuf readBuffer(BufferAllocator allocator, ArrowVectorType bufferType, Types.MinorType type, int count) throws IOException { + ArrowBuf buf; + + BufferHelper helper = new BufferHelper(); + + BufferReader reader = null; + + if (bufferType.equals(VALIDITY)) { + reader = helper.BIT; + } else if (bufferType.equals(OFFSET)) { + reader = helper.INT4; + } else if (bufferType.equals(TYPE)) { + reader = helper.INT1; + } else if (bufferType.equals(DATA)) { + switch (type) { + case BIT: + reader = helper.BIT; + break; + case TINYINT: + reader = helper.INT1; + break; + case SMALLINT: + reader = helper.INT2; + break; + case INT: + reader = helper.INT4; + break; + case BIGINT: + reader = helper.INT8; + break; + case UINT1: + reader = helper.INT1; + break; + case UINT2: + reader = helper.INT2; + break; + case UINT4: + reader = helper.INT4; + break; + case UINT8: + reader = helper.INT8; + break; + case FLOAT4: + reader = helper.FLOAT4; + break; + case FLOAT8: + reader = helper.FLOAT8; + break; + case DECIMAL: + reader = helper.DECIMAL; + break; + case VARCHAR: + reader = helper.VARCHAR; + break; + case VARBINARY: + reader = helper.VARBINARY; + break; + case DATEDAY: + reader = helper.INT4; + break; + case DATEMILLI: + reader = helper.INT8; + break; + case TIMESEC: + case TIMEMILLI: + reader = helper.INT4; + break; + case TIMEMICRO: + case TIMENANO: + reader = helper.INT8; + break; + case TIMESTAMPNANO: + case TIMESTAMPMICRO: + case TIMESTAMPMILLI: + case TIMESTAMPSEC: + case TIMESTAMPNANOTZ: + case TIMESTAMPMICROTZ: + case TIMESTAMPMILLITZ: + case TIMESTAMPSECTZ: + reader = helper.INT8; + break; + default: + throw new UnsupportedOperationException("Cannot read array of type " + type); + } + } else { + throw new InvalidArrowFileException("Unrecognized buffer type " + bufferType); + } + + buf = reader.readBuffer(allocator, count); + assert buf != null; + return buf; + } + private void readFromJsonIntoVector(Field field, FieldVector vector) throws JsonParseException, IOException { List vectorTypes = field.getTypeLayout().getVectorTypes(); ArrowBuf[] vectorBuffers = new ArrowBuf[vectorTypes.size()]; @@ -209,7 +483,7 @@ private void readFromJsonIntoVector(Field field, FieldVector vector) throws Json throw new IllegalArgumentException("Expected field " + field.getName() + " but got " + name); } - /* Initialize the vector with required capacity but don't allocate since we would + /* Initialize the vector with required capacity but don't allocateNew since we would * be doing loadFieldBuffers. */ int valueCount = readNextField("count", Integer.class); @@ -218,29 +492,39 @@ private void readFromJsonIntoVector(Field field, FieldVector vector) throws Json for (int v = 0; v < vectorTypes.size(); v++) { ArrowVectorType vectorType = vectorTypes.get(v); nextFieldIs(vectorType.getName()); - readToken(START_ARRAY); int innerBufferValueCount = valueCount; if (vectorType.equals(OFFSET)) { /* offset buffer has 1 additional value capacity */ innerBufferValueCount = valueCount + 1; } - for (int i = 0; i < innerBufferValueCount; i++) { - /* write data to the buffer */ - parser.nextToken(); - /* for variable width vectors, value count doesn't help pre-determining the capacity of - * the underlying data buffer. So we need to pass down the offset buffer (which was already - * populated in the previous iteration of this loop). - */ - if (vectorType.equals(DATA) && (vector.getMinorType() == Types.MinorType.VARCHAR - || vector.getMinorType() == Types.MinorType.VARBINARY)) { - vectorBuffers[v] = setValueFromParser(vectorType, vector, vectorBuffers[v], - vectorBuffers[v-1], i, innerBufferValueCount); - } else { - vectorBuffers[v] = setValueFromParser(vectorType, vector, vectorBuffers[v], - null, i, innerBufferValueCount); - } - } - readToken(END_ARRAY); + + vectorBuffers[v] = readBuffer(allocator, vectorType, vector.getMinorType(), innerBufferValueCount); + + + // ArrayList values = Lists.newArrayList(parser.readValuesAs(Integer.class)); + // System.out.println(values); + + // writeBuf(vectorBuffers[v], vector.getMinorType(), innerBufferValueCount); + +// +// for (int i = 0; i < innerBufferValueCount; i++) { +// /* write data to the buffer */ +// parser.nextToken(); +// +// /* for variable width vectors, value count doesn't help pre-determining the capacity of +// * the underlying data buffer. So we need to pass down the offset buffer (which was already +// * populated in the previous iteration of this loop). +// */ +// if (vectorType.equals(DATA) && (vector.getMinorType() == Types.MinorType.VARCHAR +// || vector.getMinorType() == Types.MinorType.VARBINARY)) { +// vectorBuffers[v] = setValueFromParser(vectorType, vector, vectorBuffers[v], +// vectorBuffers[v-1], i, innerBufferValueCount); +// } else { +// vectorBuffers[v] = setValueFromParser(vectorType, vector, vectorBuffers[v], +// null, i, innerBufferValueCount); +// } +// } + } vector.loadFieldBuffers(new ArrowFieldNode(valueCount, 0), Arrays.asList(vectorBuffers)); @@ -277,125 +561,6 @@ private byte[] decodeHexSafe(String hexString) throws IOException { } } - private ArrowBuf setValueFromParser(ArrowVectorType bufferType, FieldVector vector, - ArrowBuf buffer, ArrowBuf offsetBuffer, int index, int valueCount) throws IOException { - if (bufferType.equals(TYPE)) { - buffer = NullableTinyIntVector.set(buffer, allocator, - valueCount, index, parser.readValueAs(Byte.class)); - } else if (bufferType.equals(OFFSET)) { - buffer = BaseNullableVariableWidthVector.set(buffer, allocator, - valueCount, index, parser.readValueAs(Integer.class)); - } else if (bufferType.equals(VALIDITY)) { - buffer = BitVectorHelper.setValidityBit(buffer, allocator, - valueCount, index, parser.readValueAs(Boolean.class) ? 1 : 0); - } else if (bufferType.equals(DATA)) { - switch (vector.getMinorType()) { - case BIT: - buffer = BitVectorHelper.setValidityBit(buffer, allocator, - valueCount, index, parser.readValueAs(Boolean.class) ? 1 : 0); - break; - case TINYINT: - buffer = NullableTinyIntVector.set(buffer, allocator, - valueCount, index, parser.readValueAs(Byte.class)); - break; - case SMALLINT: - buffer = NullableSmallIntVector.set(buffer, allocator, - valueCount, index, parser.readValueAs(Short.class)); - break; - case INT: - buffer = NullableIntVector.set(buffer, allocator, - valueCount, index, parser.readValueAs(Integer.class)); - break; - case BIGINT: - buffer = NullableBigIntVector.set(buffer, allocator, - valueCount, index, parser.readValueAs(Long.class)); - break; - case FLOAT4: - buffer = NullableFloat4Vector.set(buffer, allocator, - valueCount, index, parser.readValueAs(Float.class)); - break; - case FLOAT8: - buffer = NullableFloat8Vector.set(buffer, allocator, - valueCount, index, parser.readValueAs(Double.class)); - break; - case DECIMAL: - buffer = NullableDecimalVector.set(buffer, allocator, - valueCount, index, decodeHexSafe(parser.readValueAs(String.class))); - break; - case VARBINARY: - assert (offsetBuffer != null); - buffer = BaseNullableVariableWidthVector.set(buffer, offsetBuffer, allocator, index, - decodeHexSafe(parser.readValueAs(String.class)), valueCount); - break; - case VARCHAR: - assert (offsetBuffer != null); - buffer = BaseNullableVariableWidthVector.set(buffer, offsetBuffer, allocator, index, - parser.readValueAs(String.class).getBytes(UTF_8), valueCount); - break; - case DATEDAY: - buffer = NullableDateDayVector.set(buffer, allocator, - valueCount, index, parser.readValueAs(Integer.class)); - break; - case DATEMILLI: - buffer = NullableDateMilliVector.set(buffer, allocator, - valueCount, index, parser.readValueAs(Long.class)); - break; - case TIMESEC: - buffer = NullableTimeSecVector.set(buffer, allocator, - valueCount, index, parser.readValueAs(Integer.class)); - break; - case TIMEMILLI: - buffer = NullableTimeMilliVector.set(buffer, allocator, - valueCount, index, parser.readValueAs(Integer.class)); - break; - case TIMEMICRO: - buffer = NullableTimeMicroVector.set(buffer, allocator, - valueCount, index, parser.readValueAs(Long.class)); - break; - case TIMENANO: - buffer = NullableTimeNanoVector.set(buffer, allocator, - valueCount, index, parser.readValueAs(Long.class)); - break; - case TIMESTAMPSEC: - buffer = NullableTimeStampSecVector.set(buffer, allocator, - valueCount, index, parser.readValueAs(Long.class)); - break; - case TIMESTAMPMILLI: - buffer = NullableTimeStampMilliVector.set(buffer, allocator, - valueCount, index, parser.readValueAs(Long.class)); - break; - case TIMESTAMPMICRO: - buffer = NullableTimeStampMicroVector.set(buffer, allocator, - valueCount, index, parser.readValueAs(Long.class)); - break; - case TIMESTAMPNANO: - buffer = NullableTimeStampNanoVector.set(buffer, allocator, - valueCount, index, parser.readValueAs(Long.class)); - break; - case TIMESTAMPSECTZ: - buffer = NullableTimeStampSecTZVector.set(buffer, allocator, - valueCount, index, parser.readValueAs(Long.class)); - break; - case TIMESTAMPMILLITZ: - buffer = NullableTimeStampMilliTZVector.set(buffer, allocator, - valueCount, index, parser.readValueAs(Long.class)); - break; - case TIMESTAMPMICROTZ: - buffer = NullableTimeStampMicroTZVector.set(buffer, allocator, - valueCount, index, parser.readValueAs(Long.class)); - break; - case TIMESTAMPNANOTZ: - buffer = NullableTimeStampNanoTZVector.set(buffer, allocator, - valueCount, index, parser.readValueAs(Long.class)); - break; - default: - throw new UnsupportedOperationException("minor type: " + vector.getMinorType()); - } - } - - return buffer; - } - @Override public void close() throws IOException { parser.close(); diff --git a/java/vector/src/test/java/org/apache/arrow/vector/file/json/TestJSONFile.java b/java/vector/src/test/java/org/apache/arrow/vector/file/json/TestJSONFile.java index 5c4c48cd26b..3f2d5712a7e 100644 --- a/java/vector/src/test/java/org/apache/arrow/vector/file/json/TestJSONFile.java +++ b/java/vector/src/test/java/org/apache/arrow/vector/file/json/TestJSONFile.java @@ -19,6 +19,7 @@ package org.apache.arrow.vector.file.json; import java.io.File; +import java.io.FileInputStream; import java.io.IOException; import org.apache.arrow.memory.BufferAllocator; @@ -28,6 +29,7 @@ import org.apache.arrow.vector.complex.NullableMapVector; import org.apache.arrow.vector.dictionary.DictionaryProvider; import org.apache.arrow.vector.dictionary.DictionaryProvider.MapDictionaryProvider; +import org.apache.arrow.vector.file.ArrowFileReader; import org.apache.arrow.vector.file.BaseFileTest; import org.apache.arrow.vector.types.pojo.Schema; import org.apache.arrow.vector.util.Validator; @@ -39,6 +41,87 @@ public class TestJSONFile extends BaseFileTest { private static final Logger LOGGER = LoggerFactory.getLogger(TestJSONFile.class); + @Test + public void testJSON() throws IOException { + File file = new File("/var/folders/0p/1z45pgjs6tz1rq093ty327h80000gn/T/tmp7bpg2aef/generated_nested.json"); + + // read + try ( + BufferAllocator readerAllocator = allocator.newChildAllocator("reader", 0, Integer.MAX_VALUE); + ) { + JsonFileReader reader = new JsonFileReader(file, readerAllocator); + Schema schema = reader.start(); + LOGGER.debug("reading schema: " + schema); + + // initialize vectors + try (VectorSchemaRoot root = reader.read();) { + NullableMapVector vector = (NullableMapVector) root.getVector("struct_nullable"); + System.out.println(vector.isNull(0)); + System.out.println(vector.isNull(1)); + System.out.println(vector.isNull(2)); + System.out.println(vector.isNull(3)); + System.out.println(vector.isNull(4)); + + System.out.println(vector.getValidityBuffer()); + } + + try (VectorSchemaRoot root = reader.read();) { + NullableMapVector vector = (NullableMapVector) root.getVector("struct_nullable"); + System.out.println(vector.isNull(0)); + System.out.println(vector.isNull(1)); + System.out.println(vector.isNull(2)); + System.out.println(vector.isNull(3)); + System.out.println(vector.isNull(4)); + + System.out.println(vector.getValidityBuffer()); + } + reader.close(); + } + } + + @Test + public void testFile() throws IOException { + File file = new File("/var/folders/0p/1z45pgjs6tz1rq093ty327h80000gn/T/tmpdda8l04u/2540f848006040769495368df23a8859_generated_nested.json_to_arrow"); + + try ( + BufferAllocator readerAllocator = allocator.newChildAllocator("reader", 0, Integer.MAX_VALUE); + ) { + FileInputStream stream = new FileInputStream(file); + ArrowFileReader reader = new ArrowFileReader(stream.getChannel(), readerAllocator); + VectorSchemaRoot root = reader.getVectorSchemaRoot(); + LOGGER.debug("reading schema: " + root.getSchema()); + + // initialize vectors + reader.loadNextBatch(); + NullableMapVector vector = (NullableMapVector) root.getVector("struct_nullable"); + System.out.println(vector.isNull(0)); + System.out.println(vector.isNull(1)); + System.out.println(vector.isNull(2)); + System.out.println(vector.isNull(3)); + System.out.println(vector.isNull(4)); + + System.out.println(vector.getValidityBuffer()); + + reader.loadNextBatch(); + vector = (NullableMapVector) root.getVector("struct_nullable"); + System.out.println(vector.isNull(0)); + System.out.println(vector.isNull(1)); + System.out.println(vector.isNull(2)); + System.out.println(vector.isNull(3)); + System.out.println(vector.isNull(4)); + System.out.println(vector.isNull(5)); + System.out.println(vector.isNull(6)); + System.out.println(vector.isNull(7)); + System.out.println(vector.isNull(8)); + System.out.println(vector.isNull(9)); + + + reader.close(); + root.close(); + } + + } + @Test public void testWriteReadComplexJSON() throws IOException { File file = new File("target/mytest_complex.json"); From 050c0df23ee01c77475b9d72f7033c67521e9cef Mon Sep 17 00:00:00 2001 From: Li Jin Date: Tue, 7 Nov 2017 13:09:05 -0500 Subject: [PATCH 2/3] Fix integration test --- .../org/apache/arrow/vector/file/json/JsonFileReader.java | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/java/vector/src/main/java/org/apache/arrow/vector/file/json/JsonFileReader.java b/java/vector/src/main/java/org/apache/arrow/vector/file/json/JsonFileReader.java index 0c9a5548d1d..d5c59184cab 100644 --- a/java/vector/src/main/java/org/apache/arrow/vector/file/json/JsonFileReader.java +++ b/java/vector/src/main/java/org/apache/arrow/vector/file/json/JsonFileReader.java @@ -201,7 +201,11 @@ private class BufferHelper { @Override protected ArrowBuf read(BufferAllocator allocator, int count) throws IOException { - ArrowBuf buf = allocator.buffer(BitVectorHelper.getValidityBufferSize(count)); + final int bufferSize = BitVectorHelper.getValidityBufferSize(count); + ArrowBuf buf = allocator.buffer(bufferSize); + + // C++ integration test fails without this. + buf.setZero(0, bufferSize); for (int i = 0; i < count; i++) { parser.nextToken(); From c3381b30a9a5165ecc4db30003ef3dc7bb9b744c Mon Sep 17 00:00:00 2001 From: Li Jin Date: Tue, 7 Nov 2017 13:18:09 -0500 Subject: [PATCH 3/3] Remove temp changes --- integration/integration_test.py | 1 - .../vector/file/json/JsonFileReader.java | 29 +------ .../arrow/vector/file/json/TestJSONFile.java | 83 ------------------- 3 files changed, 2 insertions(+), 111 deletions(-) diff --git a/integration/integration_test.py b/integration/integration_test.py index ea9bfa1f4d5..59a1de5a463 100644 --- a/integration/integration_test.py +++ b/integration/integration_test.py @@ -1025,7 +1025,6 @@ def get_static_json_files(): def run_all_tests(debug=False): testers = [CPPTester(debug=debug), JavaTester(debug=debug)] - #testers = [JavaTester(debug=debug)] static_json_files = get_static_json_files() generated_json_files = get_generated_json_files() json_files = static_json_files + generated_json_files diff --git a/java/vector/src/main/java/org/apache/arrow/vector/file/json/JsonFileReader.java b/java/vector/src/main/java/org/apache/arrow/vector/file/json/JsonFileReader.java index d5c59184cab..560b0b9c58a 100644 --- a/java/vector/src/main/java/org/apache/arrow/vector/file/json/JsonFileReader.java +++ b/java/vector/src/main/java/org/apache/arrow/vector/file/json/JsonFileReader.java @@ -503,32 +503,6 @@ private void readFromJsonIntoVector(Field field, FieldVector vector) throws Json } vectorBuffers[v] = readBuffer(allocator, vectorType, vector.getMinorType(), innerBufferValueCount); - - - // ArrayList values = Lists.newArrayList(parser.readValuesAs(Integer.class)); - // System.out.println(values); - - // writeBuf(vectorBuffers[v], vector.getMinorType(), innerBufferValueCount); - -// -// for (int i = 0; i < innerBufferValueCount; i++) { -// /* write data to the buffer */ -// parser.nextToken(); -// -// /* for variable width vectors, value count doesn't help pre-determining the capacity of -// * the underlying data buffer. So we need to pass down the offset buffer (which was already -// * populated in the previous iteration of this loop). -// */ -// if (vectorType.equals(DATA) && (vector.getMinorType() == Types.MinorType.VARCHAR -// || vector.getMinorType() == Types.MinorType.VARBINARY)) { -// vectorBuffers[v] = setValueFromParser(vectorType, vector, vectorBuffers[v], -// vectorBuffers[v-1], i, innerBufferValueCount); -// } else { -// vectorBuffers[v] = setValueFromParser(vectorType, vector, vectorBuffers[v], -// null, i, innerBufferValueCount); -// } -// } - } vector.loadFieldBuffers(new ArrowFieldNode(valueCount, 0), Arrays.asList(vectorBuffers)); @@ -538,7 +512,8 @@ private void readFromJsonIntoVector(Field field, FieldVector vector) throws Json if (!fields.isEmpty()) { List vectorChildren = vector.getChildrenFromFields(); if (fields.size() != vectorChildren.size()) { - throw new IllegalArgumentException("fields and children are not the same size: " + fields.size() + " != " + vectorChildren.size()); + throw new IllegalArgumentException( + "fields and children are not the same size: " + fields.size() + " != " + vectorChildren.size()); } nextFieldIs("children"); readToken(START_ARRAY); diff --git a/java/vector/src/test/java/org/apache/arrow/vector/file/json/TestJSONFile.java b/java/vector/src/test/java/org/apache/arrow/vector/file/json/TestJSONFile.java index 3f2d5712a7e..5c4c48cd26b 100644 --- a/java/vector/src/test/java/org/apache/arrow/vector/file/json/TestJSONFile.java +++ b/java/vector/src/test/java/org/apache/arrow/vector/file/json/TestJSONFile.java @@ -19,7 +19,6 @@ package org.apache.arrow.vector.file.json; import java.io.File; -import java.io.FileInputStream; import java.io.IOException; import org.apache.arrow.memory.BufferAllocator; @@ -29,7 +28,6 @@ import org.apache.arrow.vector.complex.NullableMapVector; import org.apache.arrow.vector.dictionary.DictionaryProvider; import org.apache.arrow.vector.dictionary.DictionaryProvider.MapDictionaryProvider; -import org.apache.arrow.vector.file.ArrowFileReader; import org.apache.arrow.vector.file.BaseFileTest; import org.apache.arrow.vector.types.pojo.Schema; import org.apache.arrow.vector.util.Validator; @@ -41,87 +39,6 @@ public class TestJSONFile extends BaseFileTest { private static final Logger LOGGER = LoggerFactory.getLogger(TestJSONFile.class); - @Test - public void testJSON() throws IOException { - File file = new File("/var/folders/0p/1z45pgjs6tz1rq093ty327h80000gn/T/tmp7bpg2aef/generated_nested.json"); - - // read - try ( - BufferAllocator readerAllocator = allocator.newChildAllocator("reader", 0, Integer.MAX_VALUE); - ) { - JsonFileReader reader = new JsonFileReader(file, readerAllocator); - Schema schema = reader.start(); - LOGGER.debug("reading schema: " + schema); - - // initialize vectors - try (VectorSchemaRoot root = reader.read();) { - NullableMapVector vector = (NullableMapVector) root.getVector("struct_nullable"); - System.out.println(vector.isNull(0)); - System.out.println(vector.isNull(1)); - System.out.println(vector.isNull(2)); - System.out.println(vector.isNull(3)); - System.out.println(vector.isNull(4)); - - System.out.println(vector.getValidityBuffer()); - } - - try (VectorSchemaRoot root = reader.read();) { - NullableMapVector vector = (NullableMapVector) root.getVector("struct_nullable"); - System.out.println(vector.isNull(0)); - System.out.println(vector.isNull(1)); - System.out.println(vector.isNull(2)); - System.out.println(vector.isNull(3)); - System.out.println(vector.isNull(4)); - - System.out.println(vector.getValidityBuffer()); - } - reader.close(); - } - } - - @Test - public void testFile() throws IOException { - File file = new File("/var/folders/0p/1z45pgjs6tz1rq093ty327h80000gn/T/tmpdda8l04u/2540f848006040769495368df23a8859_generated_nested.json_to_arrow"); - - try ( - BufferAllocator readerAllocator = allocator.newChildAllocator("reader", 0, Integer.MAX_VALUE); - ) { - FileInputStream stream = new FileInputStream(file); - ArrowFileReader reader = new ArrowFileReader(stream.getChannel(), readerAllocator); - VectorSchemaRoot root = reader.getVectorSchemaRoot(); - LOGGER.debug("reading schema: " + root.getSchema()); - - // initialize vectors - reader.loadNextBatch(); - NullableMapVector vector = (NullableMapVector) root.getVector("struct_nullable"); - System.out.println(vector.isNull(0)); - System.out.println(vector.isNull(1)); - System.out.println(vector.isNull(2)); - System.out.println(vector.isNull(3)); - System.out.println(vector.isNull(4)); - - System.out.println(vector.getValidityBuffer()); - - reader.loadNextBatch(); - vector = (NullableMapVector) root.getVector("struct_nullable"); - System.out.println(vector.isNull(0)); - System.out.println(vector.isNull(1)); - System.out.println(vector.isNull(2)); - System.out.println(vector.isNull(3)); - System.out.println(vector.isNull(4)); - System.out.println(vector.isNull(5)); - System.out.println(vector.isNull(6)); - System.out.println(vector.isNull(7)); - System.out.println(vector.isNull(8)); - System.out.println(vector.isNull(9)); - - - reader.close(); - root.close(); - } - - } - @Test public void testWriteReadComplexJSON() throws IOException { File file = new File("target/mytest_complex.json");