From a878aa641dd7ba5b3469349deb2346c71041f9da Mon Sep 17 00:00:00 2001 From: Bryan Cutler Date: Wed, 8 Mar 2017 16:54:56 -0800 Subject: [PATCH 1/2] added JSON read and write support for Date, Time, and Timestamp millisecond types, added JSON test --- .../vector/file/json/JsonFileReader.java | 10 +++- .../vector/file/json/JsonFileWriter.java | 8 ++++ .../arrow/vector/file/BaseFileTest.java | 46 ++++++++++++++++++- .../arrow/vector/file/json/TestJSONFile.java | 36 +++++++++++++++ 4 files changed, 97 insertions(+), 3 deletions(-) diff --git a/java/vector/src/main/java/org/apache/arrow/vector/file/json/JsonFileReader.java b/java/vector/src/main/java/org/apache/arrow/vector/file/json/JsonFileReader.java index 24fdc184523..1bb9571d249 100644 --- a/java/vector/src/main/java/org/apache/arrow/vector/file/json/JsonFileReader.java +++ b/java/vector/src/main/java/org/apache/arrow/vector/file/json/JsonFileReader.java @@ -32,6 +32,7 @@ import org.apache.arrow.vector.BigIntVector; import org.apache.arrow.vector.BitVector; import org.apache.arrow.vector.BufferBacked; +import org.apache.arrow.vector.DateVector; import org.apache.arrow.vector.FieldVector; import org.apache.arrow.vector.Float4Vector; import org.apache.arrow.vector.Float8Vector; @@ -41,6 +42,7 @@ import org.apache.arrow.vector.TimeStampMilliVector; import org.apache.arrow.vector.TimeStampMicroVector; import org.apache.arrow.vector.TimeStampNanoVector; +import org.apache.arrow.vector.TimeVector; import org.apache.arrow.vector.TinyIntVector; import org.apache.arrow.vector.UInt1Vector; import org.apache.arrow.vector.UInt2Vector; @@ -227,7 +229,13 @@ private void setValueFromParser(ValueVector valueVector, int i) throws IOExcepti break; case TIMESTAMPNANO: ((TimeStampNanoVector)valueVector).getMutator().set(i, parser.readValueAs(Long.class)); - break; + break; + case DATE: + ((DateVector)valueVector).getMutator().set(i, parser.readValueAs(Long.class)); + break; + case TIME: + ((TimeVector)valueVector).getMutator().set(i, parser.readValueAs(Integer.class)); + break; default: throw new UnsupportedOperationException("minor type: " + valueVector.getMinorType()); } diff --git a/java/vector/src/main/java/org/apache/arrow/vector/file/json/JsonFileWriter.java b/java/vector/src/main/java/org/apache/arrow/vector/file/json/JsonFileWriter.java index 99040b67e1c..9d74e22f7c0 100644 --- a/java/vector/src/main/java/org/apache/arrow/vector/file/json/JsonFileWriter.java +++ b/java/vector/src/main/java/org/apache/arrow/vector/file/json/JsonFileWriter.java @@ -23,11 +23,13 @@ import org.apache.arrow.vector.BitVector; import org.apache.arrow.vector.BufferBacked; +import org.apache.arrow.vector.DateVector; import org.apache.arrow.vector.FieldVector; import org.apache.arrow.vector.TimeStampSecVector; import org.apache.arrow.vector.TimeStampMilliVector; import org.apache.arrow.vector.TimeStampMicroVector; import org.apache.arrow.vector.TimeStampNanoVector; +import org.apache.arrow.vector.TimeVector; import org.apache.arrow.vector.ValueVector; import org.apache.arrow.vector.ValueVector.Accessor; import org.apache.arrow.vector.VarBinaryVector; @@ -156,6 +158,12 @@ private void writeValueToGenerator(ValueVector valueVector, int i) throws IOExce case TIMESTAMPNANO: generator.writeNumber(((TimeStampNanoVector)valueVector).getAccessor().get(i)); break; + case DATE: + generator.writeNumber(((DateVector)valueVector).getAccessor().get(i)); + break; + case TIME: + generator.writeNumber(((TimeVector)valueVector).getAccessor().get(i)); + break; case BIT: generator.writeNumber(((BitVector)valueVector).getAccessor().get(i)); break; diff --git a/java/vector/src/test/java/org/apache/arrow/vector/file/BaseFileTest.java b/java/vector/src/test/java/org/apache/arrow/vector/file/BaseFileTest.java index 774bead3207..5b3e4c80855 100644 --- a/java/vector/src/test/java/org/apache/arrow/vector/file/BaseFileTest.java +++ b/java/vector/src/test/java/org/apache/arrow/vector/file/BaseFileTest.java @@ -28,12 +28,12 @@ import org.apache.arrow.vector.complex.NullableMapVector; import org.apache.arrow.vector.complex.impl.ComplexWriterImpl; import org.apache.arrow.vector.complex.reader.FieldReader; +import org.apache.arrow.vector.complex.writer.*; import org.apache.arrow.vector.complex.writer.BaseWriter.ComplexWriter; import org.apache.arrow.vector.complex.writer.BaseWriter.ListWriter; import org.apache.arrow.vector.complex.writer.BaseWriter.MapWriter; -import org.apache.arrow.vector.complex.writer.BigIntWriter; -import org.apache.arrow.vector.complex.writer.IntWriter; import org.apache.arrow.vector.holders.NullableTimeStampMilliHolder; +import org.joda.time.DateTime; import org.joda.time.DateTimeZone; import org.junit.After; import org.junit.Assert; @@ -138,6 +138,48 @@ protected void validateComplexContent(int count, VectorSchemaRoot root) { } } + private DateTime makeDateTimeFromCount(int i) { + return new DateTime(2000 + i, 1 + i, 1 + i, i, i, i, i, DateTimeZone.UTC); + } + + protected void writeDateTimeData(int count, NullableMapVector parent) { + Assert.assertTrue(count < 100); + ComplexWriter writer = new ComplexWriterImpl("root", parent); + MapWriter rootWriter = writer.rootAsMap(); + DateWriter dateWriter = rootWriter.date("date"); + TimeWriter timeWriter = rootWriter.time("time"); + TimeStampMilliWriter timeStampMilliWriter = rootWriter.timeStampMilli("timestamp-milli"); + for (int i = 0; i < count; i++) { + DateTime dt = makeDateTimeFromCount(i); + // Number of days since epoch, stored as 64-bit integer, only date part is used + dateWriter.setPosition(i); + long dateLong = dt.minusMillis(dt.getMillisOfDay()).getMillis(); + dateWriter.writeDate(dateLong); + // Time is a value since midnight stored as 32-bit integer + timeWriter.setPosition(i); + timeWriter.writeTime(dt.getMillisOfDay()); + // Timestamp is milliseconds since the epoch, stored as 64-bit integer + timeStampMilliWriter.setPosition(i); + timeStampMilliWriter.writeTimeStampMilli(dt.getMillis()); + } + writer.setValueCount(count); + } + + protected void validateDateTimeContent(int count, VectorSchemaRoot root) { + Assert.assertEquals(count, root.getRowCount()); + printVectors(root.getFieldVectors()); + for (int i = 0; i < count; i++) { + Object dateVal = root.getVector("date").getAccessor().getObject(i); + DateTime dt = makeDateTimeFromCount(i).withZoneRetainFields(DateTimeZone.getDefault()); + DateTime dateExpected = dt.minusMillis(dt.getMillisOfDay()); + Assert.assertTrue(dateExpected.equals(dateVal)); + Object timeVal = root.getVector("time").getAccessor().getObject(i); + Assert.assertEquals(dt.getMillisOfDay(), ((DateTime) timeVal).getMillisOfDay()); + Object timestampMilliVal = root.getVector("timestamp-milli").getAccessor().getObject(i); + Assert.assertTrue(dt.equals(timestampMilliVal)); + } + } + protected void writeData(int count, MapVector parent) { ComplexWriter writer = new ComplexWriterImpl("root", parent); MapWriter rootWriter = writer.rootAsMap(); diff --git a/java/vector/src/test/java/org/apache/arrow/vector/file/json/TestJSONFile.java b/java/vector/src/test/java/org/apache/arrow/vector/file/json/TestJSONFile.java index 3720a13b0fc..e77789ef4d4 100644 --- a/java/vector/src/test/java/org/apache/arrow/vector/file/json/TestJSONFile.java +++ b/java/vector/src/test/java/org/apache/arrow/vector/file/json/TestJSONFile.java @@ -119,6 +119,42 @@ public void testWriteReadUnionJSON() throws IOException { } } + @Test + public void testWriteReadDateTimeJSON() throws IOException { + File file = new File("target/mytest_datetime.json"); + int count = COUNT; + + // write + try ( + BufferAllocator vectorAllocator = allocator.newChildAllocator("original vectors", 0, Integer.MAX_VALUE); + NullableMapVector parent = new NullableMapVector("parent", vectorAllocator, null)) { + + writeDateTimeData(count, parent); + + printVectors(parent.getChildrenFromFields()); + + VectorSchemaRoot root = new VectorSchemaRoot(parent.getChild("root")); + validateDateTimeContent(count, root); + + writeJSON(file, new VectorSchemaRoot(parent.getChild("root"))); + } + + // read + try ( + BufferAllocator readerAllocator = allocator.newChildAllocator("reader", 0, Integer.MAX_VALUE); + ) { + JsonFileReader reader = new JsonFileReader(file, readerAllocator); + Schema schema = reader.start(); + LOGGER.debug("reading schema: " + schema); + + // initialize vectors + try (VectorSchemaRoot root = reader.read();) { + validateDateTimeContent(count, root); + } + reader.close(); + } + } + @Test public void testSetStructLength() throws IOException { File file = new File("../../integration/data/struct_example.json"); From ff3c1ca2de87e0c736a9686e8feb1e295de66f9c Mon Sep 17 00:00:00 2001 From: Bryan Cutler Date: Fri, 10 Mar 2017 13:59:07 -0800 Subject: [PATCH 2/2] fixed import wildcard format --- .../java/org/apache/arrow/vector/file/BaseFileTest.java | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/java/vector/src/test/java/org/apache/arrow/vector/file/BaseFileTest.java b/java/vector/src/test/java/org/apache/arrow/vector/file/BaseFileTest.java index 5b3e4c80855..da2a950024c 100644 --- a/java/vector/src/test/java/org/apache/arrow/vector/file/BaseFileTest.java +++ b/java/vector/src/test/java/org/apache/arrow/vector/file/BaseFileTest.java @@ -28,10 +28,14 @@ import org.apache.arrow.vector.complex.NullableMapVector; import org.apache.arrow.vector.complex.impl.ComplexWriterImpl; import org.apache.arrow.vector.complex.reader.FieldReader; -import org.apache.arrow.vector.complex.writer.*; import org.apache.arrow.vector.complex.writer.BaseWriter.ComplexWriter; import org.apache.arrow.vector.complex.writer.BaseWriter.ListWriter; import org.apache.arrow.vector.complex.writer.BaseWriter.MapWriter; +import org.apache.arrow.vector.complex.writer.BigIntWriter; +import org.apache.arrow.vector.complex.writer.DateWriter; +import org.apache.arrow.vector.complex.writer.IntWriter; +import org.apache.arrow.vector.complex.writer.TimeWriter; +import org.apache.arrow.vector.complex.writer.TimeStampMilliWriter; import org.apache.arrow.vector.holders.NullableTimeStampMilliHolder; import org.joda.time.DateTime; import org.joda.time.DateTimeZone;