From e3945267962882cb1f2993929ccf67b2df8adf82 Mon Sep 17 00:00:00 2001 From: Julien Le Dem Date: Mon, 20 Mar 2017 14:00:08 -0700 Subject: [PATCH] ARROW-674: [Java] Support additional Timestamp timezone metadata --- .../src/main/codegen/data/ArrowTypes.tdd | 2 +- .../org/apache/arrow/vector/types/Types.java | 16 ++-- .../apache/arrow/vector/pojo/TestConvert.java | 2 +- .../arrow/vector/types/pojo/TestSchema.java | 90 ++++++++++++------- 4 files changed, 66 insertions(+), 44 deletions(-) diff --git a/java/vector/src/main/codegen/data/ArrowTypes.tdd b/java/vector/src/main/codegen/data/ArrowTypes.tdd index 8f997524fcc..94fe31e8dc0 100644 --- a/java/vector/src/main/codegen/data/ArrowTypes.tdd +++ b/java/vector/src/main/codegen/data/ArrowTypes.tdd @@ -62,7 +62,7 @@ }, { name: "Timestamp", - fields: [{name: "unit", type: short, valueType: TimeUnit}] + fields: [{name: "unit", type: short, valueType: TimeUnit}, {name: "timezone", type: String}] }, { name: "Interval", diff --git a/java/vector/src/main/java/org/apache/arrow/vector/types/Types.java b/java/vector/src/main/java/org/apache/arrow/vector/types/Types.java index 7cbf3c5bb5e..81743b51917 100644 --- a/java/vector/src/main/java/org/apache/arrow/vector/types/Types.java +++ b/java/vector/src/main/java/org/apache/arrow/vector/types/Types.java @@ -109,10 +109,10 @@ public class Types { private static final Field UINT8_FIELD = new Field("", true, new Int(64, false), null); private static final Field DATE_FIELD = new Field("", true, Date.INSTANCE, null); private static final Field TIME_FIELD = new Field("", true, new Time(TimeUnit.MILLISECOND, 32), null); - private static final Field TIMESTAMPSEC_FIELD = new Field("", true, new Timestamp(TimeUnit.SECOND), null); - private static final Field TIMESTAMPMILLI_FIELD = new Field("", true, new Timestamp(TimeUnit.MILLISECOND), null); - private static final Field TIMESTAMPMICRO_FIELD = new Field("", true, new Timestamp(TimeUnit.MICROSECOND), null); - private static final Field TIMESTAMPNANO_FIELD = new Field("", true, new Timestamp(TimeUnit.NANOSECOND), null); + private static final Field TIMESTAMPSEC_FIELD = new Field("", true, new Timestamp(TimeUnit.SECOND, "UTC"), null); + private static final Field TIMESTAMPMILLI_FIELD = new Field("", true, new Timestamp(TimeUnit.MILLISECOND, "UTC"), null); + private static final Field TIMESTAMPMICRO_FIELD = new Field("", true, new Timestamp(TimeUnit.MICROSECOND, "UTC"), null); + private static final Field TIMESTAMPNANO_FIELD = new Field("", true, new Timestamp(TimeUnit.NANOSECOND, "UTC"), null); private static final Field INTERVALDAY_FIELD = new Field("", true, new Interval(IntervalUnit.DAY_TIME), null); private static final Field INTERVALYEAR_FIELD = new Field("", true, new Interval(IntervalUnit.YEAR_MONTH), null); private static final Field FLOAT4_FIELD = new Field("", true, new FloatingPoint(FloatingPointPrecision.SINGLE), null); @@ -252,7 +252,7 @@ public FieldWriter getNewFieldWriter(ValueVector vector) { } }, // time in second from the Unix epoch, 00:00:00.000000 on 1 January 1970, UTC. - TIMESTAMPSEC(new Timestamp(org.apache.arrow.vector.types.TimeUnit.SECOND)) { + TIMESTAMPSEC(new Timestamp(org.apache.arrow.vector.types.TimeUnit.SECOND, "UTC")) { @Override public Field getField() { return TIMESTAMPSEC_FIELD; @@ -269,7 +269,7 @@ public FieldWriter getNewFieldWriter(ValueVector vector) { } }, // time in millis from the Unix epoch, 00:00:00.000 on 1 January 1970, UTC. - TIMESTAMPMILLI(new Timestamp(org.apache.arrow.vector.types.TimeUnit.MILLISECOND)) { + TIMESTAMPMILLI(new Timestamp(org.apache.arrow.vector.types.TimeUnit.MILLISECOND, "UTC")) { @Override public Field getField() { return TIMESTAMPMILLI_FIELD; @@ -286,7 +286,7 @@ public FieldWriter getNewFieldWriter(ValueVector vector) { } }, // time in microsecond from the Unix epoch, 00:00:00.000000 on 1 January 1970, UTC. - TIMESTAMPMICRO(new Timestamp(org.apache.arrow.vector.types.TimeUnit.MICROSECOND)) { + TIMESTAMPMICRO(new Timestamp(org.apache.arrow.vector.types.TimeUnit.MICROSECOND, "UTC")) { @Override public Field getField() { return TIMESTAMPMICRO_FIELD; @@ -303,7 +303,7 @@ public FieldWriter getNewFieldWriter(ValueVector vector) { } }, // time in nanosecond from the Unix epoch, 00:00:00.000000000 on 1 January 1970, UTC. - TIMESTAMPNANO(new Timestamp(org.apache.arrow.vector.types.TimeUnit.NANOSECOND)) { + TIMESTAMPNANO(new Timestamp(org.apache.arrow.vector.types.TimeUnit.NANOSECOND, "UTC")) { @Override public Field getField() { return TIMESTAMPNANO_FIELD; diff --git a/java/vector/src/test/java/org/apache/arrow/vector/pojo/TestConvert.java b/java/vector/src/test/java/org/apache/arrow/vector/pojo/TestConvert.java index 65823e2a821..824c62aa5fb 100644 --- a/java/vector/src/test/java/org/apache/arrow/vector/pojo/TestConvert.java +++ b/java/vector/src/test/java/org/apache/arrow/vector/pojo/TestConvert.java @@ -81,7 +81,7 @@ public void nestedSchema() { new Field("child4.1", true, Utf8.INSTANCE, null) ))); childrenBuilder.add(new Field("child5", true, new Union(UnionMode.Sparse, new int[] { MinorType.TIMESTAMPMILLI.ordinal(), MinorType.FLOAT8.ordinal() } ), ImmutableList.of( - new Field("child5.1", true, new Timestamp(TimeUnit.MILLISECOND), null), + new Field("child5.1", true, new Timestamp(TimeUnit.MILLISECOND, "UTC"), null), new Field("child5.2", true, new FloatingPoint(DOUBLE), ImmutableList.of()) ))); Schema initialSchema = new Schema(childrenBuilder.build()); diff --git a/java/vector/src/test/java/org/apache/arrow/vector/types/pojo/TestSchema.java b/java/vector/src/test/java/org/apache/arrow/vector/types/pojo/TestSchema.java index 5b74c54c915..96128c53fcf 100644 --- a/java/vector/src/test/java/org/apache/arrow/vector/types/pojo/TestSchema.java +++ b/java/vector/src/test/java/org/apache/arrow/vector/types/pojo/TestSchema.java @@ -27,6 +27,20 @@ import org.apache.arrow.vector.types.IntervalUnit; import org.apache.arrow.vector.types.TimeUnit; import org.apache.arrow.vector.types.UnionMode; +import org.apache.arrow.vector.types.pojo.ArrowType.Binary; +import org.apache.arrow.vector.types.pojo.ArrowType.Bool; +import org.apache.arrow.vector.types.pojo.ArrowType.Date; +import org.apache.arrow.vector.types.pojo.ArrowType.Decimal; +import org.apache.arrow.vector.types.pojo.ArrowType.FloatingPoint; +import org.apache.arrow.vector.types.pojo.ArrowType.Int; +import org.apache.arrow.vector.types.pojo.ArrowType.Interval; +import org.apache.arrow.vector.types.pojo.ArrowType.List; +import org.apache.arrow.vector.types.pojo.ArrowType.Null; +import org.apache.arrow.vector.types.pojo.ArrowType.Struct; +import org.apache.arrow.vector.types.pojo.ArrowType.Time; +import org.apache.arrow.vector.types.pojo.ArrowType.Timestamp; +import org.apache.arrow.vector.types.pojo.ArrowType.Union; +import org.apache.arrow.vector.types.pojo.ArrowType.Utf8; import org.junit.Test; public class TestSchema { @@ -42,38 +56,40 @@ private static Field field(String name, ArrowType type, Field... children) { @Test public void testComplex() throws IOException { Schema schema = new Schema(asList( - field("a", false, new ArrowType.Int(8, true)), - field("b", new ArrowType.Struct(), - field("c", new ArrowType.Int(16, true)), - field("d", new ArrowType.Utf8())), - field("e", new ArrowType.List(), field(null, new ArrowType.Date())), - field("f", new ArrowType.FloatingPoint(FloatingPointPrecision.SINGLE)), - field("g", new ArrowType.Timestamp(TimeUnit.MILLISECOND)), - field("h", new ArrowType.Interval(IntervalUnit.DAY_TIME)) + field("a", false, new Int(8, true)), + field("b", new Struct(), + field("c", new Int(16, true)), + field("d", new Utf8())), + field("e", new List(), field(null, new Date())), + field("f", new FloatingPoint(FloatingPointPrecision.SINGLE)), + field("g", new Timestamp(TimeUnit.MILLISECOND, "UTC")), + field("h", new Timestamp(TimeUnit.MICROSECOND, null)), + field("i", new Interval(IntervalUnit.DAY_TIME)) )); roundTrip(schema); assertEquals( - "Schema, e: List, f: FloatingPoint(SINGLE), g: Timestamp(MILLISECOND), h: Interval(DAY_TIME)>", + "Schema, e: List, f: FloatingPoint(SINGLE), g: Timestamp(MILLISECOND, UTC), h: Timestamp(MICROSECOND, null), i: Interval(DAY_TIME)>", schema.toString()); } @Test public void testAll() throws IOException { Schema schema = new Schema(asList( - field("a", false, new ArrowType.Null()), - field("b", new ArrowType.Struct(), field("ba", new ArrowType.Null())), - field("c", new ArrowType.List(), field("ca", new ArrowType.Null())), - field("d", new ArrowType.Union(UnionMode.Sparse, new int[] {1, 2, 3}), field("da", new ArrowType.Null())), - field("e", new ArrowType.Int(8, true)), - field("f", new ArrowType.FloatingPoint(FloatingPointPrecision.SINGLE)), - field("g", new ArrowType.Utf8()), - field("h", new ArrowType.Binary()), - field("i", new ArrowType.Bool()), - field("j", new ArrowType.Decimal(5, 5)), - field("k", new ArrowType.Date()), - field("l", new ArrowType.Time(TimeUnit.MILLISECOND, 32)), - field("m", new ArrowType.Timestamp(TimeUnit.MILLISECOND)), - field("n", new ArrowType.Interval(IntervalUnit.DAY_TIME)) + field("a", false, new Null()), + field("b", new Struct(), field("ba", new Null())), + field("c", new List(), field("ca", new Null())), + field("d", new Union(UnionMode.Sparse, new int[] {1, 2, 3}), field("da", new Null())), + field("e", new Int(8, true)), + field("f", new FloatingPoint(FloatingPointPrecision.SINGLE)), + field("g", new Utf8()), + field("h", new Binary()), + field("i", new Bool()), + field("j", new Decimal(5, 5)), + field("k", new Date()), + field("l", new Time(TimeUnit.MILLISECOND, 32)), + field("m", new Timestamp(TimeUnit.MILLISECOND, "UTC")), + field("n", new Timestamp(TimeUnit.MICROSECOND, null)), + field("o", new Interval(IntervalUnit.DAY_TIME)) )); roundTrip(schema); } @@ -81,7 +97,7 @@ public void testAll() throws IOException { @Test public void testUnion() throws IOException { Schema schema = new Schema(asList( - field("d", new ArrowType.Union(UnionMode.Sparse, new int[] {1, 2, 3}), field("da", new ArrowType.Null())) + field("d", new Union(UnionMode.Sparse, new int[] {1, 2, 3}), field("da", new Null())) )); roundTrip(schema); contains(schema, "Sparse"); @@ -90,20 +106,26 @@ public void testUnion() throws IOException { @Test public void testTS() throws IOException { Schema schema = new Schema(asList( - field("a", new ArrowType.Timestamp(TimeUnit.SECOND)), - field("b", new ArrowType.Timestamp(TimeUnit.MILLISECOND)), - field("c", new ArrowType.Timestamp(TimeUnit.MICROSECOND)), - field("d", new ArrowType.Timestamp(TimeUnit.NANOSECOND)) + field("a", new Timestamp(TimeUnit.SECOND, "UTC")), + field("b", new Timestamp(TimeUnit.MILLISECOND, "UTC")), + field("c", new Timestamp(TimeUnit.MICROSECOND, "UTC")), + field("d", new Timestamp(TimeUnit.NANOSECOND, "UTC")), + field("e", new Timestamp(TimeUnit.SECOND, null)), + field("f", new Timestamp(TimeUnit.MILLISECOND, null)), + field("g", new Timestamp(TimeUnit.MICROSECOND, null)), + field("h", new Timestamp(TimeUnit.NANOSECOND, null)) )); roundTrip(schema); - contains(schema, "SECOND", "MILLISECOND", "MICROSECOND", "NANOSECOND"); + assertEquals( + "Schema", + schema.toString()); } @Test public void testInterval() throws IOException { Schema schema = new Schema(asList( - field("a", new ArrowType.Interval(IntervalUnit.YEAR_MONTH)), - field("b", new ArrowType.Interval(IntervalUnit.DAY_TIME)) + field("a", new Interval(IntervalUnit.YEAR_MONTH)), + field("b", new Interval(IntervalUnit.DAY_TIME)) )); roundTrip(schema); contains(schema, "YEAR_MONTH", "DAY_TIME"); @@ -112,9 +134,9 @@ public void testInterval() throws IOException { @Test public void testFP() throws IOException { Schema schema = new Schema(asList( - field("a", new ArrowType.FloatingPoint(FloatingPointPrecision.HALF)), - field("b", new ArrowType.FloatingPoint(FloatingPointPrecision.SINGLE)), - field("c", new ArrowType.FloatingPoint(FloatingPointPrecision.DOUBLE)) + field("a", new FloatingPoint(FloatingPointPrecision.HALF)), + field("b", new FloatingPoint(FloatingPointPrecision.SINGLE)), + field("c", new FloatingPoint(FloatingPointPrecision.DOUBLE)) )); roundTrip(schema); contains(schema, "HALF", "SINGLE", "DOUBLE");