From dd333469dcb086e2469bfd9150eeff39e3cde616 Mon Sep 17 00:00:00 2001 From: Dane Pitkin Date: Thu, 9 Nov 2023 09:59:50 -0500 Subject: [PATCH] GH-38648: [Java] Regenerate Flatbuffers --- .../org/apache/arrow/flatbuf/BinaryView.java | 57 +++++++ .../apache/arrow/flatbuf/BodyCompression.java | 3 +- .../java/org/apache/arrow/flatbuf/Date.java | 4 +- .../apache/arrow/flatbuf/LargeListView.java | 52 ++++++ .../org/apache/arrow/flatbuf/ListView.java | 53 +++++++ .../org/apache/arrow/flatbuf/RecordBatch.java | 33 +++- .../apache/arrow/flatbuf/RunEndEncoded.java | 55 +++++++ .../java/org/apache/arrow/flatbuf/Time.java | 17 +- .../org/apache/arrow/flatbuf/Timestamp.java | 148 +++++++++++++----- .../java/org/apache/arrow/flatbuf/Type.java | 7 +- .../org/apache/arrow/flatbuf/Utf8View.java | 57 +++++++ 11 files changed, 435 insertions(+), 51 deletions(-) create mode 100644 java/format/src/main/java/org/apache/arrow/flatbuf/BinaryView.java create mode 100644 java/format/src/main/java/org/apache/arrow/flatbuf/LargeListView.java create mode 100644 java/format/src/main/java/org/apache/arrow/flatbuf/ListView.java create mode 100644 java/format/src/main/java/org/apache/arrow/flatbuf/RunEndEncoded.java create mode 100644 java/format/src/main/java/org/apache/arrow/flatbuf/Utf8View.java diff --git a/java/format/src/main/java/org/apache/arrow/flatbuf/BinaryView.java b/java/format/src/main/java/org/apache/arrow/flatbuf/BinaryView.java new file mode 100644 index 000000000000..56a8d329532c --- /dev/null +++ b/java/format/src/main/java/org/apache/arrow/flatbuf/BinaryView.java @@ -0,0 +1,57 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +// automatically generated by the FlatBuffers compiler, do not modify + +package org.apache.arrow.flatbuf; + +import java.nio.*; +import java.lang.*; +import java.util.*; +import com.google.flatbuffers.*; + +@SuppressWarnings("unused") +/** + * Logically the same as Binary, but the internal representation uses a view + * struct that contains the string length and either the string's entire data + * inline (for small strings) or an inlined prefix, an index of another buffer, + * and an offset pointing to a slice in that buffer (for non-small strings). + * + * Since it uses a variable number of data buffers, each Field with this type + * must have a corresponding entry in `variadicBufferCounts`. + */ +public final class BinaryView extends Table { + public static void ValidateVersion() { Constants.FLATBUFFERS_1_12_0(); } + public static BinaryView getRootAsBinaryView(ByteBuffer _bb) { return getRootAsBinaryView(_bb, new BinaryView()); } + public static BinaryView getRootAsBinaryView(ByteBuffer _bb, BinaryView obj) { _bb.order(ByteOrder.LITTLE_ENDIAN); return (obj.__assign(_bb.getInt(_bb.position()) + _bb.position(), _bb)); } + public void __init(int _i, ByteBuffer _bb) { __reset(_i, _bb); } + public BinaryView __assign(int _i, ByteBuffer _bb) { __init(_i, _bb); return this; } + + + public static void startBinaryView(FlatBufferBuilder builder) { builder.startTable(0); } + public static int endBinaryView(FlatBufferBuilder builder) { + int o = builder.endTable(); + return o; + } + + public static final class Vector extends BaseVector { + public Vector __assign(int _vector, int _element_size, ByteBuffer _bb) { __reset(_vector, _element_size, _bb); return this; } + + public BinaryView get(int j) { return get(new BinaryView(), j); } + public BinaryView get(BinaryView obj, int j) { return obj.__assign(__indirect(__element(j), bb), bb); } + } +} + diff --git a/java/format/src/main/java/org/apache/arrow/flatbuf/BodyCompression.java b/java/format/src/main/java/org/apache/arrow/flatbuf/BodyCompression.java index 650454eb154b..ed8ce0939a04 100644 --- a/java/format/src/main/java/org/apache/arrow/flatbuf/BodyCompression.java +++ b/java/format/src/main/java/org/apache/arrow/flatbuf/BodyCompression.java @@ -37,7 +37,8 @@ public final class BodyCompression extends Table { public BodyCompression __assign(int _i, ByteBuffer _bb) { __init(_i, _bb); return this; } /** - * Compressor library + * Compressor library. + * For LZ4_FRAME, each compressed buffer must consist of a single frame. */ public byte codec() { int o = __offset(4); return o != 0 ? bb.get(o + bb_pos) : 0; } /** diff --git a/java/format/src/main/java/org/apache/arrow/flatbuf/Date.java b/java/format/src/main/java/org/apache/arrow/flatbuf/Date.java index b2fcc9e39e38..ac6e389835a4 100644 --- a/java/format/src/main/java/org/apache/arrow/flatbuf/Date.java +++ b/java/format/src/main/java/org/apache/arrow/flatbuf/Date.java @@ -25,8 +25,8 @@ @SuppressWarnings("unused") /** - * Date is either a 32-bit or 64-bit type representing elapsed time since UNIX - * epoch (1970-01-01), stored in either of two units: + * Date is either a 32-bit or 64-bit signed integer type representing an + * elapsed time since UNIX epoch (1970-01-01), stored in either of two units: * * * Milliseconds (64 bits) indicating UNIX time elapsed since the epoch (no * leap seconds), where the values are evenly divisible by 86400000 diff --git a/java/format/src/main/java/org/apache/arrow/flatbuf/LargeListView.java b/java/format/src/main/java/org/apache/arrow/flatbuf/LargeListView.java new file mode 100644 index 000000000000..08c31c23a943 --- /dev/null +++ b/java/format/src/main/java/org/apache/arrow/flatbuf/LargeListView.java @@ -0,0 +1,52 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +// automatically generated by the FlatBuffers compiler, do not modify + +package org.apache.arrow.flatbuf; + +import java.nio.*; +import java.lang.*; +import java.util.*; +import com.google.flatbuffers.*; + +@SuppressWarnings("unused") +/** + * Same as ListView, but with 64-bit offsets and sizes, allowing to represent + * extremely large data values. + */ +public final class LargeListView extends Table { + public static void ValidateVersion() { Constants.FLATBUFFERS_1_12_0(); } + public static LargeListView getRootAsLargeListView(ByteBuffer _bb) { return getRootAsLargeListView(_bb, new LargeListView()); } + public static LargeListView getRootAsLargeListView(ByteBuffer _bb, LargeListView obj) { _bb.order(ByteOrder.LITTLE_ENDIAN); return (obj.__assign(_bb.getInt(_bb.position()) + _bb.position(), _bb)); } + public void __init(int _i, ByteBuffer _bb) { __reset(_i, _bb); } + public LargeListView __assign(int _i, ByteBuffer _bb) { __init(_i, _bb); return this; } + + + public static void startLargeListView(FlatBufferBuilder builder) { builder.startTable(0); } + public static int endLargeListView(FlatBufferBuilder builder) { + int o = builder.endTable(); + return o; + } + + public static final class Vector extends BaseVector { + public Vector __assign(int _vector, int _element_size, ByteBuffer _bb) { __reset(_vector, _element_size, _bb); return this; } + + public LargeListView get(int j) { return get(new LargeListView(), j); } + public LargeListView get(LargeListView obj, int j) { return obj.__assign(__indirect(__element(j), bb), bb); } + } +} + diff --git a/java/format/src/main/java/org/apache/arrow/flatbuf/ListView.java b/java/format/src/main/java/org/apache/arrow/flatbuf/ListView.java new file mode 100644 index 000000000000..2c9ad4c13d88 --- /dev/null +++ b/java/format/src/main/java/org/apache/arrow/flatbuf/ListView.java @@ -0,0 +1,53 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +// automatically generated by the FlatBuffers compiler, do not modify + +package org.apache.arrow.flatbuf; + +import java.nio.*; +import java.lang.*; +import java.util.*; +import com.google.flatbuffers.*; + +@SuppressWarnings("unused") +/** + * Represents the same logical types that List can, but contains offsets and + * sizes allowing for writes in any order and sharing of child values among + * list values. + */ +public final class ListView extends Table { + public static void ValidateVersion() { Constants.FLATBUFFERS_1_12_0(); } + public static ListView getRootAsListView(ByteBuffer _bb) { return getRootAsListView(_bb, new ListView()); } + public static ListView getRootAsListView(ByteBuffer _bb, ListView obj) { _bb.order(ByteOrder.LITTLE_ENDIAN); return (obj.__assign(_bb.getInt(_bb.position()) + _bb.position(), _bb)); } + public void __init(int _i, ByteBuffer _bb) { __reset(_i, _bb); } + public ListView __assign(int _i, ByteBuffer _bb) { __init(_i, _bb); return this; } + + + public static void startListView(FlatBufferBuilder builder) { builder.startTable(0); } + public static int endListView(FlatBufferBuilder builder) { + int o = builder.endTable(); + return o; + } + + public static final class Vector extends BaseVector { + public Vector __assign(int _vector, int _element_size, ByteBuffer _bb) { __reset(_vector, _element_size, _bb); return this; } + + public ListView get(int j) { return get(new ListView(), j); } + public ListView get(ListView obj, int j) { return obj.__assign(__indirect(__element(j), bb), bb); } + } +} + diff --git a/java/format/src/main/java/org/apache/arrow/flatbuf/RecordBatch.java b/java/format/src/main/java/org/apache/arrow/flatbuf/RecordBatch.java index eb814e07dcce..ce907ee0fdcd 100644 --- a/java/format/src/main/java/org/apache/arrow/flatbuf/RecordBatch.java +++ b/java/format/src/main/java/org/apache/arrow/flatbuf/RecordBatch.java @@ -67,27 +67,54 @@ public final class RecordBatch extends Table { */ public org.apache.arrow.flatbuf.BodyCompression compression() { return compression(new org.apache.arrow.flatbuf.BodyCompression()); } public org.apache.arrow.flatbuf.BodyCompression compression(org.apache.arrow.flatbuf.BodyCompression obj) { int o = __offset(10); return o != 0 ? obj.__assign(__indirect(o + bb_pos), bb) : null; } + /** + * Some types such as Utf8View are represented using a variable number of buffers. + * For each such Field in the pre-ordered flattened logical schema, there will be + * an entry in variadicBufferCounts to indicate the number of number of variadic + * buffers which belong to that Field in the current RecordBatch. + * + * For example, the schema + * col1: Struct + * col2: Utf8View + * contains two Fields with variadic buffers so variadicBufferCounts will have + * two entries, the first counting the variadic buffers of `col1.beta` and the + * second counting `col2`'s. + * + * This field may be omitted if and only if the schema contains no Fields with + * a variable number of buffers, such as BinaryView and Utf8View. + */ + public long variadicBufferCounts(int j) { int o = __offset(12); return o != 0 ? bb.getLong(__vector(o) + j * 8) : 0; } + public int variadicBufferCountsLength() { int o = __offset(12); return o != 0 ? __vector_len(o) : 0; } + public LongVector variadicBufferCountsVector() { return variadicBufferCountsVector(new LongVector()); } + public LongVector variadicBufferCountsVector(LongVector obj) { int o = __offset(12); return o != 0 ? obj.__assign(__vector(o), bb) : null; } + public ByteBuffer variadicBufferCountsAsByteBuffer() { return __vector_as_bytebuffer(12, 8); } + public ByteBuffer variadicBufferCountsInByteBuffer(ByteBuffer _bb) { return __vector_in_bytebuffer(_bb, 12, 8); } public static int createRecordBatch(FlatBufferBuilder builder, long length, int nodesOffset, int buffersOffset, - int compressionOffset) { - builder.startTable(4); + int compressionOffset, + int variadicBufferCountsOffset) { + builder.startTable(5); RecordBatch.addLength(builder, length); + RecordBatch.addVariadicBufferCounts(builder, variadicBufferCountsOffset); RecordBatch.addCompression(builder, compressionOffset); RecordBatch.addBuffers(builder, buffersOffset); RecordBatch.addNodes(builder, nodesOffset); return RecordBatch.endRecordBatch(builder); } - public static void startRecordBatch(FlatBufferBuilder builder) { builder.startTable(4); } + public static void startRecordBatch(FlatBufferBuilder builder) { builder.startTable(5); } public static void addLength(FlatBufferBuilder builder, long length) { builder.addLong(0, length, 0L); } public static void addNodes(FlatBufferBuilder builder, int nodesOffset) { builder.addOffset(1, nodesOffset, 0); } public static void startNodesVector(FlatBufferBuilder builder, int numElems) { builder.startVector(16, numElems, 8); } public static void addBuffers(FlatBufferBuilder builder, int buffersOffset) { builder.addOffset(2, buffersOffset, 0); } public static void startBuffersVector(FlatBufferBuilder builder, int numElems) { builder.startVector(16, numElems, 8); } public static void addCompression(FlatBufferBuilder builder, int compressionOffset) { builder.addOffset(3, compressionOffset, 0); } + public static void addVariadicBufferCounts(FlatBufferBuilder builder, int variadicBufferCountsOffset) { builder.addOffset(4, variadicBufferCountsOffset, 0); } + public static int createVariadicBufferCountsVector(FlatBufferBuilder builder, long[] data) { builder.startVector(8, data.length, 8); for (int i = data.length - 1; i >= 0; i--) builder.addLong(data[i]); return builder.endVector(); } + public static void startVariadicBufferCountsVector(FlatBufferBuilder builder, int numElems) { builder.startVector(8, numElems, 8); } public static int endRecordBatch(FlatBufferBuilder builder) { int o = builder.endTable(); return o; diff --git a/java/format/src/main/java/org/apache/arrow/flatbuf/RunEndEncoded.java b/java/format/src/main/java/org/apache/arrow/flatbuf/RunEndEncoded.java new file mode 100644 index 000000000000..d48733ef0c82 --- /dev/null +++ b/java/format/src/main/java/org/apache/arrow/flatbuf/RunEndEncoded.java @@ -0,0 +1,55 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +// automatically generated by the FlatBuffers compiler, do not modify + +package org.apache.arrow.flatbuf; + +import java.nio.*; +import java.lang.*; +import java.util.*; +import com.google.flatbuffers.*; + +@SuppressWarnings("unused") +/** + * Contains two child arrays, run_ends and values. + * The run_ends child array must be a 16/32/64-bit integer array + * which encodes the indices at which the run with the value in + * each corresponding index in the values child array ends. + * Like list/struct types, the value array can be of any type. + */ +public final class RunEndEncoded extends Table { + public static void ValidateVersion() { Constants.FLATBUFFERS_1_12_0(); } + public static RunEndEncoded getRootAsRunEndEncoded(ByteBuffer _bb) { return getRootAsRunEndEncoded(_bb, new RunEndEncoded()); } + public static RunEndEncoded getRootAsRunEndEncoded(ByteBuffer _bb, RunEndEncoded obj) { _bb.order(ByteOrder.LITTLE_ENDIAN); return (obj.__assign(_bb.getInt(_bb.position()) + _bb.position(), _bb)); } + public void __init(int _i, ByteBuffer _bb) { __reset(_i, _bb); } + public RunEndEncoded __assign(int _i, ByteBuffer _bb) { __init(_i, _bb); return this; } + + + public static void startRunEndEncoded(FlatBufferBuilder builder) { builder.startTable(0); } + public static int endRunEndEncoded(FlatBufferBuilder builder) { + int o = builder.endTable(); + return o; + } + + public static final class Vector extends BaseVector { + public Vector __assign(int _vector, int _element_size, ByteBuffer _bb) { __reset(_vector, _element_size, _bb); return this; } + + public RunEndEncoded get(int j) { return get(new RunEndEncoded(), j); } + public RunEndEncoded get(RunEndEncoded obj, int j) { return obj.__assign(__indirect(__element(j), bb), bb); } + } +} + diff --git a/java/format/src/main/java/org/apache/arrow/flatbuf/Time.java b/java/format/src/main/java/org/apache/arrow/flatbuf/Time.java index 596d403a3eae..9acc3fc7a5ea 100644 --- a/java/format/src/main/java/org/apache/arrow/flatbuf/Time.java +++ b/java/format/src/main/java/org/apache/arrow/flatbuf/Time.java @@ -25,9 +25,20 @@ @SuppressWarnings("unused") /** - * Time type. The physical storage type depends on the unit - * - SECOND and MILLISECOND: 32 bits - * - MICROSECOND and NANOSECOND: 64 bits + * Time is either a 32-bit or 64-bit signed integer type representing an + * elapsed time since midnight, stored in either of four units: seconds, + * milliseconds, microseconds or nanoseconds. + * + * The integer `bitWidth` depends on the `unit` and must be one of the following: + * * SECOND and MILLISECOND: 32 bits + * * MICROSECOND and NANOSECOND: 64 bits + * + * The allowed values are between 0 (inclusive) and 86400 (=24*60*60) seconds + * (exclusive), adjusted for the time unit (for example, up to 86400000 + * exclusive for the MILLISECOND unit). + * This definition doesn't allow for leap seconds. Time values from + * measurements with leap seconds will need to be corrected when ingesting + * into Arrow (for example by replacing the value 86400 with 86399). */ public final class Time extends Table { public static void ValidateVersion() { Constants.FLATBUFFERS_1_12_0(); } diff --git a/java/format/src/main/java/org/apache/arrow/flatbuf/Timestamp.java b/java/format/src/main/java/org/apache/arrow/flatbuf/Timestamp.java index 041452607c9e..fe0c6aaea24f 100644 --- a/java/format/src/main/java/org/apache/arrow/flatbuf/Timestamp.java +++ b/java/format/src/main/java/org/apache/arrow/flatbuf/Timestamp.java @@ -25,37 +25,111 @@ @SuppressWarnings("unused") /** - * Time elapsed from the Unix epoch, 00:00:00.000 on 1 January 1970, excluding - * leap seconds, as a 64-bit integer. Note that UNIX time does not include - * leap seconds. + * Timestamp is a 64-bit signed integer representing an elapsed time since a + * fixed epoch, stored in either of four units: seconds, milliseconds, + * microseconds or nanoseconds, and is optionally annotated with a timezone. + * + * Timestamp values do not include any leap seconds (in other words, all + * days are considered 86400 seconds long). + * + * Timestamps with a non-empty timezone + * ------------------------------------ + * + * If a Timestamp column has a non-empty timezone value, its epoch is + * 1970-01-01 00:00:00 (January 1st 1970, midnight) in the *UTC* timezone + * (the Unix epoch), regardless of the Timestamp's own timezone. + * + * Therefore, timestamp values with a non-empty timezone correspond to + * physical points in time together with some additional information about + * how the data was obtained and/or how to display it (the timezone). + * + * For example, the timestamp value 0 with the timezone string "Europe/Paris" + * corresponds to "January 1st 1970, 00h00" in the UTC timezone, but the + * application may prefer to display it as "January 1st 1970, 01h00" in + * the Europe/Paris timezone (which is the same physical point in time). + * + * One consequence is that timestamp values with a non-empty timezone + * can be compared and ordered directly, since they all share the same + * well-known point of reference (the Unix epoch). + * + * Timestamps with an unset / empty timezone + * ----------------------------------------- + * + * If a Timestamp column has no timezone value, its epoch is + * 1970-01-01 00:00:00 (January 1st 1970, midnight) in an *unknown* timezone. + * + * Therefore, timestamp values without a timezone cannot be meaningfully + * interpreted as physical points in time, but only as calendar / clock + * indications ("wall clock time") in an unspecified timezone. + * + * For example, the timestamp value 0 with an empty timezone string + * corresponds to "January 1st 1970, 00h00" in an unknown timezone: there + * is not enough information to interpret it as a well-defined physical + * point in time. + * + * One consequence is that timestamp values without a timezone cannot + * be reliably compared or ordered, since they may have different points of + * reference. In particular, it is *not* possible to interpret an unset + * or empty timezone as the same as "UTC". + * + * Conversion between timezones + * ---------------------------- + * + * If a Timestamp column has a non-empty timezone, changing the timezone + * to a different non-empty value is a metadata-only operation: + * the timestamp values need not change as their point of reference remains + * the same (the Unix epoch). + * + * However, if a Timestamp column has no timezone value, changing it to a + * non-empty value requires to think about the desired semantics. + * One possibility is to assume that the original timestamp values are + * relative to the epoch of the timezone being set; timestamp values should + * then adjusted to the Unix epoch (for example, changing the timezone from + * empty to "Europe/Paris" would require converting the timestamp values + * from "Europe/Paris" to "UTC", which seems counter-intuitive but is + * nevertheless correct). + * + * Guidelines for encoding data from external libraries + * ---------------------------------------------------- * * Date & time libraries often have multiple different data types for temporal - * data. In order to ease interoperability between different implementations the + * data. In order to ease interoperability between different implementations the * Arrow project has some recommendations for encoding these types into a Timestamp * column. * - * An "instant" represents a single moment in time that has no meaningful time zone - * or the time zone is unknown. A column of instants can also contain values from - * multiple time zones. To encode an instant set the timezone string to "UTC". - * - * A "zoned date-time" represents a single moment in time that has a meaningful - * reference time zone. To encode a zoned date-time as a Timestamp set the timezone - * string to the name of the timezone. There is some ambiguity between an instant - * and a zoned date-time with the UTC time zone. Both of these are stored the same. - * Typically, this distinction does not matter. If it does, then an application should - * use custom metadata or an extension type to distinguish between the two cases. - * - * An "offset date-time" represents a single moment in time combined with a meaningful - * offset from UTC. To encode an offset date-time as a Timestamp set the timezone string - * to the numeric time zone offset string (e.g. "+03:00"). - * - * A "local date-time" does not represent a single moment in time. It represents a wall - * clock time combined with a date. Because of daylight savings time there may multiple - * instants that correspond to a single local date-time in any given time zone. A - * local date-time is often stored as a struct or a Date32/Time64 pair. However, it can - * also be encoded into a Timestamp column. To do so the value should be the the time - * elapsed from the Unix epoch so that a wall clock in UTC would display the desired time. - * The timezone string should be set to null or the empty string. + * An "instant" represents a physical point in time that has no relevant timezone + * (for example, astronomical data). To encode an instant, use a Timestamp with + * the timezone string set to "UTC", and make sure the Timestamp values + * are relative to the UTC epoch (January 1st 1970, midnight). + * + * A "zoned date-time" represents a physical point in time annotated with an + * informative timezone (for example, the timezone in which the data was + * recorded). To encode a zoned date-time, use a Timestamp with the timezone + * string set to the name of the timezone, and make sure the Timestamp values + * are relative to the UTC epoch (January 1st 1970, midnight). + * + * (There is some ambiguity between an instant and a zoned date-time with the + * UTC timezone. Both of these are stored the same in Arrow. Typically, + * this distinction does not matter. If it does, then an application should + * use custom metadata or an extension type to distinguish between the two cases.) + * + * An "offset date-time" represents a physical point in time combined with an + * explicit offset from UTC. To encode an offset date-time, use a Timestamp + * with the timezone string set to the numeric timezone offset string + * (e.g. "+03:00"), and make sure the Timestamp values are relative to + * the UTC epoch (January 1st 1970, midnight). + * + * A "naive date-time" (also called "local date-time" in some libraries) + * represents a wall clock time combined with a calendar date, but with + * no indication of how to map this information to a physical point in time. + * Naive date-times must be handled with care because of this missing + * information, and also because daylight saving time (DST) may make + * some values ambiguous or non-existent. A naive date-time may be + * stored as a struct with Date and Time fields. However, it may also be + * encoded into a Timestamp column with an empty timezone. The timestamp + * values should be computed "as if" the timezone of the date-time values + * was UTC; for example, the naive date-time "January 1st 1970, 00h00" would + * be encoded as timestamp value 0. */ public final class Timestamp extends Table { public static void ValidateVersion() { Constants.FLATBUFFERS_1_12_0(); } @@ -66,24 +140,16 @@ public final class Timestamp extends Table { public short unit() { int o = __offset(4); return o != 0 ? bb.getShort(o + bb_pos) : 0; } /** - * The time zone is a string indicating the name of a time zone, one of: + * The timezone is an optional string indicating the name of a timezone, + * one of: * - * * As used in the Olson time zone database (the "tz database" or - * "tzdata"), such as "America/New_York" - * * An absolute time zone offset of the form +XX:XX or -XX:XX, such as +07:30 + * * As used in the Olson timezone database (the "tz database" or + * "tzdata"), such as "America/New_York". + * * An absolute timezone offset of the form "+XX:XX" or "-XX:XX", + * such as "+07:30". * * Whether a timezone string is present indicates different semantics about - * the data: - * - * * If the time zone is null or an empty string, the data is a local date-time - * and does not represent a single moment in time. Instead it represents a wall clock - * time and care should be taken to avoid interpreting it semantically as an instant. - * - * * If the time zone is set to a valid value, values can be displayed as - * "localized" to that time zone, even though the underlying 64-bit - * integers are identical to the same data stored in UTC. Converting - * between time zones is a metadata-only operation and does not change the - * underlying values + * the data (see above). */ public String timezone() { int o = __offset(6); return o != 0 ? __string(o + bb_pos) : null; } public ByteBuffer timezoneAsByteBuffer() { return __vector_as_bytebuffer(6, 1); } diff --git a/java/format/src/main/java/org/apache/arrow/flatbuf/Type.java b/java/format/src/main/java/org/apache/arrow/flatbuf/Type.java index 5f1a550cffff..29248bb23c30 100644 --- a/java/format/src/main/java/org/apache/arrow/flatbuf/Type.java +++ b/java/format/src/main/java/org/apache/arrow/flatbuf/Type.java @@ -47,8 +47,13 @@ private Type() { } public static final byte LargeBinary = 19; public static final byte LargeUtf8 = 20; public static final byte LargeList = 21; + public static final byte RunEndEncoded = 22; + public static final byte BinaryView = 23; + public static final byte Utf8View = 24; + public static final byte ListView = 25; + public static final byte LargeListView = 26; - public static final String[] names = { "NONE", "Null", "Int", "FloatingPoint", "Binary", "Utf8", "Bool", "Decimal", "Date", "Time", "Timestamp", "Interval", "List", "Struct_", "Union", "FixedSizeBinary", "FixedSizeList", "Map", "Duration", "LargeBinary", "LargeUtf8", "LargeList", }; + public static final String[] names = { "NONE", "Null", "Int", "FloatingPoint", "Binary", "Utf8", "Bool", "Decimal", "Date", "Time", "Timestamp", "Interval", "List", "Struct_", "Union", "FixedSizeBinary", "FixedSizeList", "Map", "Duration", "LargeBinary", "LargeUtf8", "LargeList", "RunEndEncoded", "BinaryView", "Utf8View", "ListView", "LargeListView", }; public static String name(int e) { return names[e]; } } diff --git a/java/format/src/main/java/org/apache/arrow/flatbuf/Utf8View.java b/java/format/src/main/java/org/apache/arrow/flatbuf/Utf8View.java new file mode 100644 index 000000000000..035c977576e4 --- /dev/null +++ b/java/format/src/main/java/org/apache/arrow/flatbuf/Utf8View.java @@ -0,0 +1,57 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +// automatically generated by the FlatBuffers compiler, do not modify + +package org.apache.arrow.flatbuf; + +import java.nio.*; +import java.lang.*; +import java.util.*; +import com.google.flatbuffers.*; + +@SuppressWarnings("unused") +/** + * Logically the same as Utf8, but the internal representation uses a view + * struct that contains the string length and either the string's entire data + * inline (for small strings) or an inlined prefix, an index of another buffer, + * and an offset pointing to a slice in that buffer (for non-small strings). + * + * Since it uses a variable number of data buffers, each Field with this type + * must have a corresponding entry in `variadicBufferCounts`. + */ +public final class Utf8View extends Table { + public static void ValidateVersion() { Constants.FLATBUFFERS_1_12_0(); } + public static Utf8View getRootAsUtf8View(ByteBuffer _bb) { return getRootAsUtf8View(_bb, new Utf8View()); } + public static Utf8View getRootAsUtf8View(ByteBuffer _bb, Utf8View obj) { _bb.order(ByteOrder.LITTLE_ENDIAN); return (obj.__assign(_bb.getInt(_bb.position()) + _bb.position(), _bb)); } + public void __init(int _i, ByteBuffer _bb) { __reset(_i, _bb); } + public Utf8View __assign(int _i, ByteBuffer _bb) { __init(_i, _bb); return this; } + + + public static void startUtf8View(FlatBufferBuilder builder) { builder.startTable(0); } + public static int endUtf8View(FlatBufferBuilder builder) { + int o = builder.endTable(); + return o; + } + + public static final class Vector extends BaseVector { + public Vector __assign(int _vector, int _element_size, ByteBuffer _bb) { __reset(_vector, _element_size, _bb); return this; } + + public Utf8View get(int j) { return get(new Utf8View(), j); } + public Utf8View get(Utf8View obj, int j) { return obj.__assign(__indirect(__element(j), bb), bb); } + } +} +