From 46a3f89ec7e13645d87dcef8a76a3a81cf569e89 Mon Sep 17 00:00:00 2001 From: leoyy0316 <571684903@qq.com> Date: Fri, 19 Jan 2024 10:45:11 +0800 Subject: [PATCH 1/2] fix orc and parquet writer about timestamp not contains [local timezone] and [is_adjust_to_utc] --- .../paimon/format/orc/reader/OrcSplitReaderUtil.java | 3 ++- .../format/parquet/ParquetSchemaConverter.java | 12 +++++++----- 2 files changed, 9 insertions(+), 6 deletions(-) diff --git a/paimon-format/src/main/java/org/apache/paimon/format/orc/reader/OrcSplitReaderUtil.java b/paimon-format/src/main/java/org/apache/paimon/format/orc/reader/OrcSplitReaderUtil.java index ae00821f3a2a..882f1c753991 100644 --- a/paimon-format/src/main/java/org/apache/paimon/format/orc/reader/OrcSplitReaderUtil.java +++ b/paimon-format/src/main/java/org/apache/paimon/format/orc/reader/OrcSplitReaderUtil.java @@ -74,8 +74,9 @@ public static TypeDescription toOrcType(DataType type) { case DATE: return TypeDescription.createDate(); case TIMESTAMP_WITHOUT_TIME_ZONE: - case TIMESTAMP_WITH_LOCAL_TIME_ZONE: return TypeDescription.createTimestamp(); + case TIMESTAMP_WITH_LOCAL_TIME_ZONE: + return TypeDescription.createTimestampInstant(); case ARRAY: ArrayType arrayType = (ArrayType) type; return TypeDescription.createList(toOrcType(arrayType.getElementType())); diff --git a/paimon-format/src/main/java/org/apache/paimon/format/parquet/ParquetSchemaConverter.java b/paimon-format/src/main/java/org/apache/paimon/format/parquet/ParquetSchemaConverter.java index 24160df56915..61bc660589ef 100644 --- a/paimon-format/src/main/java/org/apache/paimon/format/parquet/ParquetSchemaConverter.java +++ b/paimon-format/src/main/java/org/apache/paimon/format/parquet/ParquetSchemaConverter.java @@ -115,11 +115,11 @@ private static Type convertToParquetType( case TIMESTAMP_WITHOUT_TIME_ZONE: TimestampType timestampType = (TimestampType) type; return createTimestampWithLogicalType( - name, timestampType.getPrecision(), repetition); + name, timestampType.getPrecision(), repetition, false); case TIMESTAMP_WITH_LOCAL_TIME_ZONE: LocalZonedTimestampType localZonedTimestampType = (LocalZonedTimestampType) type; return createTimestampWithLogicalType( - name, localZonedTimestampType.getPrecision(), repetition); + name, localZonedTimestampType.getPrecision(), repetition, true); case ARRAY: ArrayType arrayType = (ArrayType) type; return ConversionPatterns.listOfElements( @@ -151,13 +151,15 @@ private static Type convertToParquetType( } private static Type createTimestampWithLogicalType( - String name, int precision, Type.Repetition repetition) { + String name, int precision, Type.Repetition repetition, boolean isAdjustToUTC) { if (precision <= 3) { - return Types.primitive(INT64, repetition).as(OriginalType.TIMESTAMP_MILLIS).named(name); + return Types.primitive(INT64, repetition) + .as(LogicalTypeAnnotation.timestampType(isAdjustToUTC, LogicalTypeAnnotation.TimeUnit.MILLIS)).named(name); } else if (precision > 6) { return Types.primitive(PrimitiveType.PrimitiveTypeName.INT96, repetition).named(name); } else { - return Types.primitive(INT64, repetition).as(OriginalType.TIMESTAMP_MICROS).named(name); + return Types.primitive(INT64, repetition) + .as(LogicalTypeAnnotation.timestampType(isAdjustToUTC, LogicalTypeAnnotation.TimeUnit.MICROS)).named(name); } } From 83ab14461b78bc2ba78b8b3b586569da01fd1f6c Mon Sep 17 00:00:00 2001 From: leoyy0316 <571684903@qq.com> Date: Fri, 19 Jan 2024 14:22:52 +0800 Subject: [PATCH 2/2] fix orc and parquet writer about timestamp not contains [local timezone] and [is_adjust_to_utc] --- .../paimon/format/parquet/ParquetSchemaConverter.java | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/paimon-format/src/main/java/org/apache/paimon/format/parquet/ParquetSchemaConverter.java b/paimon-format/src/main/java/org/apache/paimon/format/parquet/ParquetSchemaConverter.java index 61bc660589ef..0e445f0b5286 100644 --- a/paimon-format/src/main/java/org/apache/paimon/format/parquet/ParquetSchemaConverter.java +++ b/paimon-format/src/main/java/org/apache/paimon/format/parquet/ParquetSchemaConverter.java @@ -154,12 +154,18 @@ private static Type createTimestampWithLogicalType( String name, int precision, Type.Repetition repetition, boolean isAdjustToUTC) { if (precision <= 3) { return Types.primitive(INT64, repetition) - .as(LogicalTypeAnnotation.timestampType(isAdjustToUTC, LogicalTypeAnnotation.TimeUnit.MILLIS)).named(name); + .as( + LogicalTypeAnnotation.timestampType( + isAdjustToUTC, LogicalTypeAnnotation.TimeUnit.MILLIS)) + .named(name); } else if (precision > 6) { return Types.primitive(PrimitiveType.PrimitiveTypeName.INT96, repetition).named(name); } else { return Types.primitive(INT64, repetition) - .as(LogicalTypeAnnotation.timestampType(isAdjustToUTC, LogicalTypeAnnotation.TimeUnit.MICROS)).named(name); + .as( + LogicalTypeAnnotation.timestampType( + isAdjustToUTC, LogicalTypeAnnotation.TimeUnit.MICROS)) + .named(name); } }