From 96880ef17dd661ff301b1b055b929e69acb8916b Mon Sep 17 00:00:00 2001 From: Chao Sun Date: Wed, 28 Oct 2020 15:31:17 -0700 Subject: [PATCH 1/5] HIVE_24324: Remove deprecated API usage from Avro --- .../apache/hadoop/hive/serde2/avro/AvroDeserializer.java | 6 +++--- .../apache/hadoop/hive/serde2/avro/SchemaToTypeInfo.java | 8 ++++---- 2 files changed, 7 insertions(+), 7 deletions(-) diff --git a/serde/src/java/org/apache/hadoop/hive/serde2/avro/AvroDeserializer.java b/serde/src/java/org/apache/hadoop/hive/serde2/avro/AvroDeserializer.java index 128cfa96f0a3..659d4d34d0e3 100644 --- a/serde/src/java/org/apache/hadoop/hive/serde2/avro/AvroDeserializer.java +++ b/serde/src/java/org/apache/hadoop/hive/serde2/avro/AvroDeserializer.java @@ -278,7 +278,7 @@ private Object deserializePrimitive(Object datum, Schema fileSchema, Schema reco int scale = 0; try { - scale = fileSchema.getJsonProp(AvroSerDe.AVRO_PROP_SCALE).asInt(); + scale = (int) fileSchema.getObjectProp(AvroSerDe.AVRO_PROP_SCALE); } catch(Exception ex) { throw new AvroSerdeException("Failed to obtain scale value from file schema: " + fileSchema, ex); } @@ -294,7 +294,7 @@ private Object deserializePrimitive(Object datum, Schema fileSchema, Schema reco int maxLength = 0; try { - maxLength = fileSchema.getJsonProp(AvroSerDe.AVRO_PROP_MAX_LENGTH).getValueAsInt(); + maxLength = (int) fileSchema.getObjectProp(AvroSerDe.AVRO_PROP_MAX_LENGTH); } catch (Exception ex) { throw new AvroSerdeException("Failed to obtain maxLength value for char field from file schema: " + fileSchema, ex); } @@ -309,7 +309,7 @@ private Object deserializePrimitive(Object datum, Schema fileSchema, Schema reco maxLength = 0; try { - maxLength = fileSchema.getJsonProp(AvroSerDe.AVRO_PROP_MAX_LENGTH).getValueAsInt(); + maxLength = (int) fileSchema.getObjectProp(AvroSerDe.AVRO_PROP_MAX_LENGTH); } catch (Exception ex) { throw new AvroSerdeException("Failed to obtain maxLength value for varchar field from file schema: " + fileSchema, ex); } diff --git a/serde/src/java/org/apache/hadoop/hive/serde2/avro/SchemaToTypeInfo.java b/serde/src/java/org/apache/hadoop/hive/serde2/avro/SchemaToTypeInfo.java index 35d83bdb1af0..03f07bb8482d 100644 --- a/serde/src/java/org/apache/hadoop/hive/serde2/avro/SchemaToTypeInfo.java +++ b/serde/src/java/org/apache/hadoop/hive/serde2/avro/SchemaToTypeInfo.java @@ -136,8 +136,8 @@ public static TypeInfo generateTypeInfo(Schema schema, int precision = 0; int scale = 0; try { - precision = schema.getJsonProp(AvroSerDe.AVRO_PROP_PRECISION).getIntValue(); - scale = schema.getJsonProp(AvroSerDe.AVRO_PROP_SCALE).getIntValue(); + precision = (int) schema.getObjectProp(AvroSerDe.AVRO_PROP_PRECISION); + scale = (int) schema.getObjectProp(AvroSerDe.AVRO_PROP_SCALE); } catch (Exception ex) { throw new AvroSerdeException("Failed to obtain scale value from file schema: " + schema, ex); } @@ -155,7 +155,7 @@ public static TypeInfo generateTypeInfo(Schema schema, AvroSerDe.CHAR_TYPE_NAME.equalsIgnoreCase(schema.getProp(AvroSerDe.AVRO_PROP_LOGICAL_TYPE))) { int maxLength = 0; try { - maxLength = schema.getJsonProp(AvroSerDe.AVRO_PROP_MAX_LENGTH).getValueAsInt(); + maxLength = (int) schema.getObjectProp(AvroSerDe.AVRO_PROP_MAX_LENGTH); } catch (Exception ex) { throw new AvroSerdeException("Failed to obtain maxLength value from file schema: " + schema, ex); } @@ -166,7 +166,7 @@ public static TypeInfo generateTypeInfo(Schema schema, .equalsIgnoreCase(schema.getProp(AvroSerDe.AVRO_PROP_LOGICAL_TYPE))) { int maxLength = 0; try { - maxLength = schema.getJsonProp(AvroSerDe.AVRO_PROP_MAX_LENGTH).getValueAsInt(); + maxLength = (int) schema.getObjectProp(AvroSerDe.AVRO_PROP_MAX_LENGTH); } catch (Exception ex) { throw new AvroSerdeException("Failed to obtain maxLength value from file schema: " + schema, ex); } From e9dcc63ded56321231fe77b59498e199f84eb672 Mon Sep 17 00:00:00 2001 From: Chao Sun Date: Thu, 29 Oct 2020 10:57:51 -0700 Subject: [PATCH 2/5] Convert string to int for scale/precision --- .../org/apache/hadoop/hive/serde2/avro/AvroDeserializer.java | 2 +- .../org/apache/hadoop/hive/serde2/avro/SchemaToTypeInfo.java | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/serde/src/java/org/apache/hadoop/hive/serde2/avro/AvroDeserializer.java b/serde/src/java/org/apache/hadoop/hive/serde2/avro/AvroDeserializer.java index 659d4d34d0e3..7576afce8af9 100644 --- a/serde/src/java/org/apache/hadoop/hive/serde2/avro/AvroDeserializer.java +++ b/serde/src/java/org/apache/hadoop/hive/serde2/avro/AvroDeserializer.java @@ -278,7 +278,7 @@ private Object deserializePrimitive(Object datum, Schema fileSchema, Schema reco int scale = 0; try { - scale = (int) fileSchema.getObjectProp(AvroSerDe.AVRO_PROP_SCALE); + scale = Integer.parseInt((String) fileSchema.getObjectProp(AvroSerDe.AVRO_PROP_SCALE)); } catch(Exception ex) { throw new AvroSerdeException("Failed to obtain scale value from file schema: " + fileSchema, ex); } diff --git a/serde/src/java/org/apache/hadoop/hive/serde2/avro/SchemaToTypeInfo.java b/serde/src/java/org/apache/hadoop/hive/serde2/avro/SchemaToTypeInfo.java index 03f07bb8482d..d05192853ef7 100644 --- a/serde/src/java/org/apache/hadoop/hive/serde2/avro/SchemaToTypeInfo.java +++ b/serde/src/java/org/apache/hadoop/hive/serde2/avro/SchemaToTypeInfo.java @@ -136,8 +136,8 @@ public static TypeInfo generateTypeInfo(Schema schema, int precision = 0; int scale = 0; try { - precision = (int) schema.getObjectProp(AvroSerDe.AVRO_PROP_PRECISION); - scale = (int) schema.getObjectProp(AvroSerDe.AVRO_PROP_SCALE); + precision = Integer.parseInt((String) schema.getObjectProp(AvroSerDe.AVRO_PROP_PRECISION)); + scale = Integer.parseInt((String) schema.getObjectProp(AvroSerDe.AVRO_PROP_SCALE)); } catch (Exception ex) { throw new AvroSerdeException("Failed to obtain scale value from file schema: " + schema, ex); } From d90c9a5c31bd215231a27e9dcf7d272ec29f80ed Mon Sep 17 00:00:00 2001 From: Chao Sun Date: Sat, 31 Oct 2020 11:20:01 -0700 Subject: [PATCH 3/5] Parse Integer or String from Avro schema --- .../hadoop/hive/serde2/avro/AvroDeserializer.java | 6 +++--- .../hadoop/hive/serde2/avro/AvroSerdeUtils.java | 12 ++++++++++++ .../hadoop/hive/serde2/avro/SchemaToTypeInfo.java | 8 ++++---- 3 files changed, 19 insertions(+), 7 deletions(-) diff --git a/serde/src/java/org/apache/hadoop/hive/serde2/avro/AvroDeserializer.java b/serde/src/java/org/apache/hadoop/hive/serde2/avro/AvroDeserializer.java index 7576afce8af9..9c8f03530478 100644 --- a/serde/src/java/org/apache/hadoop/hive/serde2/avro/AvroDeserializer.java +++ b/serde/src/java/org/apache/hadoop/hive/serde2/avro/AvroDeserializer.java @@ -278,7 +278,7 @@ private Object deserializePrimitive(Object datum, Schema fileSchema, Schema reco int scale = 0; try { - scale = Integer.parseInt((String) fileSchema.getObjectProp(AvroSerDe.AVRO_PROP_SCALE)); + scale = AvroSerdeUtils.getIntFromSchema(fileSchema, AvroSerDe.AVRO_PROP_SCALE); } catch(Exception ex) { throw new AvroSerdeException("Failed to obtain scale value from file schema: " + fileSchema, ex); } @@ -294,7 +294,7 @@ private Object deserializePrimitive(Object datum, Schema fileSchema, Schema reco int maxLength = 0; try { - maxLength = (int) fileSchema.getObjectProp(AvroSerDe.AVRO_PROP_MAX_LENGTH); + maxLength = AvroSerdeUtils.getIntFromSchema(fileSchema, AvroSerDe.AVRO_PROP_MAX_LENGTH); } catch (Exception ex) { throw new AvroSerdeException("Failed to obtain maxLength value for char field from file schema: " + fileSchema, ex); } @@ -309,7 +309,7 @@ private Object deserializePrimitive(Object datum, Schema fileSchema, Schema reco maxLength = 0; try { - maxLength = (int) fileSchema.getObjectProp(AvroSerDe.AVRO_PROP_MAX_LENGTH); + maxLength = AvroSerdeUtils.getIntFromSchema(fileSchema, AvroSerDe.AVRO_PROP_MAX_LENGTH); } catch (Exception ex) { throw new AvroSerdeException("Failed to obtain maxLength value for varchar field from file schema: " + fileSchema, ex); } diff --git a/serde/src/java/org/apache/hadoop/hive/serde2/avro/AvroSerdeUtils.java b/serde/src/java/org/apache/hadoop/hive/serde2/avro/AvroSerdeUtils.java index b54007367730..3ba3824381bf 100644 --- a/serde/src/java/org/apache/hadoop/hive/serde2/avro/AvroSerdeUtils.java +++ b/serde/src/java/org/apache/hadoop/hive/serde2/avro/AvroSerdeUtils.java @@ -324,6 +324,18 @@ public static Schema getSchemaFor(URL url) { } } + public static int getIntFromSchema(Schema schema, String name) { + Object obj = schema.getObjectProp(name); + if (obj instanceof String) { + return Integer.parseInt((String) obj); + } else if (obj instanceof Integer) { + return (int) obj; + } else { + throw new IllegalArgumentException("Expect integer or string value from property " + name + + " but found type " + obj.getClass().getName()); + } + } + /** * Called on specific alter table events, removes schema url and schema literal from given tblproperties * After the change, HMS solely will be responsible for handling the schema diff --git a/serde/src/java/org/apache/hadoop/hive/serde2/avro/SchemaToTypeInfo.java b/serde/src/java/org/apache/hadoop/hive/serde2/avro/SchemaToTypeInfo.java index d05192853ef7..ba0942819e88 100644 --- a/serde/src/java/org/apache/hadoop/hive/serde2/avro/SchemaToTypeInfo.java +++ b/serde/src/java/org/apache/hadoop/hive/serde2/avro/SchemaToTypeInfo.java @@ -136,8 +136,8 @@ public static TypeInfo generateTypeInfo(Schema schema, int precision = 0; int scale = 0; try { - precision = Integer.parseInt((String) schema.getObjectProp(AvroSerDe.AVRO_PROP_PRECISION)); - scale = Integer.parseInt((String) schema.getObjectProp(AvroSerDe.AVRO_PROP_SCALE)); + precision = AvroSerdeUtils.getIntFromSchema(schema, AvroSerDe.AVRO_PROP_PRECISION); + scale = AvroSerdeUtils.getIntFromSchema(schema, AvroSerDe.AVRO_PROP_SCALE); } catch (Exception ex) { throw new AvroSerdeException("Failed to obtain scale value from file schema: " + schema, ex); } @@ -155,7 +155,7 @@ public static TypeInfo generateTypeInfo(Schema schema, AvroSerDe.CHAR_TYPE_NAME.equalsIgnoreCase(schema.getProp(AvroSerDe.AVRO_PROP_LOGICAL_TYPE))) { int maxLength = 0; try { - maxLength = (int) schema.getObjectProp(AvroSerDe.AVRO_PROP_MAX_LENGTH); + maxLength = AvroSerdeUtils.getIntFromSchema(schema, AvroSerDe.AVRO_PROP_MAX_LENGTH); } catch (Exception ex) { throw new AvroSerdeException("Failed to obtain maxLength value from file schema: " + schema, ex); } @@ -166,7 +166,7 @@ public static TypeInfo generateTypeInfo(Schema schema, .equalsIgnoreCase(schema.getProp(AvroSerDe.AVRO_PROP_LOGICAL_TYPE))) { int maxLength = 0; try { - maxLength = (int) schema.getObjectProp(AvroSerDe.AVRO_PROP_MAX_LENGTH); + maxLength = AvroSerdeUtils.getIntFromSchema(schema, AvroSerDe.AVRO_PROP_MAX_LENGTH); } catch (Exception ex) { throw new AvroSerdeException("Failed to obtain maxLength value from file schema: " + schema, ex); } From 16ad50005fd11dcbc63aa1732fa003f92433e7c5 Mon Sep 17 00:00:00 2001 From: Chao Sun Date: Sat, 31 Oct 2020 15:10:13 -0700 Subject: [PATCH 4/5] Shouldn't accept string in AvroDeserializer --- ql/src/test/results/clientnegative/avro_decimal.q.out | 2 +- .../org/apache/hadoop/hive/serde2/avro/AvroDeserializer.java | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/ql/src/test/results/clientnegative/avro_decimal.q.out b/ql/src/test/results/clientnegative/avro_decimal.q.out index 3b46c7b63ad6..c2bc4f4948f3 100644 --- a/ql/src/test/results/clientnegative/avro_decimal.q.out +++ b/ql/src/test/results/clientnegative/avro_decimal.q.out @@ -19,4 +19,4 @@ TBLPROPERTIES ( PREHOOK: type: CREATETABLE PREHOOK: Output: database:default PREHOOK: Output: default@avro_dec -FAILED: Execution Error, return code 40000 from org.apache.hadoop.hive.ql.ddl.DDLTask. java.lang.RuntimeException: MetaException(message:org.apache.hadoop.hive.serde2.avro.AvroSerdeException Invalid precision or scale for decimal type) +FAILED: Execution Error, return code 1 from org.apache.hadoop.hive.ql.ddl.DDLTask. java.lang.RuntimeException: MetaException(message:org.apache.hadoop.hive.serde2.avro.AvroSerdeException Invalid precision or scale for decimal type) diff --git a/serde/src/java/org/apache/hadoop/hive/serde2/avro/AvroDeserializer.java b/serde/src/java/org/apache/hadoop/hive/serde2/avro/AvroDeserializer.java index 9c8f03530478..afc3f84fe565 100644 --- a/serde/src/java/org/apache/hadoop/hive/serde2/avro/AvroDeserializer.java +++ b/serde/src/java/org/apache/hadoop/hive/serde2/avro/AvroDeserializer.java @@ -278,7 +278,7 @@ private Object deserializePrimitive(Object datum, Schema fileSchema, Schema reco int scale = 0; try { - scale = AvroSerdeUtils.getIntFromSchema(fileSchema, AvroSerDe.AVRO_PROP_SCALE); + scale = (int) fileSchema.getObjectProp(AvroSerDe.AVRO_PROP_SCALE); } catch(Exception ex) { throw new AvroSerdeException("Failed to obtain scale value from file schema: " + fileSchema, ex); } From a6f4d86a007e26e968df68ad9c2befb35f69864d Mon Sep 17 00:00:00 2001 From: Chao Sun Date: Mon, 2 Nov 2020 13:54:20 -0800 Subject: [PATCH 5/5] Fix parsing precision & scale --- ql/src/test/results/clientnegative/avro_decimal.q.out | 2 +- .../hadoop/hive/serde2/avro/AvroDeserializer.java | 2 +- .../hadoop/hive/serde2/avro/SchemaToTypeInfo.java | 10 ++++++++-- 3 files changed, 10 insertions(+), 4 deletions(-) diff --git a/ql/src/test/results/clientnegative/avro_decimal.q.out b/ql/src/test/results/clientnegative/avro_decimal.q.out index c2bc4f4948f3..3b46c7b63ad6 100644 --- a/ql/src/test/results/clientnegative/avro_decimal.q.out +++ b/ql/src/test/results/clientnegative/avro_decimal.q.out @@ -19,4 +19,4 @@ TBLPROPERTIES ( PREHOOK: type: CREATETABLE PREHOOK: Output: database:default PREHOOK: Output: default@avro_dec -FAILED: Execution Error, return code 1 from org.apache.hadoop.hive.ql.ddl.DDLTask. java.lang.RuntimeException: MetaException(message:org.apache.hadoop.hive.serde2.avro.AvroSerdeException Invalid precision or scale for decimal type) +FAILED: Execution Error, return code 40000 from org.apache.hadoop.hive.ql.ddl.DDLTask. java.lang.RuntimeException: MetaException(message:org.apache.hadoop.hive.serde2.avro.AvroSerdeException Invalid precision or scale for decimal type) diff --git a/serde/src/java/org/apache/hadoop/hive/serde2/avro/AvroDeserializer.java b/serde/src/java/org/apache/hadoop/hive/serde2/avro/AvroDeserializer.java index afc3f84fe565..9c8f03530478 100644 --- a/serde/src/java/org/apache/hadoop/hive/serde2/avro/AvroDeserializer.java +++ b/serde/src/java/org/apache/hadoop/hive/serde2/avro/AvroDeserializer.java @@ -278,7 +278,7 @@ private Object deserializePrimitive(Object datum, Schema fileSchema, Schema reco int scale = 0; try { - scale = (int) fileSchema.getObjectProp(AvroSerDe.AVRO_PROP_SCALE); + scale = AvroSerdeUtils.getIntFromSchema(fileSchema, AvroSerDe.AVRO_PROP_SCALE); } catch(Exception ex) { throw new AvroSerdeException("Failed to obtain scale value from file schema: " + fileSchema, ex); } diff --git a/serde/src/java/org/apache/hadoop/hive/serde2/avro/SchemaToTypeInfo.java b/serde/src/java/org/apache/hadoop/hive/serde2/avro/SchemaToTypeInfo.java index ba0942819e88..5557a6a185f0 100644 --- a/serde/src/java/org/apache/hadoop/hive/serde2/avro/SchemaToTypeInfo.java +++ b/serde/src/java/org/apache/hadoop/hive/serde2/avro/SchemaToTypeInfo.java @@ -136,8 +136,14 @@ public static TypeInfo generateTypeInfo(Schema schema, int precision = 0; int scale = 0; try { - precision = AvroSerdeUtils.getIntFromSchema(schema, AvroSerDe.AVRO_PROP_PRECISION); - scale = AvroSerdeUtils.getIntFromSchema(schema, AvroSerDe.AVRO_PROP_SCALE); + Object o = schema.getObjectProp(AvroSerDe.AVRO_PROP_PRECISION); + if (o instanceof Integer) { + precision = (int) o; + } + o = schema.getObjectProp(AvroSerDe.AVRO_PROP_SCALE); + if (o instanceof Integer) { + scale = (int) o; + } } catch (Exception ex) { throw new AvroSerdeException("Failed to obtain scale value from file schema: " + schema, ex); }