From 31090cfea53b63e1651c823a8899cc32799aadcd Mon Sep 17 00:00:00 2001 From: Max Gekk Date: Tue, 17 May 2022 13:37:03 +0300 Subject: [PATCH 1/6] Fix QueryExecutionAnsiErrorsSuite --- .../main/resources/error/error-classes.json | 16 +++++------ .../sql/errors/QueryExecutionErrors.scala | 28 +++++++++++++------ .../QueryExecutionAnsiErrorsSuite.scala | 22 +++++++-------- 3 files changed, 39 insertions(+), 27 deletions(-) diff --git a/core/src/main/resources/error/error-classes.json b/core/src/main/resources/error/error-classes.json index f401ea8d29a5e..dea15cfec8898 100644 --- a/core/src/main/resources/error/error-classes.json +++ b/core/src/main/resources/error/error-classes.json @@ -12,7 +12,7 @@ "sqlState" : "22005" }, "CANNOT_CHANGE_DECIMAL_PRECISION" : { - "message" : [ " cannot be represented as Decimal(, ). If necessary set to false to bypass this error.
" ], + "message" : [ " cannot be represented as Decimal(, ). If necessary set to \"false\" to bypass this error.
" ], "sqlState" : "22005" }, "CANNOT_PARSE_DECIMAL" : { @@ -23,7 +23,7 @@ "message" : [ "Cannot up cast from to .\n
" ] }, "CAST_CAUSES_OVERFLOW" : { - "message" : [ "Casting to causes overflow. To return NULL instead, use 'try_cast'. If necessary set to false to bypass this error." ], + "message" : [ "Casting to causes overflow. To return NULL instead, use `try_cast`. If necessary set to \"false\" to bypass this error." ], "sqlState" : "22005" }, "CONCURRENT_QUERY" : { @@ -34,7 +34,7 @@ "sqlState" : "22008" }, "DIVIDE_BY_ZERO" : { - "message" : [ "Division by zero. To return NULL instead, use `try_divide`. If necessary set to false (except for ANSI interval type) to bypass this error.
" ], + "message" : [ "Division by zero. To return NULL instead, use `try_divide`. If necessary set to \"false\" (except for ANSI interval type) to bypass this error.
" ], "sqlState" : "22012" }, "DUPLICATE_KEY" : { @@ -106,17 +106,17 @@ "message" : [ "" ] }, "INVALID_ARRAY_INDEX" : { - "message" : [ "The index is out of bounds. The array has elements. If necessary set to false to bypass this error." ] + "message" : [ "The index is out of bounds. The array has elements. If necessary set to \"false\" to bypass this error." ] }, "INVALID_ARRAY_INDEX_IN_ELEMENT_AT" : { - "message" : [ "The index is out of bounds. The array has elements. To return NULL instead, use `try_element_at`. If necessary set to false to bypass this error." ] + "message" : [ "The index is out of bounds. The array has elements. To return NULL instead, use `try_element_at`. If necessary set to \"false\" to bypass this error." ] }, "INVALID_FIELD_NAME" : { "message" : [ "Field name is invalid: is not a struct." ], "sqlState" : "42000" }, "INVALID_FRACTION_OF_SECOND" : { - "message" : [ "The fraction of sec must be zero. Valid range is [0, 60]. If necessary set to false to bypass this error. " ], + "message" : [ "The fraction of sec must be zero. Valid range is [0, 60]. If necessary set to \"false\" to bypass this error. " ], "sqlState" : "22023" }, "INVALID_JSON_SCHEMA_MAP_TYPE" : { @@ -134,11 +134,11 @@ "sqlState" : "42000" }, "INVALID_SYNTAX_FOR_CAST" : { - "message" : [ "Invalid input syntax for type : . To return NULL instead, use 'try_cast'. If necessary set to false to bypass this error.
" ], + "message" : [ "Invalid input syntax for type : . To return NULL instead, use `try_cast`. If necessary set to \"false\" to bypass this error.
" ], "sqlState" : "42000" }, "MAP_KEY_DOES_NOT_EXIST" : { - "message" : [ "Key does not exist. To return NULL instead, use 'try_element_at'. If necessary set to false to bypass this error.
" ] + "message" : [ "Key does not exist. To return NULL instead, use `try_element_at`. If necessary set to \"false\" to bypass this error.
" ] }, "MISSING_COLUMN" : { "message" : [ "Column '' does not exist. Did you mean one of the following? []" ], diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryExecutionErrors.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryExecutionErrors.scala index b7239d3ff6064..e6dc172f14eaf 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryExecutionErrors.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryExecutionErrors.scala @@ -108,9 +108,13 @@ object QueryExecutionErrors extends QueryErrorsBase { to: DataType, s: UTF8String, errorContext: String): SparkNumberFormatException = { - new SparkNumberFormatException(errorClass = "INVALID_SYNTAX_FOR_CAST", - messageParameters = Array(toSQLType(to), toSQLValue(s, StringType), - SQLConf.ANSI_ENABLED.key, errorContext)) + new SparkNumberFormatException( + errorClass = "INVALID_SYNTAX_FOR_CAST", + messageParameters = Array( + toSQLType(to), + toSQLValue(s, StringType), + toSQLConf(SQLConf.ANSI_ENABLED.key), + errorContext)) } def cannotCastFromNullTypeError(to: DataType): Throwable = { @@ -177,8 +181,12 @@ object QueryExecutionErrors extends QueryErrorsBase { } def mapKeyNotExistError(key: Any, dataType: DataType, context: String): NoSuchElementException = { - new SparkNoSuchElementException(errorClass = "MAP_KEY_DOES_NOT_EXIST", - messageParameters = Array(toSQLValue(key, dataType), SQLConf.ANSI_ENABLED.key, context)) + new SparkNoSuchElementException( + errorClass = "MAP_KEY_DOES_NOT_EXIST", + messageParameters = Array( + toSQLValue(key, dataType), + toSQLConf(SQLConf.ANSI_ENABLED.key), + context)) } def invalidFractionOfSecondError(): DateTimeException = { @@ -1005,9 +1013,13 @@ object QueryExecutionErrors extends QueryErrorsBase { def cannotCastToDateTimeError( value: Any, from: DataType, to: DataType, errorContext: String): Throwable = { - val valueString = toSQLValue(value, from) - new SparkDateTimeException("INVALID_SYNTAX_FOR_CAST", - Array(toSQLType(to), valueString, SQLConf.ANSI_ENABLED.key, errorContext)) + new SparkDateTimeException( + errorClass = "INVALID_SYNTAX_FOR_CAST", + messageParameters = Array( + toSQLType(to), + toSQLValue(value, from), + toSQLConf(SQLConf.ANSI_ENABLED.key), + errorContext)) } def registeringStreamingQueryListenerError(e: Exception): Throwable = { diff --git a/sql/core/src/test/scala/org/apache/spark/sql/errors/QueryExecutionAnsiErrorsSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/errors/QueryExecutionAnsiErrorsSuite.scala index 8aef4c6f34574..89782973a5419 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/errors/QueryExecutionAnsiErrorsSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/errors/QueryExecutionAnsiErrorsSuite.scala @@ -34,8 +34,8 @@ class QueryExecutionAnsiErrorsSuite extends QueryTest with QueryErrorsSuiteBase errorClass = "CAST_CAUSES_OVERFLOW", msg = "Casting TIMESTAMP '9999-12-.*.56789' to \"INT\" causes overflow. " + - "To return NULL instead, use 'try_cast'. " + - s"If necessary set $ansiConf to false to bypass this error.", + "To return NULL instead, use `try_cast`. " + + s"""If necessary set $ansiConf to "false" to bypass this error.""", sqlState = Some("22005"), matchMsg = true) } @@ -48,7 +48,7 @@ class QueryExecutionAnsiErrorsSuite extends QueryTest with QueryErrorsSuiteBase errorClass = "DIVIDE_BY_ZERO", msg = "Division by zero. To return NULL instead, use `try_divide`. If necessary set " + - s"$ansiConf to false (except for ANSI interval type) to bypass this error." + + s"""$ansiConf to "false" (except for ANSI interval type) to bypass this error.""" + """ |== SQL(line 1, position 7) == |select 6/0 @@ -64,7 +64,7 @@ class QueryExecutionAnsiErrorsSuite extends QueryTest with QueryErrorsSuiteBase }, errorClass = "INVALID_FRACTION_OF_SECOND", msg = "The fraction of sec must be zero. Valid range is [0, 60]. " + - s"If necessary set $ansiConf to false to bypass this error. ", + s"""If necessary set $ansiConf to "false" to bypass this error. """, sqlState = Some("22023")) } @@ -76,7 +76,7 @@ class QueryExecutionAnsiErrorsSuite extends QueryTest with QueryErrorsSuiteBase errorClass = "CANNOT_CHANGE_DECIMAL_PRECISION", msg = "Decimal(expanded, 66666666666666.666, 17, 3) cannot be represented as Decimal(8, 1). " + - s"If necessary set $ansiConf to false to bypass this error." + + s"""If necessary set $ansiConf to "false" to bypass this error.""" + """ |== SQL(line 1, position 7) == |select CAST('66666666666666.666' AS DECIMAL(8, 1)) @@ -92,7 +92,7 @@ class QueryExecutionAnsiErrorsSuite extends QueryTest with QueryErrorsSuiteBase }, errorClass = "INVALID_ARRAY_INDEX", msg = "The index 8 is out of bounds. The array has 5 elements. " + - s"If necessary set $ansiConf to false to bypass this error." + s"""If necessary set $ansiConf to "false" to bypass this error.""" ) } @@ -104,7 +104,7 @@ class QueryExecutionAnsiErrorsSuite extends QueryTest with QueryErrorsSuiteBase errorClass = "INVALID_ARRAY_INDEX_IN_ELEMENT_AT", msg = "The index 8 is out of bounds. The array has 5 elements. " + "To return NULL instead, use `try_element_at`. " + - s"If necessary set $ansiConf to false to bypass this error." + s"""If necessary set $ansiConf to "false" to bypass this error.""" ) } @@ -115,8 +115,8 @@ class QueryExecutionAnsiErrorsSuite extends QueryTest with QueryErrorsSuiteBase checkErrorClass( exception = e, errorClass = "MAP_KEY_DOES_NOT_EXIST", - msg = "Key 3 does not exist. To return NULL instead, use 'try_element_at'. " + - "If necessary set spark.sql.ansi.enabled to false to bypass this error." + + msg = "Key 3 does not exist. To return NULL instead, use `try_element_at`. " + + s"""If necessary set $ansiConf to "false" to bypass this error.""" + """ |== SQL(line 1, position 7) == |select element_at(map(1, 'a', 2, 'b'), 3) @@ -132,8 +132,8 @@ class QueryExecutionAnsiErrorsSuite extends QueryTest with QueryErrorsSuiteBase }, errorClass = "INVALID_SYNTAX_FOR_CAST", msg = """Invalid input syntax for type "DOUBLE": '111111111111xe23'. """ + - """To return NULL instead, use 'try_cast'. If necessary set """ + - """spark.sql.ansi.enabled to false to bypass this error. + """To return NULL instead, use `try_cast`. If necessary set """ + + s"""$ansiConf to \"false\" to bypass this error. |== SQL(line 1, position 7) == |select CAST('111111111111xe23' AS DOUBLE) | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ From 6da019193d630d18429a73529122ec0c3d51db3b Mon Sep 17 00:00:00 2001 From: Max Gekk Date: Tue, 17 May 2022 13:56:38 +0300 Subject: [PATCH 2/6] Fix QueryExecutionErrorsSuite --- core/src/main/resources/error/error-classes.json | 10 +++++----- .../apache/spark/sql/errors/QueryErrorsBase.scala | 4 ++++ .../spark/sql/errors/QueryExecutionErrors.scala | 4 ++-- .../spark/sql/errors/QueryExecutionErrorsSuite.scala | 12 ++++++------ 4 files changed, 17 insertions(+), 13 deletions(-) diff --git a/core/src/main/resources/error/error-classes.json b/core/src/main/resources/error/error-classes.json index dea15cfec8898..c6aa18b072cc9 100644 --- a/core/src/main/resources/error/error-classes.json +++ b/core/src/main/resources/error/error-classes.json @@ -82,19 +82,19 @@ "message" : [ "You may get a different result due to the upgrading to" ], "subClass" : { "DATETIME_PATTERN_RECOGNITION" : { - "message" : [ " Spark >= 3.0: \nFail to recognize pattern in the DateTimeFormatter. 1) You can set to 'LEGACY' to restore the behavior before Spark 3.0. 2) You can form a valid datetime pattern with the guide from https://spark.apache.org/docs/latest/sql-ref-datetime-pattern.html" ] + "message" : [ " Spark >= 3.0: \nFail to recognize pattern in the DateTimeFormatter. 1) You can set to \"LEGACY\" to restore the behavior before Spark 3.0. 2) You can form a valid datetime pattern with the guide from https://spark.apache.org/docs/latest/sql-ref-datetime-pattern.html" ] }, "FORMAT_DATETIME_BY_NEW_PARSER" : { - "message" : [ " Spark >= 3.0: \nFail to format it to in the new formatter. You can set\n to 'LEGACY' to restore the behavior before\nSpark 3.0, or set to 'CORRECTED' and treat it as an invalid datetime string.\n" ] + "message" : [ " Spark >= 3.0: \nFail to format it to in the new formatter. You can set\n to \"LEGACY\" to restore the behavior before\nSpark 3.0, or set to \"CORRECTED\" and treat it as an invalid datetime string.\n" ] }, "PARSE_DATETIME_BY_NEW_PARSER" : { - "message" : [ " Spark >= 3.0: \nFail to parse in the new parser. You can set to 'LEGACY' to restore the behavior before Spark 3.0, or set to 'CORRECTED' and treat it as an invalid datetime string." ] + "message" : [ " Spark >= 3.0: \nFail to parse in the new parser. You can set to \"LEGACY\" to restore the behavior before Spark 3.0, or set to \"CORRECTED\" and treat it as an invalid datetime string." ] }, "READ_ANCIENT_DATETIME" : { - "message" : [ " Spark >= 3.0: \nreading dates before 1582-10-15 or timestamps before 1900-01-01T00:00:00Z\nfrom files can be ambiguous, as the files may be written by\nSpark 2.x or legacy versions of Hive, which uses a legacy hybrid calendar\nthat is different from Spark 3.0+'s Proleptic Gregorian calendar.\nSee more details in SPARK-31404. You can set the SQL config or\nthe datasource option '