From c36447038f4a8bff260f5d1ffb94bdf8126b55fa Mon Sep 17 00:00:00 2001 From: Max Gekk Date: Mon, 4 Sep 2023 14:57:15 +0300 Subject: [PATCH 1/6] Update Scala/Java APIs --- .../scala/org/apache/spark/sql/functions.scala | 18 ++++++++++++++++++ .../scala/org/apache/spark/sql/functions.scala | 16 ++++++++++++++++ 2 files changed, 34 insertions(+) diff --git a/connector/connect/client/jvm/src/main/scala/org/apache/spark/sql/functions.scala b/connector/connect/client/jvm/src/main/scala/org/apache/spark/sql/functions.scala index 8ea5f07c528f7..e7ff8f01b373d 100644 --- a/connector/connect/client/jvm/src/main/scala/org/apache/spark/sql/functions.scala +++ b/connector/connect/client/jvm/src/main/scala/org/apache/spark/sql/functions.scala @@ -4263,6 +4263,7 @@ object functions { */ def to_binary(e: Column): Column = Column.fn("to_binary", e) + // scalastyle:off line.size.limit /** * Convert `e` to a string based on the `format`. Throws an exception if the conversion fails. * @@ -4284,6 +4285,14 @@ object functions { * prints '+' for positive values but 'MI' prints a space.
  • 'PR': Only allowed at the * end of the format string; specifies that the result string will be wrapped by angle * brackets if the input value is negative.
  • + * If `e` is a datetime, `format` shall be a valid datetime pattern, see + * Datetime Patterns. + * If `e` is a binary, it is converted to a string in one of the formats: + * * * @group string_funcs * @since 3.5.0 @@ -4311,11 +4320,20 @@ object functions { * prints '+' for positive values but 'MI' prints a space.
  • 'PR': Only allowed at the * end of the format string; specifies that the result string will be wrapped by angle * brackets if the input value is negative.
  • + * If `e` is a datetime, `format` shall be a valid datetime pattern, see + * Datetime Patterns. + * If `e` is a binary, it is converted to a string in one of the formats: + * * * @group string_funcs * @since 3.5.0 */ def to_varchar(e: Column, format: Column): Column = Column.fn("to_varchar", e, format) + // scalastyle:on line.size.limit /** * Convert string 'e' to a number based on the string format 'format'. Throws an exception if diff --git a/sql/core/src/main/scala/org/apache/spark/sql/functions.scala b/sql/core/src/main/scala/org/apache/spark/sql/functions.scala index 9548f424ad407..7ceaeca068a2d 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/functions.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/functions.scala @@ -4399,6 +4399,7 @@ object functions { new ToBinary(e.expr) } + // scalastyle:off line.size.limit /** * Convert `e` to a string based on the `format`. * Throws an exception if the conversion fails. The format can consist of the following @@ -4420,6 +4421,13 @@ object functions { * 'PR': Only allowed at the end of the format string; specifies that the result string will be * wrapped by angle brackets if the input value is negative. * + * If `e` is a datetime, `format` shall be a valid datetime pattern, see + * Datetime Patterns. + * If `e` is a binary, it is converted to a string in one of the formats: + * 'base64': a base 64 string. + * 'hex': a string in the hexadecimal format. + * 'utf-8': the input binary is decoded to UTF-8 string. + * * @group string_funcs * @since 3.5.0 */ @@ -4446,10 +4454,18 @@ object functions { * 'PR': Only allowed at the end of the format string; specifies that the result string will be * wrapped by angle brackets if the input value is negative. * + * If `e` is a datetime, `format` shall be a valid datetime pattern, see + * Datetime Patterns. + * If `e` is a binary, it is converted to a string in one of the formats: + * 'base64': a base 64 string. + * 'hex': a string in the hexadecimal format. + * 'utf-8': the input binary is decoded to UTF-8 string. + * * @group string_funcs * @since 3.5.0 */ def to_varchar(e: Column, format: Column): Column = call_function("to_varchar", e, format) + // scalastyle:on line.size.limit /** * Convert string 'e' to a number based on the string format 'format'. From 3385bc846819b8ac044e8eed2795a31bca192c06 Mon Sep 17 00:00:00 2001 From: Max Gekk Date: Mon, 4 Sep 2023 15:10:15 +0300 Subject: [PATCH 2/6] Update pyspark/sql/functions.py --- python/pyspark/sql/functions.py | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/python/pyspark/sql/functions.py b/python/pyspark/sql/functions.py index fb02cb0cc98b4..13367773ab9d2 100644 --- a/python/pyspark/sql/functions.py +++ b/python/pyspark/sql/functions.py @@ -10423,6 +10423,12 @@ def to_char(col: "ColumnOrName", format: "ColumnOrName") -> Column: values but 'MI' prints a space. 'PR': Only allowed at the end of the format string; specifies that the result string will be wrapped by angle brackets if the input value is negative. + If `col` is a datetime, `format` shall be a valid datetime pattern, see + Patterns. + If `col` is a binary, it is converted to a string in one of the formats: + 'base64': a base 64 string. + 'hex': a string in the hexadecimal format. + 'utf-8': the input binary is decoded to UTF-8 string. .. versionadded:: 3.5.0 @@ -10463,6 +10469,12 @@ def to_varchar(col: "ColumnOrName", format: "ColumnOrName") -> Column: values but 'MI' prints a space. 'PR': Only allowed at the end of the format string; specifies that the result string will be wrapped by angle brackets if the input value is negative. + If `col` is a datetime, `format` shall be a valid datetime pattern, see + Patterns. + If `col` is a binary, it is converted to a string in one of the formats: + 'base64': a base 64 string. + 'hex': a string in the hexadecimal format. + 'utf-8': the input binary is decoded to UTF-8 string. .. versionadded:: 3.5.0 From 992499a52c55b0d50f3da702fb4d43b276a87eef Mon Sep 17 00:00:00 2001 From: Max Gekk Date: Mon, 4 Sep 2023 16:17:30 +0300 Subject: [PATCH 3/6] Trigger build From 9d2b78276147f1fdb782a8467bbf13dd40451260 Mon Sep 17 00:00:00 2001 From: Max Gekk Date: Mon, 4 Sep 2023 21:10:00 +0300 Subject: [PATCH 4/6] Address Wenchen's comments --- .../jvm/src/main/scala/org/apache/spark/sql/functions.scala | 4 +++- sql/core/src/main/scala/org/apache/spark/sql/functions.scala | 4 +++- 2 files changed, 6 insertions(+), 2 deletions(-) diff --git a/connector/connect/client/jvm/src/main/scala/org/apache/spark/sql/functions.scala b/connector/connect/client/jvm/src/main/scala/org/apache/spark/sql/functions.scala index e7ff8f01b373d..f993e4c5d87a0 100644 --- a/connector/connect/client/jvm/src/main/scala/org/apache/spark/sql/functions.scala +++ b/connector/connect/client/jvm/src/main/scala/org/apache/spark/sql/functions.scala @@ -4297,8 +4297,10 @@ object functions { * @group string_funcs * @since 3.5.0 */ + // scalastyle:on line.size.limit def to_char(e: Column, format: Column): Column = Column.fn("to_char", e, format) + // scalastyle:off line.size.limit /** * Convert `e` to a string based on the `format`. Throws an exception if the conversion fails. * @@ -4332,8 +4334,8 @@ object functions { * @group string_funcs * @since 3.5.0 */ - def to_varchar(e: Column, format: Column): Column = Column.fn("to_varchar", e, format) // scalastyle:on line.size.limit + def to_varchar(e: Column, format: Column): Column = Column.fn("to_varchar", e, format) /** * Convert string 'e' to a number based on the string format 'format'. Throws an exception if diff --git a/sql/core/src/main/scala/org/apache/spark/sql/functions.scala b/sql/core/src/main/scala/org/apache/spark/sql/functions.scala index 7ceaeca068a2d..4d32f297a986e 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/functions.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/functions.scala @@ -4431,8 +4431,10 @@ object functions { * @group string_funcs * @since 3.5.0 */ + // scalastyle:on line.size.limit def to_char(e: Column, format: Column): Column = call_function("to_char", e, format) + // scalastyle:off line.size.limit /** * Convert `e` to a string based on the `format`. * Throws an exception if the conversion fails. The format can consist of the following @@ -4464,8 +4466,8 @@ object functions { * @group string_funcs * @since 3.5.0 */ - def to_varchar(e: Column, format: Column): Column = call_function("to_varchar", e, format) // scalastyle:on line.size.limit + def to_varchar(e: Column, format: Column): Column = call_function("to_varchar", e, format) /** * Convert string 'e' to a number based on the string format 'format'. From 92a6cacef7a4d263ddd60e126b93a4ce45fe08c9 Mon Sep 17 00:00:00 2001 From: Max Gekk Date: Tue, 5 Sep 2023 14:10:28 +0300 Subject: [PATCH 5/6] Reformat functions.scala --- .../org/apache/spark/sql/functions.scala | 30 ++++++++----------- 1 file changed, 12 insertions(+), 18 deletions(-) diff --git a/connector/connect/client/jvm/src/main/scala/org/apache/spark/sql/functions.scala b/connector/connect/client/jvm/src/main/scala/org/apache/spark/sql/functions.scala index f993e4c5d87a0..722c044ab4099 100644 --- a/connector/connect/client/jvm/src/main/scala/org/apache/spark/sql/functions.scala +++ b/connector/connect/client/jvm/src/main/scala/org/apache/spark/sql/functions.scala @@ -4284,15 +4284,12 @@ object functions { * (optional, only allowed once at the beginning or end of the format string). Note that 'S' * prints '+' for positive values but 'MI' prints a space.
  • 'PR': Only allowed at the * end of the format string; specifies that the result string will be wrapped by angle - * brackets if the input value is negative.
  • - * If `e` is a datetime, `format` shall be a valid datetime pattern, see - * Datetime Patterns. - * If `e` is a binary, it is converted to a string in one of the formats: - *
      - *
    • 'base64': a base 64 string.
    • - *
    • 'hex': a string in the hexadecimal format.
    • - *
    • 'utf-8': the input binary is decoded to UTF-8 string.
    • - *
    + * brackets if the input value is negative. If `e` is a datetime, `format` shall be + * a valid datetime pattern, see Datetime + * Patterns. If `e` is a binary, it is converted to a string in one of the formats:
      + *
    • 'base64': a base 64 string.
    • 'hex': a string in the hexadecimal format.
    • + *
    • 'utf-8': the input binary is decoded to UTF-8 string.
    * * @group string_funcs * @since 3.5.0 @@ -4321,15 +4318,12 @@ object functions { * (optional, only allowed once at the beginning or end of the format string). Note that 'S' * prints '+' for positive values but 'MI' prints a space.
  • 'PR': Only allowed at the * end of the format string; specifies that the result string will be wrapped by angle - * brackets if the input value is negative.
  • - * If `e` is a datetime, `format` shall be a valid datetime pattern, see - * Datetime Patterns. - * If `e` is a binary, it is converted to a string in one of the formats: - *
      - *
    • 'base64': a base 64 string.
    • - *
    • 'hex': a string in the hexadecimal format.
    • - *
    • 'utf-8': the input binary is decoded to UTF-8 string.
    • - *
    + * brackets if the input value is negative. If `e` is a datetime, `format` shall be + * a valid datetime pattern, see Datetime + * Patterns. If `e` is a binary, it is converted to a string in one of the formats:
      + *
    • 'base64': a base 64 string.
    • 'hex': a string in the hexadecimal format.
    • + *
    • 'utf-8': the input binary is decoded to UTF-8 string.
    * * @group string_funcs * @since 3.5.0 From 295a50c8b944acf86cf206c8360a08e21f2b2520 Mon Sep 17 00:00:00 2001 From: Max Gekk Date: Tue, 5 Sep 2023 17:19:47 +0300 Subject: [PATCH 6/6] Fix python formatting --- python/pyspark/sql/functions.py | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/python/pyspark/sql/functions.py b/python/pyspark/sql/functions.py index 13367773ab9d2..f5a5b28362662 100644 --- a/python/pyspark/sql/functions.py +++ b/python/pyspark/sql/functions.py @@ -10426,9 +10426,9 @@ def to_char(col: "ColumnOrName", format: "ColumnOrName") -> Column: If `col` is a datetime, `format` shall be a valid datetime pattern, see Patterns. If `col` is a binary, it is converted to a string in one of the formats: - 'base64': a base 64 string. - 'hex': a string in the hexadecimal format. - 'utf-8': the input binary is decoded to UTF-8 string. + 'base64': a base 64 string. + 'hex': a string in the hexadecimal format. + 'utf-8': the input binary is decoded to UTF-8 string. .. versionadded:: 3.5.0 @@ -10472,9 +10472,9 @@ def to_varchar(col: "ColumnOrName", format: "ColumnOrName") -> Column: If `col` is a datetime, `format` shall be a valid datetime pattern, see Patterns. If `col` is a binary, it is converted to a string in one of the formats: - 'base64': a base 64 string. - 'hex': a string in the hexadecimal format. - 'utf-8': the input binary is decoded to UTF-8 string. + 'base64': a base 64 string. + 'hex': a string in the hexadecimal format. + 'utf-8': the input binary is decoded to UTF-8 string. .. versionadded:: 3.5.0