From 42009fd1db696e89ae4c1853f0ff1c3c82b5f8c4 Mon Sep 17 00:00:00 2001 From: Kent Yao Date: Fri, 24 Apr 2020 13:43:41 +0800 Subject: [PATCH 1/4] [SPARK-31550][SQL][DOCS] Set nondeterministic configurations with general meanings in sql configuration doc --- docs/configuration.md | 13 ++++++----- .../spark/sql/api/python/PythonSQLUtils.scala | 22 ++++++++++++++----- .../sql/api/python/PythonSQLUtilsSuite.scala | 2 +- sql/create-docs.sh | 7 ++---- sql/gen-sql-config-docs.py | 22 ++++++++----------- 5 files changed, 35 insertions(+), 31 deletions(-) diff --git a/docs/configuration.md b/docs/configuration.md index 6faa5e749bfad..13dae042ee8ca 100644 --- a/docs/configuration.md +++ b/docs/configuration.md @@ -2624,6 +2624,9 @@ Spark subsystems. ### Spark SQL +{% for static_file in site.static_files %} + {% if static_file.name == 'generated-runtime-sql-config-table.html' %} + #### Runtime SQL Configuration Runtime SQL configurations are per-session, mutable Spark SQL configurations. They can be set with initial values by the config file @@ -2631,13 +2634,13 @@ and command-line options with `--conf/-c` prefixed, or by setting `SparkConf` th Also, they can be set and queried by SET commands and rest to their initial values by RESET command, or by `SparkSession.conf`'s setter and getter methods in runtime. -{% for static_file in site.static_files %} - {% if static_file.name == 'generated-runtime-sql-config-table.html' %} - {% include_relative generated-runtime-sql-config-table.html %} +{% include_relative generated-runtime-sql-config-table.html %} {% break %} {% endif %} {% endfor %} +{% for static_file in site.static_files %} + {% if static_file.name == 'generated-static-sql-config-table.html' %} #### Static SQL Configuration @@ -2645,9 +2648,7 @@ Static SQL configurations are cross-session, immutable Spark SQL configurations. and command-line options with `--conf/-c` prefixed, or by setting `SparkConf` that are used to create `SparkSession`. External users can query the static sql config values via `SparkSession.conf` or via set command, e.g. `SET spark.sql.extensions;`, but cannot set/unset them. -{% for static_file in site.static_files %} - {% if static_file.name == 'generated-static-sql-config-table.html' %} - {% include_relative generated-static-sql-config-table.html %} +{% include_relative generated-static-sql-config-table.html %} {% break %} {% endif %} {% endfor %} diff --git a/sql/core/src/main/scala/org/apache/spark/sql/api/python/PythonSQLUtils.scala b/sql/core/src/main/scala/org/apache/spark/sql/api/python/PythonSQLUtils.scala index 2e5f59edcf1da..50800c1fa2311 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/api/python/PythonSQLUtils.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/api/python/PythonSQLUtils.scala @@ -40,17 +40,27 @@ private[sql] object PythonSQLUtils { FunctionRegistry.functionSet.flatMap(f => FunctionRegistry.builtin.lookupFunction(f)).toArray } - def listSQLConfigs(): Array[(String, String, String, String)] = { + private def listAllSQLConfigs(): Seq[(String, String, String, String)] = { val conf = new SQLConf() + // Force to build static SQL configurations + StaticSQLConf + // set nondeterministic configurations with general meanings + conf.getAllDefinedConfs.map { + case p @ (SQLConf.SESSION_LOCAL_TIMEZONE.key, _, _, _) => + p.copy(_2 = "value of local timezone") + case p @ (StaticSQLConf.WAREHOUSE_PATH.key, _, _, _) => + p.copy(_2 = "value of $SPARK_HOME/spark-warehouse") + case o => o + } + } + + def listRuntimeSQLConfigs(): Array[(String, String, String, String)] = { // Py4J doesn't seem to translate Seq well, so we convert to an Array. - conf.getAllDefinedConfs.filterNot(p => SQLConf.staticConfKeys.contains(p._1)).toArray + listAllSQLConfigs().filterNot(p => SQLConf.staticConfKeys.contains(p._1)).toArray } def listStaticSQLConfigs(): Array[(String, String, String, String)] = { - val conf = new SQLConf() - // Force to build static SQL configurations - StaticSQLConf - conf.getAllDefinedConfs.filter(p => SQLConf.staticConfKeys.contains(p._1)).toArray + listAllSQLConfigs().filter(p => SQLConf.staticConfKeys.contains(p._1)).toArray } /** diff --git a/sql/core/src/test/scala/org/apache/spark/sql/api/python/PythonSQLUtilsSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/api/python/PythonSQLUtilsSuite.scala index 0d18d123e328a..524fc8a9b0dea 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/api/python/PythonSQLUtilsSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/api/python/PythonSQLUtilsSuite.scala @@ -23,7 +23,7 @@ import org.apache.spark.sql.internal.{SQLConf, StaticSQLConf} class PythonSQLUtilsSuite extends SparkFunSuite { test("listing sql configurations contains runtime ones only") { - val configs = PythonSQLUtils.listSQLConfigs() + val configs = PythonSQLUtils.listRuntimeSQLConfigs() // static sql configurations assert(!configs.exists(entry => entry._1 == StaticSQLConf.SPARK_SESSION_EXTENSIONS.key), diff --git a/sql/create-docs.sh b/sql/create-docs.sh index 336afc4fcb9f4..6614c714e90c7 100755 --- a/sql/create-docs.sh +++ b/sql/create-docs.sh @@ -45,11 +45,8 @@ mkdir docs echo "Generating SQL API Markdown files." "$SPARK_HOME/bin/spark-submit" gen-sql-api-docs.py -echo "Generating runtime SQL runtime configuration table HTML file." -"$SPARK_HOME/bin/spark-submit" gen-sql-config-docs.py runtime - -echo "Generating static SQL configuration table HTML file." -"$SPARK_HOME/bin/spark-submit" gen-sql-config-docs.py static +echo "Generating SQL configuration table HTML file." +"$SPARK_HOME/bin/spark-submit" gen-sql-config-docs.py echo "Generating HTML files for SQL function table and examples." "$SPARK_HOME/bin/spark-submit" gen-sql-functions-docs.py diff --git a/sql/gen-sql-config-docs.py b/sql/gen-sql-config-docs.py index 848d2f21f1142..0245ce7e8adb0 100644 --- a/sql/gen-sql-config-docs.py +++ b/sql/gen-sql-config-docs.py @@ -17,7 +17,7 @@ import os import re -import sys + from collections import namedtuple from textwrap import dedent @@ -31,11 +31,11 @@ "SQLConfEntry", ["name", "default", "description", "version"]) -def get_public_sql_configs(jvm, group): +def get_sql_configs(jvm, group): if group == "static": config_set = jvm.org.apache.spark.sql.api.python.PythonSQLUtils.listStaticSQLConfigs() else: - config_set = jvm.org.apache.spark.sql.api.python.PythonSQLUtils.listSQLConfigs() + config_set = jvm.org.apache.spark.sql.api.python.PythonSQLUtils.listRuntimeSQLConfigs() sql_configs = [ SQLConfEntry( name=_sql_config._1(), @@ -119,17 +119,13 @@ def generate_sql_configs_table_html(sql_configs, path): if __name__ == "__main__": - if len(sys.argv) != 2: - print("Usage: ./bin/spark-submit sql/gen-sql-config-docs.py ") - sys.exit(-1) - else: - group = sys.argv[1] - jvm = launch_gateway().jvm - sql_configs = get_public_sql_configs(jvm, group) + docs_root_dir = os.path.join(os.path.dirname(os.path.dirname(__file__)), "docs") - spark_root_dir = os.path.dirname(os.path.dirname(__file__)) - sql_configs_table_path = os.path\ - .join(spark_root_dir, "docs", "generated-" + group + "-sql-config-table.html") + sql_configs = get_sql_configs(jvm, "runtime") + sql_configs_table_path = os.path.join(docs_root_dir, "generated-runtime-sql-config-table.html") + generate_sql_configs_table_html(sql_configs, path=sql_configs_table_path) + sql_configs = get_sql_configs(jvm, "static") + sql_configs_table_path = os.path.join(docs_root_dir, "generated-static-sql-config-table.html") generate_sql_configs_table_html(sql_configs, path=sql_configs_table_path) From d47687fcb1da0f5f6b3f3b2a412875739024f62b Mon Sep 17 00:00:00 2001 From: Kent Yao Date: Sat, 25 Apr 2020 15:32:09 +0800 Subject: [PATCH 2/4] fix warehouse dir --- .../scala/org/apache/spark/sql/api/python/PythonSQLUtils.scala | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sql/core/src/main/scala/org/apache/spark/sql/api/python/PythonSQLUtils.scala b/sql/core/src/main/scala/org/apache/spark/sql/api/python/PythonSQLUtils.scala index 50800c1fa2311..6b797db15bb6a 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/api/python/PythonSQLUtils.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/api/python/PythonSQLUtils.scala @@ -49,7 +49,7 @@ private[sql] object PythonSQLUtils { case p @ (SQLConf.SESSION_LOCAL_TIMEZONE.key, _, _, _) => p.copy(_2 = "value of local timezone") case p @ (StaticSQLConf.WAREHOUSE_PATH.key, _, _, _) => - p.copy(_2 = "value of $SPARK_HOME/spark-warehouse") + p.copy(_2 = "value of $PWD/spark-warehouse") case o => o } } From 3d985a4c641f9fb73635311b97c42c3302a2f2c4 Mon Sep 17 00:00:00 2001 From: Kent Yao Date: Sun, 26 Apr 2020 13:34:08 +0800 Subject: [PATCH 3/4] address comments --- .../org/apache/spark/sql/api/python/PythonSQLUtils.scala | 9 +-------- sql/gen-sql-config-docs.py | 6 +++++- 2 files changed, 6 insertions(+), 9 deletions(-) diff --git a/sql/core/src/main/scala/org/apache/spark/sql/api/python/PythonSQLUtils.scala b/sql/core/src/main/scala/org/apache/spark/sql/api/python/PythonSQLUtils.scala index 6b797db15bb6a..89bb4f362de74 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/api/python/PythonSQLUtils.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/api/python/PythonSQLUtils.scala @@ -44,14 +44,7 @@ private[sql] object PythonSQLUtils { val conf = new SQLConf() // Force to build static SQL configurations StaticSQLConf - // set nondeterministic configurations with general meanings - conf.getAllDefinedConfs.map { - case p @ (SQLConf.SESSION_LOCAL_TIMEZONE.key, _, _, _) => - p.copy(_2 = "value of local timezone") - case p @ (StaticSQLConf.WAREHOUSE_PATH.key, _, _, _) => - p.copy(_2 = "value of $PWD/spark-warehouse") - case o => o - } + conf.getAllDefinedConfs } def listRuntimeSQLConfigs(): Array[(String, String, String, String)] = { diff --git a/sql/gen-sql-config-docs.py b/sql/gen-sql-config-docs.py index 0245ce7e8adb0..c5748def21677 100644 --- a/sql/gen-sql-config-docs.py +++ b/sql/gen-sql-config-docs.py @@ -81,7 +81,11 @@ def generate_sql_configs_table_html(sql_configs, path): """ )) for config in sorted(sql_configs, key=lambda x: x.name): - if config.default == "": + if config.name =="spark.sql.session.timeZone": + default = "(value of local timezone)" + elif config.name == "spark.sql.warehouse.dir": + default = "(value of $PWD/spark-warehouse)" + elif config.default == "": default = "(none)" elif config.default.startswith(" Date: Sun, 26 Apr 2020 18:01:05 +0800 Subject: [PATCH 4/4] style --- sql/gen-sql-config-docs.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sql/gen-sql-config-docs.py b/sql/gen-sql-config-docs.py index c5748def21677..f1980bcc0d80d 100644 --- a/sql/gen-sql-config-docs.py +++ b/sql/gen-sql-config-docs.py @@ -81,7 +81,7 @@ def generate_sql_configs_table_html(sql_configs, path): """ )) for config in sorted(sql_configs, key=lambda x: x.name): - if config.name =="spark.sql.session.timeZone": + if config.name == "spark.sql.session.timeZone": default = "(value of local timezone)" elif config.name == "spark.sql.warehouse.dir": default = "(value of $PWD/spark-warehouse)"