diff --git a/docs/configuration.md b/docs/configuration.md index e322247ed2975..6faa5e749bfad 100644 --- a/docs/configuration.md +++ b/docs/configuration.md @@ -2622,11 +2622,32 @@ Please refer to the [Security](security.html) page for available options on how Spark subsystems. -{% for static_file in site.static_files %} - {% if static_file.name == 'generated-sql-configuration-table.html' %} ### Spark SQL -{% include_relative generated-sql-configuration-table.html %} +#### Runtime SQL Configuration + +Runtime SQL configurations are per-session, mutable Spark SQL configurations. They can be set with initial values by the config file +and command-line options with `--conf/-c` prefixed, or by setting `SparkConf` that are used to create `SparkSession`. +Also, they can be set and queried by SET commands and rest to their initial values by RESET command, +or by `SparkSession.conf`'s setter and getter methods in runtime. + +{% for static_file in site.static_files %} + {% if static_file.name == 'generated-runtime-sql-config-table.html' %} + {% include_relative generated-runtime-sql-config-table.html %} + {% break %} + {% endif %} +{% endfor %} + + +#### Static SQL Configuration + +Static SQL configurations are cross-session, immutable Spark SQL configurations. They can be set with final values by the config file +and command-line options with `--conf/-c` prefixed, or by setting `SparkConf` that are used to create `SparkSession`. +External users can query the static sql config values via `SparkSession.conf` or via set command, e.g. `SET spark.sql.extensions;`, but cannot set/unset them. + +{% for static_file in site.static_files %} + {% if static_file.name == 'generated-static-sql-config-table.html' %} + {% include_relative generated-static-sql-config-table.html %} {% break %} {% endif %} {% endfor %} diff --git a/sql/core/src/main/scala/org/apache/spark/sql/api/python/PythonSQLUtils.scala b/sql/core/src/main/scala/org/apache/spark/sql/api/python/PythonSQLUtils.scala index 03f5a60aec438..2e5f59edcf1da 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/api/python/PythonSQLUtils.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/api/python/PythonSQLUtils.scala @@ -23,13 +23,13 @@ import java.nio.channels.Channels import org.apache.spark.api.java.JavaRDD import org.apache.spark.api.python.PythonRDDServer import org.apache.spark.rdd.RDD -import org.apache.spark.sql.{DataFrame, Dataset, SQLContext} +import org.apache.spark.sql.{DataFrame, SQLContext} import org.apache.spark.sql.catalyst.analysis.FunctionRegistry import org.apache.spark.sql.catalyst.expressions.ExpressionInfo import org.apache.spark.sql.catalyst.parser.CatalystSqlParser import org.apache.spark.sql.execution.{ExplainMode, QueryExecution} import org.apache.spark.sql.execution.arrow.ArrowConverters -import org.apache.spark.sql.internal.SQLConf +import org.apache.spark.sql.internal.{SQLConf, StaticSQLConf} import org.apache.spark.sql.types.DataType private[sql] object PythonSQLUtils { @@ -43,7 +43,14 @@ private[sql] object PythonSQLUtils { def listSQLConfigs(): Array[(String, String, String, String)] = { val conf = new SQLConf() // Py4J doesn't seem to translate Seq well, so we convert to an Array. - conf.getAllDefinedConfs.toArray + conf.getAllDefinedConfs.filterNot(p => SQLConf.staticConfKeys.contains(p._1)).toArray + } + + def listStaticSQLConfigs(): Array[(String, String, String, String)] = { + val conf = new SQLConf() + // Force to build static SQL configurations + StaticSQLConf + conf.getAllDefinedConfs.filter(p => SQLConf.staticConfKeys.contains(p._1)).toArray } /** diff --git a/sql/core/src/test/scala/org/apache/spark/sql/api/python/PythonSQLUtilsSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/api/python/PythonSQLUtilsSuite.scala new file mode 100644 index 0000000000000..0d18d123e328a --- /dev/null +++ b/sql/core/src/test/scala/org/apache/spark/sql/api/python/PythonSQLUtilsSuite.scala @@ -0,0 +1,64 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.sql.api.python + +import org.apache.spark.SparkFunSuite +import org.apache.spark.sql.internal.{SQLConf, StaticSQLConf} + +class PythonSQLUtilsSuite extends SparkFunSuite { + + test("listing sql configurations contains runtime ones only") { + val configs = PythonSQLUtils.listSQLConfigs() + + // static sql configurations + assert(!configs.exists(entry => entry._1 == StaticSQLConf.SPARK_SESSION_EXTENSIONS.key), + "listSQLConfigs should contain public static sql configuration") + assert(!configs.exists(entry => entry._1 == StaticSQLConf.DEBUG_MODE.key), + "listSQLConfigs should not contain internal static sql configuration") + + // dynamic sql configurations + assert(configs.exists(entry => entry._1 == SQLConf.DYNAMIC_PARTITION_PRUNING_ENABLED.key), + "listSQLConfigs should contain public dynamic sql configuration") + assert(!configs.exists(entry => entry._1 == SQLConf.ANALYZER_MAX_ITERATIONS.key), + "listSQLConfigs should not contain internal dynamic sql configuration") + + // spark core configurations + assert(!configs.exists(entry => entry._1 == "spark.master"), + "listSQLConfigs should not contain core configuration") + } + + test("listing static sql configurations contains public static ones only") { + val configs = PythonSQLUtils.listStaticSQLConfigs() + + // static sql configurations + assert(configs.exists(entry => entry._1 == StaticSQLConf.SPARK_SESSION_EXTENSIONS.key), + "listStaticSQLConfigs should contain public static sql configuration") + assert(!configs.exists(entry => entry._1 == StaticSQLConf.DEBUG_MODE.key), + "listStaticSQLConfigs should not contain internal static sql configuration") + + // dynamic sql configurations + assert(!configs.exists(entry => entry._1 == SQLConf.DYNAMIC_PARTITION_PRUNING_ENABLED.key), + "listStaticSQLConfigs should not contain dynamic sql configuration") + assert(!configs.exists(entry => entry._1 == SQLConf.ANALYZER_MAX_ITERATIONS.key), + "listStaticSQLConfigs should not contain internal dynamic sql configuration") + + // spark core configurations + assert(!configs.exists(entry => entry._1 == "spark.master"), + "listStaticSQLConfigs should not contain core configuration") + } +} diff --git a/sql/create-docs.sh b/sql/create-docs.sh index 6614c714e90c7..336afc4fcb9f4 100755 --- a/sql/create-docs.sh +++ b/sql/create-docs.sh @@ -45,8 +45,11 @@ mkdir docs echo "Generating SQL API Markdown files." "$SPARK_HOME/bin/spark-submit" gen-sql-api-docs.py -echo "Generating SQL configuration table HTML file." -"$SPARK_HOME/bin/spark-submit" gen-sql-config-docs.py +echo "Generating runtime SQL runtime configuration table HTML file." +"$SPARK_HOME/bin/spark-submit" gen-sql-config-docs.py runtime + +echo "Generating static SQL configuration table HTML file." +"$SPARK_HOME/bin/spark-submit" gen-sql-config-docs.py static echo "Generating HTML files for SQL function table and examples." "$SPARK_HOME/bin/spark-submit" gen-sql-functions-docs.py diff --git a/sql/gen-sql-config-docs.py b/sql/gen-sql-config-docs.py index 0043c412fbc16..848d2f21f1142 100644 --- a/sql/gen-sql-config-docs.py +++ b/sql/gen-sql-config-docs.py @@ -17,6 +17,7 @@ import os import re +import sys from collections import namedtuple from textwrap import dedent @@ -30,7 +31,11 @@ "SQLConfEntry", ["name", "default", "description", "version"]) -def get_public_sql_configs(jvm): +def get_public_sql_configs(jvm, group): + if group == "static": + config_set = jvm.org.apache.spark.sql.api.python.PythonSQLUtils.listStaticSQLConfigs() + else: + config_set = jvm.org.apache.spark.sql.api.python.PythonSQLUtils.listSQLConfigs() sql_configs = [ SQLConfEntry( name=_sql_config._1(), @@ -38,7 +43,7 @@ def get_public_sql_configs(jvm): description=_sql_config._3(), version=_sql_config._4() ) - for _sql_config in jvm.org.apache.spark.sql.api.python.PythonSQLUtils.listSQLConfigs() + for _sql_config in config_set ] return sql_configs @@ -114,11 +119,17 @@ def generate_sql_configs_table_html(sql_configs, path): if __name__ == "__main__": + if len(sys.argv) != 2: + print("Usage: ./bin/spark-submit sql/gen-sql-config-docs.py ") + sys.exit(-1) + else: + group = sys.argv[1] + jvm = launch_gateway().jvm - sql_configs = get_public_sql_configs(jvm) + sql_configs = get_public_sql_configs(jvm, group) spark_root_dir = os.path.dirname(os.path.dirname(__file__)) - sql_configs_table_path = os.path.join( - spark_root_dir, "docs/generated-sql-configuration-table.html") + sql_configs_table_path = os.path\ + .join(spark_root_dir, "docs", "generated-" + group + "-sql-config-table.html") generate_sql_configs_table_html(sql_configs, path=sql_configs_table_path)