From a1346e2d481c77344af106bc505787850744e585 Mon Sep 17 00:00:00 2001 From: hyukjinkwon Date: Fri, 29 Jun 2018 00:59:29 +0800 Subject: [PATCH 1/8] Add a control to force Python version via enviornment variable --- docs/README.md | 3 ++- python/docs/Makefile | 28 ++++++++++++++++++++++------ 2 files changed, 24 insertions(+), 7 deletions(-) diff --git a/docs/README.md b/docs/README.md index dbea4d64c4298..f7ea8c3ac4d8c 100644 --- a/docs/README.md +++ b/docs/README.md @@ -69,7 +69,8 @@ You can build just the Spark scaladoc and javadoc by running `build/sbt unidoc` Similarly, you can build just the PySpark docs by running `make html` from the `$SPARK_HOME/python/docs` directory. Documentation is only generated for classes that are listed as -public in `__init__.py`. The SparkR docs can be built by running `$SPARK_HOME/R/create-docs.sh`, and +public in `__init__.py`. You can also set `SPHINXPYTHON` to specify the Python executable to use with Sphinx. + The SparkR docs can be built by running `$SPARK_HOME/R/create-docs.sh`, and the SQL docs can be built by running `$SPARK_HOME/sql/create-docs.sh` after [building Spark](https://github.com/apache/spark#building-spark) first. diff --git a/python/docs/Makefile b/python/docs/Makefile index b8e079483c90c..010723b57d288 100644 --- a/python/docs/Makefile +++ b/python/docs/Makefile @@ -1,19 +1,35 @@ # Makefile for Sphinx documentation # +ifndef SPHINXPYTHON +SPHINXBUILD ?= sphinx-build +# User-friendly check for sphinx-build if explicitly specified. +ifeq ($(shell which $(SPHINXBUILD) >/dev/null 2>&1; echo $$?), 1) +$(error The '$(SPHINXBUILD)' command was not found. Make sure you have Sphinx installed, then set the SPHINXBUILD environment variable to point to the full path of the '$(SPHINXBUILD)' executable. Alternatively you can add the directory with the executable to your PATH. If you don't have Sphinx installed, grab it from http://sphinx-doc.org/) +endif +else +# Note that there is an issue with Python version and Sphinx in PySpark documentation generation. +# Please remove this check below when this issue is fixed. See SPARK-24530 for more details. +PYTHON_VERSION_CHECK = $(shell $(SPHINXPYTHON) -c 'import sys; print(sys.version_info < (3, 0, 0))') +ifeq ($(PYTHON_VERSION_CHECK), True) +$(warning Note that Python 3 is required to generate PySpark documentation correctly for now. Current Python executable was less than Python 3. See SPARK-24530. To force Sphinx to use a specific Python executable, please set SPHINXPYTHON to point to the Python 3 executable.) +endif +# Check if Sphinx is installed. +ifeq ($(shell $(SPHINXPYTHON) -c 'import sphinx' >/dev/null 2>&1; echo $$?), 1) +$(error Python executable '$(SPHINXPYTHON)' did not have Sphinx installed. Make sure you have Sphinx installed, then set the SPHINXPYTHON environment variable to point to the Python executable having Sphinx installed. If you don't have Sphinx installed, grab it from http://sphinx-doc.org/) +endif +# Use 'SPHINXPYTHON -msphinx' instead of 'sphinx-build' by default. See https://github.com/sphinx-doc/sphinx/pull/3523 for more details. +SPHINXBUILD = $(SPHINXPYTHON) -msphinx +endif + # You can set these variables from the command line. SPHINXOPTS ?= -SPHINXBUILD ?= sphinx-build PAPER ?= BUILDDIR ?= _build +# Also, you can set SPHINXBUILD to specify Sphinx build executable or SPHINXPYTHON to specify the Python executable used in Sphinx. export PYTHONPATH=$(realpath ..):$(realpath ../lib/py4j-0.10.7-src.zip) -# User-friendly check for sphinx-build -ifeq ($(shell which $(SPHINXBUILD) >/dev/null 2>&1; echo $$?), 1) -$(error The '$(SPHINXBUILD)' command was not found. Make sure you have Sphinx installed, then set the SPHINXBUILD environment variable to point to the full path of the '$(SPHINXBUILD)' executable. Alternatively you can add the directory with the executable to your PATH. If you don't have Sphinx installed, grab it from http://sphinx-doc.org/) -endif - # Internal variables. PAPEROPT_a4 = -D latex_paper_size=a4 PAPEROPT_letter = -D latex_paper_size=letter From dd303b60bb0f3a2d6992080d677b263a26795b47 Mon Sep 17 00:00:00 2001 From: hyukjinkwon Date: Fri, 29 Jun 2018 16:33:48 +0800 Subject: [PATCH 2/8] Python change to trigger the stlye check --- python/pyspark/sql/__init__.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/python/pyspark/sql/__init__.py b/python/pyspark/sql/__init__.py index c3c06c8124362..a5cd18350f062 100644 --- a/python/pyspark/sql/__init__.py +++ b/python/pyspark/sql/__init__.py @@ -16,7 +16,7 @@ # """ -Important classes of Spark SQL and DataFrames: +Important classes of Spark SQL and DataFrames - :class:`pyspark.sql.SparkSession` Main entry point for :class:`DataFrame` and SQL functionality. From 69f31a9006b3dc60bb34b3ae39967076016528ec Mon Sep 17 00:00:00 2001 From: hyukjinkwon Date: Fri, 29 Jun 2018 16:41:43 +0800 Subject: [PATCH 3/8] Revert "Python change to trigger the stlye check" This reverts commit dd303b60bb0f3a2d6992080d677b263a26795b47. --- python/pyspark/sql/__init__.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/python/pyspark/sql/__init__.py b/python/pyspark/sql/__init__.py index a5cd18350f062..c3c06c8124362 100644 --- a/python/pyspark/sql/__init__.py +++ b/python/pyspark/sql/__init__.py @@ -16,7 +16,7 @@ # """ -Important classes of Spark SQL and DataFrames +Important classes of Spark SQL and DataFrames: - :class:`pyspark.sql.SparkSession` Main entry point for :class:`DataFrame` and SQL functionality. From 135d613fdd6b3e78d0cd717a5426c3c79c1ab815 Mon Sep 17 00:00:00 2001 From: hyukjinkwon Date: Mon, 2 Jul 2018 23:57:00 +0800 Subject: [PATCH 4/8] Address comments and correct its priority --- python/docs/Makefile | 15 ++++++++++++--- 1 file changed, 12 insertions(+), 3 deletions(-) diff --git a/python/docs/Makefile b/python/docs/Makefile index 010723b57d288..a61eb57f905fd 100644 --- a/python/docs/Makefile +++ b/python/docs/Makefile @@ -1,8 +1,13 @@ # Makefile for Sphinx documentation # +ifndef SPHINXBUILD ifndef SPHINXPYTHON -SPHINXBUILD ?= sphinx-build +SPHINXBUILD = sphinx-build +endif +endif + +ifdef SPHINXBUILD # User-friendly check for sphinx-build if explicitly specified. ifeq ($(shell which $(SPHINXBUILD) >/dev/null 2>&1; echo $$?), 1) $(error The '$(SPHINXBUILD)' command was not found. Make sure you have Sphinx installed, then set the SPHINXBUILD environment variable to point to the full path of the '$(SPHINXBUILD)' executable. Alternatively you can add the directory with the executable to your PATH. If you don't have Sphinx installed, grab it from http://sphinx-doc.org/) @@ -12,7 +17,7 @@ else # Please remove this check below when this issue is fixed. See SPARK-24530 for more details. PYTHON_VERSION_CHECK = $(shell $(SPHINXPYTHON) -c 'import sys; print(sys.version_info < (3, 0, 0))') ifeq ($(PYTHON_VERSION_CHECK), True) -$(warning Note that Python 3 is required to generate PySpark documentation correctly for now. Current Python executable was less than Python 3. See SPARK-24530. To force Sphinx to use a specific Python executable, please set SPHINXPYTHON to point to the Python 3 executable.) +$(error Note that Python 3 is required to generate PySpark documentation correctly for now. Current Python executable was less than Python 3. See SPARK-24530. To force Sphinx to use a specific Python executable, please set SPHINXPYTHON to point to the Python 3 executable.) endif # Check if Sphinx is installed. ifeq ($(shell $(SPHINXPYTHON) -c 'import sphinx' >/dev/null 2>&1; echo $$?), 1) @@ -26,7 +31,11 @@ endif SPHINXOPTS ?= PAPER ?= BUILDDIR ?= _build -# Also, you can set SPHINXBUILD to specify Sphinx build executable or SPHINXPYTHON to specify the Python executable used in Sphinx. +# You can set SPHINXBUILD to specify Sphinx build executable or SPHINXPYTHON to specify the Python executable used in Sphinx. +# They follow: +# 1. if SPHINXPYTHON is set, use Python. If SPHINXBUILD is set, use sphinx-build. +# 2. If both are set, SPHINXBUILD has a higher priority over SPHINXPYTHON +# 3. By default, SPHINXBUILD is used as 'sphinx-build'. export PYTHONPATH=$(realpath ..):$(realpath ../lib/py4j-0.10.7-src.zip) From 71ff04080c716b32dd46e3a81fa3922e489ce30c Mon Sep 17 00:00:00 2001 From: hyukjinkwon Date: Tue, 3 Jul 2018 00:01:29 +0800 Subject: [PATCH 5/8] Fix some comments --- python/docs/Makefile | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/python/docs/Makefile b/python/docs/Makefile index a61eb57f905fd..1ed1f33af2326 100644 --- a/python/docs/Makefile +++ b/python/docs/Makefile @@ -8,7 +8,7 @@ endif endif ifdef SPHINXBUILD -# User-friendly check for sphinx-build if explicitly specified. +# User-friendly check for sphinx-build. ifeq ($(shell which $(SPHINXBUILD) >/dev/null 2>&1; echo $$?), 1) $(error The '$(SPHINXBUILD)' command was not found. Make sure you have Sphinx installed, then set the SPHINXBUILD environment variable to point to the full path of the '$(SPHINXBUILD)' executable. Alternatively you can add the directory with the executable to your PATH. If you don't have Sphinx installed, grab it from http://sphinx-doc.org/) endif @@ -23,7 +23,7 @@ endif ifeq ($(shell $(SPHINXPYTHON) -c 'import sphinx' >/dev/null 2>&1; echo $$?), 1) $(error Python executable '$(SPHINXPYTHON)' did not have Sphinx installed. Make sure you have Sphinx installed, then set the SPHINXPYTHON environment variable to point to the Python executable having Sphinx installed. If you don't have Sphinx installed, grab it from http://sphinx-doc.org/) endif -# Use 'SPHINXPYTHON -msphinx' instead of 'sphinx-build' by default. See https://github.com/sphinx-doc/sphinx/pull/3523 for more details. +# Use 'SPHINXPYTHON -msphinx' instead of 'sphinx-build'. See https://github.com/sphinx-doc/sphinx/pull/3523 for more details. SPHINXBUILD = $(SPHINXPYTHON) -msphinx endif From 950ead09a17ed4a413617fe4f1f34ff2ee60eb82 Mon Sep 17 00:00:00 2001 From: hyukjinkwon Date: Mon, 9 Jul 2018 10:04:14 +0800 Subject: [PATCH 6/8] Run pydocstyle --- python/pyspark/__init__.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/python/pyspark/__init__.py b/python/pyspark/__init__.py index 58218918693ca..0594ff728e688 100644 --- a/python/pyspark/__init__.py +++ b/python/pyspark/__init__.py @@ -16,7 +16,7 @@ # """ -PySpark is the Python API for Spark. +PySpark is the Python API for Spark Public classes: From d500e0d515d55c1f7c94784a5ca6ee32519b3cf0 Mon Sep 17 00:00:00 2001 From: hyukjinkwon Date: Mon, 9 Jul 2018 10:05:17 +0800 Subject: [PATCH 7/8] Revert "Run pydocstyle" This reverts commit 950ead09a17ed4a413617fe4f1f34ff2ee60eb82. --- python/pyspark/__init__.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/python/pyspark/__init__.py b/python/pyspark/__init__.py index 0594ff728e688..58218918693ca 100644 --- a/python/pyspark/__init__.py +++ b/python/pyspark/__init__.py @@ -16,7 +16,7 @@ # """ -PySpark is the Python API for Spark +PySpark is the Python API for Spark. Public classes: From 2e1e7da3111129907704f02e8d20a2894f22465d Mon Sep 17 00:00:00 2001 From: hyukjinkwon Date: Tue, 10 Jul 2018 09:51:33 +0800 Subject: [PATCH 8/8] Remove changes in README.md which makes backporting harder --- docs/README.md | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/docs/README.md b/docs/README.md index f7ea8c3ac4d8c..dbea4d64c4298 100644 --- a/docs/README.md +++ b/docs/README.md @@ -69,8 +69,7 @@ You can build just the Spark scaladoc and javadoc by running `build/sbt unidoc` Similarly, you can build just the PySpark docs by running `make html` from the `$SPARK_HOME/python/docs` directory. Documentation is only generated for classes that are listed as -public in `__init__.py`. You can also set `SPHINXPYTHON` to specify the Python executable to use with Sphinx. - The SparkR docs can be built by running `$SPARK_HOME/R/create-docs.sh`, and +public in `__init__.py`. The SparkR docs can be built by running `$SPARK_HOME/R/create-docs.sh`, and the SQL docs can be built by running `$SPARK_HOME/sql/create-docs.sh` after [building Spark](https://github.com/apache/spark#building-spark) first.