From 2e3b1e249ec40db51e16f35c4c0d49316dc3dc27 Mon Sep 17 00:00:00 2001 From: Andrew Or Date: Wed, 4 May 2016 17:10:01 -0700 Subject: [PATCH 1/3] Deprecate it --- python/pyspark/sql/context.py | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/python/pyspark/sql/context.py b/python/pyspark/sql/context.py index 2096236d7f36f..78ab2e81bfce2 100644 --- a/python/pyspark/sql/context.py +++ b/python/pyspark/sql/context.py @@ -17,6 +17,7 @@ from __future__ import print_function import sys +import warnings if sys.version >= '3': basestring = unicode = str @@ -434,7 +435,6 @@ def streams(self): return ContinuousQueryManager(self._ssql_ctx.streams()) -# TODO(andrew): deprecate this class HiveContext(SQLContext): """A variant of Spark SQL that integrates with data stored in Hive. @@ -444,8 +444,15 @@ class HiveContext(SQLContext): :param sparkContext: The SparkContext to wrap. :param jhiveContext: An optional JVM Scala HiveContext. If set, we do not instantiate a new :class:`HiveContext` in the JVM, instead we make all calls to this object. + + .. note:: Deprecated in 2.0.0. Use SparkSession.builder.enableHiveSupport().getOrCreate(). """ + warnings.warn( + "HiveContext is deprecated in Spark 2.0.0. Please use " + + "SparkSession.builder.enableHiveSupport().getOrCreate() instead.", + DeprecationWarning) + def __init__(self, sparkContext, jhiveContext=None): if jhiveContext is None: sparkSession = SparkSession.withHiveSupport(sparkContext) From 8515ff7326ec6280336489a82435129fa84ed7d0 Mon Sep 17 00:00:00 2001 From: Andrew Or Date: Wed, 4 May 2016 17:15:47 -0700 Subject: [PATCH 2/3] Minor followups --- python/pyspark/sql/column.py | 2 +- python/pyspark/sql/streaming.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/python/pyspark/sql/column.py b/python/pyspark/sql/column.py index 43e9baece2de9..24f00fc17dc64 100644 --- a/python/pyspark/sql/column.py +++ b/python/pyspark/sql/column.py @@ -419,7 +419,7 @@ def over(self, window): >>> from pyspark.sql.functions import rank, min >>> # df.select(rank().over(window), min('age').over(window)) - .. note:: Window functions is only supported with HiveContext in 1.4 + .. note:: Window functions is only supported with Hive support in 1.4 """ from pyspark.sql.window import WindowSpec if not isinstance(window, WindowSpec): diff --git a/python/pyspark/sql/streaming.py b/python/pyspark/sql/streaming.py index bf03fdca91394..8238b8e7cde6b 100644 --- a/python/pyspark/sql/streaming.py +++ b/python/pyspark/sql/streaming.py @@ -221,7 +221,7 @@ def _test(): globs['os'] = os globs['sc'] = sc globs['sqlContext'] = SQLContext(sc) - globs['hiveContext'] = HiveContext(sc) + globs['hiveContext'] = HiveContext._createForTesting(sc) globs['df'] = \ globs['sqlContext'].read.format('text').stream('python/test_support/sql/streaming') From 5025c305b5b97a427bde1704187efba536e98a4f Mon Sep 17 00:00:00 2001 From: Andrew Or Date: Wed, 4 May 2016 17:35:49 -0700 Subject: [PATCH 3/3] Remove it --- python/pyspark/sql/column.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/python/pyspark/sql/column.py b/python/pyspark/sql/column.py index 24f00fc17dc64..90fb76f9b5a5b 100644 --- a/python/pyspark/sql/column.py +++ b/python/pyspark/sql/column.py @@ -418,8 +418,6 @@ def over(self, window): >>> window = Window.partitionBy("name").orderBy("age").rowsBetween(-1, 1) >>> from pyspark.sql.functions import rank, min >>> # df.select(rank().over(window), min('age').over(window)) - - .. note:: Window functions is only supported with Hive support in 1.4 """ from pyspark.sql.window import WindowSpec if not isinstance(window, WindowSpec):