From fd08f1ddc81c209c76bf9ba329743467ae1706ad Mon Sep 17 00:00:00 2001 From: Sean Zhong Date: Sun, 12 Jun 2016 23:43:30 -0700 Subject: [PATCH 1/5] SPARK-15914 --- .../org/apache/spark/sql/SQLContext.scala | 288 ++++++++++++++++++ 1 file changed, 288 insertions(+) diff --git a/sql/core/src/main/scala/org/apache/spark/sql/SQLContext.scala b/sql/core/src/main/scala/org/apache/spark/sql/SQLContext.scala index 6fcc9bb44775a..046b897f5d3da 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/SQLContext.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/SQLContext.scala @@ -736,6 +736,294 @@ class SQLContext private[sql](val sparkSession: SparkSession) private[sql] def parseDataType(dataTypeString: String): DataType = { DataType.fromJson(dataTypeString) } + + //////////////////////////////////////////////////////////////////////////// + //////////////////////////////////////////////////////////////////////////// + // Deprecated methods + //////////////////////////////////////////////////////////////////////////// + //////////////////////////////////////////////////////////////////////////// + + /** + * @deprecated As of 1.3.0, replaced by `createDataFrame()`. This will be removed in Spark 2.0. + */ + @deprecated("Use createDataFrame. This will be removed in Spark 2.0.", "1.3.0") + def applySchema(rowRDD: RDD[Row], schema: StructType): DataFrame = { + createDataFrame(rowRDD, schema) + } + + /** + * @deprecated As of 1.3.0, replaced by `createDataFrame()`. This will be removed in Spark 2.0. + */ + @deprecated("Use createDataFrame. This will be removed in Spark 2.0.", "1.3.0") + def applySchema(rowRDD: JavaRDD[Row], schema: StructType): DataFrame = { + createDataFrame(rowRDD, schema) + } + + /** + * @deprecated As of 1.3.0, replaced by `createDataFrame()`. This will be removed in Spark 2.0. + */ + @deprecated("Use createDataFrame. This will be removed in Spark 2.0.", "1.3.0") + def applySchema(rdd: RDD[_], beanClass: Class[_]): DataFrame = { + createDataFrame(rdd, beanClass) + } + + /** + * @deprecated As of 1.3.0, replaced by `createDataFrame()`. This will be removed in Spark 2.0. + */ + @deprecated("Use createDataFrame. This will be removed in Spark 2.0.", "1.3.0") + def applySchema(rdd: JavaRDD[_], beanClass: Class[_]): DataFrame = { + createDataFrame(rdd, beanClass) + } + + /** + * Loads a Parquet file, returning the result as a [[DataFrame]]. This function returns an empty + * [[DataFrame]] if no paths are passed in. + * + * @group specificdata + * @deprecated As of 1.4.0, replaced by `read().parquet()`. This will be removed in Spark 2.0. + */ + @deprecated("Use read.parquet(). This will be removed in Spark 2.0.", "1.4.0") + @scala.annotation.varargs + def parquetFile(paths: String*): DataFrame = { + if (paths.isEmpty) { + emptyDataFrame + } else { + read.parquet(paths : _*) + } + } + + /** + * Loads a JSON file (one object per line), returning the result as a [[DataFrame]]. + * It goes through the entire dataset once to determine the schema. + * + * @group specificdata + * @deprecated As of 1.4.0, replaced by `read().json()`. This will be removed in Spark 2.0. + */ + @deprecated("Use read.json(). This will be removed in Spark 2.0.", "1.4.0") + def jsonFile(path: String): DataFrame = { + read.json(path) + } + + /** + * Loads a JSON file (one object per line) and applies the given schema, + * returning the result as a [[DataFrame]]. + * + * @group specificdata + * @deprecated As of 1.4.0, replaced by `read().json()`. This will be removed in Spark 2.0. + */ + @deprecated("Use read.json(). This will be removed in Spark 2.0.", "1.4.0") + def jsonFile(path: String, schema: StructType): DataFrame = { + read.schema(schema).json(path) + } + + /** + * @group specificdata + * @deprecated As of 1.4.0, replaced by `read().json()`. This will be removed in Spark 2.0. + */ + @deprecated("Use read.json(). This will be removed in Spark 2.0.", "1.4.0") + def jsonFile(path: String, samplingRatio: Double): DataFrame = { + read.option("samplingRatio", samplingRatio.toString).json(path) + } + + /** + * Loads an RDD[String] storing JSON objects (one object per record), returning the result as a + * [[DataFrame]]. + * It goes through the entire dataset once to determine the schema. + * + * @group specificdata + * @deprecated As of 1.4.0, replaced by `read().json()`. This will be removed in Spark 2.0. + */ + @deprecated("Use read.json(). This will be removed in Spark 2.0.", "1.4.0") + def jsonRDD(json: RDD[String]): DataFrame = read.json(json) + + /** + * Loads an RDD[String] storing JSON objects (one object per record), returning the result as a + * [[DataFrame]]. + * It goes through the entire dataset once to determine the schema. + * + * @group specificdata + * @deprecated As of 1.4.0, replaced by `read().json()`. This will be removed in Spark 2.0. + */ + @deprecated("Use read.json(). This will be removed in Spark 2.0.", "1.4.0") + def jsonRDD(json: JavaRDD[String]): DataFrame = read.json(json) + + /** + * Loads an RDD[String] storing JSON objects (one object per record) and applies the given schema, + * returning the result as a [[DataFrame]]. + * + * @group specificdata + * @deprecated As of 1.4.0, replaced by `read().json()`. This will be removed in Spark 2.0. + */ + @deprecated("Use read.json(). This will be removed in Spark 2.0.", "1.4.0") + def jsonRDD(json: RDD[String], schema: StructType): DataFrame = { + read.schema(schema).json(json) + } + + /** + * Loads an JavaRDD storing JSON objects (one object per record) and applies the given + * schema, returning the result as a [[DataFrame]]. + * + * @group specificdata + * @deprecated As of 1.4.0, replaced by `read().json()`. This will be removed in Spark 2.0. + */ + @deprecated("Use read.json(). This will be removed in Spark 2.0.", "1.4.0") + def jsonRDD(json: JavaRDD[String], schema: StructType): DataFrame = { + read.schema(schema).json(json) + } + + /** + * Loads an RDD[String] storing JSON objects (one object per record) inferring the + * schema, returning the result as a [[DataFrame]]. + * + * @group specificdata + * @deprecated As of 1.4.0, replaced by `read().json()`. This will be removed in Spark 2.0. + */ + @deprecated("Use read.json(). This will be removed in Spark 2.0.", "1.4.0") + def jsonRDD(json: RDD[String], samplingRatio: Double): DataFrame = { + read.option("samplingRatio", samplingRatio.toString).json(json) + } + + /** + * Loads a JavaRDD[String] storing JSON objects (one object per record) inferring the + * schema, returning the result as a [[DataFrame]]. + * + * @group specificdata + * @deprecated As of 1.4.0, replaced by `read().json()`. This will be removed in Spark 2.0. + */ + @deprecated("Use read.json(). This will be removed in Spark 2.0.", "1.4.0") + def jsonRDD(json: JavaRDD[String], samplingRatio: Double): DataFrame = { + read.option("samplingRatio", samplingRatio.toString).json(json) + } + + /** + * Returns the dataset stored at path as a DataFrame, + * using the default data source configured by spark.sql.sources.default. + * + * @group genericdata + * @deprecated As of 1.4.0, replaced by `read().load(path)`. This will be removed in Spark 2.0. + */ + @deprecated("Use read.load(path). This will be removed in Spark 2.0.", "1.4.0") + def load(path: String): DataFrame = { + read.load(path) + } + + /** + * Returns the dataset stored at path as a DataFrame, using the given data source. + * + * @group genericdata + * @deprecated As of 1.4.0, replaced by `read().format(source).load(path)`. + * This will be removed in Spark 2.0. + */ + @deprecated("Use read.format(source).load(path). This will be removed in Spark 2.0.", "1.4.0") + def load(path: String, source: String): DataFrame = { + read.format(source).load(path) + } + + /** + * (Java-specific) Returns the dataset specified by the given data source and + * a set of options as a DataFrame. + * + * @group genericdata + * @deprecated As of 1.4.0, replaced by `read().format(source).options(options).load()`. + * This will be removed in Spark 2.0. + */ + @deprecated("Use read.format(source).options(options).load(). " + + "This will be removed in Spark 2.0.", "1.4.0") + def load(source: String, options: java.util.Map[String, String]): DataFrame = { + read.options(options).format(source).load() + } + + /** + * (Scala-specific) Returns the dataset specified by the given data source and + * a set of options as a DataFrame. + * + * @group genericdata + * @deprecated As of 1.4.0, replaced by `read().format(source).options(options).load()`. + */ + @deprecated("Use read.format(source).options(options).load(). " + + "This will be removed in Spark 2.0.", "1.4.0") + def load(source: String, options: Map[String, String]): DataFrame = { + read.options(options).format(source).load() + } + + /** + * (Java-specific) Returns the dataset specified by the given data source and + * a set of options as a DataFrame, using the given schema as the schema of the DataFrame. + * + * @group genericdata + * @deprecated As of 1.4.0, replaced by + * `read().format(source).schema(schema).options(options).load()`. + */ + @deprecated("Use read.format(source).schema(schema).options(options).load(). " + + "This will be removed in Spark 2.0.", "1.4.0") + def load(source: String, schema: StructType, options: java.util.Map[String, String]): DataFrame = + { + read.format(source).schema(schema).options(options).load() + } + + /** + * (Scala-specific) Returns the dataset specified by the given data source and + * a set of options as a DataFrame, using the given schema as the schema of the DataFrame. + * + * @group genericdata + * @deprecated As of 1.4.0, replaced by + * `read().format(source).schema(schema).options(options).load()`. + */ + @deprecated("Use read.format(source).schema(schema).options(options).load(). " + + "This will be removed in Spark 2.0.", "1.4.0") + def load(source: String, schema: StructType, options: Map[String, String]): DataFrame = { + read.format(source).schema(schema).options(options).load() + } + + /** + * Construct a [[DataFrame]] representing the database table accessible via JDBC URL + * url named table. + * + * @group specificdata + * @deprecated As of 1.4.0, replaced by `read().jdbc()`. This will be removed in Spark 2.0. + */ + @deprecated("Use read.jdbc(). This will be removed in Spark 2.0.", "1.4.0") + def jdbc(url: String, table: String): DataFrame = { + read.jdbc(url, table, new Properties) + } + + /** + * Construct a [[DataFrame]] representing the database table accessible via JDBC URL + * url named table. Partitions of the table will be retrieved in parallel based on the parameters + * passed to this function. + * + * @param columnName the name of a column of integral type that will be used for partitioning. + * @param lowerBound the minimum value of `columnName` used to decide partition stride + * @param upperBound the maximum value of `columnName` used to decide partition stride + * @param numPartitions the number of partitions. the range `minValue`-`maxValue` will be split + * evenly into this many partitions + * @group specificdata + * @deprecated As of 1.4.0, replaced by `read().jdbc()`. This will be removed in Spark 2.0. + */ + @deprecated("Use read.jdbc(). This will be removed in Spark 2.0.", "1.4.0") + def jdbc( + url: String, + table: String, + columnName: String, + lowerBound: Long, + upperBound: Long, + numPartitions: Int): DataFrame = { + read.jdbc(url, table, columnName, lowerBound, upperBound, numPartitions, new Properties) + } + + /** + * Construct a [[DataFrame]] representing the database table accessible via JDBC URL + * url named table. The theParts parameter gives a list expressions + * suitable for inclusion in WHERE clauses; each one defines one partition + * of the [[DataFrame]]. + * + * @group specificdata + * @deprecated As of 1.4.0, replaced by `read().jdbc()`. This will be removed in Spark 2.0. + */ + @deprecated("Use read.jdbc(). This will be removed in Spark 2.0.", "1.4.0") + def jdbc(url: String, table: String, theParts: Array[String]): DataFrame = { + read.jdbc(url, table, theParts, new Properties) + } } /** From 4751b6c149e0e91ebe799f743ba644f33ca2c694 Mon Sep 17 00:00:00 2001 From: Sean Zhong Date: Mon, 13 Jun 2016 13:03:49 -0700 Subject: [PATCH 2/5] update the deprecation notes. --- .../org/apache/spark/sql/SQLContext.scala | 50 +++++++++---------- 1 file changed, 23 insertions(+), 27 deletions(-) diff --git a/sql/core/src/main/scala/org/apache/spark/sql/SQLContext.scala b/sql/core/src/main/scala/org/apache/spark/sql/SQLContext.scala index 046b897f5d3da..bea1fbb474098 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/SQLContext.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/SQLContext.scala @@ -746,7 +746,7 @@ class SQLContext private[sql](val sparkSession: SparkSession) /** * @deprecated As of 1.3.0, replaced by `createDataFrame()`. This will be removed in Spark 2.0. */ - @deprecated("Use createDataFrame. This will be removed in Spark 2.0.", "1.3.0") + @deprecated("Use createDataFrame instead.", "1.3.0") def applySchema(rowRDD: RDD[Row], schema: StructType): DataFrame = { createDataFrame(rowRDD, schema) } @@ -754,7 +754,7 @@ class SQLContext private[sql](val sparkSession: SparkSession) /** * @deprecated As of 1.3.0, replaced by `createDataFrame()`. This will be removed in Spark 2.0. */ - @deprecated("Use createDataFrame. This will be removed in Spark 2.0.", "1.3.0") + @deprecated("Use createDataFrame instead.", "1.3.0") def applySchema(rowRDD: JavaRDD[Row], schema: StructType): DataFrame = { createDataFrame(rowRDD, schema) } @@ -762,7 +762,7 @@ class SQLContext private[sql](val sparkSession: SparkSession) /** * @deprecated As of 1.3.0, replaced by `createDataFrame()`. This will be removed in Spark 2.0. */ - @deprecated("Use createDataFrame. This will be removed in Spark 2.0.", "1.3.0") + @deprecated("Use createDataFrame instead.", "1.3.0") def applySchema(rdd: RDD[_], beanClass: Class[_]): DataFrame = { createDataFrame(rdd, beanClass) } @@ -770,7 +770,7 @@ class SQLContext private[sql](val sparkSession: SparkSession) /** * @deprecated As of 1.3.0, replaced by `createDataFrame()`. This will be removed in Spark 2.0. */ - @deprecated("Use createDataFrame. This will be removed in Spark 2.0.", "1.3.0") + @deprecated("Use createDataFrame instead.", "1.3.0") def applySchema(rdd: JavaRDD[_], beanClass: Class[_]): DataFrame = { createDataFrame(rdd, beanClass) } @@ -782,7 +782,7 @@ class SQLContext private[sql](val sparkSession: SparkSession) * @group specificdata * @deprecated As of 1.4.0, replaced by `read().parquet()`. This will be removed in Spark 2.0. */ - @deprecated("Use read.parquet(). This will be removed in Spark 2.0.", "1.4.0") + @deprecated("Use read.parquet() instead.", "1.4.0") @scala.annotation.varargs def parquetFile(paths: String*): DataFrame = { if (paths.isEmpty) { @@ -799,7 +799,7 @@ class SQLContext private[sql](val sparkSession: SparkSession) * @group specificdata * @deprecated As of 1.4.0, replaced by `read().json()`. This will be removed in Spark 2.0. */ - @deprecated("Use read.json(). This will be removed in Spark 2.0.", "1.4.0") + @deprecated("Use read.json() instead.", "1.4.0") def jsonFile(path: String): DataFrame = { read.json(path) } @@ -811,7 +811,7 @@ class SQLContext private[sql](val sparkSession: SparkSession) * @group specificdata * @deprecated As of 1.4.0, replaced by `read().json()`. This will be removed in Spark 2.0. */ - @deprecated("Use read.json(). This will be removed in Spark 2.0.", "1.4.0") + @deprecated("Use read.json() instead.", "1.4.0") def jsonFile(path: String, schema: StructType): DataFrame = { read.schema(schema).json(path) } @@ -820,7 +820,7 @@ class SQLContext private[sql](val sparkSession: SparkSession) * @group specificdata * @deprecated As of 1.4.0, replaced by `read().json()`. This will be removed in Spark 2.0. */ - @deprecated("Use read.json(). This will be removed in Spark 2.0.", "1.4.0") + @deprecated("Use read.json() instead.", "1.4.0") def jsonFile(path: String, samplingRatio: Double): DataFrame = { read.option("samplingRatio", samplingRatio.toString).json(path) } @@ -833,7 +833,7 @@ class SQLContext private[sql](val sparkSession: SparkSession) * @group specificdata * @deprecated As of 1.4.0, replaced by `read().json()`. This will be removed in Spark 2.0. */ - @deprecated("Use read.json(). This will be removed in Spark 2.0.", "1.4.0") + @deprecated("Use read.json() instead.", "1.4.0") def jsonRDD(json: RDD[String]): DataFrame = read.json(json) /** @@ -844,7 +844,7 @@ class SQLContext private[sql](val sparkSession: SparkSession) * @group specificdata * @deprecated As of 1.4.0, replaced by `read().json()`. This will be removed in Spark 2.0. */ - @deprecated("Use read.json(). This will be removed in Spark 2.0.", "1.4.0") + @deprecated("Use read.json() instead.", "1.4.0") def jsonRDD(json: JavaRDD[String]): DataFrame = read.json(json) /** @@ -854,7 +854,7 @@ class SQLContext private[sql](val sparkSession: SparkSession) * @group specificdata * @deprecated As of 1.4.0, replaced by `read().json()`. This will be removed in Spark 2.0. */ - @deprecated("Use read.json(). This will be removed in Spark 2.0.", "1.4.0") + @deprecated("Use read.json() instead.", "1.4.0") def jsonRDD(json: RDD[String], schema: StructType): DataFrame = { read.schema(schema).json(json) } @@ -866,7 +866,7 @@ class SQLContext private[sql](val sparkSession: SparkSession) * @group specificdata * @deprecated As of 1.4.0, replaced by `read().json()`. This will be removed in Spark 2.0. */ - @deprecated("Use read.json(). This will be removed in Spark 2.0.", "1.4.0") + @deprecated("Use read.json() instead.", "1.4.0") def jsonRDD(json: JavaRDD[String], schema: StructType): DataFrame = { read.schema(schema).json(json) } @@ -878,7 +878,7 @@ class SQLContext private[sql](val sparkSession: SparkSession) * @group specificdata * @deprecated As of 1.4.0, replaced by `read().json()`. This will be removed in Spark 2.0. */ - @deprecated("Use read.json(). This will be removed in Spark 2.0.", "1.4.0") + @deprecated("Use read.json() instead.", "1.4.0") def jsonRDD(json: RDD[String], samplingRatio: Double): DataFrame = { read.option("samplingRatio", samplingRatio.toString).json(json) } @@ -890,7 +890,7 @@ class SQLContext private[sql](val sparkSession: SparkSession) * @group specificdata * @deprecated As of 1.4.0, replaced by `read().json()`. This will be removed in Spark 2.0. */ - @deprecated("Use read.json(). This will be removed in Spark 2.0.", "1.4.0") + @deprecated("Use read.json() instead.", "1.4.0") def jsonRDD(json: JavaRDD[String], samplingRatio: Double): DataFrame = { read.option("samplingRatio", samplingRatio.toString).json(json) } @@ -902,7 +902,7 @@ class SQLContext private[sql](val sparkSession: SparkSession) * @group genericdata * @deprecated As of 1.4.0, replaced by `read().load(path)`. This will be removed in Spark 2.0. */ - @deprecated("Use read.load(path). This will be removed in Spark 2.0.", "1.4.0") + @deprecated("Use read.load(path) instead.", "1.4.0") def load(path: String): DataFrame = { read.load(path) } @@ -914,7 +914,7 @@ class SQLContext private[sql](val sparkSession: SparkSession) * @deprecated As of 1.4.0, replaced by `read().format(source).load(path)`. * This will be removed in Spark 2.0. */ - @deprecated("Use read.format(source).load(path). This will be removed in Spark 2.0.", "1.4.0") + @deprecated("Use read.format(source).load(path) instead.", "1.4.0") def load(path: String, source: String): DataFrame = { read.format(source).load(path) } @@ -927,8 +927,7 @@ class SQLContext private[sql](val sparkSession: SparkSession) * @deprecated As of 1.4.0, replaced by `read().format(source).options(options).load()`. * This will be removed in Spark 2.0. */ - @deprecated("Use read.format(source).options(options).load(). " + - "This will be removed in Spark 2.0.", "1.4.0") + @deprecated("Use read.format(source).options(options).load() instead.", "1.4.0") def load(source: String, options: java.util.Map[String, String]): DataFrame = { read.options(options).format(source).load() } @@ -940,8 +939,7 @@ class SQLContext private[sql](val sparkSession: SparkSession) * @group genericdata * @deprecated As of 1.4.0, replaced by `read().format(source).options(options).load()`. */ - @deprecated("Use read.format(source).options(options).load(). " + - "This will be removed in Spark 2.0.", "1.4.0") + @deprecated("Use read.format(source).options(options).load() instead.", "1.4.0") def load(source: String, options: Map[String, String]): DataFrame = { read.options(options).format(source).load() } @@ -954,8 +952,7 @@ class SQLContext private[sql](val sparkSession: SparkSession) * @deprecated As of 1.4.0, replaced by * `read().format(source).schema(schema).options(options).load()`. */ - @deprecated("Use read.format(source).schema(schema).options(options).load(). " + - "This will be removed in Spark 2.0.", "1.4.0") + @deprecated("Use read.format(source).schema(schema).options(options).load() instead.", "1.4.0") def load(source: String, schema: StructType, options: java.util.Map[String, String]): DataFrame = { read.format(source).schema(schema).options(options).load() @@ -969,8 +966,7 @@ class SQLContext private[sql](val sparkSession: SparkSession) * @deprecated As of 1.4.0, replaced by * `read().format(source).schema(schema).options(options).load()`. */ - @deprecated("Use read.format(source).schema(schema).options(options).load(). " + - "This will be removed in Spark 2.0.", "1.4.0") + @deprecated("Use read.format(source).schema(schema).options(options).load() instead.", "1.4.0") def load(source: String, schema: StructType, options: Map[String, String]): DataFrame = { read.format(source).schema(schema).options(options).load() } @@ -982,7 +978,7 @@ class SQLContext private[sql](val sparkSession: SparkSession) * @group specificdata * @deprecated As of 1.4.0, replaced by `read().jdbc()`. This will be removed in Spark 2.0. */ - @deprecated("Use read.jdbc(). This will be removed in Spark 2.0.", "1.4.0") + @deprecated("Use read.jdbc() instead.", "1.4.0") def jdbc(url: String, table: String): DataFrame = { read.jdbc(url, table, new Properties) } @@ -1000,7 +996,7 @@ class SQLContext private[sql](val sparkSession: SparkSession) * @group specificdata * @deprecated As of 1.4.0, replaced by `read().jdbc()`. This will be removed in Spark 2.0. */ - @deprecated("Use read.jdbc(). This will be removed in Spark 2.0.", "1.4.0") + @deprecated("Use read.jdbc() instead.", "1.4.0") def jdbc( url: String, table: String, @@ -1020,7 +1016,7 @@ class SQLContext private[sql](val sparkSession: SparkSession) * @group specificdata * @deprecated As of 1.4.0, replaced by `read().jdbc()`. This will be removed in Spark 2.0. */ - @deprecated("Use read.jdbc(). This will be removed in Spark 2.0.", "1.4.0") + @deprecated("Use read.jdbc() instead.", "1.4.0") def jdbc(url: String, table: String, theParts: Array[String]): DataFrame = { read.jdbc(url, table, theParts, new Properties) } From 4c039d8b61fc4ab40fdd7d59d24f576f002d10cc Mon Sep 17 00:00:00 2001 From: Sean Zhong Date: Mon, 13 Jun 2016 13:20:26 -0700 Subject: [PATCH 3/5] update MIMA --- project/MimaExcludes.scala | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/project/MimaExcludes.scala b/project/MimaExcludes.scala index 9d0d9b1be0771..7682ac0393e2f 100644 --- a/project/MimaExcludes.scala +++ b/project/MimaExcludes.scala @@ -778,6 +778,15 @@ object MimaExcludes { ) ++ Seq( ProblemFilters.exclude[ReversedMissingMethodProblem]("org.apache.spark.mllib.linalg.Vector.asBreeze"), ProblemFilters.exclude[ReversedMissingMethodProblem]("org.apache.spark.mllib.linalg.Matrix.asBreeze") + ) ++ Seq( + // [SPARK-15914] Binary compatibility is broken since combining Dataset and DataFrame in + // Spark 2.0. Source level compatibility is still maintained. + ProblemFilters.exclude[IncompatibleResultTypeProblem]("org.apache.spark.sql.SQLContext.load") + ProblemFilters.exclude[IncompatibleResultTypeProblem]("org.apache.spark.sql.SQLContext.jsonRDD") + ProblemFilters.exclude[IncompatibleResultTypeProblem]("org.apache.spark.sql.SQLContext.jsonFile") + ProblemFilters.exclude[IncompatibleResultTypeProblem]("org.apache.spark.sql.SQLContext.jdbc") + ProblemFilters.exclude[IncompatibleResultTypeProblem]("org.apache.spark.sql.SQLContext.parquetFile") + ProblemFilters.exclude[IncompatibleResultTypeProblem]("org.apache.spark.sql.SQLContext.applySchema") ) case v if v.startsWith("1.6") => Seq( From 04ef1b557cf4267e85c98993c11e7f6a6a31b6c8 Mon Sep 17 00:00:00 2001 From: Sean Zhong Date: Mon, 13 Jun 2016 13:23:13 -0700 Subject: [PATCH 4/5] update MIMA --- project/MimaExcludes.scala | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/project/MimaExcludes.scala b/project/MimaExcludes.scala index 7682ac0393e2f..a6209d78e168c 100644 --- a/project/MimaExcludes.scala +++ b/project/MimaExcludes.scala @@ -779,13 +779,13 @@ object MimaExcludes { ProblemFilters.exclude[ReversedMissingMethodProblem]("org.apache.spark.mllib.linalg.Vector.asBreeze"), ProblemFilters.exclude[ReversedMissingMethodProblem]("org.apache.spark.mllib.linalg.Matrix.asBreeze") ) ++ Seq( - // [SPARK-15914] Binary compatibility is broken since combining Dataset and DataFrame in - // Spark 2.0. Source level compatibility is still maintained. - ProblemFilters.exclude[IncompatibleResultTypeProblem]("org.apache.spark.sql.SQLContext.load") - ProblemFilters.exclude[IncompatibleResultTypeProblem]("org.apache.spark.sql.SQLContext.jsonRDD") - ProblemFilters.exclude[IncompatibleResultTypeProblem]("org.apache.spark.sql.SQLContext.jsonFile") - ProblemFilters.exclude[IncompatibleResultTypeProblem]("org.apache.spark.sql.SQLContext.jdbc") - ProblemFilters.exclude[IncompatibleResultTypeProblem]("org.apache.spark.sql.SQLContext.parquetFile") + // [SPARK-15914] Binary compatibility is broken since consolidation of Dataset and DataFrame + // in Spark 2.0. However, source level compatibility is still maintained. + ProblemFilters.exclude[IncompatibleResultTypeProblem]("org.apache.spark.sql.SQLContext.load"), + ProblemFilters.exclude[IncompatibleResultTypeProblem]("org.apache.spark.sql.SQLContext.jsonRDD"), + ProblemFilters.exclude[IncompatibleResultTypeProblem]("org.apache.spark.sql.SQLContext.jsonFile"), + ProblemFilters.exclude[IncompatibleResultTypeProblem]("org.apache.spark.sql.SQLContext.jdbc"), + ProblemFilters.exclude[IncompatibleResultTypeProblem]("org.apache.spark.sql.SQLContext.parquetFile"), ProblemFilters.exclude[IncompatibleResultTypeProblem]("org.apache.spark.sql.SQLContext.applySchema") ) case v if v.startsWith("1.6") => From 0cc81b1d7dc3c4e05e3adf5f72c25c320a043a0c Mon Sep 17 00:00:00 2001 From: Sean Zhong Date: Mon, 13 Jun 2016 16:23:00 -0700 Subject: [PATCH 5/5] fix style --- .../org/apache/spark/sql/SQLContext.scala | 56 +++++++++---------- 1 file changed, 28 insertions(+), 28 deletions(-) diff --git a/sql/core/src/main/scala/org/apache/spark/sql/SQLContext.scala b/sql/core/src/main/scala/org/apache/spark/sql/SQLContext.scala index bea1fbb474098..58b4e6c5f604e 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/SQLContext.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/SQLContext.scala @@ -744,7 +744,7 @@ class SQLContext private[sql](val sparkSession: SparkSession) //////////////////////////////////////////////////////////////////////////// /** - * @deprecated As of 1.3.0, replaced by `createDataFrame()`. This will be removed in Spark 2.0. + * @deprecated As of 1.3.0, replaced by `createDataFrame()`. */ @deprecated("Use createDataFrame instead.", "1.3.0") def applySchema(rowRDD: RDD[Row], schema: StructType): DataFrame = { @@ -752,7 +752,7 @@ class SQLContext private[sql](val sparkSession: SparkSession) } /** - * @deprecated As of 1.3.0, replaced by `createDataFrame()`. This will be removed in Spark 2.0. + * @deprecated As of 1.3.0, replaced by `createDataFrame()`. */ @deprecated("Use createDataFrame instead.", "1.3.0") def applySchema(rowRDD: JavaRDD[Row], schema: StructType): DataFrame = { @@ -760,7 +760,7 @@ class SQLContext private[sql](val sparkSession: SparkSession) } /** - * @deprecated As of 1.3.0, replaced by `createDataFrame()`. This will be removed in Spark 2.0. + * @deprecated As of 1.3.0, replaced by `createDataFrame()`. */ @deprecated("Use createDataFrame instead.", "1.3.0") def applySchema(rdd: RDD[_], beanClass: Class[_]): DataFrame = { @@ -768,7 +768,7 @@ class SQLContext private[sql](val sparkSession: SparkSession) } /** - * @deprecated As of 1.3.0, replaced by `createDataFrame()`. This will be removed in Spark 2.0. + * @deprecated As of 1.3.0, replaced by `createDataFrame()`. */ @deprecated("Use createDataFrame instead.", "1.3.0") def applySchema(rdd: JavaRDD[_], beanClass: Class[_]): DataFrame = { @@ -780,7 +780,7 @@ class SQLContext private[sql](val sparkSession: SparkSession) * [[DataFrame]] if no paths are passed in. * * @group specificdata - * @deprecated As of 1.4.0, replaced by `read().parquet()`. This will be removed in Spark 2.0. + * @deprecated As of 1.4.0, replaced by `read().parquet()`. */ @deprecated("Use read.parquet() instead.", "1.4.0") @scala.annotation.varargs @@ -797,7 +797,7 @@ class SQLContext private[sql](val sparkSession: SparkSession) * It goes through the entire dataset once to determine the schema. * * @group specificdata - * @deprecated As of 1.4.0, replaced by `read().json()`. This will be removed in Spark 2.0. + * @deprecated As of 1.4.0, replaced by `read().json()`. */ @deprecated("Use read.json() instead.", "1.4.0") def jsonFile(path: String): DataFrame = { @@ -809,7 +809,7 @@ class SQLContext private[sql](val sparkSession: SparkSession) * returning the result as a [[DataFrame]]. * * @group specificdata - * @deprecated As of 1.4.0, replaced by `read().json()`. This will be removed in Spark 2.0. + * @deprecated As of 1.4.0, replaced by `read().json()`. */ @deprecated("Use read.json() instead.", "1.4.0") def jsonFile(path: String, schema: StructType): DataFrame = { @@ -818,7 +818,7 @@ class SQLContext private[sql](val sparkSession: SparkSession) /** * @group specificdata - * @deprecated As of 1.4.0, replaced by `read().json()`. This will be removed in Spark 2.0. + * @deprecated As of 1.4.0, replaced by `read().json()`. */ @deprecated("Use read.json() instead.", "1.4.0") def jsonFile(path: String, samplingRatio: Double): DataFrame = { @@ -831,7 +831,7 @@ class SQLContext private[sql](val sparkSession: SparkSession) * It goes through the entire dataset once to determine the schema. * * @group specificdata - * @deprecated As of 1.4.0, replaced by `read().json()`. This will be removed in Spark 2.0. + * @deprecated As of 1.4.0, replaced by `read().json()`. */ @deprecated("Use read.json() instead.", "1.4.0") def jsonRDD(json: RDD[String]): DataFrame = read.json(json) @@ -842,7 +842,7 @@ class SQLContext private[sql](val sparkSession: SparkSession) * It goes through the entire dataset once to determine the schema. * * @group specificdata - * @deprecated As of 1.4.0, replaced by `read().json()`. This will be removed in Spark 2.0. + * @deprecated As of 1.4.0, replaced by `read().json()`. */ @deprecated("Use read.json() instead.", "1.4.0") def jsonRDD(json: JavaRDD[String]): DataFrame = read.json(json) @@ -852,7 +852,7 @@ class SQLContext private[sql](val sparkSession: SparkSession) * returning the result as a [[DataFrame]]. * * @group specificdata - * @deprecated As of 1.4.0, replaced by `read().json()`. This will be removed in Spark 2.0. + * @deprecated As of 1.4.0, replaced by `read().json()`. */ @deprecated("Use read.json() instead.", "1.4.0") def jsonRDD(json: RDD[String], schema: StructType): DataFrame = { @@ -864,7 +864,7 @@ class SQLContext private[sql](val sparkSession: SparkSession) * schema, returning the result as a [[DataFrame]]. * * @group specificdata - * @deprecated As of 1.4.0, replaced by `read().json()`. This will be removed in Spark 2.0. + * @deprecated As of 1.4.0, replaced by `read().json()`. */ @deprecated("Use read.json() instead.", "1.4.0") def jsonRDD(json: JavaRDD[String], schema: StructType): DataFrame = { @@ -876,7 +876,7 @@ class SQLContext private[sql](val sparkSession: SparkSession) * schema, returning the result as a [[DataFrame]]. * * @group specificdata - * @deprecated As of 1.4.0, replaced by `read().json()`. This will be removed in Spark 2.0. + * @deprecated As of 1.4.0, replaced by `read().json()`. */ @deprecated("Use read.json() instead.", "1.4.0") def jsonRDD(json: RDD[String], samplingRatio: Double): DataFrame = { @@ -888,7 +888,7 @@ class SQLContext private[sql](val sparkSession: SparkSession) * schema, returning the result as a [[DataFrame]]. * * @group specificdata - * @deprecated As of 1.4.0, replaced by `read().json()`. This will be removed in Spark 2.0. + * @deprecated As of 1.4.0, replaced by `read().json()`. */ @deprecated("Use read.json() instead.", "1.4.0") def jsonRDD(json: JavaRDD[String], samplingRatio: Double): DataFrame = { @@ -900,7 +900,7 @@ class SQLContext private[sql](val sparkSession: SparkSession) * using the default data source configured by spark.sql.sources.default. * * @group genericdata - * @deprecated As of 1.4.0, replaced by `read().load(path)`. This will be removed in Spark 2.0. + * @deprecated As of 1.4.0, replaced by `read().load(path)`. */ @deprecated("Use read.load(path) instead.", "1.4.0") def load(path: String): DataFrame = { @@ -912,7 +912,6 @@ class SQLContext private[sql](val sparkSession: SparkSession) * * @group genericdata * @deprecated As of 1.4.0, replaced by `read().format(source).load(path)`. - * This will be removed in Spark 2.0. */ @deprecated("Use read.format(source).load(path) instead.", "1.4.0") def load(path: String, source: String): DataFrame = { @@ -925,7 +924,6 @@ class SQLContext private[sql](val sparkSession: SparkSession) * * @group genericdata * @deprecated As of 1.4.0, replaced by `read().format(source).options(options).load()`. - * This will be removed in Spark 2.0. */ @deprecated("Use read.format(source).options(options).load() instead.", "1.4.0") def load(source: String, options: java.util.Map[String, String]): DataFrame = { @@ -953,8 +951,10 @@ class SQLContext private[sql](val sparkSession: SparkSession) * `read().format(source).schema(schema).options(options).load()`. */ @deprecated("Use read.format(source).schema(schema).options(options).load() instead.", "1.4.0") - def load(source: String, schema: StructType, options: java.util.Map[String, String]): DataFrame = - { + def load( + source: String, + schema: StructType, + options: java.util.Map[String, String]): DataFrame = { read.format(source).schema(schema).options(options).load() } @@ -976,7 +976,7 @@ class SQLContext private[sql](val sparkSession: SparkSession) * url named table. * * @group specificdata - * @deprecated As of 1.4.0, replaced by `read().jdbc()`. This will be removed in Spark 2.0. + * @deprecated As of 1.4.0, replaced by `read().jdbc()`. */ @deprecated("Use read.jdbc() instead.", "1.4.0") def jdbc(url: String, table: String): DataFrame = { @@ -994,16 +994,16 @@ class SQLContext private[sql](val sparkSession: SparkSession) * @param numPartitions the number of partitions. the range `minValue`-`maxValue` will be split * evenly into this many partitions * @group specificdata - * @deprecated As of 1.4.0, replaced by `read().jdbc()`. This will be removed in Spark 2.0. + * @deprecated As of 1.4.0, replaced by `read().jdbc()`. */ @deprecated("Use read.jdbc() instead.", "1.4.0") def jdbc( - url: String, - table: String, - columnName: String, - lowerBound: Long, - upperBound: Long, - numPartitions: Int): DataFrame = { + url: String, + table: String, + columnName: String, + lowerBound: Long, + upperBound: Long, + numPartitions: Int): DataFrame = { read.jdbc(url, table, columnName, lowerBound, upperBound, numPartitions, new Properties) } @@ -1014,7 +1014,7 @@ class SQLContext private[sql](val sparkSession: SparkSession) * of the [[DataFrame]]. * * @group specificdata - * @deprecated As of 1.4.0, replaced by `read().jdbc()`. This will be removed in Spark 2.0. + * @deprecated As of 1.4.0, replaced by `read().jdbc()`. */ @deprecated("Use read.jdbc() instead.", "1.4.0") def jdbc(url: String, table: String, theParts: Array[String]): DataFrame = {