From fe4532c07046c1efe68fdbdff16224bde55c02df Mon Sep 17 00:00:00 2001 From: Holden Karau Date: Wed, 18 Sep 2019 14:27:31 -0700 Subject: [PATCH 1/4] Expose the SerializableConfiguration as a DeveloperAPI for data source / sink writers working on DataSource V2 who need access to the Hadoop configuration. This is used extensively inside of Spark's own DSV2 implementations. --- .../org/apache/spark/util/SerializableConfiguration.scala | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/core/src/main/scala/org/apache/spark/util/SerializableConfiguration.scala b/core/src/main/scala/org/apache/spark/util/SerializableConfiguration.scala index 3354a923273ff..2efaeec09770f 100644 --- a/core/src/main/scala/org/apache/spark/util/SerializableConfiguration.scala +++ b/core/src/main/scala/org/apache/spark/util/SerializableConfiguration.scala @@ -20,7 +20,13 @@ import java.io.{ObjectInputStream, ObjectOutputStream} import org.apache.hadoop.conf.Configuration -private[spark] +import org.apache.spark.annotation.DeveloperApi + +/** + * Helper wrapper to serialize a Hadoop configuration. Intended for use when implementing DataSourceV2 + * readers & writers which depend on the Hadoop configuration from the driver node. + */ +@DeveloperApi class SerializableConfiguration(@transient var value: Configuration) extends Serializable { private def writeObject(out: ObjectOutputStream): Unit = Utils.tryOrIOException { out.defaultWriteObject() From 76b9f965d2451df1d73736e05ee1749b18134eef Mon Sep 17 00:00:00 2001 From: Holden Karau Date: Wed, 18 Sep 2019 14:27:57 -0700 Subject: [PATCH 2/4] long line fix --- .../org/apache/spark/util/SerializableConfiguration.scala | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/core/src/main/scala/org/apache/spark/util/SerializableConfiguration.scala b/core/src/main/scala/org/apache/spark/util/SerializableConfiguration.scala index 2efaeec09770f..cfd2bc0093d3f 100644 --- a/core/src/main/scala/org/apache/spark/util/SerializableConfiguration.scala +++ b/core/src/main/scala/org/apache/spark/util/SerializableConfiguration.scala @@ -23,8 +23,8 @@ import org.apache.hadoop.conf.Configuration import org.apache.spark.annotation.DeveloperApi /** - * Helper wrapper to serialize a Hadoop configuration. Intended for use when implementing DataSourceV2 - * readers & writers which depend on the Hadoop configuration from the driver node. + * Helper wrapper to serialize a Hadoop configuration. Intended for use when implementing + * DataSourceV2 readers & writers which depend on the Hadoop configuration from the driver node. */ @DeveloperApi class SerializableConfiguration(@transient var value: Configuration) extends Serializable { From 83ed56a485fe84770ce6d7659d024814d5ae2bfa Mon Sep 17 00:00:00 2001 From: Holden Karau Date: Thu, 19 Sep 2019 11:21:14 -0700 Subject: [PATCH 3/4] Mark SerializableConfiguration as Unstable --- .../org/apache/spark/util/SerializableConfiguration.scala | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/core/src/main/scala/org/apache/spark/util/SerializableConfiguration.scala b/core/src/main/scala/org/apache/spark/util/SerializableConfiguration.scala index cfd2bc0093d3f..52b309abd77f2 100644 --- a/core/src/main/scala/org/apache/spark/util/SerializableConfiguration.scala +++ b/core/src/main/scala/org/apache/spark/util/SerializableConfiguration.scala @@ -20,13 +20,13 @@ import java.io.{ObjectInputStream, ObjectOutputStream} import org.apache.hadoop.conf.Configuration -import org.apache.spark.annotation.DeveloperApi +import org.apache.spark.annotation.{DeveloperApi, Unstable} /** * Helper wrapper to serialize a Hadoop configuration. Intended for use when implementing * DataSourceV2 readers & writers which depend on the Hadoop configuration from the driver node. */ -@DeveloperApi +@DeveloperApi @Unstable class SerializableConfiguration(@transient var value: Configuration) extends Serializable { private def writeObject(out: ObjectOutputStream): Unit = Utils.tryOrIOException { out.defaultWriteObject() From 9f1f5617437fba231337495d68c4454fa8058b07 Mon Sep 17 00:00:00 2001 From: Holden Karau Date: Thu, 19 Sep 2019 11:21:33 -0700 Subject: [PATCH 4/4] Make sure we can call the Scala API from Java --- .../util/SerializableConfigurationSuite.java | 28 +++++++++++++++++++ 1 file changed, 28 insertions(+) create mode 100644 core/src/main/java/org/apache/spark/util/SerializableConfigurationSuite.java diff --git a/core/src/main/java/org/apache/spark/util/SerializableConfigurationSuite.java b/core/src/main/java/org/apache/spark/util/SerializableConfigurationSuite.java new file mode 100644 index 0000000000000..1352653028271 --- /dev/null +++ b/core/src/main/java/org/apache/spark/util/SerializableConfigurationSuite.java @@ -0,0 +1,28 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.spark.util; + +/** + * This test ensures that the API we've exposed for SerializableConfiguration is usable + * from Java. It does not test any of the serialization it's self. + */ +class SerializableConfigurationSuite { + public SerializableConfiguration compileTest() { + SerializableConfiguration scs = new SerializableConfiguration(null); + return scs; + } +}