From 411687987ec5618f23b6ba24c0c322b3aeef04bb Mon Sep 17 00:00:00 2001
From: caoxuewen <cao.xuewen@zte.com.cn>
Date: Fri, 26 Oct 2018 11:52:31 +0800
Subject: [PATCH 1/3] Refactor JSONBenchmarks to use main method

---
 .../benchmarks/JSONBenchmarks-results.txt     | 33 +++++++
 .../datasources/json/JsonBenchmarks.scala     | 86 ++++++-------------
 2 files changed, 59 insertions(+), 60 deletions(-)
 create mode 100644 sql/core/benchmarks/JSONBenchmarks-results.txt

diff --git a/sql/core/benchmarks/JSONBenchmarks-results.txt b/sql/core/benchmarks/JSONBenchmarks-results.txt
new file mode 100644
index 0000000000000..c1d1457a80d7b
--- /dev/null
+++ b/sql/core/benchmarks/JSONBenchmarks-results.txt
@@ -0,0 +1,33 @@
+================================================================================================
+Benchmark for performance of JSON parsing
+================================================================================================
+
+OpenJDK 64-Bit Server VM 1.8.0_163-b01 on Windows 7 6.1
+Intel64 Family 6 Model 94 Stepping 3, GenuineIntel
+JSON schema inferring:                   Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------
+No encoding                                 48088 / 48180          2.1         480.9       1.0X
+UTF-8 is set                                71881 / 71992          1.4         718.8       0.7X
+
+OpenJDK 64-Bit Server VM 1.8.0_163-b01 on Windows 7 6.1
+Intel64 Family 6 Model 94 Stepping 3, GenuineIntel
+JSON per-line parsing:                   Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------
+No encoding                                 12107 / 12246          8.3         121.1       1.0X
+UTF-8 is set                                12375 / 12475          8.1         123.8       1.0X
+
+OpenJDK 64-Bit Server VM 1.8.0_163-b01 on Windows 7 6.1
+Intel64 Family 6 Model 94 Stepping 3, GenuineIntel
+JSON parsing of wide lines:              Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------
+No encoding                               168172 / 199309          0.1       16817.2       1.0X
+UTF-8 is set                              167959 / 211007          0.1       16795.9       1.0X
+
+OpenJDK 64-Bit Server VM 1.8.0_163-b01 on Windows 7 6.1
+Intel64 Family 6 Model 94 Stepping 3, GenuineIntel
+Count a dataset with 10 columns:         Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------
+Select 10 columns + count()                 11828 / 12138          0.8        1182.8       1.0X
+Select 1 column + count()                   10049 / 10056          1.0        1004.9       1.2X
+count()                                       2611 / 2617          3.8         261.1       4.5X
+
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/json/JsonBenchmarks.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/json/JsonBenchmarks.scala
index 3c4a5ab32724b..c2995ef561762 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/json/JsonBenchmarks.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/json/JsonBenchmarks.scala
@@ -16,32 +16,31 @@
  */
 package org.apache.spark.sql.execution.datasources.json
 
-import java.io.File
-
-import org.apache.spark.SparkConf
 import org.apache.spark.benchmark.Benchmark
-import org.apache.spark.sql.{Row, SparkSession}
-import org.apache.spark.sql.catalyst.plans.SQLHelper
+import org.apache.spark.sql.Row
+import org.apache.spark.sql.execution.benchmark.SqlBasedBenchmark
 import org.apache.spark.sql.functions.lit
 import org.apache.spark.sql.types._
 
 /**
  * The benchmarks aims to measure performance of JSON parsing when encoding is set and isn't.
- * To run this:
- *  spark-submit --class <this class> --jars <spark sql test jar>
+ * To run this benchmark:
+ * {{{
+ *   1. without sbt:
+ *      bin/spark-submit --class <this class> --jars <spark core test jar>,
+ *        <spark catalyst test jar> <spark sql test jar>
+ *   2. build/sbt "sql/test:runMain <this class>"
+ *   3. generate result:
+ *      SPARK_GENERATE_BENCHMARK_FILES=1 build/sbt "sql/test:runMain <this class>"
+ *      Results will be written to "benchmarks/JSONBenchmarks-results.txt".
+ * }}}
  */
-object JSONBenchmarks extends SQLHelper {
-  val conf = new SparkConf()
-
-  val spark = SparkSession.builder
-    .master("local[1]")
-    .appName("benchmark-json-datasource")
-    .config(conf)
-    .getOrCreate()
+
+object JSONBenchmarks extends SqlBasedBenchmark {
   import spark.implicits._
 
   def schemaInferring(rowsNum: Int): Unit = {
-    val benchmark = new Benchmark("JSON schema inferring", rowsNum)
+    val benchmark = new Benchmark("JSON schema inferring", rowsNum, output = output)
 
     withTempPath { path =>
       // scalastyle:off println
@@ -65,21 +64,12 @@ object JSONBenchmarks extends SQLHelper {
           .json(path.getAbsolutePath)
       }
 
-      /*
-      Java HotSpot(TM) 64-Bit Server VM 1.8.0_172-b11 on Mac OS X 10.13.5
-      Intel(R) Core(TM) i7-7820HQ CPU @ 2.90GHz
-
-      JSON schema inferring:                Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
-      ---------------------------------------------------------------------------------------------
-      No encoding                              45908 / 46480          2.2         459.1       1.0X
-      UTF-8 is set                             68469 / 69762          1.5         684.7       0.7X
-      */
       benchmark.run()
     }
   }
 
   def perlineParsing(rowsNum: Int): Unit = {
-    val benchmark = new Benchmark("JSON per-line parsing", rowsNum)
+    val benchmark = new Benchmark("JSON per-line parsing", rowsNum, output = output)
 
     withTempPath { path =>
       // scalastyle:off println
@@ -107,21 +97,12 @@ object JSONBenchmarks extends SQLHelper {
           .count()
       }
 
-      /*
-      Java HotSpot(TM) 64-Bit Server VM 1.8.0_172-b11 on Mac OS X 10.13.5
-      Intel(R) Core(TM) i7-7820HQ CPU @ 2.90GHz
-
-      JSON per-line parsing:                Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
-      ---------------------------------------------------------------------------------------------
-      No encoding                               9982 / 10237         10.0          99.8       1.0X
-      UTF-8 is set                             16373 / 16806          6.1         163.7       0.6X
-      */
       benchmark.run()
     }
   }
 
   def perlineParsingOfWideColumn(rowsNum: Int): Unit = {
-    val benchmark = new Benchmark("JSON parsing of wide lines", rowsNum)
+    val benchmark = new Benchmark("JSON parsing of wide lines", rowsNum, output = output)
 
     withTempPath { path =>
       // scalastyle:off println
@@ -156,22 +137,14 @@ object JSONBenchmarks extends SQLHelper {
           .count()
       }
 
-      /*
-      Java HotSpot(TM) 64-Bit Server VM 1.8.0_172-b11 on Mac OS X 10.13.5
-      Intel(R) Core(TM) i7-7820HQ CPU @ 2.90GHz
-
-      JSON parsing of wide lines:           Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
-      ---------------------------------------------------------------------------------------------
-      No encoding                              26038 / 26386          0.4        2603.8       1.0X
-      UTF-8 is set                             28343 / 28557          0.4        2834.3       0.9X
-      */
       benchmark.run()
     }
   }
 
   def countBenchmark(rowsNum: Int): Unit = {
     val colsNum = 10
-    val benchmark = new Benchmark(s"Count a dataset with $colsNum columns", rowsNum)
+    val benchmark =
+      new Benchmark(s"Count a dataset with $colsNum columns", rowsNum, output = output)
 
     withTempPath { path =>
       val fields = Seq.tabulate(colsNum)(i => StructField(s"col$i", IntegerType))
@@ -195,23 +168,16 @@ object JSONBenchmarks extends SQLHelper {
         ds.count()
       }
 
-      /*
-      Intel(R) Core(TM) i7-7700HQ CPU @ 2.80GHz
-
-      Count a dataset with 10 columns:      Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
-      ---------------------------------------------------------------------------------------------
-      Select 10 columns + count()               9961 / 10006          1.0         996.1       1.0X
-      Select 1 column + count()                  8355 / 8470          1.2         835.5       1.2X
-      count()                                    2104 / 2156          4.8         210.4       4.7X
-      */
       benchmark.run()
     }
   }
 
-  def main(args: Array[String]): Unit = {
-    schemaInferring(100 * 1000 * 1000)
-    perlineParsing(100 * 1000 * 1000)
-    perlineParsingOfWideColumn(10 * 1000 * 1000)
-    countBenchmark(10 * 1000 * 1000)
+  override def runBenchmarkSuite(mainArgs: Array[String]): Unit = {
+    runBenchmark("Benchmark for performance of JSON parsing") {
+      schemaInferring(100 * 1000 * 1000)
+      perlineParsing(100 * 1000 * 1000)
+      perlineParsingOfWideColumn(10 * 1000 * 1000)
+      countBenchmark(10 * 1000 * 1000)
+    }
   }
 }

From c1cde631c8aaaa55c61d877d12e5cb1948b94d55 Mon Sep 17 00:00:00 2001
From: Dongjoon Hyun <dongjoon@apache.org>
Date: Tue, 30 Oct 2018 08:24:16 +0000
Subject: [PATCH 2/3] Update result

---
 .../benchmarks/JSONBenchmarks-results.txt     | 38 ++++++++++---------
 1 file changed, 21 insertions(+), 17 deletions(-)

diff --git a/sql/core/benchmarks/JSONBenchmarks-results.txt b/sql/core/benchmarks/JSONBenchmarks-results.txt
index c1d1457a80d7b..99937309a4145 100644
--- a/sql/core/benchmarks/JSONBenchmarks-results.txt
+++ b/sql/core/benchmarks/JSONBenchmarks-results.txt
@@ -2,32 +2,36 @@
 Benchmark for performance of JSON parsing
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 1.8.0_163-b01 on Windows 7 6.1
-Intel64 Family 6 Model 94 Stepping 3, GenuineIntel
+Preparing data for benchmarking ...
+OpenJDK 64-Bit Server VM 1.8.0_191-b12 on Linux 3.10.0-862.3.2.el7.x86_64
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
 JSON schema inferring:                   Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------
-No encoding                                 48088 / 48180          2.1         480.9       1.0X
-UTF-8 is set                                71881 / 71992          1.4         718.8       0.7X
+No encoding                                 62946 / 63310          1.6         629.5       1.0X
+UTF-8 is set                              112814 / 112866          0.9        1128.1       0.6X
 
-OpenJDK 64-Bit Server VM 1.8.0_163-b01 on Windows 7 6.1
-Intel64 Family 6 Model 94 Stepping 3, GenuineIntel
+Preparing data for benchmarking ...
+OpenJDK 64-Bit Server VM 1.8.0_191-b12 on Linux 3.10.0-862.3.2.el7.x86_64
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
 JSON per-line parsing:                   Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------
-No encoding                                 12107 / 12246          8.3         121.1       1.0X
-UTF-8 is set                                12375 / 12475          8.1         123.8       1.0X
+No encoding                                 16468 / 16553          6.1         164.7       1.0X
+UTF-8 is set                                16420 / 16441          6.1         164.2       1.0X
 
-OpenJDK 64-Bit Server VM 1.8.0_163-b01 on Windows 7 6.1
-Intel64 Family 6 Model 94 Stepping 3, GenuineIntel
+Preparing data for benchmarking ...
+OpenJDK 64-Bit Server VM 1.8.0_191-b12 on Linux 3.10.0-862.3.2.el7.x86_64
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
 JSON parsing of wide lines:              Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------
-No encoding                               168172 / 199309          0.1       16817.2       1.0X
-UTF-8 is set                              167959 / 211007          0.1       16795.9       1.0X
+No encoding                                 39789 / 40053          0.3        3978.9       1.0X
+UTF-8 is set                                39505 / 39584          0.3        3950.5       1.0X
 
-OpenJDK 64-Bit Server VM 1.8.0_163-b01 on Windows 7 6.1
-Intel64 Family 6 Model 94 Stepping 3, GenuineIntel
+OpenJDK 64-Bit Server VM 1.8.0_191-b12 on Linux 3.10.0-862.3.2.el7.x86_64
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
 Count a dataset with 10 columns:         Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------
-Select 10 columns + count()                 11828 / 12138          0.8        1182.8       1.0X
-Select 1 column + count()                   10049 / 10056          1.0        1004.9       1.2X
-count()                                       2611 / 2617          3.8         261.1       4.5X
+Select 10 columns + count()                 15997 / 16015          0.6        1599.7       1.0X
+Select 1 column + count()                   13280 / 13326          0.8        1328.0       1.2X
+count()                                       3006 / 3021          3.3         300.6       5.3X
+
 

From 422df479aae55b52e17eccf1f46675c9b7369ddd Mon Sep 17 00:00:00 2001
From: caoxuewen <cao.xuewen@zte.com.cn>
Date: Tue, 30 Oct 2018 17:26:18 +0800
Subject: [PATCH 3/3] fix file name

---
 .../{JSONBenchmarks-results.txt => JSONBenchmark-results.txt} | 0
 .../json/{JsonBenchmarks.scala => JsonBenchmark.scala}        | 4 ++--
 2 files changed, 2 insertions(+), 2 deletions(-)
 rename sql/core/benchmarks/{JSONBenchmarks-results.txt => JSONBenchmark-results.txt} (100%)
 rename sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/json/{JsonBenchmarks.scala => JsonBenchmark.scala} (97%)

diff --git a/sql/core/benchmarks/JSONBenchmarks-results.txt b/sql/core/benchmarks/JSONBenchmark-results.txt
similarity index 100%
rename from sql/core/benchmarks/JSONBenchmarks-results.txt
rename to sql/core/benchmarks/JSONBenchmark-results.txt
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/json/JsonBenchmarks.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/json/JsonBenchmark.scala
similarity index 97%
rename from sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/json/JsonBenchmarks.scala
rename to sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/json/JsonBenchmark.scala
index c2995ef561762..04f724ec8638f 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/json/JsonBenchmarks.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/json/JsonBenchmark.scala
@@ -32,11 +32,11 @@ import org.apache.spark.sql.types._
  *   2. build/sbt "sql/test:runMain <this class>"
  *   3. generate result:
  *      SPARK_GENERATE_BENCHMARK_FILES=1 build/sbt "sql/test:runMain <this class>"
- *      Results will be written to "benchmarks/JSONBenchmarks-results.txt".
+ *      Results will be written to "benchmarks/JSONBenchmark-results.txt".
  * }}}
  */
 
-object JSONBenchmarks extends SqlBasedBenchmark {
+object JSONBenchmark extends SqlBasedBenchmark {
   import spark.implicits._
 
   def schemaInferring(rowsNum: Int): Unit = {