From 8f1a767cb27107ecc0f89b452f8b4745cef2a628 Mon Sep 17 00:00:00 2001 From: Eugene Zhulenev Date: Thu, 11 Feb 2016 15:51:35 -0500 Subject: [PATCH 1/2] GC-633 : update CDH up to 5.5.1 --- .../com/collective/modelmatrix/cli/ASCIITableFormat.scala | 4 +--- .../src/main/scala/com/collective/modelmatrix/cli/Sink.scala | 4 ++-- .../main/scala/com/collective/modelmatrix/ModelFeature.scala | 5 +---- .../collective/modelmatrix/transform/IndexTransformer.scala | 2 +- project/Dependency.scala | 2 +- version.sbt | 2 +- 6 files changed, 7 insertions(+), 12 deletions(-) diff --git a/modelmatrix-cli/src/main/scala/com/collective/modelmatrix/cli/ASCIITableFormat.scala b/modelmatrix-cli/src/main/scala/com/collective/modelmatrix/cli/ASCIITableFormat.scala index 4062690..19059f8 100644 --- a/modelmatrix-cli/src/main/scala/com/collective/modelmatrix/cli/ASCIITableFormat.scala +++ b/modelmatrix-cli/src/main/scala/com/collective/modelmatrix/cli/ASCIITableFormat.scala @@ -49,10 +49,8 @@ object ASCIITableFormat { object ASCIITableFormats { - private val sqlParser = new SqlParser() - private def formatExtractExpr(s: String): String = { - sqlParser.parseExpression(s).toString() + SqlParser.parseExpression(s).toString() } implicit class StringFormattingOps(val s: String) extends AnyVal { diff --git a/modelmatrix-cli/src/main/scala/com/collective/modelmatrix/cli/Sink.scala b/modelmatrix-cli/src/main/scala/com/collective/modelmatrix/cli/Sink.scala index b8ab1d8..143c671 100644 --- a/modelmatrix-cli/src/main/scala/com/collective/modelmatrix/cli/Sink.scala +++ b/modelmatrix-cli/src/main/scala/com/collective/modelmatrix/cli/Sink.scala @@ -38,7 +38,7 @@ case class HiveSink( ) extends Sink { def saveDataFrame(df: DataFrame)(implicit sqlContext: SQLContext): Unit = { - df.saveAsTable(tableName, SaveMode.Overwrite) + df.write.mode(SaveMode.Overwrite).saveAsTable(tableName) } override def toString: String = @@ -50,7 +50,7 @@ case class ParquetSink( ) extends Sink { def saveDataFrame(df: DataFrame)(implicit sqlContext: SQLContext): Unit = { - df.saveAsParquetFile(path) + df.write.parquet(path) } override def toString: String = diff --git a/modelmatrix-core/src/main/scala/com/collective/modelmatrix/ModelFeature.scala b/modelmatrix-core/src/main/scala/com/collective/modelmatrix/ModelFeature.scala index 590d6a6..27ff21f 100644 --- a/modelmatrix-core/src/main/scala/com/collective/modelmatrix/ModelFeature.scala +++ b/modelmatrix-core/src/main/scala/com/collective/modelmatrix/ModelFeature.scala @@ -19,9 +19,6 @@ case class ModelFeature( object ModelFeature { - // Validate extract expressions using SqlParser that used in DataFrame.selectExpr - private val sqlParser = new SqlParser() - def parse(feature: String, config: Config, path: String): ValidationNel[String, ModelFeature] = { parse(feature, config.getConfig(path)) } @@ -33,7 +30,7 @@ object ModelFeature { def expression(p: String) = { import scalaz.Validation.FlatMap._ string(p).flatMap { input => - Try(sqlParser.parseExpression(input)) match { + Try(SqlParser.parseExpression(input)) match { case Success(parsed) => input.successNel case Failure(err) => s"Failed to parse extract expression: $err".failureNel } diff --git a/modelmatrix-core/src/main/scala/com/collective/modelmatrix/transform/IndexTransformer.scala b/modelmatrix-core/src/main/scala/com/collective/modelmatrix/transform/IndexTransformer.scala index 0340f21..a86e2ce 100644 --- a/modelmatrix-core/src/main/scala/com/collective/modelmatrix/transform/IndexTransformer.scala +++ b/modelmatrix-core/src/main/scala/com/collective/modelmatrix/transform/IndexTransformer.scala @@ -40,7 +40,7 @@ class IndexTransformer(input: DataFrame @@ Transformer.Features) extends Categor val df = scalaz.Tag.unwrap(input) - import org.apache.spark.sql.functions._ + import org.apache.spark.sql.functions.col // Group and count by extract value val grouped: DataFrame = df.filter(df(f).isNotNull).groupBy(f).count() diff --git a/project/Dependency.scala b/project/Dependency.scala index 56beb84..eb9d704 100644 --- a/project/Dependency.scala +++ b/project/Dependency.scala @@ -15,7 +15,7 @@ object Dependency { val Guava = "14.0.1" // match to Spark Guava version // Spark - val Spark = "1.3.0-cdh5.4.2" + val Spark = "1.5.0-cdh5.5.1" // Database val Slick = "3.0.0" diff --git a/version.sbt b/version.sbt index 4e8e41b..1264ea2 100644 --- a/version.sbt +++ b/version.sbt @@ -2,7 +2,7 @@ import VersionScheme.Keys._ isRelease in ThisBuild := sys.props("release") == "true" -versionPrefix in ThisBuild := "0.0.13" +versionPrefix in ThisBuild := "0.1.0" version in ThisBuild <<= Def.setting[String] { if (isRelease.value) { From c88fc927a98caa5a654715fda8ffd8c0eac20c6e Mon Sep 17 00:00:00 2001 From: Eugene Zhulenev Date: Fri, 12 Feb 2016 15:37:17 -0500 Subject: [PATCH 2/2] GC-633 : fix failing test --- .../modelmatrix/transform/TransformerSpec.scala | 10 +++------- 1 file changed, 3 insertions(+), 7 deletions(-) diff --git a/modelmatrix-core/src/it/scala/com/collective/modelmatrix/transform/TransformerSpec.scala b/modelmatrix-core/src/it/scala/com/collective/modelmatrix/transform/TransformerSpec.scala index 09d244b..4ffe2c1 100644 --- a/modelmatrix-core/src/it/scala/com/collective/modelmatrix/transform/TransformerSpec.scala +++ b/modelmatrix-core/src/it/scala/com/collective/modelmatrix/transform/TransformerSpec.scala @@ -24,22 +24,18 @@ class TransformerSpec extends FlatSpec with TestSparkContext { val isActive = true val withAllOther = true - // Can't call 'day_of_week' with String - val badFunctionType = ModelFeature(isActive, "advertisement", "f1", "day_of_week(adv_site, 'UTC')", Top(95.0, allOther = false)) - // Not enough parameters for 'concat' val wrongParametersCount = ModelFeature(isActive, "advertisement", "f2", "concat(adv_site)", Top(95.0, allOther = false)) val df = sqlContext.createDataFrame(sc.parallelize(input), schema) "Transformer" should "report failed feature extraction" in { - val features = Transformer.extractFeatures(df, Seq(badFunctionType, wrongParametersCount)) + val features = Transformer.extractFeatures(df, Seq(wrongParametersCount)) assert(features.isLeft) val errors = features.fold(identity, _ => sys.error("Should not be here")) - assert(errors.length == 2) - assert(errors(0).feature == badFunctionType) - assert(errors(1).feature == wrongParametersCount) + assert(errors.length == 1) + assert(errors(0).feature == wrongParametersCount) } }