From 602c27f60a92524ff522ba5e40142e8a9159bacc Mon Sep 17 00:00:00 2001
From: wangmiao1981 <wm624@hotmail.com>
Date: Fri, 26 May 2017 23:27:04 -0700
Subject: [PATCH 01/12] add constraint logit

---
 R/pkg/R/mllib_classification.R                | 33 +++++++++++++++++--
 .../ml/r/LogisticRegressionWrapper.scala      | 32 ++++++++++++++++--
 2 files changed, 61 insertions(+), 4 deletions(-)

diff --git a/R/pkg/R/mllib_classification.R b/R/pkg/R/mllib_classification.R
index 306a9b8676539..d3e9cc93ed09c 100644
--- a/R/pkg/R/mllib_classification.R
+++ b/R/pkg/R/mllib_classification.R
@@ -239,8 +239,14 @@ function(object, path, overwrite = FALSE) {
 setMethod("spark.logit", signature(data = "SparkDataFrame", formula = "formula"),
           function(data, formula, regParam = 0.0, elasticNetParam = 0.0, maxIter = 100,
                    tol = 1E-6, family = "auto", standardization = TRUE,
-                   thresholds = 0.5, weightCol = NULL, aggregationDepth = 2) {
+                   thresholds = 0.5, weightCol = NULL, aggregationDepth = 2,
+                   lowerBoundsOnCoefficients = NULL, upperBoundsOnCoefficients = NULL,
+                   lowerBoundsOnIntercepts = NULL, upperBoundsOnIntercepts = NULL) {
             formula <- paste(deparse(formula), collapse = "")
+            lrow <- 0
+            lcol <- 0
+            urow <- 0
+            ucol <- 0
 
             if (!is.null(weightCol) && weightCol == "") {
               weightCol <- NULL
@@ -248,12 +254,35 @@ setMethod("spark.logit", signature(data = "SparkDataFrame", formula = "formula")
               weightCol <- as.character(weightCol)
             }
 
+            if (!is.null(lowerBoundsOnIntercepts)) {
+                lowerBoundsOnIntercepts <- as.array(lowerBoundsOnIntercepts)
+            }
+
+            if (!is.null(upperBoundsOnIntercepts)) {
+                upperBoundsOnIntercepts <- as.array(upperBoundsOnIntercepts)
+            }
+
+            if (!is.null(lowerBoundsOnCoefficients)) {
+              lrow = nrow(lowerBoundsOnCoefficients)
+              lcol = ncol(lowerBoundsOnCoefficients)
+              lowerBoundsOnCoefficients <- as.array(as.vector(lowerBoundsOnCoefficients))
+            }
+
+            if (!is.null(upperBoundsOnIntercepts)) {
+              urow = nrow(upperBoundsOnIntercepts)
+              ucol = ncol(upperBoundsOnIntercepts)
+              upperBoundsOnIntercepts <- as.array(as.vector(upperBoundsOnIntercepts))
+            }
+
             jobj <- callJStatic("org.apache.spark.ml.r.LogisticRegressionWrapper", "fit",
                                 data@sdf, formula, as.numeric(regParam),
                                 as.numeric(elasticNetParam), as.integer(maxIter),
                                 as.numeric(tol), as.character(family),
                                 as.logical(standardization), as.array(thresholds),
-                                weightCol, as.integer(aggregationDepth))
+                                weightCol, as.integer(aggregationDepth),
+                                as.integer(lrow), as.integer(lcol), as.integer(urow), as.integer(ucol),
+                                lowerBoundsOnCoefficients, upperBoundsOnCoefficients,
+                                lowerBoundsOnIntercepts, upperBoundsOnIntercepts)
             new("LogisticRegressionModel", jobj = jobj)
           })
 
diff --git a/mllib/src/main/scala/org/apache/spark/ml/r/LogisticRegressionWrapper.scala b/mllib/src/main/scala/org/apache/spark/ml/r/LogisticRegressionWrapper.scala
index 703bcdf4ca725..c4422d2b7f09c 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/r/LogisticRegressionWrapper.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/r/LogisticRegressionWrapper.scala
@@ -25,7 +25,7 @@ import org.json4s.jackson.JsonMethods._
 import org.apache.spark.ml.{Pipeline, PipelineModel}
 import org.apache.spark.ml.classification.{LogisticRegression, LogisticRegressionModel}
 import org.apache.spark.ml.feature.{IndexToString, RFormula}
-import org.apache.spark.ml.linalg.Vector
+import org.apache.spark.ml.linalg.{Matrices, Vector, Vectors}
 import org.apache.spark.ml.r.RWrapperUtils._
 import org.apache.spark.ml.util._
 import org.apache.spark.sql.{DataFrame, Dataset}
@@ -97,7 +97,15 @@ private[r] object LogisticRegressionWrapper
       standardization: Boolean,
       thresholds: Array[Double],
       weightCol: String,
-      aggregationDepth: Int
+      aggregationDepth: Int,
+      lrow: Int,
+      lcol: Int,
+      urow: Int,
+      ucol: Int,
+      lowerBoundsOnCoefficients: Array[Double],
+      upperBoundsOnCoefficients: Array[Double],
+      lowerBoundsOnIntercepts: Array[Double],
+      upperBoundsOnIntercepts: Array[Double]
       ): LogisticRegressionWrapper = {
 
     val rFormula = new RFormula()
@@ -133,6 +141,26 @@ private[r] object LogisticRegressionWrapper
 
     if (weightCol != null) lr.setWeightCol(weightCol)
 
+    if (lrow != 0 && lcol != 0 && lowerBoundsOnCoefficients != null) {
+      val coef = Matrices.dense(lrow, lcol, lowerBoundsOnCoefficients)
+      lr.setLowerBoundsOnCoefficients(coef)
+    }
+
+    if (urow != 0 && ucol != 0 && upperBoundsOnCoefficients != null) {
+      val coef = Matrices.dense(urow, ucol, upperBoundsOnCoefficients)
+      lr.setUpperBoundsOnCoefficients(coef)
+    }
+
+    if (lowerBoundsOnIntercepts != null) {
+      val intercept = Vectors.dense(lowerBoundsOnIntercepts)
+      lr.setLowerBoundsOnIntercepts(intercept)
+    }
+
+    if (upperBoundsOnIntercepts != null) {
+      val intercept = Vectors.dense(upperBoundsOnIntercepts)
+      lr.setUpperBoundsOnIntercepts(intercept)
+    }
+
     val idxToStr = new IndexToString()
       .setInputCol(PREDICTED_LABEL_INDEX_COL)
       .setOutputCol(PREDICTED_LABEL_COL)

From 2dd0b7d101afe2074f34161c14ca232c3c748417 Mon Sep 17 00:00:00 2001
From: wangmiao1981 <wm624@hotmail.com>
Date: Sat, 27 May 2017 00:29:25 -0700
Subject: [PATCH 02/12] add unit test and doc

---
 R/pkg/R/mllib_classification.R                | 22 +++++++++++++++----
 .../fulltests/test_mllib_classification.R     | 20 +++++++++++++++++
 2 files changed, 38 insertions(+), 4 deletions(-)

diff --git a/R/pkg/R/mllib_classification.R b/R/pkg/R/mllib_classification.R
index d3e9cc93ed09c..75683cf053328 100644
--- a/R/pkg/R/mllib_classification.R
+++ b/R/pkg/R/mllib_classification.R
@@ -202,6 +202,20 @@ function(object, path, overwrite = FALSE) {
 #' @param aggregationDepth The depth for treeAggregate (greater than or equal to 2). If the dimensions of features
 #'                         or the number of partitions are large, this param could be adjusted to a larger size.
 #'                         This is an expert parameter. Default value should be good for most cases.
+#' @param lowerBoundsOnCoefficients The lower bounds on coefficients if fitting under bound constrained optimization.
+#'                                  The bound matrix must be compatible with the shape (1, number of features) for binomial
+#'                                  regression, or (number of classes, number of features) for multinomial regression.
+#'                                  It is a R matrix.
+#' @param upperBoundsOnCoefficients The upper bounds on coefficients if fitting under bound constrained optimization.
+#'                                  The bound matrix must be compatible with the shape (1, number of features) for binomial
+#'                                  regression, or (number of classes, number of features) for multinomial regression.
+#'                                  It is a R matrix.
+#' @param lowerBoundsOnIntercepts The lower bounds on intercepts if fitting under bound constrained optimization.
+#'                                The bounds vector size must be equal with 1 for binomial regression, or the number
+#'                                of classes for multinomial regression.
+#' @param upperBoundsOnIntercepts The upper bounds on intercepts if fitting under bound constrained optimization.
+#'                                The bound vector size must be equal with 1 for binomial regression, or the number
+#'                                of classes for multinomial regression.
 #' @param ... additional arguments passed to the method.
 #' @return \code{spark.logit} returns a fitted logistic regression model.
 #' @rdname spark.logit
@@ -268,10 +282,10 @@ setMethod("spark.logit", signature(data = "SparkDataFrame", formula = "formula")
               lowerBoundsOnCoefficients <- as.array(as.vector(lowerBoundsOnCoefficients))
             }
 
-            if (!is.null(upperBoundsOnIntercepts)) {
-              urow = nrow(upperBoundsOnIntercepts)
-              ucol = ncol(upperBoundsOnIntercepts)
-              upperBoundsOnIntercepts <- as.array(as.vector(upperBoundsOnIntercepts))
+            if (!is.null(upperBoundsOnCoefficients)) {
+              urow = nrow(upperBoundsOnCoefficients)
+              ucol = ncol(upperBoundsOnCoefficients)
+              upperBoundsOnCoefficients <- as.array(as.vector(upperBoundsOnCoefficients))
             }
 
             jobj <- callJStatic("org.apache.spark.ml.r.LogisticRegressionWrapper", "fit",
diff --git a/R/pkg/tests/fulltests/test_mllib_classification.R b/R/pkg/tests/fulltests/test_mllib_classification.R
index 726e9d9a20b1c..4e1a9cb0fd820 100644
--- a/R/pkg/tests/fulltests/test_mllib_classification.R
+++ b/R/pkg/tests/fulltests/test_mllib_classification.R
@@ -200,6 +200,26 @@ test_that("spark.logit", {
   coefs <- summary$coefficients[, "Estimate"]
   expect_true(all(abs(coefsR - coefs) < 0.1))
 
+  # Test binomial logistic regression againt two classes with upperBoundsOnCoefficients
+  # and upperBoundsOnIntercepts
+  u <- matrix(c(1.0, 0.0, 1.0, 0.0), nrow = 1, ncol = 4)
+  model <- spark.logit(training, Species ~ ., upperBoundsOnCoefficients = u,
+                       upperBoundsOnIntercepts = 1.0)
+  summary <- summary(model)
+  coefsR <- c(-11.13331, 1.00000, 0.00000, 1.00000, 0.00000)
+  coefs <- summary$coefficients[, "Estimate"]
+  expect_true(all(abs(coefsR - coefs) < 0.1))
+
+  # Test binomial logistic regression againt two classes with lowerBoundsOnCoefficients
+  # and lowerBoundsOnIntercepts
+  l <- matrix(c(0.0, -1.0, 0.0, -1.0), nrow = 1, ncol = 4)
+  model <- spark.logit(training, Species ~ ., lowerBoundsOnCoefficients = l,
+                       lowerBoundsOnIntercepts = 0.0)
+  summary <- summary(model)
+  coefsR <- c(0, 0, -1, 0, 1.902192)
+  coefs <- summary$coefficients[, "Estimate"]
+  expect_true(all(abs(coefsR - coefs) < 0.1))
+
   # Test prediction with string label
   prediction <- predict(model, training)
   expect_equal(typeof(take(select(prediction, "prediction"), 1)$prediction), "character")

From 5b272fd3ea0f4636f0ebca999c5858b99726845c Mon Sep 17 00:00:00 2001
From: wangmiao1981 <wm624@hotmail.com>
Date: Sat, 27 May 2017 00:39:32 -0700
Subject: [PATCH 03/12] fix <-

---
 R/pkg/R/mllib_classification.R | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/R/pkg/R/mllib_classification.R b/R/pkg/R/mllib_classification.R
index 75683cf053328..7c0d2c1d4b8ca 100644
--- a/R/pkg/R/mllib_classification.R
+++ b/R/pkg/R/mllib_classification.R
@@ -277,14 +277,14 @@ setMethod("spark.logit", signature(data = "SparkDataFrame", formula = "formula")
             }
 
             if (!is.null(lowerBoundsOnCoefficients)) {
-              lrow = nrow(lowerBoundsOnCoefficients)
-              lcol = ncol(lowerBoundsOnCoefficients)
+              lrow <- nrow(lowerBoundsOnCoefficients)
+              lcol <- ncol(lowerBoundsOnCoefficients)
               lowerBoundsOnCoefficients <- as.array(as.vector(lowerBoundsOnCoefficients))
             }
 
             if (!is.null(upperBoundsOnCoefficients)) {
-              urow = nrow(upperBoundsOnCoefficients)
-              ucol = ncol(upperBoundsOnCoefficients)
+              urow <- nrow(upperBoundsOnCoefficients)
+              ucol <- ncol(upperBoundsOnCoefficients)
               upperBoundsOnCoefficients <- as.array(as.vector(upperBoundsOnCoefficients))
             }
 

From eb65a1d96c91ea7798014a5ccdf790c6fbe406b7 Mon Sep 17 00:00:00 2001
From: wangmiao1981 <wm624@hotmail.com>
Date: Sat, 27 May 2017 00:46:04 -0700
Subject: [PATCH 04/12] fix style

---
 R/pkg/R/mllib_classification.R | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/R/pkg/R/mllib_classification.R b/R/pkg/R/mllib_classification.R
index 7c0d2c1d4b8ca..56a15e5a981b7 100644
--- a/R/pkg/R/mllib_classification.R
+++ b/R/pkg/R/mllib_classification.R
@@ -294,7 +294,8 @@ setMethod("spark.logit", signature(data = "SparkDataFrame", formula = "formula")
                                 as.numeric(tol), as.character(family),
                                 as.logical(standardization), as.array(thresholds),
                                 weightCol, as.integer(aggregationDepth),
-                                as.integer(lrow), as.integer(lcol), as.integer(urow), as.integer(ucol),
+                                as.integer(lrow), as.integer(lcol),
+                                as.integer(urow), as.integer(ucol),
                                 lowerBoundsOnCoefficients, upperBoundsOnCoefficients,
                                 lowerBoundsOnIntercepts, upperBoundsOnIntercepts)
             new("LogisticRegressionModel", jobj = jobj)

From 62a436bba8523e0693c75841e90dcb1f6625f4e2 Mon Sep 17 00:00:00 2001
From: wangmiao1981 <wm624@hotmail.com>
Date: Sat, 27 May 2017 09:18:01 -0700
Subject: [PATCH 05/12] change test order

---
 .../fulltests/test_mllib_classification.R     | 40 +++++++++----------
 1 file changed, 20 insertions(+), 20 deletions(-)

diff --git a/R/pkg/tests/fulltests/test_mllib_classification.R b/R/pkg/tests/fulltests/test_mllib_classification.R
index 4e1a9cb0fd820..60b9a3d21508a 100644
--- a/R/pkg/tests/fulltests/test_mllib_classification.R
+++ b/R/pkg/tests/fulltests/test_mllib_classification.R
@@ -200,26 +200,6 @@ test_that("spark.logit", {
   coefs <- summary$coefficients[, "Estimate"]
   expect_true(all(abs(coefsR - coefs) < 0.1))
 
-  # Test binomial logistic regression againt two classes with upperBoundsOnCoefficients
-  # and upperBoundsOnIntercepts
-  u <- matrix(c(1.0, 0.0, 1.0, 0.0), nrow = 1, ncol = 4)
-  model <- spark.logit(training, Species ~ ., upperBoundsOnCoefficients = u,
-                       upperBoundsOnIntercepts = 1.0)
-  summary <- summary(model)
-  coefsR <- c(-11.13331, 1.00000, 0.00000, 1.00000, 0.00000)
-  coefs <- summary$coefficients[, "Estimate"]
-  expect_true(all(abs(coefsR - coefs) < 0.1))
-
-  # Test binomial logistic regression againt two classes with lowerBoundsOnCoefficients
-  # and lowerBoundsOnIntercepts
-  l <- matrix(c(0.0, -1.0, 0.0, -1.0), nrow = 1, ncol = 4)
-  model <- spark.logit(training, Species ~ ., lowerBoundsOnCoefficients = l,
-                       lowerBoundsOnIntercepts = 0.0)
-  summary <- summary(model)
-  coefsR <- c(0, 0, -1, 0, 1.902192)
-  coefs <- summary$coefficients[, "Estimate"]
-  expect_true(all(abs(coefsR - coefs) < 0.1))
-
   # Test prediction with string label
   prediction <- predict(model, training)
   expect_equal(typeof(take(select(prediction, "prediction"), 1)$prediction), "character")
@@ -243,6 +223,26 @@ test_that("spark.logit", {
   model2 <- spark.logit(df2, label ~ feature, weightCol = "weight")
   prediction2 <- collect(select(predict(model2, df2), "prediction"))
   expect_equal(sort(prediction2$prediction), c("0.0", "0.0", "0.0", "0.0", "0.0"))
+
+  # Test binomial logistic regression againt two classes with upperBoundsOnCoefficients
+  # and upperBoundsOnIntercepts
+  u <- matrix(c(1.0, 0.0, 1.0, 0.0), nrow = 1, ncol = 4)
+  model <- spark.logit(training, Species ~ ., upperBoundsOnCoefficients = u,
+  upperBoundsOnIntercepts = 1.0)
+  summary <- summary(model)
+  coefsR <- c(-11.13331, 1.00000, 0.00000, 1.00000, 0.00000)
+  coefs <- summary$coefficients[, "Estimate"]
+  expect_true(all(abs(coefsR - coefs) < 0.1))
+
+  # Test binomial logistic regression againt two classes with lowerBoundsOnCoefficients
+  # and lowerBoundsOnIntercepts
+  l <- matrix(c(0.0, -1.0, 0.0, -1.0), nrow = 1, ncol = 4)
+  model <- spark.logit(training, Species ~ ., lowerBoundsOnCoefficients = l,
+  lowerBoundsOnIntercepts = 0.0)
+  summary <- summary(model)
+  coefsR <- c(0, 0, -1, 0, 1.902192)
+  coefs <- summary$coefficients[, "Estimate"]
+  expect_true(all(abs(coefsR - coefs) < 0.1))
 })
 
 test_that("spark.mlp", {

From 43640f43474d37c05d6e74cf5b602c6c71c0bc3d Mon Sep 17 00:00:00 2001
From: wangmiao1981 <wm624@hotmail.com>
Date: Sun, 28 May 2017 22:57:16 -0700
Subject: [PATCH 06/12] add check and test for matrix type

---
 R/pkg/R/mllib_classification.R                    | 6 ++++++
 R/pkg/tests/fulltests/test_mllib_classification.R | 8 +++++++-
 2 files changed, 13 insertions(+), 1 deletion(-)

diff --git a/R/pkg/R/mllib_classification.R b/R/pkg/R/mllib_classification.R
index 56a15e5a981b7..e91dc8db3b175 100644
--- a/R/pkg/R/mllib_classification.R
+++ b/R/pkg/R/mllib_classification.R
@@ -277,12 +277,18 @@ setMethod("spark.logit", signature(data = "SparkDataFrame", formula = "formula")
             }
 
             if (!is.null(lowerBoundsOnCoefficients)) {
+              if (class(lowerBoundsOnCoefficients) != "matrix") {
+                stop("lowerBoundsOnCoefficients must be a matrix.")
+              }
               lrow <- nrow(lowerBoundsOnCoefficients)
               lcol <- ncol(lowerBoundsOnCoefficients)
               lowerBoundsOnCoefficients <- as.array(as.vector(lowerBoundsOnCoefficients))
             }
 
             if (!is.null(upperBoundsOnCoefficients)) {
+              if (class(upperBoundsOnCoefficients) != "matrix") {
+                stop("upperBoundsOnCoefficients must be a matrix.")
+              }
               urow <- nrow(upperBoundsOnCoefficients)
               ucol <- ncol(upperBoundsOnCoefficients)
               upperBoundsOnCoefficients <- as.array(as.vector(upperBoundsOnCoefficients))
diff --git a/R/pkg/tests/fulltests/test_mllib_classification.R b/R/pkg/tests/fulltests/test_mllib_classification.R
index 60b9a3d21508a..0af4a8353ab3f 100644
--- a/R/pkg/tests/fulltests/test_mllib_classification.R
+++ b/R/pkg/tests/fulltests/test_mllib_classification.R
@@ -233,16 +233,22 @@ test_that("spark.logit", {
   coefsR <- c(-11.13331, 1.00000, 0.00000, 1.00000, 0.00000)
   coefs <- summary$coefficients[, "Estimate"]
   expect_true(all(abs(coefsR - coefs) < 0.1))
+  # Test upperBoundsOnCoefficients should be matrix
+  expect_error(spark.logit(training, Species ~ ., upperBoundsOnCoefficients = as.array(c(1, 2)),
+               upperBoundsOnIntercepts = 1.0))
 
   # Test binomial logistic regression againt two classes with lowerBoundsOnCoefficients
   # and lowerBoundsOnIntercepts
   l <- matrix(c(0.0, -1.0, 0.0, -1.0), nrow = 1, ncol = 4)
   model <- spark.logit(training, Species ~ ., lowerBoundsOnCoefficients = l,
-  lowerBoundsOnIntercepts = 0.0)
+                       lowerBoundsOnIntercepts = 0.0)
   summary <- summary(model)
   coefsR <- c(0, 0, -1, 0, 1.902192)
   coefs <- summary$coefficients[, "Estimate"]
   expect_true(all(abs(coefsR - coefs) < 0.1))
+  # Test lowerBoundsOnCoefficients should be matrix
+  expect_error(spark.logit(training, Species ~ ., lowerBoundsOnCoefficients = as.array(c(1, 2)),
+               lowerBoundsOnIntercepts = 0.0))
 })
 
 test_that("spark.mlp", {

From 9ab89c76b6994eaa86e19aa466f28dc6c110c6a2 Mon Sep 17 00:00:00 2001
From: wangmiao1981 <wm624@hotmail.com>
Date: Sun, 28 May 2017 23:00:47 -0700
Subject: [PATCH 07/12] extra spaces

---
 R/pkg/tests/fulltests/test_mllib_classification.R | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/R/pkg/tests/fulltests/test_mllib_classification.R b/R/pkg/tests/fulltests/test_mllib_classification.R
index 0af4a8353ab3f..e148392e69642 100644
--- a/R/pkg/tests/fulltests/test_mllib_classification.R
+++ b/R/pkg/tests/fulltests/test_mllib_classification.R
@@ -228,14 +228,14 @@ test_that("spark.logit", {
   # and upperBoundsOnIntercepts
   u <- matrix(c(1.0, 0.0, 1.0, 0.0), nrow = 1, ncol = 4)
   model <- spark.logit(training, Species ~ ., upperBoundsOnCoefficients = u,
-  upperBoundsOnIntercepts = 1.0)
+                       upperBoundsOnIntercepts = 1.0)
   summary <- summary(model)
   coefsR <- c(-11.13331, 1.00000, 0.00000, 1.00000, 0.00000)
   coefs <- summary$coefficients[, "Estimate"]
   expect_true(all(abs(coefsR - coefs) < 0.1))
   # Test upperBoundsOnCoefficients should be matrix
   expect_error(spark.logit(training, Species ~ ., upperBoundsOnCoefficients = as.array(c(1, 2)),
-               upperBoundsOnIntercepts = 1.0))
+                           upperBoundsOnIntercepts = 1.0))
 
   # Test binomial logistic regression againt two classes with lowerBoundsOnCoefficients
   # and lowerBoundsOnIntercepts
@@ -248,7 +248,7 @@ test_that("spark.logit", {
   expect_true(all(abs(coefsR - coefs) < 0.1))
   # Test lowerBoundsOnCoefficients should be matrix
   expect_error(spark.logit(training, Species ~ ., lowerBoundsOnCoefficients = as.array(c(1, 2)),
-               lowerBoundsOnIntercepts = 0.0))
+                           lowerBoundsOnIntercepts = 0.0))
 })
 
 test_that("spark.mlp", {

From b8d683cb3308a85c198293d9e390ca908b06be63 Mon Sep 17 00:00:00 2001
From: wangmiao1981 <wm624@hotmail.com>
Date: Fri, 2 Jun 2017 16:13:58 -0700
Subject: [PATCH 08/12] address review comments

---
 R/pkg/R/mllib_classification.R                | 26 ++++++++++++-------
 .../ml/r/LogisticRegressionWrapper.scala      | 18 +++++++------
 2 files changed, 26 insertions(+), 18 deletions(-)

diff --git a/R/pkg/R/mllib_classification.R b/R/pkg/R/mllib_classification.R
index e91dc8db3b175..d9dec59fabb62 100644
--- a/R/pkg/R/mllib_classification.R
+++ b/R/pkg/R/mllib_classification.R
@@ -257,10 +257,8 @@ setMethod("spark.logit", signature(data = "SparkDataFrame", formula = "formula")
                    lowerBoundsOnCoefficients = NULL, upperBoundsOnCoefficients = NULL,
                    lowerBoundsOnIntercepts = NULL, upperBoundsOnIntercepts = NULL) {
             formula <- paste(deparse(formula), collapse = "")
-            lrow <- 0
-            lcol <- 0
-            urow <- 0
-            ucol <- 0
+            row <- 0
+            col <- 0
 
             if (!is.null(weightCol) && weightCol == "") {
               weightCol <- NULL
@@ -280,8 +278,8 @@ setMethod("spark.logit", signature(data = "SparkDataFrame", formula = "formula")
               if (class(lowerBoundsOnCoefficients) != "matrix") {
                 stop("lowerBoundsOnCoefficients must be a matrix.")
               }
-              lrow <- nrow(lowerBoundsOnCoefficients)
-              lcol <- ncol(lowerBoundsOnCoefficients)
+              row <- nrow(lowerBoundsOnCoefficients)
+              col <- ncol(lowerBoundsOnCoefficients)
               lowerBoundsOnCoefficients <- as.array(as.vector(lowerBoundsOnCoefficients))
             }
 
@@ -289,8 +287,17 @@ setMethod("spark.logit", signature(data = "SparkDataFrame", formula = "formula")
               if (class(upperBoundsOnCoefficients) != "matrix") {
                 stop("upperBoundsOnCoefficients must be a matrix.")
               }
-              urow <- nrow(upperBoundsOnCoefficients)
-              ucol <- ncol(upperBoundsOnCoefficients)
+
+              if(!is.null(lowerBoundsOnCoefficients) & (row != nrow(upperBoundsOnCoefficients)
+                | col != ncol(upperBoundsOnCoefficients))) {
+                stop("dimension of upperBoundsOnCoefficients is not the same as lowerBoundsOnCoefficients")
+              }
+
+              if (is.null(lowerBoundsOnCoefficients)) {
+                row <- nrow(upperBoundsOnCoefficients)
+                col <- ncol(upperBoundsOnCoefficients)
+              }
+
               upperBoundsOnCoefficients <- as.array(as.vector(upperBoundsOnCoefficients))
             }
 
@@ -300,8 +307,7 @@ setMethod("spark.logit", signature(data = "SparkDataFrame", formula = "formula")
                                 as.numeric(tol), as.character(family),
                                 as.logical(standardization), as.array(thresholds),
                                 weightCol, as.integer(aggregationDepth),
-                                as.integer(lrow), as.integer(lcol),
-                                as.integer(urow), as.integer(ucol),
+                                as.integer(row), as.integer(col),
                                 lowerBoundsOnCoefficients, upperBoundsOnCoefficients,
                                 lowerBoundsOnIntercepts, upperBoundsOnIntercepts)
             new("LogisticRegressionModel", jobj = jobj)
diff --git a/mllib/src/main/scala/org/apache/spark/ml/r/LogisticRegressionWrapper.scala b/mllib/src/main/scala/org/apache/spark/ml/r/LogisticRegressionWrapper.scala
index c4422d2b7f09c..b96481acf46d7 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/r/LogisticRegressionWrapper.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/r/LogisticRegressionWrapper.scala
@@ -98,10 +98,8 @@ private[r] object LogisticRegressionWrapper
       thresholds: Array[Double],
       weightCol: String,
       aggregationDepth: Int,
-      lrow: Int,
-      lcol: Int,
-      urow: Int,
-      ucol: Int,
+      numRowsOfBoundsOnCoefficients: Int,
+      numColsOfBoundsOnCoefficients: Int,
       lowerBoundsOnCoefficients: Array[Double],
       upperBoundsOnCoefficients: Array[Double],
       lowerBoundsOnIntercepts: Array[Double],
@@ -141,13 +139,17 @@ private[r] object LogisticRegressionWrapper
 
     if (weightCol != null) lr.setWeightCol(weightCol)
 
-    if (lrow != 0 && lcol != 0 && lowerBoundsOnCoefficients != null) {
-      val coef = Matrices.dense(lrow, lcol, lowerBoundsOnCoefficients)
+    if (numRowsOfBoundsOnCoefficients != 0 &&
+      numColsOfBoundsOnCoefficients != 0 && lowerBoundsOnCoefficients != null) {
+      val coef = Matrices.dense(numRowsOfBoundsOnCoefficients,
+        numColsOfBoundsOnCoefficients, lowerBoundsOnCoefficients)
       lr.setLowerBoundsOnCoefficients(coef)
     }
 
-    if (urow != 0 && ucol != 0 && upperBoundsOnCoefficients != null) {
-      val coef = Matrices.dense(urow, ucol, upperBoundsOnCoefficients)
+    if (numRowsOfBoundsOnCoefficients != 0 &&
+      numColsOfBoundsOnCoefficients != 0 && upperBoundsOnCoefficients != null) {
+      val coef = Matrices.dense(numRowsOfBoundsOnCoefficients,
+        numColsOfBoundsOnCoefficients, upperBoundsOnCoefficients)
       lr.setUpperBoundsOnCoefficients(coef)
     }
 

From 98709ac23828c4c801c029da4de1406f973cf7ae Mon Sep 17 00:00:00 2001
From: wangmiao1981 <wm624@hotmail.com>
Date: Fri, 2 Jun 2017 16:43:16 -0700
Subject: [PATCH 09/12] add unit test and fix R style issue

---
 R/pkg/R/mllib_classification.R                    |  5 +++--
 R/pkg/tests/fulltests/test_mllib_classification.R | 14 ++++++++++++++
 2 files changed, 17 insertions(+), 2 deletions(-)

diff --git a/R/pkg/R/mllib_classification.R b/R/pkg/R/mllib_classification.R
index d9dec59fabb62..a66fed8f4c2d7 100644
--- a/R/pkg/R/mllib_classification.R
+++ b/R/pkg/R/mllib_classification.R
@@ -288,9 +288,10 @@ setMethod("spark.logit", signature(data = "SparkDataFrame", formula = "formula")
                 stop("upperBoundsOnCoefficients must be a matrix.")
               }
 
-              if(!is.null(lowerBoundsOnCoefficients) & (row != nrow(upperBoundsOnCoefficients)
+              if (!is.null(lowerBoundsOnCoefficients) & (row != nrow(upperBoundsOnCoefficients)
                 | col != ncol(upperBoundsOnCoefficients))) {
-                stop("dimension of upperBoundsOnCoefficients is not the same as lowerBoundsOnCoefficients")
+                stop(paste("dimension of upperBoundsOnCoefficients ",
+                           "is not the same as lowerBoundsOnCoefficients", sep = ""))
               }
 
               if (is.null(lowerBoundsOnCoefficients)) {
diff --git a/R/pkg/tests/fulltests/test_mllib_classification.R b/R/pkg/tests/fulltests/test_mllib_classification.R
index e148392e69642..3d75f4ce11ec8 100644
--- a/R/pkg/tests/fulltests/test_mllib_classification.R
+++ b/R/pkg/tests/fulltests/test_mllib_classification.R
@@ -249,6 +249,20 @@ test_that("spark.logit", {
   # Test lowerBoundsOnCoefficients should be matrix
   expect_error(spark.logit(training, Species ~ ., lowerBoundsOnCoefficients = as.array(c(1, 2)),
                            lowerBoundsOnIntercepts = 0.0))
+
+  # Test multinomial logistic regression with lowerBoundsOnCoefficients
+  # and lowerBoundsOnIntercepts
+  l <- matrix(c(0.0, -1.0, 0.0, -1.0, 0.0, -1.0, 0.0, -1.0), nrow = 2, ncol = 4)
+  model <- spark.logit(training, Species ~ ., family = "multinomial",
+                       lowerBoundsOnCoefficients = l,
+                       lowerBoundsOnIntercepts = as.array(c(0.0, 0.0)))
+  summary <- summary(model)
+  versicolorCoefsR <- c(42.639465, 7.258104, 14.330814, 16.298243, 11.716429)
+  virginicaCoefsR <- c(0.0002970796, 4.79274, 7.65047, 25.72793, 30.0021)
+  versicolorCoefs <- summary$coefficients[, "versicolor"]
+  virginicaCoefs <- summary$coefficients[, "virginica"]
+  expect_true(all(abs(versicolorCoefsR - versicolorCoefs) < 0.1))
+  expect_true(all(abs(virginicaCoefsR - virginicaCoefs) < 0.1))
 })
 
 test_that("spark.mlp", {

From 8c88a4fd0bb1f988610e74696a1b57806f408b2c Mon Sep 17 00:00:00 2001
From: wangmiao1981 <wm624@hotmail.com>
Date: Fri, 2 Jun 2017 21:24:17 -0700
Subject: [PATCH 10/12] address review comments

---
 R/pkg/R/mllib_classification.R | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/R/pkg/R/mllib_classification.R b/R/pkg/R/mllib_classification.R
index a66fed8f4c2d7..fe4f2aeed4e00 100644
--- a/R/pkg/R/mllib_classification.R
+++ b/R/pkg/R/mllib_classification.R
@@ -290,7 +290,7 @@ setMethod("spark.logit", signature(data = "SparkDataFrame", formula = "formula")
 
               if (!is.null(lowerBoundsOnCoefficients) & (row != nrow(upperBoundsOnCoefficients)
                 | col != ncol(upperBoundsOnCoefficients))) {
-                stop(paste("dimension of upperBoundsOnCoefficients ",
+                stop(paste0("dimension of upperBoundsOnCoefficients ",
                            "is not the same as lowerBoundsOnCoefficients", sep = ""))
               }
 
@@ -307,8 +307,7 @@ setMethod("spark.logit", signature(data = "SparkDataFrame", formula = "formula")
                                 as.numeric(elasticNetParam), as.integer(maxIter),
                                 as.numeric(tol), as.character(family),
                                 as.logical(standardization), as.array(thresholds),
-                                weightCol, as.integer(aggregationDepth),
-                                as.integer(row), as.integer(col),
+                                weightCol, as.integer(aggregationDepth), row, col,
                                 lowerBoundsOnCoefficients, upperBoundsOnCoefficients,
                                 lowerBoundsOnIntercepts, upperBoundsOnIntercepts)
             new("LogisticRegressionModel", jobj = jobj)

From 8bebb982c5cfa786a4d82567268908ab0ce242c4 Mon Sep 17 00:00:00 2001
From: wangmiao1981 <wm624@hotmail.com>
Date: Sat, 3 Jun 2017 16:11:25 -0700
Subject: [PATCH 11/12] revert change of as.integer

---
 R/pkg/R/mllib_classification.R | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/R/pkg/R/mllib_classification.R b/R/pkg/R/mllib_classification.R
index fe4f2aeed4e00..2ab3088868856 100644
--- a/R/pkg/R/mllib_classification.R
+++ b/R/pkg/R/mllib_classification.R
@@ -307,7 +307,8 @@ setMethod("spark.logit", signature(data = "SparkDataFrame", formula = "formula")
                                 as.numeric(elasticNetParam), as.integer(maxIter),
                                 as.numeric(tol), as.character(family),
                                 as.logical(standardization), as.array(thresholds),
-                                weightCol, as.integer(aggregationDepth), row, col,
+                                weightCol, as.integer(aggregationDepth),
+                                as.integer(row), as.integer(col),
                                 lowerBoundsOnCoefficients, upperBoundsOnCoefficients,
                                 lowerBoundsOnIntercepts, upperBoundsOnIntercepts)
             new("LogisticRegressionModel", jobj = jobj)

From 45b62cc9b070105ff2553ffc7b1e5788dc8134e5 Mon Sep 17 00:00:00 2001
From: wangmiao1981 <wm624@hotmail.com>
Date: Fri, 16 Jun 2017 17:05:46 +0800
Subject: [PATCH 12/12] address review comments

---
 R/pkg/R/mllib_classification.R                            | 8 ++++----
 .../spark/ml/classification/LogisticRegression.scala      | 8 ++++----
 2 files changed, 8 insertions(+), 8 deletions(-)

diff --git a/R/pkg/R/mllib_classification.R b/R/pkg/R/mllib_classification.R
index 2ab3088868856..1177a0f7f722b 100644
--- a/R/pkg/R/mllib_classification.R
+++ b/R/pkg/R/mllib_classification.R
@@ -211,10 +211,10 @@ function(object, path, overwrite = FALSE) {
 #'                                  regression, or (number of classes, number of features) for multinomial regression.
 #'                                  It is a R matrix.
 #' @param lowerBoundsOnIntercepts The lower bounds on intercepts if fitting under bound constrained optimization.
-#'                                The bounds vector size must be equal with 1 for binomial regression, or the number
+#'                                The bounds vector size must be equal to 1 for binomial regression, or the number
 #'                                of classes for multinomial regression.
 #' @param upperBoundsOnIntercepts The upper bounds on intercepts if fitting under bound constrained optimization.
-#'                                The bound vector size must be equal with 1 for binomial regression, or the number
+#'                                The bound vector size must be equal to 1 for binomial regression, or the number
 #'                                of classes for multinomial regression.
 #' @param ... additional arguments passed to the method.
 #' @return \code{spark.logit} returns a fitted logistic regression model.
@@ -288,8 +288,8 @@ setMethod("spark.logit", signature(data = "SparkDataFrame", formula = "formula")
                 stop("upperBoundsOnCoefficients must be a matrix.")
               }
 
-              if (!is.null(lowerBoundsOnCoefficients) & (row != nrow(upperBoundsOnCoefficients)
-                | col != ncol(upperBoundsOnCoefficients))) {
+              if (!is.null(lowerBoundsOnCoefficients) && (row != nrow(upperBoundsOnCoefficients)
+                || col != ncol(upperBoundsOnCoefficients))) {
                 stop(paste0("dimension of upperBoundsOnCoefficients ",
                            "is not the same as lowerBoundsOnCoefficients", sep = ""))
               }
diff --git a/mllib/src/main/scala/org/apache/spark/ml/classification/LogisticRegression.scala b/mllib/src/main/scala/org/apache/spark/ml/classification/LogisticRegression.scala
index 567af0488e1b4..b234bc4c2df4f 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/classification/LogisticRegression.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/classification/LogisticRegression.scala
@@ -214,7 +214,7 @@ private[classification] trait LogisticRegressionParams extends ProbabilisticClas
 
   /**
    * The lower bounds on intercepts if fitting under bound constrained optimization.
-   * The bounds vector size must be equal with 1 for binomial regression, or the number
+   * The bounds vector size must be equal to 1 for binomial regression, or the number
    * of classes for multinomial regression. Otherwise, it throws exception.
    * Default is none.
    *
@@ -230,7 +230,7 @@ private[classification] trait LogisticRegressionParams extends ProbabilisticClas
 
   /**
    * The upper bounds on intercepts if fitting under bound constrained optimization.
-   * The bound vector size must be equal with 1 for binomial regression, or the number
+   * The bound vector size must be equal to 1 for binomial regression, or the number
    * of classes for multinomial regression. Otherwise, it throws exception.
    * Default is none.
    *
@@ -451,12 +451,12 @@ class LogisticRegression @Since("1.2.0") (
     }
     if (isSet(lowerBoundsOnIntercepts)) {
       require($(lowerBoundsOnIntercepts).size == numCoefficientSets, "The size of " +
-        "lowerBoundsOnIntercepts must be equal with 1 for binomial regression, or the number of " +
+        "lowerBoundsOnIntercepts must be equal to 1 for binomial regression, or the number of " +
         s"classes for multinomial regression, but found: ${getLowerBoundsOnIntercepts.size}.")
     }
     if (isSet(upperBoundsOnIntercepts)) {
       require($(upperBoundsOnIntercepts).size == numCoefficientSets, "The size of " +
-        "upperBoundsOnIntercepts must be equal with 1 for binomial regression, or the number of " +
+        "upperBoundsOnIntercepts must be equal to 1 for binomial regression, or the number of " +
         s"classes for multinomial regression, but found: ${getUpperBoundsOnIntercepts.size}.")
     }
     if (isSet(lowerBoundsOnCoefficients) && isSet(upperBoundsOnCoefficients)) {