apache · felixcheung · Jun 11, 2017 · Jun 11, 2017 · Jun 11, 2017 · Jun 11, 2017
diff --git a/R/pkg/.Rbuildignore b/R/pkg/.Rbuildignore
@@ -6,3 +6,4 @@
 ^README\.Rmd$
 ^src-native$
 ^html$
+^tests/fulltests/*
diff --git a/R/pkg/R/install.R b/R/pkg/R/install.R
@@ -267,7 +267,7 @@ hadoopVersionName <- function(hadoopVersion) {
 # The implementation refers to appdirs package: https://pypi.python.org/pypi/appdirs and
 # adapt to Spark context
 sparkCachePath <- function() {
-  if (.Platform$OS.type == "windows") {
+  if (is_windows()) {
     winAppPath <- Sys.getenv("LOCALAPPDATA", unset = NA)
     if (is.na(winAppPath)) {
       stop(paste("%LOCALAPPDATA% not found.",

diff --git a/R/pkg/R/utils.R b/R/pkg/R/utils.R
@@ -908,10 +908,6 @@ isAtomicLengthOne <- function(x) {
   is.atomic(x) && length(x) == 1
 }
 
-is_cran <- function() {
-  !identical(Sys.getenv("NOT_CRAN"), "true")
-}
-
 is_windows <- function() {
   .Platform$OS.type == "windows"
 }
@@ -920,6 +916,6 @@ hadoop_home_set <- function() {
   !identical(Sys.getenv("HADOOP_HOME"), "")
 }
 
-not_cran_or_windows_with_hadoop <- function() {
-  !is_cran() && (!is_windows() || hadoop_home_set())
+windows_with_hadoop <- function() {
+  !is_windows() || hadoop_home_set()
 }
diff --git a/R/pkg/tests/fulltests/test_Serde.R b/R/pkg/tests/fulltests/test_Serde.R
@@ -20,8 +20,6 @@ context("SerDe functionality")
 sparkSession <- sparkR.session(master = sparkRTestMaster, enableHiveSupport = FALSE)
 
 test_that("SerDe of primitive types", {
-  skip_on_cran()
-
   x <- callJStatic("SparkRHandler", "echo", 1L)
   expect_equal(x, 1L)
   expect_equal(class(x), "integer")
@@ -40,8 +38,6 @@ test_that("SerDe of primitive types", {
 })
 
 test_that("SerDe of list of primitive types", {
-  skip_on_cran()
-
   x <- list(1L, 2L, 3L)
   y <- callJStatic("SparkRHandler", "echo", x)
   expect_equal(x, y)
@@ -69,8 +65,6 @@ test_that("SerDe of list of primitive types", {
 })
 
 test_that("SerDe of list of lists", {
-  skip_on_cran()
-
   x <- list(list(1L, 2L, 3L), list(1, 2, 3),
             list(TRUE, FALSE), list("a", "b", "c"))
   y <- callJStatic("SparkRHandler", "echo", x)

diff --git a/R/pkg/tests/fulltests/test_Windows.R b/R/pkg/tests/fulltests/test_Windows.R
@@ -17,16 +17,11 @@
 context("Windows-specific tests")
 
 test_that("sparkJars tag in SparkContext", {
-  skip_on_cran()
-
-  if (.Platform$OS.type != "windows") {
+  if (!is_windows()) {
     skip("This test is only for Windows, skipped")
   }
 
   testOutput <- launchScript("ECHO", "a/b/c", wait = TRUE)
   abcPath <- testOutput[1]
   expect_equal(abcPath, "a\\b\\c")
 })
-
-message("--- End test (Windows) ", as.POSIXct(Sys.time(), tz = "GMT"))
-message("elapsed ", (proc.time() - timer_ptm)[3])
diff --git a/R/pkg/tests/fulltests/test_binaryFile.R b/R/pkg/tests/fulltests/test_binaryFile.R
@@ -24,8 +24,6 @@ sc <- callJStatic("org.apache.spark.sql.api.r.SQLUtils", "getJavaSparkContext",
 mockFile <- c("Spark is pretty.", "Spark is awesome.")
 
 test_that("saveAsObjectFile()/objectFile() following textFile() works", {
-  skip_on_cran()
-
   fileName1 <- tempfile(pattern = "spark-test", fileext = ".tmp")
   fileName2 <- tempfile(pattern = "spark-test", fileext = ".tmp")
   writeLines(mockFile, fileName1)
@@ -40,8 +38,6 @@ test_that("saveAsObjectFile()/objectFile() following textFile() works", {
 })
 
 test_that("saveAsObjectFile()/objectFile() works on a parallelized list", {
-  skip_on_cran()
-
   fileName <- tempfile(pattern = "spark-test", fileext = ".tmp")
 
   l <- list(1, 2, 3)
@@ -54,8 +50,6 @@ test_that("saveAsObjectFile()/objectFile() works on a parallelized list", {
 })
 
 test_that("saveAsObjectFile()/objectFile() following RDD transformations works", {
-  skip_on_cran()
-
   fileName1 <- tempfile(pattern = "spark-test", fileext = ".tmp")
   fileName2 <- tempfile(pattern = "spark-test", fileext = ".tmp")
   writeLines(mockFile, fileName1)
@@ -80,8 +74,6 @@ test_that("saveAsObjectFile()/objectFile() following RDD transformations works",
 })
 
 test_that("saveAsObjectFile()/objectFile() works with multiple paths", {
-  skip_on_cran()
-
   fileName1 <- tempfile(pattern = "spark-test", fileext = ".tmp")
   fileName2 <- tempfile(pattern = "spark-test", fileext = ".tmp")
 

diff --git a/R/pkg/tests/fulltests/test_binary_function.R b/R/pkg/tests/fulltests/test_binary_function.R
@@ -29,8 +29,6 @@ rdd <- parallelize(sc, nums, 2L)
 mockFile <- c("Spark is pretty.", "Spark is awesome.")
 
 test_that("union on two RDDs", {
-  skip_on_cran()
-
   actual <- collectRDD(unionRDD(rdd, rdd))
   expect_equal(actual, as.list(rep(nums, 2)))
 
@@ -53,8 +51,6 @@ test_that("union on two RDDs", {
 })
 
 test_that("cogroup on two RDDs", {
-  skip_on_cran()
-
   rdd1 <- parallelize(sc, list(list(1, 1), list(2, 4)))
   rdd2 <- parallelize(sc, list(list(1, 2), list(1, 3)))
   cogroup.rdd <- cogroup(rdd1, rdd2, numPartitions = 2L)
@@ -73,8 +69,6 @@ test_that("cogroup on two RDDs", {
 })
 
 test_that("zipPartitions() on RDDs", {
-  skip_on_cran()
-
   rdd1 <- parallelize(sc, 1:2, 2L)  # 1, 2
   rdd2 <- parallelize(sc, 1:4, 2L)  # 1:2, 3:4
   rdd3 <- parallelize(sc, 1:6, 2L)  # 1:3, 4:6

diff --git a/R/pkg/tests/fulltests/test_broadcast.R b/R/pkg/tests/fulltests/test_broadcast.R
@@ -26,8 +26,6 @@ nums <- 1:2
 rrdd <- parallelize(sc, nums, 2L)
 
 test_that("using broadcast variable", {
-  skip_on_cran()
-
   randomMat <- matrix(nrow = 10, ncol = 10, data = rnorm(100))
   randomMatBr <- broadcastRDD(sc, randomMat)
 
@@ -40,8 +38,6 @@ test_that("using broadcast variable", {
 })
 
 test_that("without using broadcast variable", {
-  skip_on_cran()
-
   randomMat <- matrix(nrow = 10, ncol = 10, data = rnorm(100))
 
   useBroadcast <- function(x) {

diff --git a/R/pkg/tests/fulltests/test_client.R b/R/pkg/tests/fulltests/test_client.R
@@ -18,8 +18,6 @@
 context("functions in client.R")
 
 test_that("adding spark-testing-base as a package works", {
-  skip_on_cran()
-
   args <- generateSparkSubmitArgs("", "", "", "",
                                   "holdenk:spark-testing-base:1.3.0_0.0.5")
   expect_equal(gsub("[[:space:]]", "", args),
@@ -28,22 +26,16 @@ test_that("adding spark-testing-base as a package works", {
 })
 
 test_that("no package specified doesn't add packages flag", {
-  skip_on_cran()
-
   args <- generateSparkSubmitArgs("", "", "", "", "")
   expect_equal(gsub("[[:space:]]", "", args),
                "")
 })
 
 test_that("multiple packages don't produce a warning", {
-  skip_on_cran()
-
   expect_warning(generateSparkSubmitArgs("", "", "", "", c("A", "B")), NA)
 })
 
 test_that("sparkJars sparkPackages as character vectors", {
-  skip_on_cran()
-
   args <- generateSparkSubmitArgs("", "", c("one.jar", "two.jar", "three.jar"), "",
                                   c("com.databricks:spark-avro_2.10:2.0.1"))
   expect_match(args, "--jars one.jar,two.jar,three.jar")

diff --git a/R/pkg/tests/fulltests/test_context.R b/R/pkg/tests/fulltests/test_context.R
@@ -18,8 +18,6 @@
 context("test functions in sparkR.R")
 
 test_that("Check masked functions", {
-  skip_on_cran()
-
   # Check that we are not masking any new function from base, stats, testthat unexpectedly
   # NOTE: We should avoid adding entries to *namesOfMaskedCompletely* as masked functions make it
   # hard for users to use base R functions. Please check when in doubt.
@@ -57,8 +55,6 @@ test_that("Check masked functions", {
 })
 
 test_that("repeatedly starting and stopping SparkR", {
-  skip_on_cran()
-
   for (i in 1:4) {
     sc <- suppressWarnings(sparkR.init(master = sparkRTestMaster))
     rdd <- parallelize(sc, 1:20, 2L)
@@ -77,8 +73,6 @@ test_that("repeatedly starting and stopping SparkSession", {
 })
 
 test_that("rdd GC across sparkR.stop", {
-  skip_on_cran()
-
   sc <- sparkR.sparkContext(master = sparkRTestMaster) # sc should get id 0
   rdd1 <- parallelize(sc, 1:20, 2L) # rdd1 should get id 1
   rdd2 <- parallelize(sc, 1:10, 2L) # rdd2 should get id 2
@@ -102,8 +96,6 @@ test_that("rdd GC across sparkR.stop", {
 })
 
 test_that("job group functions can be called", {
-  skip_on_cran()
-
   sc <- sparkR.sparkContext(master = sparkRTestMaster)
   setJobGroup("groupId", "job description", TRUE)
   cancelJobGroup("groupId")
@@ -116,16 +108,12 @@ test_that("job group functions can be called", {
 })
 
 test_that("utility function can be called", {
-  skip_on_cran()
-
   sparkR.sparkContext(master = sparkRTestMaster)
   setLogLevel("ERROR")
   sparkR.session.stop()
 })
 
 test_that("getClientModeSparkSubmitOpts() returns spark-submit args from whitelist", {
-  skip_on_cran()
-
   e <- new.env()
   e[["spark.driver.memory"]] <- "512m"
   ops <- getClientModeSparkSubmitOpts("sparkrmain", e)
@@ -153,8 +141,6 @@ test_that("getClientModeSparkSubmitOpts() returns spark-submit args from whiteli
 })
 
 test_that("sparkJars sparkPackages as comma-separated strings", {
-  skip_on_cran()
-
   expect_warning(processSparkJars(" a, b "))
   jars <- suppressWarnings(processSparkJars(" a, b "))
   expect_equal(lapply(jars, basename), list("a", "b"))
@@ -182,8 +168,6 @@ test_that("spark.lapply should perform simple transforms", {
 })
 
 test_that("add and get file to be downloaded with Spark job on every node", {
-  skip_on_cran()
-
   sparkR.sparkContext(master = sparkRTestMaster)
   # Test add file.
   path <- tempfile(pattern = "hello", fileext = ".txt")

diff --git a/R/pkg/tests/fulltests/test_includePackage.R b/R/pkg/tests/fulltests/test_includePackage.R
@@ -26,8 +26,6 @@ nums <- 1:2
 rdd <- parallelize(sc, nums, 2L)
 
 test_that("include inside function", {
-  skip_on_cran()
-
   # Only run the test if plyr is installed.
   if ("plyr" %in% rownames(installed.packages())) {
     suppressPackageStartupMessages(library(plyr))
@@ -44,8 +42,6 @@ test_that("include inside function", {
 })
 
 test_that("use include package", {
-  skip_on_cran()
-
   # Only run the test if plyr is installed.
   if ("plyr" %in% rownames(installed.packages())) {
     suppressPackageStartupMessages(library(plyr))

diff --git a/R/pkg/tests/fulltests/test_mllib_classification.R b/R/pkg/tests/fulltests/test_mllib_classification.R
@@ -28,8 +28,6 @@ absoluteSparkPath <- function(x) {
 }
 
 test_that("spark.svmLinear", {
-  skip_on_cran()
-
   df <- suppressWarnings(createDataFrame(iris))
   training <- df[df$Species %in% c("versicolor", "virginica"), ]
   model <- spark.svmLinear(training,  Species ~ ., regParam = 0.01, maxIter = 10)
@@ -51,7 +49,7 @@ test_that("spark.svmLinear", {
   expect_equal(sort(as.list(take(select(prediction, "prediction"), 10))[[1]]), expected)
 
   # Test model save and load
-  if (not_cran_or_windows_with_hadoop()) {
+  if (windows_with_hadoop()) {
     modelPath <- tempfile(pattern = "spark-svm-linear", fileext = ".tmp")
     write.ml(model, modelPath)
     expect_error(write.ml(model, modelPath))
@@ -131,7 +129,7 @@ test_that("spark.logit", {
   expect_true(all(abs(setosaCoefs - setosaCoefs) < 0.1))
 
   # Test model save and load
-  if (not_cran_or_windows_with_hadoop()) {
+  if (windows_with_hadoop()) {
     modelPath <- tempfile(pattern = "spark-logit", fileext = ".tmp")
     write.ml(model, modelPath)
     expect_error(write.ml(model, modelPath))
@@ -228,8 +226,6 @@ test_that("spark.logit", {
 })
 
 test_that("spark.mlp", {
-  skip_on_cran()
-
   df <- read.df(absoluteSparkPath("data/mllib/sample_multiclass_classification_data.txt"),
                 source = "libsvm")
   model <- spark.mlp(df, label ~ features, blockSize = 128, layers = c(4, 5, 4, 3),
@@ -250,7 +246,7 @@ test_that("spark.mlp", {
   expect_equal(head(mlpPredictions$prediction, 6), c("1.0", "0.0", "0.0", "0.0", "0.0", "0.0"))
 
   # Test model save/load
-  if (not_cran_or_windows_with_hadoop()) {
+  if (windows_with_hadoop()) {
     modelPath <- tempfile(pattern = "spark-mlp", fileext = ".tmp")
     write.ml(model, modelPath)
     expect_error(write.ml(model, modelPath))
@@ -363,7 +359,7 @@ test_that("spark.naiveBayes", {
                                "Yes", "Yes", "No", "No"))
 
   # Test model save/load
-  if (not_cran_or_windows_with_hadoop()) {
+  if (windows_with_hadoop()) {
     modelPath <- tempfile(pattern = "spark-naiveBayes", fileext = ".tmp")
     write.ml(m, modelPath)
     expect_error(write.ml(m, modelPath))

diff --git a/R/pkg/tests/fulltests/test_mllib_clustering.R b/R/pkg/tests/fulltests/test_mllib_clustering.R
@@ -28,8 +28,6 @@ absoluteSparkPath <- function(x) {
 }
 
 test_that("spark.bisectingKmeans", {
-  skip_on_cran()
-
   newIris <- iris
   newIris$Species <- NULL
   training <- suppressWarnings(createDataFrame(newIris))
@@ -55,7 +53,7 @@ test_that("spark.bisectingKmeans", {
                c(0, 1, 2, 3))
 
   # Test model save/load
-  if (not_cran_or_windows_with_hadoop()) {
+  if (windows_with_hadoop()) {
     modelPath <- tempfile(pattern = "spark-bisectingkmeans", fileext = ".tmp")
     write.ml(model, modelPath)
     expect_error(write.ml(model, modelPath))
@@ -129,7 +127,7 @@ test_that("spark.gaussianMixture", {
   expect_equal(p$prediction, c(0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1))
 
   # Test model save/load
-  if (not_cran_or_windows_with_hadoop()) {
+  if (windows_with_hadoop()) {
     modelPath <- tempfile(pattern = "spark-gaussianMixture", fileext = ".tmp")
     write.ml(model, modelPath)
     expect_error(write.ml(model, modelPath))
@@ -177,7 +175,7 @@ test_that("spark.kmeans", {
   expect_true(class(summary.model$coefficients[1, ]) == "numeric")
 
   # Test model save/load
-  if (not_cran_or_windows_with_hadoop()) {
+  if (windows_with_hadoop()) {
     modelPath <- tempfile(pattern = "spark-kmeans", fileext = ".tmp")
     write.ml(model, modelPath)
     expect_error(write.ml(model, modelPath))
@@ -244,7 +242,7 @@ test_that("spark.lda with libsvm", {
   expect_true(logPrior <= 0 & !is.na(logPrior))
 
   # Test model save/load
-  if (not_cran_or_windows_with_hadoop()) {
+  if (windows_with_hadoop()) {
     modelPath <- tempfile(pattern = "spark-lda", fileext = ".tmp")
     write.ml(model, modelPath)
     expect_error(write.ml(model, modelPath))
@@ -265,8 +263,6 @@ test_that("spark.lda with libsvm", {
 })
 
 test_that("spark.lda with text input", {
-  skip_on_cran()
-
   text <- read.text(absoluteSparkPath("data/mllib/sample_lda_data.txt"))
   model <- spark.lda(text, optimizer = "online", features = "value")
 
@@ -309,8 +305,6 @@ test_that("spark.lda with text input", {
 })
 
 test_that("spark.posterior and spark.perplexity", {
-  skip_on_cran()
-
   text <- read.text(absoluteSparkPath("data/mllib/sample_lda_data.txt"))
   model <- spark.lda(text, features = "value", k = 3)