From 7f55569f4068a24c72feb4c9a8fc45dfde3c0a6c Mon Sep 17 00:00:00 2001 From: "wm624@hotmail.com" Date: Thu, 11 Aug 2016 16:15:15 -0700 Subject: [PATCH 1/6] add a type check helper --- R/pkg/R/DataFrame.R | 31 ++++++++++++++++++++++++++++++- 1 file changed, 30 insertions(+), 1 deletion(-) diff --git a/R/pkg/R/DataFrame.R b/R/pkg/R/DataFrame.R index e12b58e2eefc5..5bc9a29c3f608 100644 --- a/R/pkg/R/DataFrame.R +++ b/R/pkg/R/DataFrame.R @@ -359,6 +359,24 @@ setMethod("colnames<-", dataFrame(sdf) }) +specialtypeshandle <- function(type) { + if (!is.null(PRIMITIVE_TYPES[[type]])) { + type + } else { + firstChar <- substr(type, 1, 1) + returntype <- NULL + switch (firstChar, + d = { + m <- regexec("^decimal(.+)", type) + matchedStrings <- regmatches(type, m) + if (length(matchedStrings[[1]]) >= 2) { + returntype <- "double" + } + }) + returntype + } +} + #' coltypes #' #' Get column types of a SparkDataFrame @@ -397,7 +415,11 @@ setMethod("coltypes", } if (is.null(type)) { - stop(paste("Unsupported data type: ", x)) + specialtype <- specialtypeshandle(x) + if (is.null(specialtype)) { + stop(paste("Unsupported data type: ", x)) + } + type <- specialtype } } type @@ -1063,6 +1085,13 @@ setMethod("collect", df[[colIndex]] <- col } else { colType <- dtypes[[colIndex]][[2]] + if (is.null(PRIMITIVE_TYPES[[colType]])) { + specialtype <- specialtypeshandle(colType) + if (!is.null(specialtype)) { + colType <- specialtype + } + } + # Note that "binary" columns behave like complex types. if (!is.null(PRIMITIVE_TYPES[[colType]]) && colType != "binary") { vec <- do.call(c, col) From 4f88f478aaa60ad55ec9d924d1f612624fe3b06e Mon Sep 17 00:00:00 2001 From: "wm624@hotmail.com" Date: Fri, 12 Aug 2016 16:54:54 -0700 Subject: [PATCH 2/6] update unit tests --- R/pkg/inst/tests/testthat/test_sparkSQL.R | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) diff --git a/R/pkg/inst/tests/testthat/test_sparkSQL.R b/R/pkg/inst/tests/testthat/test_sparkSQL.R index 8ff56eba1f7bf..3557e7062899d 100644 --- a/R/pkg/inst/tests/testthat/test_sparkSQL.R +++ b/R/pkg/inst/tests/testthat/test_sparkSQL.R @@ -668,6 +668,15 @@ test_that("collect() returns a data.frame", { df <- createDataFrame(list(list(1, 2)), schema = c("name", "name")) ldf <- collect(df) expect_equal(names(ldf), c("name", "name")) + createOrReplaceTempView(df, "dfView") + sqlCast <- collect(sql("select cast('2' as decimal) as x from dfView limit 1")) + out <- capture.output(sqlCast) + expect_true(is.data.frame(sqlCast)) + expect_equal(names(sqlCast)[1], "x") + expect_equal(nrow(sqlCast), 1) + expect_equal(ncol(sqlCast), 1) + expect_equal(out[1], " x") + expect_equal(out[2], "1 2") }) test_that("limit() returns DataFrame with the correct number of rows", { @@ -2089,6 +2098,9 @@ test_that("Method coltypes() to get and set R's data types of a DataFrame", { # Test primitive types DF <- createDataFrame(data, schema) expect_equal(coltypes(DF), c("integer", "logical", "POSIXct")) + createOrReplaceTempView(DF, "DFView") + sqlCast <- sql("select cast('2' as decimal) as x from DFView limit 1") + expect_equal(coltypes(sqlCast), "double") # Test complex types x <- createDataFrame(list(list(as.environment( @@ -2120,6 +2132,13 @@ test_that("Method str()", { colnames(iris2) <- c("Sepal_Length", "Sepal_Width", "Petal_Length", "Petal_Width", "Species") iris2$col <- TRUE irisDF2 <- createDataFrame(iris2) + createOrReplaceTempView(irisDF2, "irisView") + + sqlCast <- sql("select cast('2' as decimal) as x from irisView limit 1") + castStr <- capture.output(str(sqlCast)) + expect_equal(length(castStr), 2) + expect_equal(castStr[1], "'SparkDataFrame': 1 variables:") + expect_equal(castStr[2], " $ x: dou 2") out <- capture.output(str(irisDF2)) expect_equal(length(out), 7) From 6d197112778a3f0e0348464f3932833c741e1a44 Mon Sep 17 00:00:00 2001 From: "wm624@hotmail.com" Date: Sun, 14 Aug 2016 23:16:57 -0700 Subject: [PATCH 3/6] move helper function to types.R and add comments --- R/pkg/R/DataFrame.R | 18 ------------------ R/pkg/R/types.R | 21 +++++++++++++++++++++ 2 files changed, 21 insertions(+), 18 deletions(-) diff --git a/R/pkg/R/DataFrame.R b/R/pkg/R/DataFrame.R index 5bc9a29c3f608..51ee65be561ea 100644 --- a/R/pkg/R/DataFrame.R +++ b/R/pkg/R/DataFrame.R @@ -359,24 +359,6 @@ setMethod("colnames<-", dataFrame(sdf) }) -specialtypeshandle <- function(type) { - if (!is.null(PRIMITIVE_TYPES[[type]])) { - type - } else { - firstChar <- substr(type, 1, 1) - returntype <- NULL - switch (firstChar, - d = { - m <- regexec("^decimal(.+)", type) - matchedStrings <- regmatches(type, m) - if (length(matchedStrings[[1]]) >= 2) { - returntype <- "double" - } - }) - returntype - } -} - #' coltypes #' #' Get column types of a SparkDataFrame diff --git a/R/pkg/R/types.R b/R/pkg/R/types.R index ad048b1cd1795..4cd5f97b75357 100644 --- a/R/pkg/R/types.R +++ b/R/pkg/R/types.R @@ -67,3 +67,24 @@ rToSQLTypes <- as.environment(list( "double" = "double", "character" = "string", "logical" = "boolean")) + +# Helper function of coverting decimal type. When backend returns column type in the +# format of decimal(,) (e.g., decimal(10, 0)), this function coverts the column type +# as double type. +specialtypeshandle <- function(type) { + if (!is.null(PRIMITIVE_TYPES[[type]])) { + type + } else { + firstChar <- substr(type, 1, 1) + returntype <- NULL + switch (firstChar, + d = { + m <- regexec("^decimal(.+)", type) + matchedStrings <- regmatches(type, m) + if (length(matchedStrings[[1]]) >= 2) { + returntype <- "double" + } + }) + returntype + } +} From 63b9d0631e7b84c118a70e163d6d0250a9a1cb33 Mon Sep 17 00:00:00 2001 From: "wm624@hotmail.com" Date: Tue, 23 Aug 2016 10:56:11 -0700 Subject: [PATCH 4/6] address review comments, adjust return value, change tests --- R/pkg/R/DataFrame.R | 2 +- R/pkg/R/types.R | 5 ++- R/pkg/inst/tests/testthat/test_sparkSQL.R | 37 ++++++++++++----------- 3 files changed, 25 insertions(+), 19 deletions(-) diff --git a/R/pkg/R/DataFrame.R b/R/pkg/R/DataFrame.R index 51ee65be561ea..a92450274e077 100644 --- a/R/pkg/R/DataFrame.R +++ b/R/pkg/R/DataFrame.R @@ -401,7 +401,7 @@ setMethod("coltypes", if (is.null(specialtype)) { stop(paste("Unsupported data type: ", x)) } - type <- specialtype + type <- PRIMITIVE_TYPES[[specialtype]] } } type diff --git a/R/pkg/R/types.R b/R/pkg/R/types.R index 4cd5f97b75357..46353d3d784f1 100644 --- a/R/pkg/R/types.R +++ b/R/pkg/R/types.R @@ -70,7 +70,10 @@ rToSQLTypes <- as.environment(list( # Helper function of coverting decimal type. When backend returns column type in the # format of decimal(,) (e.g., decimal(10, 0)), this function coverts the column type -# as double type. +# as double type. This function converts backend returned types that are not the key +# of PRIMITIVE_TYPES, but should be treated as PRIMITIVE_TYPES. +# @param A type returned from the JVM backend. +# @return A type is the key of the PRIMITIVE_TYPES. specialtypeshandle <- function(type) { if (!is.null(PRIMITIVE_TYPES[[type]])) { type diff --git a/R/pkg/inst/tests/testthat/test_sparkSQL.R b/R/pkg/inst/tests/testthat/test_sparkSQL.R index 3557e7062899d..683a15cb4ffcd 100644 --- a/R/pkg/inst/tests/testthat/test_sparkSQL.R +++ b/R/pkg/inst/tests/testthat/test_sparkSQL.R @@ -526,6 +526,17 @@ test_that( expect_is(newdf, "SparkDataFrame") expect_equal(count(newdf), 1) dropTempView("table1") + + createOrReplaceTempView(df, "dfView") + sqlCast <- collect(sql("select cast('2' as decimal) as x from dfView limit 1")) + out <- capture.output(sqlCast) + expect_true(is.data.frame(sqlCast)) + expect_equal(names(sqlCast)[1], "x") + expect_equal(nrow(sqlCast), 1) + expect_equal(ncol(sqlCast), 1) + expect_equal(out[1], " x") + expect_equal(out[2], "1 2") + dropTempView("dfView") }) test_that("test cache, uncache and clearCache", { @@ -668,15 +679,6 @@ test_that("collect() returns a data.frame", { df <- createDataFrame(list(list(1, 2)), schema = c("name", "name")) ldf <- collect(df) expect_equal(names(ldf), c("name", "name")) - createOrReplaceTempView(df, "dfView") - sqlCast <- collect(sql("select cast('2' as decimal) as x from dfView limit 1")) - out <- capture.output(sqlCast) - expect_true(is.data.frame(sqlCast)) - expect_equal(names(sqlCast)[1], "x") - expect_equal(nrow(sqlCast), 1) - expect_equal(ncol(sqlCast), 1) - expect_equal(out[1], " x") - expect_equal(out[2], "1 2") }) test_that("limit() returns DataFrame with the correct number of rows", { @@ -2100,7 +2102,7 @@ test_that("Method coltypes() to get and set R's data types of a DataFrame", { expect_equal(coltypes(DF), c("integer", "logical", "POSIXct")) createOrReplaceTempView(DF, "DFView") sqlCast <- sql("select cast('2' as decimal) as x from DFView limit 1") - expect_equal(coltypes(sqlCast), "double") + expect_equal(coltypes(sqlCast), "numeric") # Test complex types x <- createDataFrame(list(list(as.environment( @@ -2132,13 +2134,6 @@ test_that("Method str()", { colnames(iris2) <- c("Sepal_Length", "Sepal_Width", "Petal_Length", "Petal_Width", "Species") iris2$col <- TRUE irisDF2 <- createDataFrame(iris2) - createOrReplaceTempView(irisDF2, "irisView") - - sqlCast <- sql("select cast('2' as decimal) as x from irisView limit 1") - castStr <- capture.output(str(sqlCast)) - expect_equal(length(castStr), 2) - expect_equal(castStr[1], "'SparkDataFrame': 1 variables:") - expect_equal(castStr[2], " $ x: dou 2") out <- capture.output(str(irisDF2)) expect_equal(length(out), 7) @@ -2151,6 +2146,14 @@ test_that("Method str()", { "setosa\" \"setosa\" \"setosa\" \"setosa\"")) expect_equal(out[7], " $ col : logi TRUE TRUE TRUE TRUE TRUE TRUE") + createOrReplaceTempView(irisDF2, "irisView") + + sqlCast <- sql("select cast('2' as decimal) as x from irisView limit 1") + castStr <- capture.output(str(sqlCast)) + expect_equal(length(castStr), 2) + expect_equal(castStr[1], "'SparkDataFrame': 1 variables:") + expect_equal(castStr[2], " $ x: num 2") + # A random dataset with many columns. This test is to check str limits # the number of columns. Therefore, it will suffice to check for the # number of returned rows From 4d6d048c57e2395ef4337a8074543d3eebe6cfcf Mon Sep 17 00:00:00 2001 From: "wm624@hotmail.com" Date: Thu, 1 Sep 2016 15:12:39 -0700 Subject: [PATCH 5/6] address review comments --- R/pkg/R/types.R | 26 +++++++++++--------------- 1 file changed, 11 insertions(+), 15 deletions(-) diff --git a/R/pkg/R/types.R b/R/pkg/R/types.R index 46353d3d784f1..d30d60f86786d 100644 --- a/R/pkg/R/types.R +++ b/R/pkg/R/types.R @@ -75,19 +75,15 @@ rToSQLTypes <- as.environment(list( # @param A type returned from the JVM backend. # @return A type is the key of the PRIMITIVE_TYPES. specialtypeshandle <- function(type) { - if (!is.null(PRIMITIVE_TYPES[[type]])) { - type - } else { - firstChar <- substr(type, 1, 1) - returntype <- NULL - switch (firstChar, - d = { - m <- regexec("^decimal(.+)", type) - matchedStrings <- regmatches(type, m) - if (length(matchedStrings[[1]]) >= 2) { - returntype <- "double" - } - }) - returntype - } + firstChar <- substr(type, 1, 1) + returntype <- NULL + switch (firstChar, + d = { + m <- regexec("^decimal(.+)$", type) + matchedStrings <- regmatches(type, m) + if (length(matchedStrings[[1]]) >= 2) { + returntype <- "double" + } + }) + returntype } From 4e9e403d54e307e49a2dfb6330d7500e9f77d299 Mon Sep 17 00:00:00 2001 From: "wm624@hotmail.com" Date: Thu, 1 Sep 2016 23:03:37 -0700 Subject: [PATCH 6/6] remove switch --- R/pkg/R/types.R | 14 +++++--------- 1 file changed, 5 insertions(+), 9 deletions(-) diff --git a/R/pkg/R/types.R b/R/pkg/R/types.R index d30d60f86786d..abca703617c7b 100644 --- a/R/pkg/R/types.R +++ b/R/pkg/R/types.R @@ -75,15 +75,11 @@ rToSQLTypes <- as.environment(list( # @param A type returned from the JVM backend. # @return A type is the key of the PRIMITIVE_TYPES. specialtypeshandle <- function(type) { - firstChar <- substr(type, 1, 1) returntype <- NULL - switch (firstChar, - d = { - m <- regexec("^decimal(.+)$", type) - matchedStrings <- regmatches(type, m) - if (length(matchedStrings[[1]]) >= 2) { - returntype <- "double" - } - }) + m <- regexec("^decimal(.+)$", type) + matchedStrings <- regmatches(type, m) + if (length(matchedStrings[[1]]) >= 2) { + returntype <- "double" + } returntype }