diff --git a/ci/windows-pkg-arrow-for-r.sh b/ci/windows-pkg-arrow-for-r.sh index c70eba6571c..f5abdb68c56 100644 --- a/ci/windows-pkg-arrow-for-r.sh +++ b/ci/windows-pkg-arrow-for-r.sh @@ -32,8 +32,8 @@ mv mingw64/include $DST_DIR mkdir -p $DST_DIR/lib-4.9.3/x64 mv mingw64/lib/*.a $DST_DIR/lib-4.9.3/x64 # Same for the 32-bit versions -mkdir -p $DST_DIR/lib-4.9.3/i686 -mv mingw32/lib/*.a $DST_DIR/lib-4.9.3/i686 +mkdir -p $DST_DIR/lib-4.9.3/i386 +mv mingw32/lib/*.a $DST_DIR/lib-4.9.3/i386 mkdir deps && cd deps # Get these from "backports" so they were compiled with gcc 4.9 @@ -58,9 +58,9 @@ ls | xargs -n 1 tar -xJf cd .. mkdir -p $DST_DIR/lib/x64 -mkdir -p $DST_DIR/lib/i686 +mkdir -p $DST_DIR/lib/i386 mv deps/mingw64/lib/*.a $DST_DIR/lib/x64 -mv deps/mingw32/lib/*.a $DST_DIR/lib/i686 +mv deps/mingw32/lib/*.a $DST_DIR/lib/i386 # Create build artifact zip -r ${DST_DIR}.zip $DST_DIR diff --git a/r/DESCRIPTION b/r/DESCRIPTION index 263a29f719c..9159d270dcb 100644 --- a/r/DESCRIPTION +++ b/r/DESCRIPTION @@ -2,9 +2,9 @@ Package: arrow Title: Integration to 'Apache' 'Arrow' Version: 0.14.1.9000 Authors@R: c( - person("Romain", "Fran\u00e7ois", email = "romain@rstudio.com", role = c("aut", "cre"), comment = c(ORCID = "0000-0002-2444-4226")), + person("Romain", "Fran\u00e7ois", email = "romain@rstudio.com", role = c("aut"), comment = c(ORCID = "0000-0002-2444-4226")), person("Jeroen", "Ooms", email = "jeroen@berkeley.edu", role = c("aut")), - person("Neal", "Richardson", email = "neal@ursalabs.org", role = c("aut")), + person("Neal", "Richardson", email = "neal@ursalabs.org", role = c("aut", "cre")), person("Javier", "Luraschi", email = "javier@rstudio.com", role = c("ctb")), person("Jeffrey", "Wong", email = "jeffreyw@netflix.com", role = c("ctb")), person("Apache Arrow", email = "dev@arrow.apache.org", role = c("aut", "cph")) @@ -13,7 +13,7 @@ Description: 'Apache' 'Arrow' is a cross-language development platform for in-memory data. It specifies a standardized language-independent columnar memory format for flat and hierarchical data, organized for efficient analytic operations on modern hardware. This - package provides an interface to the Arrow C++ library. + package provides an interface to the 'Arrow C++' library. Depends: R (>= 3.1) License: Apache License (>= 2.0) URL: https://github.com/apache/arrow/ @@ -22,6 +22,7 @@ Encoding: UTF-8 Language: en-US LazyData: true SystemRequirements: C++11 +Biarch: true LinkingTo: Rcpp (>= 1.0.1) Imports: diff --git a/r/Makefile b/r/Makefile index f3bdbd6daa7..f907cefa18c 100644 --- a/r/Makefile +++ b/r/Makefile @@ -30,6 +30,7 @@ deps: R --slave -e 'lib <- Sys.getenv("R_LIB", .libPaths()[1]); install.packages("devtools", repo="https://cloud.r-project.org", lib=lib); devtools::install_dev_deps(lib=lib)' build: doc + cp ../NOTICE.txt inst/NOTICE.txt R CMD build . check: build diff --git a/r/NAMESPACE b/r/NAMESPACE index e4b367d0eaf..5f29a61829c 100644 --- a/r/NAMESPACE +++ b/r/NAMESPACE @@ -215,6 +215,6 @@ importFrom(tidyselect,num_range) importFrom(tidyselect,one_of) importFrom(tidyselect,starts_with) importFrom(tidyselect,vars_select) -importFrom(utils,installed.packages) +importFrom(utils,packageDescription) importFrom(utils,packageVersion) useDynLib(arrow, .registration = TRUE) diff --git a/r/R/Field.R b/r/R/Field.R index 5175ee5d154..d5bdf2250e1 100644 --- a/r/R/Field.R +++ b/r/R/Field.R @@ -64,11 +64,11 @@ #' @param metadata currently ignored #' #' @examples -#' -#' \dontrun{ +#' \donttest{ +#' try({ #' field("x", int32()) +#' }) #' } -#' #' @export field <- function(name, type, metadata) { assert_that(inherits(name, "character"), length(name) == 1L) @@ -78,6 +78,6 @@ field <- function(name, type, metadata) { } .fields <- function(.list){ - assert_that( !is.null(nms <- names(.list)) ) + assert_that(!is.null(nms <- names(.list))) map2(nms, .list, field) } diff --git a/r/R/csv.R b/r/R/csv.R index bf69830079d..51729509f7e 100644 --- a/r/R/csv.R +++ b/r/R/csv.R @@ -61,6 +61,18 @@ #' #' @return A `data.frame`, or an `arrow::Table` if `as_tibble = FALSE`. #' @export +#' @examples +#' \donttest{ +#' try({ +#' tf <- tempfile() +#' on.exit(unlink(tf)) +#' write.csv(iris, file = tf) +#' df <- read_csv_arrow(tf) +#' dim(df) +#' # Can select columns +#' df <- read_csv_arrow(tf, col_select = starts_with("Sepal")) +#' }) +#' } read_delim_arrow <- function(file, delim = ",", quote = '"', diff --git a/r/R/feather.R b/r/R/feather.R index 57c1dffae42..9de91553163 100644 --- a/r/R/feather.R +++ b/r/R/feather.R @@ -61,6 +61,14 @@ FeatherTableWriter <- function(stream) { #' @param stream A file path or an `arrow::io::OutputStream` #' #' @export +#' @examples +#' \donttest{ +#' try({ +#' tf <- tempfile() +#' on.exit(unlink(tf)) +#' write_feather(mtcars, tf) +#' }) +#' } write_feather <- function(data, stream) { UseMethod("write_feather", data) } @@ -169,6 +177,18 @@ FeatherTableReader.fs_path <- function(file, mmap = TRUE, ...) { #' @return A `data.frame` if `as_tibble` is `TRUE` (the default), or a [arrow::Table][arrow__Table] otherwise #' #' @export +#' @examples +#' \donttest{ +#' try({ +#' tf <- tempfile() +#' on.exit(unlink(tf)) +#' write_feather(iris, tf) +#' df <- read_feather(tf) +#' dim(df) +#' # Can select columns +#' df <- read_feather(tf, col_select = starts_with("Sepal")) +#' }) +#' } read_feather <- function(file, col_select = NULL, as_tibble = TRUE, ...){ reader <- FeatherTableReader(file, ...) diff --git a/r/R/install-arrow.R b/r/R/install-arrow.R index 4887d8eefa3..33a3d807ae7 100644 --- a/r/R/install-arrow.R +++ b/r/R/install-arrow.R @@ -22,14 +22,15 @@ #' first. This function offers guidance on how to get the C++ library depending #' on your operating system and package version. #' @export -#' @importFrom utils packageVersion installed.packages +#' @importFrom utils packageVersion packageDescription +#' @examples +#' install_arrow() install_arrow <- function() { os <- tolower(Sys.info()[["sysname"]]) # c("windows", "darwin", "linux", "sunos") # win/mac/linux/solaris version <- packageVersion("arrow") # From CRAN check: - rep <- installed.packages(fields="Repository")["arrow", "Repository"] - from_cran <- identical(rep, "CRAN") + from_cran <- identical(packageDescription("arrow")$Repository, "CRAN") # Is it possible to tell if was a binary install from CRAN vs. source? message(install_arrow_msg(arrow_available(), version, from_cran, os)) diff --git a/r/R/json.R b/r/R/json.R index 647126d1e4e..dce130e61a1 100644 --- a/r/R/json.R +++ b/r/R/json.R @@ -136,6 +136,19 @@ json_table_reader.default <- function(file, #' #' @return A `data.frame`, or an `arrow::Table` if `as_tibble = FALSE`. #' @export +#' @examples +#' \donttest{ +#' try({ +#' tf <- tempfile() +#' on.exit(unlink(tf)) +#' writeLines(' +#' { "hello": 3.5, "world": false, "yo": "thing" } +#' { "hello": 3.25, "world": null } +#' { "hello": 0.0, "world": true, "yo": null } +#' ', tf, useBytes=TRUE) +#' df <- read_json_arrow(tf) +#' }) +#' } read_json_arrow <- function(file, col_select = NULL, as_tibble = TRUE, ...) { tab <- json_table_reader(file, ...)$Read()$select(!!enquo(col_select)) diff --git a/r/R/parquet.R b/r/R/parquet.R index b75f93ea804..1163a2df828 100644 --- a/r/R/parquet.R +++ b/r/R/parquet.R @@ -109,11 +109,11 @@ parquet_file_reader.character <- function(file, props = parquet_arrow_reader_pro #' @return A [arrow::Table][arrow__Table], or a `data.frame` if `as_tibble` is #' `TRUE`. #' @examples -#' -#' \dontrun{ +#' \donttest{ +#' try({ #' df <- read_parquet(system.file("v0.7.1.parquet", package="arrow")) +#' }) #' } -#' #' @export read_parquet <- function(file, col_select = NULL, as_tibble = TRUE, props = parquet_arrow_reader_properties(), ...) { reader <- parquet_file_reader(file, props = props, ...) @@ -134,12 +134,13 @@ read_parquet <- function(file, col_select = NULL, as_tibble = TRUE, props = parq #' @param file a file path #' #' @examples -#' -#' \dontrun{ +#' \donttest{ +#' try({ #' tf <- tempfile(fileext = ".parquet") +#' on.exit(unlink(tf)) #' write_parquet(tibble::tibble(x = 1:5), tf) +#' }) #' } -#' #' @export write_parquet <- function(table, file) { write_parquet_file(to_arrow(table), file) diff --git a/r/cran-comments.md b/r/cran-comments.md index c99d06a2288..6d270344d9d 100644 --- a/r/cran-comments.md +++ b/r/cran-comments.md @@ -1,27 +1,32 @@ ## Test environments -* local OS X install, R 3.5.3 -* win-builder (devel and release) +* Debian Linux, R-devel, GCC ASAN/UBSAN +* Ubuntu Linux 16.04 LTS, R-release, GCC +* win-builder (R-devel and R-release) +* macOS (10.11, 10.14), R-release +* Oracle Solaris 10, x86, 32-bit, R-patched ## R CMD check results -0 errors | 0 warnings | 1 note +There were no ERRORs or WARNINGs. On some platforms, there is a NOTE about the installed package size, as well as the "New submission" NOTE. -* This is a new release. +## Feedback from previous submission -## Platform support +Version 0.14.1 was submitted to CRAN on 24 July 2019. The CRAN team requested two revisions: -This package supports Windows and macOS but not Linux. +1. Put quotes around 'Arrow C++' in the package Description. -The Arrow project is cross-language development platform -for in-memory data, it spans several languages and -their code base is quite large (about 150K lines of C -sources and more than 600K lines across all languages). +2. Remove usage of utils::installed.packages() -In the future, the Apache Arrow project will release -binaries in the official Fedora and Debian repos; -we're working on hard on this, but due to the size, -this is likely to be implemented until next year. +Both have been addressed in this resubmission. -In the meantime, R users can install the Linux binaries -from custom repos or build Arrow from source when using -Linux. +## Feedback from initial submission + +Version 0.14.0 was submitted to CRAN on 18 July 2019. The CRAN team requested two revisions, which have been addressed in this re-submission. + +1. Source files contain a comment header, required in all source files in Apache Software Foundation projects (see https://www.apache.org/legal/src-headers.html), which mentions a NOTICE file. But the NOTICE file was not included in the package. + +This submission includes a NOTICE.txt file in the inst/ directory. + +2. Rd files for main exported functions should have executable (not in \dontrun{}) examples. + +This submission includes examples for the user-facing functions (read_parquet, write_parquet, read_feather, write_feather, et al.) diff --git a/r/inst/NOTICE.txt b/r/inst/NOTICE.txt new file mode 100644 index 00000000000..a609791374c --- /dev/null +++ b/r/inst/NOTICE.txt @@ -0,0 +1,84 @@ +Apache Arrow +Copyright 2016-2019 The Apache Software Foundation + +This product includes software developed at +The Apache Software Foundation (http://www.apache.org/). + +This product includes software from the SFrame project (BSD, 3-clause). +* Copyright (C) 2015 Dato, Inc. +* Copyright (c) 2009 Carnegie Mellon University. + +This product includes software from the Feather project (Apache 2.0) +https://github.com/wesm/feather + +This product includes software from the DyND project (BSD 2-clause) +https://github.com/libdynd + +This product includes software from the LLVM project + * distributed under the University of Illinois Open Source + +This product includes software from the google-lint project + * Copyright (c) 2009 Google Inc. All rights reserved. + +This product includes software from the mman-win32 project + * Copyright https://code.google.com/p/mman-win32/ + * Licensed under the MIT License; + +This product includes software from the LevelDB project + * Copyright (c) 2011 The LevelDB Authors. All rights reserved. + * Use of this source code is governed by a BSD-style license that can be + * Moved from Kudu http://github.com/cloudera/kudu + +This product includes software from the CMake project + * Copyright 2001-2009 Kitware, Inc. + * Copyright 2012-2014 Continuum Analytics, Inc. + * All rights reserved. + +This product includes software from https://github.com/matthew-brett/multibuild (BSD 2-clause) + * Copyright (c) 2013-2016, Matt Terry and Matthew Brett; all rights reserved. + +This product includes software from the Ibis project (Apache 2.0) + * Copyright (c) 2015 Cloudera, Inc. + * https://github.com/cloudera/ibis + +This product includes software from Dremio (Apache 2.0) + * Copyright (C) 2017-2018 Dremio Corporation + * https://github.com/dremio/dremio-oss + +This product includes software from Google Guava (Apache 2.0) + * Copyright (C) 2007 The Guava Authors + * https://github.com/google/guava + +This product include software from CMake (BSD 3-Clause) + * CMake - Cross Platform Makefile Generator + * Copyright 2000-2019 Kitware, Inc. and Contributors + +The web site includes files generated by Jekyll. + +-------------------------------------------------------------------------------- + +This product includes code from Apache Kudu, which includes the following in +its NOTICE file: + + Apache Kudu + Copyright 2016 The Apache Software Foundation + + This product includes software developed at + The Apache Software Foundation (http://www.apache.org/). + + Portions of this software were developed at + Cloudera, Inc (http://www.cloudera.com/). + +-------------------------------------------------------------------------------- + +This product includes code from Apache ORC, which includes the following in +its NOTICE file: + + Apache ORC + Copyright 2013-2019 The Apache Software Foundation + + This product includes software developed by The Apache Software + Foundation (http://www.apache.org/). + + This product includes software developed by Hewlett-Packard: + (c) Copyright [2014-2015] Hewlett-Packard Development Company, L.P diff --git a/r/man/arrow-package.Rd b/r/man/arrow-package.Rd index c3da92dc00f..35a5f67338a 100644 --- a/r/man/arrow-package.Rd +++ b/r/man/arrow-package.Rd @@ -10,7 +10,7 @@ development platform for in-memory data. It specifies a standardized language-independent columnar memory format for flat and hierarchical data, organized for efficient analytic operations on modern hardware. This - package provides an interface to the Arrow C++ library. + package provides an interface to the 'Arrow C++' library. } \seealso{ Useful links: @@ -21,12 +21,12 @@ Useful links: } \author{ -\strong{Maintainer}: Romain François \email{romain@rstudio.com} (0000-0002-2444-4226) +\strong{Maintainer}: Neal Richardson \email{neal@ursalabs.org} Authors: \itemize{ + \item Romain François \email{romain@rstudio.com} (0000-0002-2444-4226) \item Jeroen Ooms \email{jeroen@berkeley.edu} - \item Neal Richardson \email{neal@ursalabs.org} \item Apache Arrow \email{dev@arrow.apache.org} [copyright holder] } diff --git a/r/man/field.Rd b/r/man/field.Rd index 52995a13fa9..8cf260a08f1 100644 --- a/r/man/field.Rd +++ b/r/man/field.Rd @@ -17,9 +17,9 @@ field(name, type, metadata) Factory for a \code{arrow::Field} } \examples{ - -\dontrun{ +\donttest{ +try({ field("x", int32()) +}) } - } diff --git a/r/man/install_arrow.Rd b/r/man/install_arrow.Rd index 4393258bfa2..7ad85da19f0 100644 --- a/r/man/install_arrow.Rd +++ b/r/man/install_arrow.Rd @@ -12,3 +12,6 @@ but when installing from source, you'll need to obtain the C++ library first. This function offers guidance on how to get the C++ library depending on your operating system and package version. } +\examples{ +install_arrow() +} diff --git a/r/man/read_delim_arrow.Rd b/r/man/read_delim_arrow.Rd index 0726889cdab..c645695506f 100644 --- a/r/man/read_delim_arrow.Rd +++ b/r/man/read_delim_arrow.Rd @@ -82,3 +82,16 @@ equivalent in \code{readr::read_csv()}, you can either provide them in the \code{parse_options}, \code{convert_options}, or \code{read_options} arguments, or you can call \code{\link[=csv_table_reader]{csv_table_reader()}} directly for lower-level access. } +\examples{ +\donttest{ +try({ + tf <- tempfile() + on.exit(unlink(tf)) + write.csv(iris, file = tf) + df <- read_csv_arrow(tf) + dim(df) + # Can select columns + df <- read_csv_arrow(tf, col_select = starts_with("Sepal")) +}) +} +} diff --git a/r/man/read_feather.Rd b/r/man/read_feather.Rd index 954ee929012..41250dd0960 100644 --- a/r/man/read_feather.Rd +++ b/r/man/read_feather.Rd @@ -21,3 +21,16 @@ A \code{data.frame} if \code{as_tibble} is \code{TRUE} (the default), or a \link \description{ Read a Feather file } +\examples{ +\donttest{ +try({ + tf <- tempfile() + on.exit(unlink(tf)) + write_feather(iris, tf) + df <- read_feather(tf) + dim(df) + # Can select columns + df <- read_feather(tf, col_select = starts_with("Sepal")) +}) +} +} diff --git a/r/man/read_json_arrow.Rd b/r/man/read_json_arrow.Rd index 57a73b1209b..b4e774b700c 100644 --- a/r/man/read_json_arrow.Rd +++ b/r/man/read_json_arrow.Rd @@ -23,3 +23,17 @@ A \code{data.frame}, or an \code{arrow::Table} if \code{as_tibble = FALSE}. \description{ Use \link[=arrow__json__TableReader]{arrow::json::TableReader} from \code{\link[=json_table_reader]{json_table_reader()}} } +\examples{ +\donttest{ +try({ + tf <- tempfile() + on.exit(unlink(tf)) + writeLines(' + { "hello": 3.5, "world": false, "yo": "thing" } + { "hello": 3.25, "world": null } + { "hello": 0.0, "world": true, "yo": null } + ', tf, useBytes=TRUE) + df <- read_json_arrow(tf) +}) +} +} diff --git a/r/man/read_parquet.Rd b/r/man/read_parquet.Rd index 917acc00915..b9ac36b157a 100644 --- a/r/man/read_parquet.Rd +++ b/r/man/read_parquet.Rd @@ -29,9 +29,9 @@ A \link[=arrow__Table]{arrow::Table}, or a \code{data.frame} if \code{as_tibble} This function enables you to read Parquet files into R. } \examples{ - -\dontrun{ +\donttest{ +try({ df <- read_parquet(system.file("v0.7.1.parquet", package="arrow")) +}) } - } diff --git a/r/man/write_feather.Rd b/r/man/write_feather.Rd index a427320e2f9..9eb20021caf 100644 --- a/r/man/write_feather.Rd +++ b/r/man/write_feather.Rd @@ -14,3 +14,12 @@ write_feather(data, stream) \description{ Write data in the Feather format } +\examples{ +\donttest{ +try({ + tf <- tempfile() + on.exit(unlink(tf)) + write_feather(mtcars, tf) +}) +} +} diff --git a/r/man/write_parquet.Rd b/r/man/write_parquet.Rd index b5393a6c8bb..192d950d82a 100644 --- a/r/man/write_parquet.Rd +++ b/r/man/write_parquet.Rd @@ -16,10 +16,11 @@ write_parquet(table, file) This function enables you to write Parquet files from R. } \examples{ - -\dontrun{ +\donttest{ +try({ tf <- tempfile(fileext = ".parquet") + on.exit(unlink(tf)) write_parquet(tibble::tibble(x = 1:5), tf) +}) } - }