From babe5b7e22ca21ea3ddc222d1190387b7f462ed7 Mon Sep 17 00:00:00 2001 From: Jonathan Keane Date: Tue, 5 Jan 2021 14:58:54 -0600 Subject: [PATCH 01/38] Basic forwards compatibility testing --- ci/scripts/r_test_old_version.sh | 36 ++++++++++++++ dev/tasks/r/azure.linux.yml | 1 + dev/tasks/tasks.yml | 10 ++++ docker-compose.yml | 25 ++++++++++ r/tests/testthat/parquets/data-arrow2.parquet | Bin 0 -> 1136 bytes r/tests/testthat/test-parquet-compatibility.R | 45 ++++++++++++++++++ 6 files changed, 117 insertions(+) create mode 100755 ci/scripts/r_test_old_version.sh create mode 100644 r/tests/testthat/parquets/data-arrow2.parquet create mode 100644 r/tests/testthat/test-parquet-compatibility.R diff --git a/ci/scripts/r_test_old_version.sh b/ci/scripts/r_test_old_version.sh new file mode 100755 index 00000000000..712b996c072 --- /dev/null +++ b/ci/scripts/r_test_old_version.sh @@ -0,0 +1,36 @@ +#!/usr/bin/env bash +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +set -ex + +: ${R_BIN:=R} + +source_dir=${1}/r + +pushd ${source_dir} + +printenv + +export TEST_R_WITH_ARROW=TRUE + +${R_BIN} -e "as_cran <- !identical(tolower(Sys.getenv('NOT_CRAN')), 'true') + remotes::install_version('arrow', '$OLD_ARROW_VERSION', quiet = FALSE, verbose = TRUE) + library(arrow) + testthat::test_file('tests/testthat/test-parquet-compatibility.R')" + +popd diff --git a/dev/tasks/r/azure.linux.yml b/dev/tasks/r/azure.linux.yml index 7ffe8c581cc..e715191fae7 100644 --- a/dev/tasks/r/azure.linux.yml +++ b/dev/tasks/r/azure.linux.yml @@ -37,6 +37,7 @@ jobs: export R_IMAGE={{ r_image }} export R_TAG={{ r_tag }} export DEVTOOLSET_VERSION={{ devtoolset_version|default("-1") }} + export OLD_ARROW_VERSION={{ old_arrow_version }} docker-compose pull --ignore-pull-failures r docker-compose build r displayName: Docker build diff --git a/dev/tasks/tasks.yml b/dev/tasks/tasks.yml index 569e59f80dd..edd4dac2f45 100644 --- a/dev/tasks/tasks.yml +++ b/dev/tasks/tasks.yml @@ -1799,6 +1799,16 @@ tasks: not_cran: "TRUE" devtoolset_version: 8 + test-r-rstudio-r-arrow-1.0.1-compatibility: + ci: azure + template: r/azure.linux.yml + params: + r_org: rstudio + r_image: r-base + r_tag: 3.6-bionic + old_arrow_version: "1.0.1" + run: r-forward-compatibility + test-r-rstudio-r-base-3.6-opensuse15: ci: azure template: r/azure.linux.yml diff --git a/docker-compose.yml b/docker-compose.yml index 0454c678c88..04adf816fa7 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -858,6 +858,31 @@ services: command: > /bin/bash -c "/arrow/ci/scripts/r_test.sh /arrow" + r-forward-compatibility: + # This lets you test forward compatilbity from old arrow versions (given in OLD_ARROW_VERSION) + # + # Usage: + # R_ORG=rstudio R_IMAGE=r-base R_TAG=3.6-bionic OLD_ARROW_VERSION=1.0.1 docker-compose build r-forward-compatibility + # R_ORG=rstudio R_IMAGE=r-base R_TAG=3.6-bionic OLD_ARROW_VERSION=1.0.1 docker-compose run r-forward-compatibility + image: ${REPO}:r-${R_ORG}-${R_IMAGE}-${R_TAG} + build: + context: . + dockerfile: ci/docker/linux-r.dockerfile + cache_from: + - ${REPO}:r-${R_ORG}-${R_IMAGE}-${R_TAG} + args: + base: ${R_ORG}/${R_IMAGE}:${R_TAG} + r_dev: ${ARROW_R_DEV} + shm_size: *shm-size + environment: + ARROW_R_DEV: ${ARROW_R_DEV} + ARROW_USE_PKG_CONFIG: "false" + OLD_ARROW_VERSION: ${OLD_ARROW_VERSION} + volumes: + - .:/arrow:delegated + command: > + /bin/bash -c "/arrow/ci/scripts/r_test_old_version.sh /arrow" + ubuntu-r-sanitizer: # Only 18.04 and amd64 supported # Usage: diff --git a/r/tests/testthat/parquets/data-arrow2.parquet b/r/tests/testthat/parquets/data-arrow2.parquet new file mode 100644 index 0000000000000000000000000000000000000000..002a44fd69740ef022f6bb66c30a9e91298f12d9 GIT binary patch literal 1136 zcmb_c&2G~`5MCQMlof{x#gQ%LP$8&#NK4j3^$ z1|EO|60gKlaOB9DnN3jB9yqbm?Cj2b^Ude6>^g5SRZ;7z|3OWw$_qlaze$7;8SDv| z0=5R00(%O!4)zR;f~iE^RA1$=PEDz)tfpX8Q?NcmwpVTt*m#GOX-QezF#OxEaNhbY zWmQ=>6_V05C5msDDreG}pHk+Rv^CpW{}-R0gPw{iqXu*!M%HsC(Ng26)9QzqkzdOT zT9QfYSv~uI`+>+z8Ysc13DI+!G}3-ZoEE6DXRu<4vV+q8{ysHn;2-XQFEDC|ol&+& z;Q}zTKv|wP!%4XNX%zNba5GkNCLKU<`~Yv#=q4PGsbLnEY0Qnp-z(U?is|09i>Ewh z_RV?F=;n)dFSv8Oc)s*Az!+f5n&1x3iQR8>nn#uC)!?LaeG$|gpU>TP4A`pS^eW6r zfUj>t?i}%t>Vw*)G3!JH)Dh(G`&60BnaI6N6qys zOO4e>8V~c$iz@$WOE{s{UXjGGs18YS`E^*xw9E*zyb7e}0@C5Gyuw>dxJ zJmO_c!Y~&@hqG4|AQ(1Y#EzSo_mR*lVo>kk`>+yxFUxf%{ClFk9EPLM)7E4s8V)ya iTcdF|9&EB*bC>O$xx9XMeCq4jEPS5d;b&5Zf6X6_Isc0Q literal 0 HcmV?d00001 diff --git a/r/tests/testthat/test-parquet-compatibility.R b/r/tests/testthat/test-parquet-compatibility.R new file mode 100644 index 00000000000..e715acc0602 --- /dev/null +++ b/r/tests/testthat/test-parquet-compatibility.R @@ -0,0 +1,45 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +# TODO: skip mostly? Or use this for backwards compat? + +pq_file <- test_path("parquets/data-arrow2.parquet") + +test_that("reading a known Parquet file to dataframe", { + df <- read_parquet(pq_file) + expect_data_frame(df, data.frame(col1 = 1:10)) + expect_identical(dim(df), c(10L, 1L)) + + tab <- read_parquet(pq_file, as_data_frame = FALSE) + expect_s3_class(tab, "Table") + expect_equal( + # unserialize like .unserialize_arrow_r_metadata does (though we can't call + # it directly because it's not exported) + unserialize(charToRaw(tab$metadata$r)), + list( + attributes = list(class = "data.frame"), + columns = list(col1 = NULL) + ) + ) + + # test the workaround + tab$metadata$r <- NULL + df <- as.data.frame(tab) + + expect_data_frame(df, data.frame(col1 = 1:10)) + expect_identical(dim(df), c(10L, 1L)) +}) From f0ce0f185c010641f89191742f022b83dbc94025 Mon Sep 17 00:00:00 2001 From: Jonathan Keane Date: Tue, 5 Jan 2021 18:57:10 -0600 Subject: [PATCH 02/38] Trying on GHA --- .../github.linux.version.compatibility.yml | 92 +++++++++++++++++++ ci/scripts/r_build_install.sh | 45 +++++++++ dev/tasks/tasks.yml | 10 -- docker-compose.yml | 25 ----- r/.Rbuildignore | 1 + r/file-compatibility/write.R | 14 +++ 6 files changed, 152 insertions(+), 35 deletions(-) create mode 100644 .github/workflows/github.linux.version.compatibility.yml create mode 100755 ci/scripts/r_build_install.sh create mode 100644 r/file-compatibility/write.R diff --git a/.github/workflows/github.linux.version.compatibility.yml b/.github/workflows/github.linux.version.compatibility.yml new file mode 100644 index 00000000000..274cee29689 --- /dev/null +++ b/.github/workflows/github.linux.version.compatibility.yml @@ -0,0 +1,92 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +# NOTE: must set "Crossbow" as name to have the badge links working in the +# github comment reports! +name: Crossbow + +# TODO: move this back to de/vtasks/r/github.linux.version.compatibility.yml when done + +on: + push: + branches: + - "*-github-*" + +jobs: + write-files: + name: "Write files" + runs-on: ubuntu-latest + strategy: + fail-fast: false + env: + R_ORG: "rstudio" + R_IMAGE: "r-base" + R_TAG: "4.0-focal" + ARROW_R_DEV: "FALSE" + steps: + - name: Checkout Arrow + run: | + git clone --no-checkout {{ arrow.remote }} arrow + git -C arrow fetch -t {{ arrow.remote }} {{ arrow.branch }} + git -C arrow checkout FETCH_HEAD + git -C arrow submodule update --init --recursive + - name: Free Up Disk Space + shell: bash + run: arrow/ci/scripts/util_cleanup.sh + - name: Fetch Submodules and Tags + shell: bash + run: cd arrow && ci/scripts/util_checkout.sh + - name: Docker Pull + shell: bash + run: cd arrow && docker-compose pull --ignore-pull-failures r + - name: Docker Build + shell: bash + run: cd arrow && docker-compose build r + - name: Docker Run + shell: bash + run: | + cd arrow && + docker-compose run r /bin/bash -c "/arrow/ci/scripts/r_build_install.sh /arrow" && cd r && R -f file-compatibility/write.R + - name: Upload the parquet artifacts + uses: actions/upload-artifact@v2 + with: + name: files + path: arrow/r/file-compatibility/files + + read-files: + name: "Read files with Arrow {matrix.old_arrow_version}" + runs-on: ubuntu-latest + strategy: + fail-fast: false + matrix: + old_arrow_version: + - "2.0.0" + - "1.0.1" + env: + R_ORG: "rstudio" + R_IMAGE: "r-base" + R_TAG: "latest" + ARROW_R_DEV: "FALSE" + steps: + - name: Download artifacts + uses: actions/download-artifact@v2 + with: + name: files + path: . + - name: + shell: bash + run: ls -la . diff --git a/ci/scripts/r_build_install.sh b/ci/scripts/r_build_install.sh new file mode 100755 index 00000000000..2cc3708a24d --- /dev/null +++ b/ci/scripts/r_build_install.sh @@ -0,0 +1,45 @@ +#!/usr/bin/env bash +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +set -ex + +: ${R_BIN:=R} + +source_dir=${1}/r + +pushd ${source_dir} + +printenv + +if [ "$ARROW_USE_PKG_CONFIG" != "false" ]; then + export LD_LIBRARY_PATH=${ARROW_HOME}/lib:${LD_LIBRARY_PATH} + export R_LD_LIBRARY_PATH=${LD_LIBRARY_PATH} +fi +if [ "$ARROW_R_DEV" = "TRUE" ]; then + # These are used in the Arrow C++ build and are not a problem + export _R_CHECK_COMPILATION_FLAGS_KNOWN_="${_R_CHECK_COMPILATION_FLAGS_KNOWN_} -Wno-attributes -msse4.2" + # Note that NOT_CRAN=true means (among other things) that optional dependencies are built + export NOT_CRAN=true +fi + + +ls + +${R_BIN} CMD INSTALL . + +popd diff --git a/dev/tasks/tasks.yml b/dev/tasks/tasks.yml index edd4dac2f45..569e59f80dd 100644 --- a/dev/tasks/tasks.yml +++ b/dev/tasks/tasks.yml @@ -1799,16 +1799,6 @@ tasks: not_cran: "TRUE" devtoolset_version: 8 - test-r-rstudio-r-arrow-1.0.1-compatibility: - ci: azure - template: r/azure.linux.yml - params: - r_org: rstudio - r_image: r-base - r_tag: 3.6-bionic - old_arrow_version: "1.0.1" - run: r-forward-compatibility - test-r-rstudio-r-base-3.6-opensuse15: ci: azure template: r/azure.linux.yml diff --git a/docker-compose.yml b/docker-compose.yml index 04adf816fa7..0454c678c88 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -858,31 +858,6 @@ services: command: > /bin/bash -c "/arrow/ci/scripts/r_test.sh /arrow" - r-forward-compatibility: - # This lets you test forward compatilbity from old arrow versions (given in OLD_ARROW_VERSION) - # - # Usage: - # R_ORG=rstudio R_IMAGE=r-base R_TAG=3.6-bionic OLD_ARROW_VERSION=1.0.1 docker-compose build r-forward-compatibility - # R_ORG=rstudio R_IMAGE=r-base R_TAG=3.6-bionic OLD_ARROW_VERSION=1.0.1 docker-compose run r-forward-compatibility - image: ${REPO}:r-${R_ORG}-${R_IMAGE}-${R_TAG} - build: - context: . - dockerfile: ci/docker/linux-r.dockerfile - cache_from: - - ${REPO}:r-${R_ORG}-${R_IMAGE}-${R_TAG} - args: - base: ${R_ORG}/${R_IMAGE}:${R_TAG} - r_dev: ${ARROW_R_DEV} - shm_size: *shm-size - environment: - ARROW_R_DEV: ${ARROW_R_DEV} - ARROW_USE_PKG_CONFIG: "false" - OLD_ARROW_VERSION: ${OLD_ARROW_VERSION} - volumes: - - .:/arrow:delegated - command: > - /bin/bash -c "/arrow/ci/scripts/r_test_old_version.sh /arrow" - ubuntu-r-sanitizer: # Only 18.04 and amd64 supported # Usage: diff --git a/r/.Rbuildignore b/r/.Rbuildignore index 91a8d741a8e..87074e7f92c 100644 --- a/r/.Rbuildignore +++ b/r/.Rbuildignore @@ -23,3 +23,4 @@ clang_format.sh ^autobrew$ ^apache-arrow.rb$ ^.*\.Rhistory$ +^file-compatibility diff --git a/r/file-compatibility/write.R b/r/file-compatibility/write.R new file mode 100644 index 00000000000..eb7a2a9ff75 --- /dev/null +++ b/r/file-compatibility/write.R @@ -0,0 +1,14 @@ +library(arrow) + +example_with_metadata <- tibble::tibble( + a = structure("one", class = "special_string"), + b = 2, + c = tibble::tibble( + c1 = structure("inner", extra_attr = "something"), + c2 = 4, + c3 = 50 + ), + d = "four" +) + +write_parquet(example_with_metadata, "file-compatibility/files/ex_data.parquet") From 8ef0c8df94f2fcb1f8aa26bb2a29f1af86067c12 Mon Sep 17 00:00:00 2001 From: Jonathan Keane Date: Tue, 5 Jan 2021 19:00:19 -0600 Subject: [PATCH 03/38] run on all pushes --- .github/workflows/github.linux.version.compatibility.yml | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/.github/workflows/github.linux.version.compatibility.yml b/.github/workflows/github.linux.version.compatibility.yml index 274cee29689..1f9c96602d1 100644 --- a/.github/workflows/github.linux.version.compatibility.yml +++ b/.github/workflows/github.linux.version.compatibility.yml @@ -19,12 +19,10 @@ # github comment reports! name: Crossbow -# TODO: move this back to de/vtasks/r/github.linux.version.compatibility.yml when done +# TODO: move this back to dev/tasks/r/github.linux.version.compatibility.yml when done on: - push: - branches: - - "*-github-*" + push jobs: write-files: From 2dba6c75eb34ca841810df8b251f4e09f8e16eed Mon Sep 17 00:00:00 2001 From: Jonathan Keane Date: Tue, 5 Jan 2021 19:04:19 -0600 Subject: [PATCH 04/38] hardcode some crossbowery --- .../workflows/github.linux.version.compatibility.yml | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/.github/workflows/github.linux.version.compatibility.yml b/.github/workflows/github.linux.version.compatibility.yml index 1f9c96602d1..7d47c6458c8 100644 --- a/.github/workflows/github.linux.version.compatibility.yml +++ b/.github/workflows/github.linux.version.compatibility.yml @@ -19,7 +19,9 @@ # github comment reports! name: Crossbow -# TODO: move this back to dev/tasks/r/github.linux.version.compatibility.yml when done +# TODO: +# * move this back to dev/tasks/r/github.linux.version.compatibility.yml when done +# * add back only on branches "*-github-*" on: push @@ -38,8 +40,10 @@ jobs: steps: - name: Checkout Arrow run: | - git clone --no-checkout {{ arrow.remote }} arrow - git -C arrow fetch -t {{ arrow.remote }} {{ arrow.branch }} + # git clone --no-checkout {{ arrow.remote }} arrow + # git clone --no-checkout https://github.com/jonkeane/arrow.git arrow + # git -C arrow fetch -t {{ arrow.remote }} {{ arrow.branch }} + # git -C arrow fetch -t https://github.com/jonkeane/arrow.git ARROW-10623-attr-writing-test git -C arrow checkout FETCH_HEAD git -C arrow submodule update --init --recursive - name: Free Up Disk Space From 90ff23fddef21dc94fcf2682517dc5ffd7063026 Mon Sep 17 00:00:00 2001 From: Jonathan Keane Date: Tue, 5 Jan 2021 19:05:01 -0600 Subject: [PATCH 05/38] oops --- .github/workflows/github.linux.version.compatibility.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/github.linux.version.compatibility.yml b/.github/workflows/github.linux.version.compatibility.yml index 7d47c6458c8..e93c672ef63 100644 --- a/.github/workflows/github.linux.version.compatibility.yml +++ b/.github/workflows/github.linux.version.compatibility.yml @@ -41,9 +41,9 @@ jobs: - name: Checkout Arrow run: | # git clone --no-checkout {{ arrow.remote }} arrow - # git clone --no-checkout https://github.com/jonkeane/arrow.git arrow + git clone --no-checkout https://github.com/jonkeane/arrow.git arrow # git -C arrow fetch -t {{ arrow.remote }} {{ arrow.branch }} - # git -C arrow fetch -t https://github.com/jonkeane/arrow.git ARROW-10623-attr-writing-test + git -C arrow fetch -t https://github.com/jonkeane/arrow.git ARROW-10623-attr-writing-test git -C arrow checkout FETCH_HEAD git -C arrow submodule update --init --recursive - name: Free Up Disk Space From ba73af757b5a7216d735fe84dc025ec234618f20 Mon Sep 17 00:00:00 2001 From: Jonathan Keane Date: Tue, 5 Jan 2021 19:06:27 -0600 Subject: [PATCH 06/38] oh and needs too --- .github/workflows/github.linux.version.compatibility.yml | 4 +++- ci/scripts/r_build_install.sh | 3 --- 2 files changed, 3 insertions(+), 4 deletions(-) diff --git a/.github/workflows/github.linux.version.compatibility.yml b/.github/workflows/github.linux.version.compatibility.yml index e93c672ef63..17e013cbc79 100644 --- a/.github/workflows/github.linux.version.compatibility.yml +++ b/.github/workflows/github.linux.version.compatibility.yml @@ -22,6 +22,7 @@ name: Crossbow # TODO: # * move this back to dev/tasks/r/github.linux.version.compatibility.yml when done # * add back only on branches "*-github-*" +# * rstudio package manager packages? on: push @@ -62,7 +63,7 @@ jobs: shell: bash run: | cd arrow && - docker-compose run r /bin/bash -c "/arrow/ci/scripts/r_build_install.sh /arrow" && cd r && R -f file-compatibility/write.R + docker-compose run r /bin/bash -c "/arrow/ci/scripts/r_build_install.sh /arrow && cd r && R -f file-compatibility/write.R" - name: Upload the parquet artifacts uses: actions/upload-artifact@v2 with: @@ -71,6 +72,7 @@ jobs: read-files: name: "Read files with Arrow {matrix.old_arrow_version}" + needs: [write-files] runs-on: ubuntu-latest strategy: fail-fast: false diff --git a/ci/scripts/r_build_install.sh b/ci/scripts/r_build_install.sh index 2cc3708a24d..383240ce50e 100755 --- a/ci/scripts/r_build_install.sh +++ b/ci/scripts/r_build_install.sh @@ -37,9 +37,6 @@ if [ "$ARROW_R_DEV" = "TRUE" ]; then export NOT_CRAN=true fi - -ls - ${R_BIN} CMD INSTALL . popd From c792d4bde81f340c95b20d0582267c6116a388fc Mon Sep 17 00:00:00 2001 From: Jonathan Keane Date: Wed, 6 Jan 2021 08:08:10 -0600 Subject: [PATCH 07/38] Don't cd --- .github/workflows/github.linux.version.compatibility.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/github.linux.version.compatibility.yml b/.github/workflows/github.linux.version.compatibility.yml index 17e013cbc79..b3fdbeb48a2 100644 --- a/.github/workflows/github.linux.version.compatibility.yml +++ b/.github/workflows/github.linux.version.compatibility.yml @@ -63,7 +63,7 @@ jobs: shell: bash run: | cd arrow && - docker-compose run r /bin/bash -c "/arrow/ci/scripts/r_build_install.sh /arrow && cd r && R -f file-compatibility/write.R" + docker-compose run r /bin/bash -c "/arrow/ci/scripts/r_build_install.sh /arrow && R -f file-compatibility/write.R" - name: Upload the parquet artifacts uses: actions/upload-artifact@v2 with: From 4102ec5d45206514397c1af53a724158c12d6797 Mon Sep 17 00:00:00 2001 From: Jonathan Keane Date: Wed, 6 Jan 2021 08:49:12 -0600 Subject: [PATCH 08/38] Try running directly on GHA --- .../github.linux.version.compatibility.yml | 30 +++++++++++-------- 1 file changed, 17 insertions(+), 13 deletions(-) diff --git a/.github/workflows/github.linux.version.compatibility.yml b/.github/workflows/github.linux.version.compatibility.yml index b3fdbeb48a2..e07eaf4db21 100644 --- a/.github/workflows/github.linux.version.compatibility.yml +++ b/.github/workflows/github.linux.version.compatibility.yml @@ -30,14 +30,12 @@ on: jobs: write-files: name: "Write files" - runs-on: ubuntu-latest + runs-on: ubuntu-20.04 strategy: fail-fast: false env: - R_ORG: "rstudio" - R_IMAGE: "r-base" - R_TAG: "4.0-focal" ARROW_R_DEV: "FALSE" + RSPM: "https://packagemanager.rstudio.com/cran/__linux__/focal/latest" steps: - name: Checkout Arrow run: | @@ -53,17 +51,23 @@ jobs: - name: Fetch Submodules and Tags shell: bash run: cd arrow && ci/scripts/util_checkout.sh - - name: Docker Pull - shell: bash - run: cd arrow && docker-compose pull --ignore-pull-failures r - - name: Docker Build - shell: bash - run: cd arrow && docker-compose build r - - name: Docker Run + - uses: r-lib/actions/setup-r@v1 + - name: Install dependencies + run: | + install.packages(c("remotes", "glue", "sys")) + remotes::install_deps("arrow/r", dependencies = TRUE) + shell: Rscript {0} + - name: Install Arrow + run: | + cd arrow/r + R CMD INSTALL . shell: bash + - name: Write files run: | - cd arrow && - docker-compose run r /bin/bash -c "/arrow/ci/scripts/r_build_install.sh /arrow && R -f file-compatibility/write.R" + cd arrow/r + R -f file-compatibility/write.R + shell: bash + - name: Upload the parquet artifacts uses: actions/upload-artifact@v2 with: From fcf56cd7b85474f7ac15c56f5cf8b6de7deacd37 Mon Sep 17 00:00:00 2001 From: Jonathan Keane Date: Wed, 6 Jan 2021 09:04:17 -0600 Subject: [PATCH 09/38] Make sure the dir exists --- r/file-compatibility/write.R | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/r/file-compatibility/write.R b/r/file-compatibility/write.R index eb7a2a9ff75..b4be8912864 100644 --- a/r/file-compatibility/write.R +++ b/r/file-compatibility/write.R @@ -1,5 +1,9 @@ library(arrow) +if (!dir.exists("file-compatibility/files")) { + dir.create("file-compatibility/files") +} + example_with_metadata <- tibble::tibble( a = structure("one", class = "special_string"), b = 2, From bfb96a43a456f4985d6a04650ea539777030971d Mon Sep 17 00:00:00 2001 From: Jonathan Keane Date: Wed, 6 Jan 2021 10:03:57 -0600 Subject: [PATCH 10/38] Add tests --- .../github.linux.version.compatibility.yml | 36 +++++++++++++------ r/file-compatibility/test-read.R | 19 ++++++++++ 2 files changed, 45 insertions(+), 10 deletions(-) create mode 100644 r/file-compatibility/test-read.R diff --git a/.github/workflows/github.linux.version.compatibility.yml b/.github/workflows/github.linux.version.compatibility.yml index e07eaf4db21..478742b949a 100644 --- a/.github/workflows/github.linux.version.compatibility.yml +++ b/.github/workflows/github.linux.version.compatibility.yml @@ -34,7 +34,7 @@ jobs: strategy: fail-fast: false env: - ARROW_R_DEV: "FALSE" + ARROW_R_DEV: "TRUE" RSPM: "https://packagemanager.rstudio.com/cran/__linux__/focal/latest" steps: - name: Checkout Arrow @@ -77,7 +77,7 @@ jobs: read-files: name: "Read files with Arrow {matrix.old_arrow_version}" needs: [write-files] - runs-on: ubuntu-latest + runs-on: ubuntu-20.04 strategy: fail-fast: false matrix: @@ -85,16 +85,32 @@ jobs: - "2.0.0" - "1.0.1" env: - R_ORG: "rstudio" - R_IMAGE: "r-base" - R_TAG: "latest" - ARROW_R_DEV: "FALSE" + RSPM: "https://packagemanager.rstudio.com/cran/__linux__/focal/latest" steps: + - name: Checkout Arrow + run: | + # git clone --no-checkout {{ arrow.remote }} arrow + git clone --no-checkout https://github.com/jonkeane/arrow.git arrow + # git -C arrow fetch -t {{ arrow.remote }} {{ arrow.branch }} + git -C arrow fetch -t https://github.com/jonkeane/arrow.git ARROW-10623-attr-writing-test + git -C arrow checkout FETCH_HEAD + git -C arrow submodule update --init --recursive + - uses: r-lib/actions/setup-r@v1 + - name: Install old Arrow + run: | + install.packages(c("remotes", "testthat")) + remotes::install_version("arrow", "{matrix.old_arrow_version}") + shell: Rscript {0} + - name: Setup our testing directory, copy only the tests to it. + run: | + mkdir -p file-compatibility/files + cp arrow/r/file-compatibility/test-*.R - name: Download artifacts uses: actions/download-artifact@v2 with: name: files - path: . - - name: - shell: bash - run: ls -la . + path: file-compatibility/files + - name: Install dependencies + run: | + testthat::test_dir("file-compatibility") + shell: Rscript {0} diff --git a/r/file-compatibility/test-read.R b/r/file-compatibility/test-read.R new file mode 100644 index 00000000000..a90d54bbf53 --- /dev/null +++ b/r/file-compatibility/test-read.R @@ -0,0 +1,19 @@ +library(arrow) +library(testthat) + +pq_file <- "files/ex_data.parquet" + +test_that("Can see the metadata", { + df <- read_parquet(pq_file) + expect_s3_class(df, "tbl") + expect_equal(attributes(df$a), list(class = "special_string")) + expect_equal( + attributes(df$c), + list( + row.names = 1L, + names = c("c1", "c2", "c3"), + class = c("tbl_df", "tbl", "data.frame") + ) + ) +}) + From cdf162d4fac2650462c78afc84f9e16130ede00b Mon Sep 17 00:00:00 2001 From: Jonathan Keane Date: Wed, 6 Jan 2021 10:32:12 -0600 Subject: [PATCH 11/38] try a different reference --- .github/workflows/github.linux.version.compatibility.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/github.linux.version.compatibility.yml b/.github/workflows/github.linux.version.compatibility.yml index 478742b949a..516f3932c63 100644 --- a/.github/workflows/github.linux.version.compatibility.yml +++ b/.github/workflows/github.linux.version.compatibility.yml @@ -99,7 +99,7 @@ jobs: - name: Install old Arrow run: | install.packages(c("remotes", "testthat")) - remotes::install_version("arrow", "{matrix.old_arrow_version}") + remotes::install_version("arrow", "${{ matrix.old_arrow_version }}") shell: Rscript {0} - name: Setup our testing directory, copy only the tests to it. run: | From ac718655ea75454711359242e94f513f2ce38821 Mon Sep 17 00:00:00 2001 From: Jonathan Keane Date: Wed, 6 Jan 2021 10:52:19 -0600 Subject: [PATCH 12/38] oops! --- .github/workflows/github.linux.version.compatibility.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/github.linux.version.compatibility.yml b/.github/workflows/github.linux.version.compatibility.yml index 516f3932c63..c12903b695e 100644 --- a/.github/workflows/github.linux.version.compatibility.yml +++ b/.github/workflows/github.linux.version.compatibility.yml @@ -104,7 +104,7 @@ jobs: - name: Setup our testing directory, copy only the tests to it. run: | mkdir -p file-compatibility/files - cp arrow/r/file-compatibility/test-*.R + cp arrow/r/file-compatibility/test-*.R file-compatibility/ - name: Download artifacts uses: actions/download-artifact@v2 with: From a4260140e3b96a28bb5a57723754ef8d6a17f7d0 Mon Sep 17 00:00:00 2001 From: Jonathan Keane Date: Wed, 6 Jan 2021 11:07:44 -0600 Subject: [PATCH 13/38] clean up --- .github/workflows/github.linux.version.compatibility.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/github.linux.version.compatibility.yml b/.github/workflows/github.linux.version.compatibility.yml index c12903b695e..6e37af95276 100644 --- a/.github/workflows/github.linux.version.compatibility.yml +++ b/.github/workflows/github.linux.version.compatibility.yml @@ -75,7 +75,7 @@ jobs: path: arrow/r/file-compatibility/files read-files: - name: "Read files with Arrow {matrix.old_arrow_version}" + name: "Read files with Arrow ${{ matrix.old_arrow_version }}" needs: [write-files] runs-on: ubuntu-20.04 strategy: @@ -110,7 +110,7 @@ jobs: with: name: files path: file-compatibility/files - - name: Install dependencies + - name: Test reading run: | testthat::test_dir("file-compatibility") shell: Rscript {0} From 596d94d16d320ce70136e6879580c09981525c7e Mon Sep 17 00:00:00 2001 From: Jonathan Keane Date: Wed, 6 Jan 2021 11:16:52 -0600 Subject: [PATCH 14/38] Can we skip known-bad versions? --- .github/workflows/github.linux.version.compatibility.yml | 1 + r/file-compatibility/test-read.R | 8 ++++++++ 2 files changed, 9 insertions(+) diff --git a/.github/workflows/github.linux.version.compatibility.yml b/.github/workflows/github.linux.version.compatibility.yml index 6e37af95276..addd36e88bc 100644 --- a/.github/workflows/github.linux.version.compatibility.yml +++ b/.github/workflows/github.linux.version.compatibility.yml @@ -86,6 +86,7 @@ jobs: - "1.0.1" env: RSPM: "https://packagemanager.rstudio.com/cran/__linux__/focal/latest" + OLD_ARROW_VERSION: ${{ matrix.old_arrow_version }} steps: - name: Checkout Arrow run: | diff --git a/r/file-compatibility/test-read.R b/r/file-compatibility/test-read.R index a90d54bbf53..eb3f7b214f4 100644 --- a/r/file-compatibility/test-read.R +++ b/r/file-compatibility/test-read.R @@ -1,9 +1,17 @@ library(arrow) library(testthat) +skip_if_version <- function(version, msg, op = `<`) { + if (op(version, numeric_version(Sys.getenv("OLD_ARROW_VERSION", "0.0.0")))) { + skip(msg) + } +} + pq_file <- "files/ex_data.parquet" test_that("Can see the metadata", { + skip_if_version("2.0.0", "Version 1.0.1 can't read new version metadata.") + df <- read_parquet(pq_file) expect_s3_class(df, "tbl") expect_equal(attributes(df$a), list(class = "special_string")) From 83d2c071dfcde1211d28bc50c875f53a41878345 Mon Sep 17 00:00:00 2001 From: Jonathan Keane Date: Wed, 6 Jan 2021 11:39:17 -0600 Subject: [PATCH 15/38] What happens in 1.0.1? --- r/file-compatibility/test-read.R | 14 +++++++++++--- 1 file changed, 11 insertions(+), 3 deletions(-) diff --git a/r/file-compatibility/test-read.R b/r/file-compatibility/test-read.R index eb3f7b214f4..13ebf1999c7 100644 --- a/r/file-compatibility/test-read.R +++ b/r/file-compatibility/test-read.R @@ -1,16 +1,24 @@ library(arrow) library(testthat) -skip_if_version <- function(version, msg, op = `<`) { - if (op(version, numeric_version(Sys.getenv("OLD_ARROW_VERSION", "0.0.0")))) { +skip_if_version <- function(version, msg, op = `<=`) { + if (op(numeric_version(Sys.getenv("OLD_ARROW_VERSION", "0.0.0")), version)) { skip(msg) } } pq_file <- "files/ex_data.parquet" +test_that("Can sread the file", { + skip_if_version("1.0.1", "Version 1.0.1 can't read new version metadata.") + + df <- read_parquet(pq_file) + print(str(df)) + expect_true(TRUE) +}) + test_that("Can see the metadata", { - skip_if_version("2.0.0", "Version 1.0.1 can't read new version metadata.") + skip_if_version("1.0.1", "Version 1.0.1 can't read new version metadata.") df <- read_parquet(pq_file) expect_s3_class(df, "tbl") From 6696029605214fa23e71c090da84eaebaf397bb5 Mon Sep 17 00:00:00 2001 From: Jonathan Keane Date: Wed, 6 Jan 2021 12:10:37 -0600 Subject: [PATCH 16/38] oops --- r/file-compatibility/test-read.R | 2 -- 1 file changed, 2 deletions(-) diff --git a/r/file-compatibility/test-read.R b/r/file-compatibility/test-read.R index 13ebf1999c7..b00b7e2ceae 100644 --- a/r/file-compatibility/test-read.R +++ b/r/file-compatibility/test-read.R @@ -10,8 +10,6 @@ skip_if_version <- function(version, msg, op = `<=`) { pq_file <- "files/ex_data.parquet" test_that("Can sread the file", { - skip_if_version("1.0.1", "Version 1.0.1 can't read new version metadata.") - df <- read_parquet(pq_file) print(str(df)) expect_true(TRUE) From 3f5525505e0cfe334d4b1d51e4d7171d183246ee Mon Sep 17 00:00:00 2001 From: Jonathan Keane Date: Wed, 6 Jan 2021 13:40:21 -0600 Subject: [PATCH 17/38] Some more cleanup --- r/.Rbuildignore | 2 +- r/.gitignore | 1 + .../test-read-parquet.R} | 25 ++++++++++++++++--- .../write.R => extra-tests/write-parquet.R} | 7 +++++- r/tests/testthat/helper-data.R | 11 ++++---- 5 files changed, 34 insertions(+), 12 deletions(-) rename r/{file-compatibility/test-read.R => extra-tests/test-read-parquet.R} (60%) rename r/{file-compatibility/write.R => extra-tests/write-parquet.R} (65%) diff --git a/r/.Rbuildignore b/r/.Rbuildignore index 87074e7f92c..cf4b7ce31ba 100644 --- a/r/.Rbuildignore +++ b/r/.Rbuildignore @@ -23,4 +23,4 @@ clang_format.sh ^autobrew$ ^apache-arrow.rb$ ^.*\.Rhistory$ -^file-compatibility +^extra-tests diff --git a/r/.gitignore b/r/.gitignore index e5ab1197071..76e8a8dd0bd 100644 --- a/r/.gitignore +++ b/r/.gitignore @@ -17,3 +17,4 @@ revdep/ vignettes/nyc-taxi/ arrow_*.tar.gz arrow_*.tgz +extra-tests/files diff --git a/r/file-compatibility/test-read.R b/r/extra-tests/test-read-parquet.R similarity index 60% rename from r/file-compatibility/test-read.R rename to r/extra-tests/test-read-parquet.R index b00b7e2ceae..eccb42d6f24 100644 --- a/r/file-compatibility/test-read.R +++ b/r/extra-tests/test-read-parquet.R @@ -9,10 +9,12 @@ skip_if_version <- function(version, msg, op = `<=`) { pq_file <- "files/ex_data.parquet" -test_that("Can sread the file", { - df <- read_parquet(pq_file) - print(str(df)) - expect_true(TRUE) +test_that("Can read the file", { + # We can read with no error, we assert metadata below + expect_error( + df <- read_parquet(pq_file), + NA + ) }) test_that("Can see the metadata", { @@ -20,6 +22,21 @@ test_that("Can see the metadata", { df <- read_parquet(pq_file) expect_s3_class(df, "tbl") + + expect_equal( + attributes(df), + list( + names = letters[1:4], + row.names = 1L, + top_level = list( + field_one = 12, + field_two = "more stuff" + ), + class = c("tbl_df", "tbl", "data.frame") + ) + ) + + # column-level attributes expect_equal(attributes(df$a), list(class = "special_string")) expect_equal( attributes(df$c), diff --git a/r/file-compatibility/write.R b/r/extra-tests/write-parquet.R similarity index 65% rename from r/file-compatibility/write.R rename to r/extra-tests/write-parquet.R index b4be8912864..a01f708f6a7 100644 --- a/r/file-compatibility/write.R +++ b/r/extra-tests/write-parquet.R @@ -15,4 +15,9 @@ example_with_metadata <- tibble::tibble( d = "four" ) -write_parquet(example_with_metadata, "file-compatibility/files/ex_data.parquet") +attr(example_with_metadata, "top_level") <- list( + field_one = 12, + field_two = "more stuff" +) + +write_parquet(example_with_metadata, "extra-tests/files/ex_data.parquet") diff --git a/r/tests/testthat/helper-data.R b/r/tests/testthat/helper-data.R index a810aa36781..26b1cf0e108 100644 --- a/r/tests/testthat/helper-data.R +++ b/r/tests/testthat/helper-data.R @@ -34,12 +34,11 @@ example_with_metadata <- tibble::tibble( ), d = "four" ) -# TODO: collect top-level dataset metadata -# https://issues.apache.org/jira/browse/ARROW-9271 -# attr(example_with_metadata, "top_level") <- list( -# field_one = 12, -# field_two = "more stuff" -# ) + +attr(example_with_metadata, "top_level") <- list( + field_one = 12, + field_two = "more stuff" +) haven_data <- tibble::tibble( num = structure(c(5.1, 4.9), From 0961b2bcc507f35efc6dfdb2ce7faa9d652e328d Mon Sep 17 00:00:00 2001 From: Jonathan Keane Date: Wed, 6 Jan 2021 13:42:33 -0600 Subject: [PATCH 18/38] Cleanup --- ci/scripts/r_build_install.sh | 42 -------------------------------- ci/scripts/r_test_old_version.sh | 36 --------------------------- 2 files changed, 78 deletions(-) delete mode 100755 ci/scripts/r_build_install.sh delete mode 100755 ci/scripts/r_test_old_version.sh diff --git a/ci/scripts/r_build_install.sh b/ci/scripts/r_build_install.sh deleted file mode 100755 index 383240ce50e..00000000000 --- a/ci/scripts/r_build_install.sh +++ /dev/null @@ -1,42 +0,0 @@ -#!/usr/bin/env bash -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -set -ex - -: ${R_BIN:=R} - -source_dir=${1}/r - -pushd ${source_dir} - -printenv - -if [ "$ARROW_USE_PKG_CONFIG" != "false" ]; then - export LD_LIBRARY_PATH=${ARROW_HOME}/lib:${LD_LIBRARY_PATH} - export R_LD_LIBRARY_PATH=${LD_LIBRARY_PATH} -fi -if [ "$ARROW_R_DEV" = "TRUE" ]; then - # These are used in the Arrow C++ build and are not a problem - export _R_CHECK_COMPILATION_FLAGS_KNOWN_="${_R_CHECK_COMPILATION_FLAGS_KNOWN_} -Wno-attributes -msse4.2" - # Note that NOT_CRAN=true means (among other things) that optional dependencies are built - export NOT_CRAN=true -fi - -${R_BIN} CMD INSTALL . - -popd diff --git a/ci/scripts/r_test_old_version.sh b/ci/scripts/r_test_old_version.sh deleted file mode 100755 index 712b996c072..00000000000 --- a/ci/scripts/r_test_old_version.sh +++ /dev/null @@ -1,36 +0,0 @@ -#!/usr/bin/env bash -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -set -ex - -: ${R_BIN:=R} - -source_dir=${1}/r - -pushd ${source_dir} - -printenv - -export TEST_R_WITH_ARROW=TRUE - -${R_BIN} -e "as_cran <- !identical(tolower(Sys.getenv('NOT_CRAN')), 'true') - remotes::install_version('arrow', '$OLD_ARROW_VERSION', quiet = FALSE, verbose = TRUE) - library(arrow) - testthat::test_file('tests/testthat/test-parquet-compatibility.R')" - -popd From afb0aee695b0b17e274c7897132faa9a3644d375 Mon Sep 17 00:00:00 2001 From: Jonathan Keane Date: Wed, 6 Jan 2021 16:09:34 -0600 Subject: [PATCH 19/38] oops --- .../github.linux.version.compatibility.yml | 13 ++++++------- 1 file changed, 6 insertions(+), 7 deletions(-) diff --git a/.github/workflows/github.linux.version.compatibility.yml b/.github/workflows/github.linux.version.compatibility.yml index addd36e88bc..cfacd303f5c 100644 --- a/.github/workflows/github.linux.version.compatibility.yml +++ b/.github/workflows/github.linux.version.compatibility.yml @@ -22,7 +22,6 @@ name: Crossbow # TODO: # * move this back to dev/tasks/r/github.linux.version.compatibility.yml when done # * add back only on branches "*-github-*" -# * rstudio package manager packages? on: push @@ -65,14 +64,14 @@ jobs: - name: Write files run: | cd arrow/r - R -f file-compatibility/write.R + R -f extra-tests/write.R shell: bash - name: Upload the parquet artifacts uses: actions/upload-artifact@v2 with: name: files - path: arrow/r/file-compatibility/files + path: arrow/r/extra-tests/files read-files: name: "Read files with Arrow ${{ matrix.old_arrow_version }}" @@ -104,14 +103,14 @@ jobs: shell: Rscript {0} - name: Setup our testing directory, copy only the tests to it. run: | - mkdir -p file-compatibility/files - cp arrow/r/file-compatibility/test-*.R file-compatibility/ + mkdir -p extra-tests/files + cp arrow/r/extra-tests/test-*.R extra-tests/ - name: Download artifacts uses: actions/download-artifact@v2 with: name: files - path: file-compatibility/files + path: extra-tests/files - name: Test reading run: | - testthat::test_dir("file-compatibility") + testthat::test_dir("extra-tests") shell: Rscript {0} From 350c48326da969d492ffeabc34a91e9b70010fca Mon Sep 17 00:00:00 2001 From: Jonathan Keane Date: Wed, 6 Jan 2021 16:24:12 -0600 Subject: [PATCH 20/38] Oy --- .github/workflows/github.linux.version.compatibility.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/github.linux.version.compatibility.yml b/.github/workflows/github.linux.version.compatibility.yml index cfacd303f5c..2d7ce3da294 100644 --- a/.github/workflows/github.linux.version.compatibility.yml +++ b/.github/workflows/github.linux.version.compatibility.yml @@ -64,7 +64,7 @@ jobs: - name: Write files run: | cd arrow/r - R -f extra-tests/write.R + R -f extra-tests/write-parquet.R shell: bash - name: Upload the parquet artifacts From b4c23df24b21be55a87a396751142bd7cd430dfb Mon Sep 17 00:00:00 2001 From: Jonathan Keane Date: Wed, 6 Jan 2021 16:42:44 -0600 Subject: [PATCH 21/38] still more changes --- r/extra-tests/write-parquet.R | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/r/extra-tests/write-parquet.R b/r/extra-tests/write-parquet.R index a01f708f6a7..f98b29e0f53 100644 --- a/r/extra-tests/write-parquet.R +++ b/r/extra-tests/write-parquet.R @@ -1,7 +1,7 @@ library(arrow) -if (!dir.exists("file-compatibility/files")) { - dir.create("file-compatibility/files") +if (!dir.exists("extra-tests/files")) { + dir.create("extra-tests/files") } example_with_metadata <- tibble::tibble( From d9e7e611d2304f06b6145d4d30e7e75fbd8f8575 Mon Sep 17 00:00:00 2001 From: Jonathan Keane Date: Wed, 6 Jan 2021 17:11:08 -0600 Subject: [PATCH 22/38] Move workflow to crossbox --- .../r}/github.linux.version.compatibility.yml | 16 ++++------------ 1 file changed, 4 insertions(+), 12 deletions(-) rename {.github/workflows => dev/tasks/r}/github.linux.version.compatibility.yml (81%) diff --git a/.github/workflows/github.linux.version.compatibility.yml b/dev/tasks/r/github.linux.version.compatibility.yml similarity index 81% rename from .github/workflows/github.linux.version.compatibility.yml rename to dev/tasks/r/github.linux.version.compatibility.yml index 2d7ce3da294..848bfcf4ec5 100644 --- a/.github/workflows/github.linux.version.compatibility.yml +++ b/dev/tasks/r/github.linux.version.compatibility.yml @@ -19,10 +19,6 @@ # github comment reports! name: Crossbow -# TODO: -# * move this back to dev/tasks/r/github.linux.version.compatibility.yml when done -# * add back only on branches "*-github-*" - on: push @@ -38,10 +34,8 @@ jobs: steps: - name: Checkout Arrow run: | - # git clone --no-checkout {{ arrow.remote }} arrow - git clone --no-checkout https://github.com/jonkeane/arrow.git arrow - # git -C arrow fetch -t {{ arrow.remote }} {{ arrow.branch }} - git -C arrow fetch -t https://github.com/jonkeane/arrow.git ARROW-10623-attr-writing-test + git clone --no-checkout {{ arrow.remote }} arrow + git -C arrow fetch -t {{ arrow.remote }} {{ arrow.branch }} git -C arrow checkout FETCH_HEAD git -C arrow submodule update --init --recursive - name: Free Up Disk Space @@ -89,10 +83,8 @@ jobs: steps: - name: Checkout Arrow run: | - # git clone --no-checkout {{ arrow.remote }} arrow - git clone --no-checkout https://github.com/jonkeane/arrow.git arrow - # git -C arrow fetch -t {{ arrow.remote }} {{ arrow.branch }} - git -C arrow fetch -t https://github.com/jonkeane/arrow.git ARROW-10623-attr-writing-test + git clone --no-checkout {{ arrow.remote }} arrow + git -C arrow fetch -t {{ arrow.remote }} {{ arrow.branch }} git -C arrow checkout FETCH_HEAD git -C arrow submodule update --init --recursive - uses: r-lib/actions/setup-r@v1 From 8b920509fa0e295c59a8d0b53b3c4b433b91faef Mon Sep 17 00:00:00 2001 From: Jonathan Keane Date: Wed, 6 Jan 2021 17:16:50 -0600 Subject: [PATCH 23/38] Add a crossbow task --- dev/tasks/tasks.yml | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/dev/tasks/tasks.yml b/dev/tasks/tasks.yml index 569e59f80dd..79bc120cf72 100644 --- a/dev/tasks/tasks.yml +++ b/dev/tasks/tasks.yml @@ -1752,6 +1752,10 @@ tasks: template: r/github.linux.cran.yml params: MATRIX: "${{ matrix.r_image }}" + + test-r-version-compatibility: + ci: github + template: r/github.linux.version.compatibility.yml test-r-rhub-ubuntu-gcc-release: ci: azure From 5cd59d5592ce76fb6ce78abdb8fcce1407aa7e04 Mon Sep 17 00:00:00 2001 From: Jonathan Keane Date: Wed, 6 Jan 2021 19:22:58 -0600 Subject: [PATCH 24/38] Escape jinja --- .../r/github.linux.version.compatibility.yml | 6 +++--- r/extra-tests/write-parquet.R | 16 +--------------- 2 files changed, 4 insertions(+), 18 deletions(-) diff --git a/dev/tasks/r/github.linux.version.compatibility.yml b/dev/tasks/r/github.linux.version.compatibility.yml index 848bfcf4ec5..e0b82841b8e 100644 --- a/dev/tasks/r/github.linux.version.compatibility.yml +++ b/dev/tasks/r/github.linux.version.compatibility.yml @@ -68,7 +68,7 @@ jobs: path: arrow/r/extra-tests/files read-files: - name: "Read files with Arrow ${{ matrix.old_arrow_version }}" + name: "Read files with Arrow {{ '${{ matrix.old_arrow_version }}' }}" needs: [write-files] runs-on: ubuntu-20.04 strategy: @@ -79,7 +79,7 @@ jobs: - "1.0.1" env: RSPM: "https://packagemanager.rstudio.com/cran/__linux__/focal/latest" - OLD_ARROW_VERSION: ${{ matrix.old_arrow_version }} + OLD_ARROW_VERSION: {{ '${{ matrix.old_arrow_version }}' }} steps: - name: Checkout Arrow run: | @@ -91,7 +91,7 @@ jobs: - name: Install old Arrow run: | install.packages(c("remotes", "testthat")) - remotes::install_version("arrow", "${{ matrix.old_arrow_version }}") + remotes::install_version("arrow", "{{ '${{ matrix.old_arrow_version }}' }}") shell: Rscript {0} - name: Setup our testing directory, copy only the tests to it. run: | diff --git a/r/extra-tests/write-parquet.R b/r/extra-tests/write-parquet.R index f98b29e0f53..21d94ac84a4 100644 --- a/r/extra-tests/write-parquet.R +++ b/r/extra-tests/write-parquet.R @@ -4,20 +4,6 @@ if (!dir.exists("extra-tests/files")) { dir.create("extra-tests/files") } -example_with_metadata <- tibble::tibble( - a = structure("one", class = "special_string"), - b = 2, - c = tibble::tibble( - c1 = structure("inner", extra_attr = "something"), - c2 = 4, - c3 = 50 - ), - d = "four" -) - -attr(example_with_metadata, "top_level") <- list( - field_one = 12, - field_two = "more stuff" -) +source("tests/testthat/helper-data.R") write_parquet(example_with_metadata, "extra-tests/files/ex_data.parquet") From 4814d4a552d3f08bf4239ffcdefbdcdebb9a5d6f Mon Sep 17 00:00:00 2001 From: Jonathan Keane Date: Wed, 6 Jan 2021 20:12:32 -0600 Subject: [PATCH 25/38] More indirection? --- dev/tasks/r/github.linux.version.compatibility.yml | 6 +++--- dev/tasks/tasks.yml | 2 ++ 2 files changed, 5 insertions(+), 3 deletions(-) diff --git a/dev/tasks/r/github.linux.version.compatibility.yml b/dev/tasks/r/github.linux.version.compatibility.yml index e0b82841b8e..672d8d4af60 100644 --- a/dev/tasks/r/github.linux.version.compatibility.yml +++ b/dev/tasks/r/github.linux.version.compatibility.yml @@ -68,7 +68,7 @@ jobs: path: arrow/r/extra-tests/files read-files: - name: "Read files with Arrow {{ '${{ matrix.old_arrow_version }}' }}" + name: "Read files with Arrow {{ OLD_ARROW_VERSION }}" needs: [write-files] runs-on: ubuntu-20.04 strategy: @@ -79,7 +79,7 @@ jobs: - "1.0.1" env: RSPM: "https://packagemanager.rstudio.com/cran/__linux__/focal/latest" - OLD_ARROW_VERSION: {{ '${{ matrix.old_arrow_version }}' }} + OLD_ARROW_VERSION: {{ OLD_ARROW_VERSION }} steps: - name: Checkout Arrow run: | @@ -91,7 +91,7 @@ jobs: - name: Install old Arrow run: | install.packages(c("remotes", "testthat")) - remotes::install_version("arrow", "{{ '${{ matrix.old_arrow_version }}' }}") + remotes::install_version("arrow", "{{ OLD_ARROW_VERSION }}") shell: Rscript {0} - name: Setup our testing directory, copy only the tests to it. run: | diff --git a/dev/tasks/tasks.yml b/dev/tasks/tasks.yml index 79bc120cf72..42d072f2056 100644 --- a/dev/tasks/tasks.yml +++ b/dev/tasks/tasks.yml @@ -1756,6 +1756,8 @@ tasks: test-r-version-compatibility: ci: github template: r/github.linux.version.compatibility.yml + params: + OLD_ARROW_VERSION: "${{ matrix.old_arrow_version }}" test-r-rhub-ubuntu-gcc-release: ci: azure From faa9f37a9635a9ce105c757ef436b3051f7ee9d6 Mon Sep 17 00:00:00 2001 From: Jonathan Keane Date: Thu, 7 Jan 2021 10:40:23 -0600 Subject: [PATCH 26/38] Oops extra typo --- dev/tasks/r/azure.linux.yml | 1 - 1 file changed, 1 deletion(-) diff --git a/dev/tasks/r/azure.linux.yml b/dev/tasks/r/azure.linux.yml index e715191fae7..7ffe8c581cc 100644 --- a/dev/tasks/r/azure.linux.yml +++ b/dev/tasks/r/azure.linux.yml @@ -37,7 +37,6 @@ jobs: export R_IMAGE={{ r_image }} export R_TAG={{ r_tag }} export DEVTOOLSET_VERSION={{ devtoolset_version|default("-1") }} - export OLD_ARROW_VERSION={{ old_arrow_version }} docker-compose pull --ignore-pull-failures r docker-compose build r displayName: Docker build From b6ec09f5a5d71ff30599fa51348071d6d9a2dc5a Mon Sep 17 00:00:00 2001 From: Jonathan Keane Date: Thu, 7 Jan 2021 13:02:33 -0600 Subject: [PATCH 27/38] A slightly different way of loading the crossbow... --- dev/tasks/r/github.linux.version.compatibility.yml | 6 +++--- dev/tasks/tasks.yml | 2 -- 2 files changed, 3 insertions(+), 5 deletions(-) diff --git a/dev/tasks/r/github.linux.version.compatibility.yml b/dev/tasks/r/github.linux.version.compatibility.yml index 672d8d4af60..e0b82841b8e 100644 --- a/dev/tasks/r/github.linux.version.compatibility.yml +++ b/dev/tasks/r/github.linux.version.compatibility.yml @@ -68,7 +68,7 @@ jobs: path: arrow/r/extra-tests/files read-files: - name: "Read files with Arrow {{ OLD_ARROW_VERSION }}" + name: "Read files with Arrow {{ '${{ matrix.old_arrow_version }}' }}" needs: [write-files] runs-on: ubuntu-20.04 strategy: @@ -79,7 +79,7 @@ jobs: - "1.0.1" env: RSPM: "https://packagemanager.rstudio.com/cran/__linux__/focal/latest" - OLD_ARROW_VERSION: {{ OLD_ARROW_VERSION }} + OLD_ARROW_VERSION: {{ '${{ matrix.old_arrow_version }}' }} steps: - name: Checkout Arrow run: | @@ -91,7 +91,7 @@ jobs: - name: Install old Arrow run: | install.packages(c("remotes", "testthat")) - remotes::install_version("arrow", "{{ OLD_ARROW_VERSION }}") + remotes::install_version("arrow", "{{ '${{ matrix.old_arrow_version }}' }}") shell: Rscript {0} - name: Setup our testing directory, copy only the tests to it. run: | diff --git a/dev/tasks/tasks.yml b/dev/tasks/tasks.yml index 42d072f2056..79bc120cf72 100644 --- a/dev/tasks/tasks.yml +++ b/dev/tasks/tasks.yml @@ -1756,8 +1756,6 @@ tasks: test-r-version-compatibility: ci: github template: r/github.linux.version.compatibility.yml - params: - OLD_ARROW_VERSION: "${{ matrix.old_arrow_version }}" test-r-rhub-ubuntu-gcc-release: ci: azure From 3959ed386f63acf378594c99c8afc31e16c2e83d Mon Sep 17 00:00:00 2001 From: Jonathan Keane Date: Thu, 7 Jan 2021 14:22:37 -0600 Subject: [PATCH 28/38] Add feather tests --- .../r/github.linux.version.compatibility.yml | 2 +- r/extra-tests/test-read-files.R | 96 +++++++++++++++++++ r/extra-tests/test-read-parquet.R | 50 ---------- r/extra-tests/write-files.R | 15 +++ r/extra-tests/write-parquet.R | 9 -- 5 files changed, 112 insertions(+), 60 deletions(-) create mode 100644 r/extra-tests/test-read-files.R delete mode 100644 r/extra-tests/test-read-parquet.R create mode 100644 r/extra-tests/write-files.R delete mode 100644 r/extra-tests/write-parquet.R diff --git a/dev/tasks/r/github.linux.version.compatibility.yml b/dev/tasks/r/github.linux.version.compatibility.yml index e0b82841b8e..8b2a4b5a1aa 100644 --- a/dev/tasks/r/github.linux.version.compatibility.yml +++ b/dev/tasks/r/github.linux.version.compatibility.yml @@ -58,7 +58,7 @@ jobs: - name: Write files run: | cd arrow/r - R -f extra-tests/write-parquet.R + R -f extra-tests/write-files.R shell: bash - name: Upload the parquet artifacts diff --git a/r/extra-tests/test-read-files.R b/r/extra-tests/test-read-files.R new file mode 100644 index 00000000000..0639d78bb44 --- /dev/null +++ b/r/extra-tests/test-read-files.R @@ -0,0 +1,96 @@ +library(arrow) +library(testthat) + +if_version <- function(version, op = `==`) { + op(numeric_version(Sys.getenv("OLD_ARROW_VERSION", "0.0.0")), version) +} + +skip_if_version_less_than <- function(version, msg) { + if(if_version(version, `<`)) { + skip(msg) + } +} + +skip_if_version_equals <- function(version, msg) { + if(if_version(version, `==`)) { + skip(msg) + } +} + +pq_file <- "files/ex_data.parquet" + +test_that("Can read the file (parquet)", { + # We can read with no error, we assert metadata below + expect_error( + df <- read_parquet(pq_file), + NA + ) +}) + +test_that("Can see the metadata (feather)", { + skip_if_version_less_than("2.0.0", "Version 1.0.1 can't read new version metadata.") + + df <- read_parquet(pq_file) + expect_s3_class(df, "tbl") + + expect_equal( + attributes(df), + list( + names = letters[1:4], + row.names = 1L, + top_level = list( + field_one = 12, + field_two = "more stuff" + ), + class = c("tbl_df", "tbl", "data.frame") + ) + ) + + # column-level attributes + expect_equal(attributes(df$a), list(class = "special_string")) + expect_equal( + attributes(df$c), + list( + row.names = 1L, + names = c("c1", "c2", "c3"), + class = c("tbl_df", "tbl", "data.frame") + ) + ) +}) + + + +for (comp in c("lz4", "uncompressed", "zstd")) { + feather_file <- paste0("files/ex_data.", comp, ".feather") + + test_that(paste0("Can see the metadata (feather", comp, ")"), { + df <- read_feather(feather_file) + expect_s3_class(df, "tbl") + + expect_equal( + attributes(df), + list( + names = letters[1:4], + row.names = 1L, + top_level = list( + field_one = 12, + field_two = "more stuff" + ), + class = c("tbl_df", "tbl", "data.frame") + ) + ) + + # column-level attributes + expect_equal(attributes(df$a), list(class = "special_string")) + expect_equal( + attributes(df$c), + list( + row.names = 1L, + names = c("c1", "c2", "c3"), + class = c("tbl_df", "tbl", "data.frame") + ) + ) + }) +} + + diff --git a/r/extra-tests/test-read-parquet.R b/r/extra-tests/test-read-parquet.R deleted file mode 100644 index eccb42d6f24..00000000000 --- a/r/extra-tests/test-read-parquet.R +++ /dev/null @@ -1,50 +0,0 @@ -library(arrow) -library(testthat) - -skip_if_version <- function(version, msg, op = `<=`) { - if (op(numeric_version(Sys.getenv("OLD_ARROW_VERSION", "0.0.0")), version)) { - skip(msg) - } -} - -pq_file <- "files/ex_data.parquet" - -test_that("Can read the file", { - # We can read with no error, we assert metadata below - expect_error( - df <- read_parquet(pq_file), - NA - ) -}) - -test_that("Can see the metadata", { - skip_if_version("1.0.1", "Version 1.0.1 can't read new version metadata.") - - df <- read_parquet(pq_file) - expect_s3_class(df, "tbl") - - expect_equal( - attributes(df), - list( - names = letters[1:4], - row.names = 1L, - top_level = list( - field_one = 12, - field_two = "more stuff" - ), - class = c("tbl_df", "tbl", "data.frame") - ) - ) - - # column-level attributes - expect_equal(attributes(df$a), list(class = "special_string")) - expect_equal( - attributes(df$c), - list( - row.names = 1L, - names = c("c1", "c2", "c3"), - class = c("tbl_df", "tbl", "data.frame") - ) - ) -}) - diff --git a/r/extra-tests/write-files.R b/r/extra-tests/write-files.R new file mode 100644 index 00000000000..4e13597e2a7 --- /dev/null +++ b/r/extra-tests/write-files.R @@ -0,0 +1,15 @@ +library(arrow) + +if (!dir.exists("extra-tests/files")) { + dir.create("extra-tests/files") +} + +source("tests/testthat/helper-data.R") + +write_parquet(example_with_metadata, "extra-tests/files/ex_data.parquet") +for (comp in c("lz4", "uncompressed", "zstd")) { + if(!codec_is_available(comp)) break + + name <- paste0("extra-tests/files/ex_data.", comp, ".feather") + write_feather(example_with_metadata, name, compression = comp) +} diff --git a/r/extra-tests/write-parquet.R b/r/extra-tests/write-parquet.R deleted file mode 100644 index 21d94ac84a4..00000000000 --- a/r/extra-tests/write-parquet.R +++ /dev/null @@ -1,9 +0,0 @@ -library(arrow) - -if (!dir.exists("extra-tests/files")) { - dir.create("extra-tests/files") -} - -source("tests/testthat/helper-data.R") - -write_parquet(example_with_metadata, "extra-tests/files/ex_data.parquet") From afed5b664cc0a2a9696d6c725ade0945773f6ac5 Mon Sep 17 00:00:00 2001 From: Jonathan Keane Date: Thu, 7 Jan 2021 16:18:55 -0600 Subject: [PATCH 29/38] More filetypes, will probably fail on 1.0.1 for streams + metadata. --- r/extra-tests/helpers.R | 15 ++++++ r/extra-tests/test-read-files.R | 89 ++++++++++++++++++++++++++------- r/extra-tests/write-files.R | 9 +++- 3 files changed, 93 insertions(+), 20 deletions(-) create mode 100644 r/extra-tests/helpers.R diff --git a/r/extra-tests/helpers.R b/r/extra-tests/helpers.R new file mode 100644 index 00000000000..1fae757d8d9 --- /dev/null +++ b/r/extra-tests/helpers.R @@ -0,0 +1,15 @@ +if_version <- function(version, op = `==`) { + op(numeric_version(Sys.getenv("OLD_ARROW_VERSION", "0.0.0")), version) +} + +skip_if_version_less_than <- function(version, msg) { + if(if_version(version, `<`)) { + skip(msg) + } +} + +skip_if_version_equals <- function(version, msg) { + if(if_version(version, `==`)) { + skip(msg) + } +} diff --git a/r/extra-tests/test-read-files.R b/r/extra-tests/test-read-files.R index 0639d78bb44..a6d206586bb 100644 --- a/r/extra-tests/test-read-files.R +++ b/r/extra-tests/test-read-files.R @@ -1,22 +1,6 @@ library(arrow) library(testthat) -if_version <- function(version, op = `==`) { - op(numeric_version(Sys.getenv("OLD_ARROW_VERSION", "0.0.0")), version) -} - -skip_if_version_less_than <- function(version, msg) { - if(if_version(version, `<`)) { - skip(msg) - } -} - -skip_if_version_equals <- function(version, msg) { - if(if_version(version, `==`)) { - skip(msg) - } -} - pq_file <- "files/ex_data.parquet" test_that("Can read the file (parquet)", { @@ -27,6 +11,7 @@ test_that("Can read the file (parquet)", { ) }) +### Parquet test_that("Can see the metadata (feather)", { skip_if_version_less_than("2.0.0", "Version 1.0.1 can't read new version metadata.") @@ -58,12 +43,21 @@ test_that("Can see the metadata (feather)", { ) }) +### Feather +for (comp in c("lz4", "uncompressed", "zstd")) { + feather_file <- paste0("files/ex_data_", comp, ".feather") + test_that(paste0("Can read the file (feather ", comp, ")"), { + # We can read with no error, we assert metadata below + expect_error( + df <- read_feather(feather_file), + NA + ) + }) -for (comp in c("lz4", "uncompressed", "zstd")) { - feather_file <- paste0("files/ex_data.", comp, ".feather") + test_that(paste0("Can see the metadata (feather ", comp, ")"), { + skip_if_version_less_than("2.0.0", "Version 1.0.1 can't read new version metadata.") - test_that(paste0("Can see the metadata (feather", comp, ")"), { df <- read_feather(feather_file) expect_s3_class(df, "tbl") @@ -93,4 +87,61 @@ for (comp in c("lz4", "uncompressed", "zstd")) { }) } +test_that(paste0("Can read feather version 1"), { + feather_v1_file <- "files/ex_data_v1.feather" + + df <- read_feather(feather_v1_file) + expect_s3_class(df, "tbl") + + expect_equal( + attributes(df), + list( + names = c("a", "b", "d"), + class = c("tbl_df", "tbl", "data.frame"), + row.names = 1L + ) + ) +}) + +### IPC Stream +stream_file <- "files/ex_data.stream" + +test_that("Can read the file (parquet)", { + # We can read with no error, we assert metadata below + expect_error( + df <- read_ipc_stream(stream_file), + NA + ) +}) + +test_that("Can see the metadata (stream)", { + df <- read_ipc_stream(stream_file) + + expect_s3_class(df, "tbl") + + expect_equal( + attributes(df), + list( + names = letters[1:4], + row.names = 1L, + top_level = list( + field_one = 12, + field_two = "more stuff" + ), + class = c("tbl_df", "tbl", "data.frame") + ) + ) + + # column-level attributes + expect_equal(attributes(df$a), list(class = "special_string")) + expect_equal( + attributes(df$c), + list( + row.names = 1L, + names = c("c1", "c2", "c3"), + class = c("tbl_df", "tbl", "data.frame") + ) + ) +}) + diff --git a/r/extra-tests/write-files.R b/r/extra-tests/write-files.R index 4e13597e2a7..3e9d6cdd56f 100644 --- a/r/extra-tests/write-files.R +++ b/r/extra-tests/write-files.R @@ -7,9 +7,16 @@ if (!dir.exists("extra-tests/files")) { source("tests/testthat/helper-data.R") write_parquet(example_with_metadata, "extra-tests/files/ex_data.parquet") + for (comp in c("lz4", "uncompressed", "zstd")) { if(!codec_is_available(comp)) break - name <- paste0("extra-tests/files/ex_data.", comp, ".feather") + name <- paste0("extra-tests/files/ex_data_", comp, ".feather") write_feather(example_with_metadata, name, compression = comp) } + +example_with_metadata_v1 <- example_with_metadata +example_with_metadata_v1$c <- NULL +write_feather(example_with_metadata_v1, "extra-tests/files/ex_data_v1.feather", version = 1) + +write_ipc_stream(example_with_metadata, "extra-tests/files/ex_data.stream") From 71e74ea65721e62da8e39b4a1000d919a85ae0c0 Mon Sep 17 00:00:00 2001 From: Jonathan Keane Date: Thu, 7 Jan 2021 16:52:07 -0600 Subject: [PATCH 30/38] Helpers need some copying too --- dev/tasks/r/github.linux.version.compatibility.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/dev/tasks/r/github.linux.version.compatibility.yml b/dev/tasks/r/github.linux.version.compatibility.yml index 8b2a4b5a1aa..2f64227eb8d 100644 --- a/dev/tasks/r/github.linux.version.compatibility.yml +++ b/dev/tasks/r/github.linux.version.compatibility.yml @@ -96,6 +96,7 @@ jobs: - name: Setup our testing directory, copy only the tests to it. run: | mkdir -p extra-tests/files + cp arrow/r/extra-tests/helper*.R extra-tests/ cp arrow/r/extra-tests/test-*.R extra-tests/ - name: Download artifacts uses: actions/download-artifact@v2 From 54d8d2d6ccbe2606c6a52dc59dfd885154c619e8 Mon Sep 17 00:00:00 2001 From: Jonathan Keane Date: Fri, 8 Jan 2021 11:41:24 -0600 Subject: [PATCH 31/38] Add backwards compatibility testing --- r/extra-tests/test-read-files.R | 1 + .../golden-files/data-arrow_1.0.1.parquet | Bin 0 -> 3603 bytes .../golden-files/data-arrow_2.0.0.parquet | Bin 0 -> 3965 bytes r/tests/testthat/parquets/data-arrow2.parquet | Bin 1136 -> 0 bytes .../testthat/test-backwards-compatibility.R | 90 ++++++++++++++++++ r/tests/testthat/test-parquet-compatibility.R | 45 --------- 6 files changed, 91 insertions(+), 45 deletions(-) create mode 100644 r/tests/testthat/golden-files/data-arrow_1.0.1.parquet create mode 100644 r/tests/testthat/golden-files/data-arrow_2.0.0.parquet delete mode 100644 r/tests/testthat/parquets/data-arrow2.parquet create mode 100644 r/tests/testthat/test-backwards-compatibility.R delete mode 100644 r/tests/testthat/test-parquet-compatibility.R diff --git a/r/extra-tests/test-read-files.R b/r/extra-tests/test-read-files.R index a6d206586bb..1722cb6e905 100644 --- a/r/extra-tests/test-read-files.R +++ b/r/extra-tests/test-read-files.R @@ -115,6 +115,7 @@ test_that("Can read the file (parquet)", { }) test_that("Can see the metadata (stream)", { + skip_if_version_less_than("2.0.0", "Version 1.0.1 can't read new version metadata.") df <- read_ipc_stream(stream_file) expect_s3_class(df, "tbl") diff --git a/r/tests/testthat/golden-files/data-arrow_1.0.1.parquet b/r/tests/testthat/golden-files/data-arrow_1.0.1.parquet new file mode 100644 index 0000000000000000000000000000000000000000..e1d589bf0992aae3cd5661b79b34dfc4a6f835c3 GIT binary patch literal 3603 zcmcInOK;*<6ux!{$=qf}Q8Y$IVi8puD^lly4bG!grCc5fjAIgr31K!PFboOd;Q=y0 zR@E%3YE*TZWq(2!U3FP?F;b8$_m+l33wB&htB;&q3sf6l5VFJQCIp z1YQWNA+#heAcVS|x`2c=A@r1rg`g0OY@&??5Q}sHiwEmGtUSl@NczA%;Gt&f@=K8h^}>A4}KR#qST|;rC0?f8!58 z`ZwLndA$&za*?ebMm9-5KXN-kvB~l^KaE zq)<34eQke7iGSf2>>(*aNFF})(Flo?M1$-R(jdtOlO+;U`XLpjU43SlwC}L8C`_Lv zWJtgdb`}J7!V)o8k{HwbaUR6!w5AWc<6JX4%B4(AH!fm_sYX26ENUkwh5ZAgq2&yH zlx$`-y;+Qc{7TPvC;QmBgFdam2W-k?-6&qcIhyR`hObIFt6UnrF=K^srKEO1J_H*L zV%RwiUCZUI%W_F>nZxcV*XYt*ir!4ifJ;ucbN%vZ4&r6y904CA4;aZ9>}1`5dAW!roCsX56WIC^Ka_w_5tw++Gz|4TL5;e)lzK?Uy{myqD_ar_hIa%-t zcZ9ny1WxeXWta`|2E#C&c3ohEyMq5AiiLGyeP^F}a0A354xn*!mxGaGS&ot3usgz5 zK=m=}0gIB zVz)*Q#OdU6ka z(j}JfH=RgQmWn5s?>3~j8%{92AlE>DEdSKC{8JA9l!w1py3GI9BL9$=KQF!I9sXVr zm%mrae3-uv%%62GR@H(JuCzk*!gJ-?9E5Sn1tu>FsSS1Ml4xB)Gtj zDkHA<&GWil6*d`R>pll>@;>tdpO(Pm&R|p+O<{d{3iaka=1gU($C;to_1f&*&joe@ z)~7so=}SIG>CNYON75IrbLst;twZTkYsmlihM$8Z!+ei{LKoSd_|F>QL%g!D$l-ln zjyyRyIN(*jkWV~sIX){=r!m4MrVaQNav?s4`gdU`6;rr+aGEhjoi8A;IthO|LeP z?RV>Dvud682fb#yu^Lq6O{?1PQ@~}*4r6e#Isg#kVgSBixWMx#yxFlX?6!N?jaYEO zSkR#xx|VK)?|1C_;QU&mz@a`K^s1-GKA{OPQ~_OT%57?y~ktmKkx2Psov}!78e?^t9e8U+Vd+qP5~` zX3_{Bk&dic#X_zb3~Q~zM9+3EXY<(I$Ah4s*7GU&EvHc@(@KpqdBDq)v{8y&R*Iuh zCKjF44vTiB(HZNp6xx{CD@*_@ZP=gdr(l}NuicbI^?NU9A^->;@gEtqi z(xwuDT;w{{lG?tC=ha2N#W3(aKT(i3`hvDr<^pQR$;DVF-SgUE?y_2XF4vMp;N_f! zG&?gi4vPb$jWy}u(R^KrH^z2eZ;ftiq5g31sU;ov^IRTRzDJP1|bLPI4BF z_sm9S1UXg4vQbizGuGTnUQ|BkmF=E;{2OcbLeA4=uMgSos~PU>ojA4p|Krz~K6R7! z584z8*c)Vg|JuBNKvy`1{)5|Wfhs}M!3`aqWfJ$XgT3M2p6=)eO+1I@0w2}K9z?1* z#3=q01jnR~(muAjvU6Xj=U40@-=sbPl1HcyJB_4TusVQcJcf;uMCnz!#O;(t6!K2U z060>A8ro=fKN`(Zr2>w$|Isbr!dIREOge_Dg}FW%Q-v~)9(}qjH%JouDfDNU>8GI* z_yM6Pljw+0X90EUKQ`K_{ndC>D~k{wf1W;N#IL&5-nSR^!6UQV-5u6@{br}VD?e5r d%a2}W$nnd=qkO=E|8_6^Fu&enm}B_E`#%}{L{tC( literal 0 HcmV?d00001 diff --git a/r/tests/testthat/parquets/data-arrow2.parquet b/r/tests/testthat/parquets/data-arrow2.parquet deleted file mode 100644 index 002a44fd69740ef022f6bb66c30a9e91298f12d9..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 1136 zcmb_c&2G~`5MCQMlof{x#gQ%LP$8&#NK4j3^$ z1|EO|60gKlaOB9DnN3jB9yqbm?Cj2b^Ude6>^g5SRZ;7z|3OWw$_qlaze$7;8SDv| z0=5R00(%O!4)zR;f~iE^RA1$=PEDz)tfpX8Q?NcmwpVTt*m#GOX-QezF#OxEaNhbY zWmQ=>6_V05C5msDDreG}pHk+Rv^CpW{}-R0gPw{iqXu*!M%HsC(Ng26)9QzqkzdOT zT9QfYSv~uI`+>+z8Ysc13DI+!G}3-ZoEE6DXRu<4vV+q8{ysHn;2-XQFEDC|ol&+& z;Q}zTKv|wP!%4XNX%zNba5GkNCLKU<`~Yv#=q4PGsbLnEY0Qnp-z(U?is|09i>Ewh z_RV?F=;n)dFSv8Oc)s*Az!+f5n&1x3iQR8>nn#uC)!?LaeG$|gpU>TP4A`pS^eW6r zfUj>t?i}%t>Vw*)G3!JH)Dh(G`&60BnaI6N6qys zOO4e>8V~c$iz@$WOE{s{UXjGGs18YS`E^*xw9E*zyb7e}0@C5Gyuw>dxJ zJmO_c!Y~&@hqG4|AQ(1Y#EzSo_mR*lVo>kk`>+yxFUxf%{ClFk9EPLM)7E4s8V)ya iTcdF|9&EB*bC>O$xx9XMeCq4jEPS5d;b&5Zf6X6_Isc0Q diff --git a/r/tests/testthat/test-backwards-compatibility.R b/r/tests/testthat/test-backwards-compatibility.R new file mode 100644 index 00000000000..227deaba1de --- /dev/null +++ b/r/tests/testthat/test-backwards-compatibility.R @@ -0,0 +1,90 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +# To write a new version of a test file for a current version: +# write_parquet(example_with_metadata, test_path("golden-files/data-arrow_2.0.0.parquet")) + +# To write a new version of a test file for an old version, use docker(-compose) +# to setup a linux distribution and use RStudio's public package manager binary +# repo to install the old version. The following commands should be run at the +# root of the arrow repo directory and might need slight adjusments. +# R_ORG=rstudio R_IMAGE=r-base R_TAG=4.0-focal docker-compose build --no-cache r +# R_ORG=rstudio R_IMAGE=r-base R_TAG=4.0-focal docker-compose run r /bin/bash +# R +# options(repos = "https://packagemanager.rstudio.com/all/__linux__/focal/latest") +# remotes::install_version("arrow", version = "1.0.1") +# # get example data into the global env +# write_parquet(example_with_metadata, "arrow/r/tests/testthat/golden-files/data-arrow_1.0.1.parquet") +# quit()/exit + +test_that("reading a known Parquet file to dataframe with 2.0.0", { + pq_file <- test_path("golden-files/data-arrow_2.0.0.parquet") + + df <- read_parquet(pq_file) + expect_equal(df, example_with_metadata) + expect_identical(dim(df), c(1L, 4L)) + + expect_equal( + attributes(df), + list( + names = letters[1:4], + row.names = 1L, + top_level = list(field_one = 12, field_two = "more stuff"), + class = c("tbl_df", "tbl", "data.frame")) + ) + expect_equal(attributes(df$a), list(class = "special_string")) + expect_null(attributes(df$b)) + expect_equal( + attributes(df$c), + list( + row.names = 1L, + names = c("c1", "c2", "c3"), + class = c("tbl_df", "tbl", "data.frame") + ) + ) + expect_null(attributes(df$d)) +}) + +test_that("reading a known Parquet file to dataframe with 1.0.1", { + pq_file <- test_path("golden-files/data-arrow_1.0.1.parquet") + + df <- read_parquet(pq_file) + # 1.0.1 didn't save top-level metadata, so we need to remove it. + example_with_metadata_sans_toplevel <- example_with_metadata + attributes(example_with_metadata_sans_toplevel)$top_level <- NULL + expect_equal(df, example_with_metadata_sans_toplevel) + expect_identical(dim(df), c(1L, 4L)) + + expect_equal( + attributes(df), + list( + names = letters[1:4], + row.names = 1L, + class = c("tbl_df", "tbl", "data.frame")) + ) + expect_equal(attributes(df$a), list(class = "special_string")) + expect_null(attributes(df$b)) + expect_equal( + attributes(df$c), + list( + row.names = 1L, + names = c("c1", "c2", "c3"), + class = c("tbl_df", "tbl", "data.frame") + ) + ) + expect_null(attributes(df$d)) +}) diff --git a/r/tests/testthat/test-parquet-compatibility.R b/r/tests/testthat/test-parquet-compatibility.R deleted file mode 100644 index e715acc0602..00000000000 --- a/r/tests/testthat/test-parquet-compatibility.R +++ /dev/null @@ -1,45 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -# TODO: skip mostly? Or use this for backwards compat? - -pq_file <- test_path("parquets/data-arrow2.parquet") - -test_that("reading a known Parquet file to dataframe", { - df <- read_parquet(pq_file) - expect_data_frame(df, data.frame(col1 = 1:10)) - expect_identical(dim(df), c(10L, 1L)) - - tab <- read_parquet(pq_file, as_data_frame = FALSE) - expect_s3_class(tab, "Table") - expect_equal( - # unserialize like .unserialize_arrow_r_metadata does (though we can't call - # it directly because it's not exported) - unserialize(charToRaw(tab$metadata$r)), - list( - attributes = list(class = "data.frame"), - columns = list(col1 = NULL) - ) - ) - - # test the workaround - tab$metadata$r <- NULL - df <- as.data.frame(tab) - - expect_data_frame(df, data.frame(col1 = 1:10)) - expect_identical(dim(df), c(10L, 1L)) -}) From 9871abc01d6fd4bf19c69a3fea7d62fb42f99359 Mon Sep 17 00:00:00 2001 From: Jonathan Keane Date: Fri, 8 Jan 2021 11:48:39 -0600 Subject: [PATCH 32/38] Add skips if snappy isn't available --- r/tests/testthat/test-backwards-compatibility.R | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/r/tests/testthat/test-backwards-compatibility.R b/r/tests/testthat/test-backwards-compatibility.R index 227deaba1de..ce3e8c15857 100644 --- a/r/tests/testthat/test-backwards-compatibility.R +++ b/r/tests/testthat/test-backwards-compatibility.R @@ -32,6 +32,7 @@ # quit()/exit test_that("reading a known Parquet file to dataframe with 2.0.0", { + skip_if_not_available("snappy") pq_file <- test_path("golden-files/data-arrow_2.0.0.parquet") df <- read_parquet(pq_file) @@ -60,6 +61,7 @@ test_that("reading a known Parquet file to dataframe with 2.0.0", { }) test_that("reading a known Parquet file to dataframe with 1.0.1", { + skip_if_not_available("snappy") pq_file <- test_path("golden-files/data-arrow_1.0.1.parquet") df <- read_parquet(pq_file) @@ -88,3 +90,5 @@ test_that("reading a known Parquet file to dataframe with 1.0.1", { ) expect_null(attributes(df$d)) }) + +# TODO: feather, streams(?) From 5cab1b242a7a9872b43ab21b25ffe995632db8eb Mon Sep 17 00:00:00 2001 From: Jonathan Keane Date: Fri, 8 Jan 2021 12:19:40 -0600 Subject: [PATCH 33/38] Add feather files --- .../golden-files/data-arrow_1.0.1_lz4.feather | Bin 0 -> 2858 bytes .../data-arrow_1.0.1_uncompressed.feather | Bin 0 -> 2626 bytes .../data-arrow_1.0.1_zstd.feather | Bin 0 -> 2842 bytes .../golden-files/data-arrow_2.0.0_lz4.feather | Bin 0 -> 3162 bytes .../data-arrow_2.0.0_uncompressed.feather | Bin 0 -> 2930 bytes .../data-arrow_2.0.0_zstd.feather | Bin 0 -> 3146 bytes .../testthat/test-backwards-compatibility.R | 67 +++++++++++++++++- 7 files changed, 66 insertions(+), 1 deletion(-) create mode 100644 r/tests/testthat/golden-files/data-arrow_1.0.1_lz4.feather create mode 100644 r/tests/testthat/golden-files/data-arrow_1.0.1_uncompressed.feather create mode 100644 r/tests/testthat/golden-files/data-arrow_1.0.1_zstd.feather create mode 100644 r/tests/testthat/golden-files/data-arrow_2.0.0_lz4.feather create mode 100644 r/tests/testthat/golden-files/data-arrow_2.0.0_uncompressed.feather create mode 100644 r/tests/testthat/golden-files/data-arrow_2.0.0_zstd.feather diff --git a/r/tests/testthat/golden-files/data-arrow_1.0.1_lz4.feather b/r/tests/testthat/golden-files/data-arrow_1.0.1_lz4.feather new file mode 100644 index 0000000000000000000000000000000000000000..f3a71435a6cea40e6e6a8599f745f8d80083dce6 GIT binary patch literal 2858 zcmeHJy>1gh5Z?GN*6KhmC@et2qH+-=-`N*C5{2C$r9eng5M_M69AL>lBcCl%q_FY~ zc>oG3o`Of9prFhnP*P#OZ-0DegVO{GB#*Vb-~8;%+}_NsdnYF^Ub#XnLErO2R0K(i zyP_nDq6`GuE3D)W*n*K?C2a= z&z@jyy3&)L-%?1=p2Dtl4<(e?<~OC=lF7R$90sGaWHuefZ=~DsGJ0Re!8l6R_V_X! z1xcd3GlJUTyUH6XN>w+QrmnqebhKJPll_oz^g|FhVqsNl0D3pJvUbdDIBCQOyT*hBBP#I zcU2VESF8tyFQbN0jm3N5b)5o$e*9c6S%>Es2ioS&6>t@Ueo*$JkI}+l?bcy)s_pW$ z$@;of@|-|9i?Aug#TL6ySvyaNkJ|3H+^!&3zR-irF~s;{eAa;ZEX+|;3Hg=qBjuF+ z0H^{h%J)^K71OJGMdr?aw)+b}#EcVQkPvG54Fbj?IXD&F`Hu1?@&)k_@)|c8av-U| z6Z@TKQ-_W=^pP=B(HKvEQvm)zkBqEl*867dn$<3rxAuLjFzYtn+AoY9^CW#xb2?ex z+J8<}3R&LjuQ&9Tw|aXJ4>ET9z}fSm>R3)33fr2C9lW=qX)Z~#F5qN zejvB8ACfM`s$aNn=!=w*EHHYoi(wo`yb=^`$qz--y;*;Jqu!Q0YEYAj4|ef-GGB#P z^-l}dsDlb-hdJO_*X*39nCl9ecV!%2n-^Exe1W(z_4h*Gox3|y-&hS(2)Uq0nIcHuSL{d>c7u`%DGH(t>&*g|>^1V*5=AP$A)i3S zm!wRQ522*OoHGx5*CK=O@d=G@oZJJ)x1GCV(j^UhOh1^Q7{s;x+(o~nkbt0oX= zueO%Az}Bq%x%XywiKAWbCPwNbAyv;$nl73mt<4Z>U zE2(WC&6FB7^Cek_1zsN+7|KT$q*%<*)yJ=P=3H_^lB z#`-hxrcVJtKYy)O?87UJBV+UA2DpR4ebDymJw}MZ-aUlPg|VyBrnuKvEw3oFvq}P` zt{<@bUbOR-`ey9@$n6Gly%+8vb4)Pq?4Ad}JPUI))Ifd>{E;f!egxEp73KRn)3&{< zdv)f{`|SK{K*Wp_V~`MPbqfLGkXq`wz4BxLT z99LdhUy@r{d(yU#vlaXIs-f2*>|_}j(7V0mfrDBKd^dNYqI+o z#kx@TuX)B`O=nq3=x9K|T8y(S<@eZ7dtlZxy)yR?v!zzEFR0`1&_Z{x^IG6|7uZp+ wFtdiPpELk}XAgJ^Tqe=JySKxi->o1MjAA0RR91 literal 0 HcmV?d00001 diff --git a/r/tests/testthat/golden-files/data-arrow_1.0.1_zstd.feather b/r/tests/testthat/golden-files/data-arrow_1.0.1_zstd.feather new file mode 100644 index 0000000000000000000000000000000000000000..056b26c1743ad10ed6fcf8401a8bccc4c8df9691 GIT binary patch literal 2842 zcmeHJKaUbo5FcQH&C6;i)L>!^CKi~SkzLq>ODvAYSNRDn z?ju-P+4w!K@B>&{UTLHFoA-xZRV|M2kXbC+lVx-1dZh)K|O zs!*A1aG-*<)H@I@tv@R=4;YK}HlB>xl2w z5PRDdp78vRYU`;o zv_y0|n+C_6^{HW~I5SU#%e3H^uM8gG6%QnPm>qL&t_V_gkg{RQo?Iz2`ic9l zj^_G;eMieSdKleUz5!YFDHzcApNj?e;R)7`vblB+zK+Fl(Dw8atAoY8+l0-LvMaGo zHr6|-FB00B!Zy*_2D^`0JCEptviq&JbLeF(%plhoVqNH%4eZ{$afAzDtsmNTU%PI=5B`XoMn??8%DmP+qF0kS-$UH*oT7&1 zpzFCq9Y{Rm?DZJWrsVW@q62-?Us!ey*n{s%_4}ai%I%$~?#S((DE|>`??g<9nZ^I% MPOPKYzWh)83q@Uv$^ZZW literal 0 HcmV?d00001 diff --git a/r/tests/testthat/golden-files/data-arrow_2.0.0_lz4.feather b/r/tests/testthat/golden-files/data-arrow_2.0.0_lz4.feather new file mode 100644 index 0000000000000000000000000000000000000000..b65da723466572ab42a22cde0274d63114afd932 GIT binary patch literal 3162 zcmeHKy>1gh5ME+Cu~r9i;lctWEUFYi@}2F(mMH87i2@--AQ~k5d_EIPwlDIXfzqN% zMNOGUNJY&54w$)A(=SFq%B8d ze=_U^Ge5f&y$Ph##5UxlX=AsPueOYW)852??N3;V0&H9hGp&ta!&BcIyOUlR&BwD> zzjoS`-oy<5MsPA%MEDjuE`0fs^n%G^I!kPm6RTa@tQLn`UmI>)hQZX2 zPSysB82q=<-0e{$|0LY(=c`1hliBD+J-qxTe=B3NJuXs zhkvy*=fNCrpK_v%q__k=(7c8GLZ%cbzsh?SpGZ03)rEM^@*Q!uNzW1Dqt?4ndOyX;{!k;%A-_!ds$d_Ak$~bF zpk@>?c6nt7@s^q2(OU8rL!WiL`7F5i@k|lUhp~5#u_ZF zl;DHv5Y?l7$Vi8Lq}1YlRISs-8FWwcRgG_J+|k$|!yEm+5oo-QH~LGhM{yFI^PEhE zH~OCv5mSaY@~e#8@J4P9;!a9$4|Mu`Nc6vX>JbF_`%fzU&O5M6V(Gav7(&azE>X*k zlzNSVj6D`RELbBiC@X>9ra;?+U5;lnpVF6MjXjdEvswOlquitm-k<`#KiK6cSj5}& z`zH;0qZVE;WtIhw-ZfB0^VOaV5ned7bz!ktmV|9Tn0EM1vF@$KfJ5_9=FNN4Scb zl1I3TXP~6Gci;sm5#gKluHzU2NI;i~);qKFv$L~1GZ~(rKYk{KSOKmqAzFfg#2v9G z8lnjdaLX#zZIG5;Kd3VfHU_UZ@nv2U_?KGF`;2)=9db`x**ziIKkfB;a@g#OD+3Hi4T#lmfRTwEDcCfqQW?34}rk7zb^P^FkEoSk{pBwcki2O8F z2CF+49^;UH1|#EThO=fkE^%^FnGr|1z+iKd{9ST3AMz-_7aOmGU!09w z;{L-gi1UPF$U1zcclzL;^a0PXwh5$kcRfwFoLx`*T??*ur(I7&LhX8*Q2of%SuQs| NhUsb7U<}oN?eEBawp0KB literal 0 HcmV?d00001 diff --git a/r/tests/testthat/golden-files/data-arrow_2.0.0_zstd.feather b/r/tests/testthat/golden-files/data-arrow_2.0.0_zstd.feather new file mode 100644 index 0000000000000000000000000000000000000000..39c829fda204647f17cc70cc25f340e75273e181 GIT binary patch literal 3146 zcmeHKy>1gh5T5uawpIs9k)R+U6eNlu1#x$_6I-H)8zd^E2m}R^xt!0$lI@FpXUHR@ zLX?y|f*W`aiaY=vloXUe_-23X^I``ipz}t1JG0-+?Cj6XIL(8D2ai3XbF|X}(He0R zbeUGENM%rHD}~9t1Gb>&H}gybM(6o5zKoSYf6XcE6UL^f2!Gf2s=LB#?A7aaQ4^0I z-n)&l;t5}VJxzO6;R(+d9%Q9bts*>E^y6Xl)UCQnI_h*V;b-8Q7zF(&VHMw~G(@Kx z_1dj?5ZP1d%UJj|EW<`>9#+fPYKSyGZuO!UQBMl7gVh-~v(j)jIgY|^&}${>use7@ z-)K#Qy&y@XL3`zi$Jk_$!pPw$#a{Thu=70;#=TL0Fx8!&m^-tWJ8`$N(A|be;(nAK zEp!wLFlfz6dAx3wm-4paELfGeQ6Gkg``>HD~FGU`ifN$#6=x z|7v9!2SdDlj)`>y#TmvO%{z!ML`sq4YhBM_kUDLQ-+r8*yGrKLHSk;LYzHAmN9cFa zS(jriMBjnzGbNw5Z6f3-tPs7Dc#cjllJkk)Yq_zK`^86T4>{s3@Ux7qO{~K{a$~;^ zku%EZt8yj>f7c~nlG$NR=MZ%Uwr&8Og8Y-pG9P_}Q9dhf4sGg; Date: Fri, 8 Jan 2021 13:46:57 -0600 Subject: [PATCH 34/38] Add apache licensing where it should be. --- r/extra-tests/helpers.R | 17 +++++++++++++++++ r/extra-tests/test-read-files.R | 17 +++++++++++++++++ r/extra-tests/write-files.R | 17 +++++++++++++++++ 3 files changed, 51 insertions(+) diff --git a/r/extra-tests/helpers.R b/r/extra-tests/helpers.R index 1fae757d8d9..a31b6a39336 100644 --- a/r/extra-tests/helpers.R +++ b/r/extra-tests/helpers.R @@ -1,3 +1,20 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + if_version <- function(version, op = `==`) { op(numeric_version(Sys.getenv("OLD_ARROW_VERSION", "0.0.0")), version) } diff --git a/r/extra-tests/test-read-files.R b/r/extra-tests/test-read-files.R index 1722cb6e905..e1950df6dcf 100644 --- a/r/extra-tests/test-read-files.R +++ b/r/extra-tests/test-read-files.R @@ -1,3 +1,20 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + library(arrow) library(testthat) diff --git a/r/extra-tests/write-files.R b/r/extra-tests/write-files.R index 3e9d6cdd56f..e0927ead4eb 100644 --- a/r/extra-tests/write-files.R +++ b/r/extra-tests/write-files.R @@ -1,3 +1,20 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + library(arrow) if (!dir.exists("extra-tests/files")) { From 2323e8f96f78d383f86c0ec580686114e348a46b Mon Sep 17 00:00:00 2001 From: Jonathan Keane Date: Fri, 8 Jan 2021 18:52:13 -0600 Subject: [PATCH 35/38] PR comments, add 0.17.0 fixture that exhibits the case mis-match on codecs --- r/extra-tests/helpers.R | 2 +- r/extra-tests/test-read-files.R | 4 ++-- .../golden-files/data-arrow_0.17.0_lz4.feather | Bin 0 -> 1650 bytes .../data-arrow_0.17.0_uncompressed.feather | Bin 0 -> 1354 bytes .../golden-files/data-arrow_0.17.0_zstd.feather | Bin 0 -> 1626 bytes r/tests/testthat/test-backwards-compatibility.R | 12 ++++++++++++ 6 files changed, 15 insertions(+), 3 deletions(-) create mode 100644 r/tests/testthat/golden-files/data-arrow_0.17.0_lz4.feather create mode 100644 r/tests/testthat/golden-files/data-arrow_0.17.0_uncompressed.feather create mode 100644 r/tests/testthat/golden-files/data-arrow_0.17.0_zstd.feather diff --git a/r/extra-tests/helpers.R b/r/extra-tests/helpers.R index a31b6a39336..61b7da4ec25 100644 --- a/r/extra-tests/helpers.R +++ b/r/extra-tests/helpers.R @@ -16,7 +16,7 @@ # under the License. if_version <- function(version, op = `==`) { - op(numeric_version(Sys.getenv("OLD_ARROW_VERSION", "0.0.0")), version) + op(packageVersion("arrow"), version) } skip_if_version_less_than <- function(version, msg) { diff --git a/r/extra-tests/test-read-files.R b/r/extra-tests/test-read-files.R index e1950df6dcf..90efce3d791 100644 --- a/r/extra-tests/test-read-files.R +++ b/r/extra-tests/test-read-files.R @@ -29,7 +29,7 @@ test_that("Can read the file (parquet)", { }) ### Parquet -test_that("Can see the metadata (feather)", { +test_that("Can see the metadata (parquet)", { skip_if_version_less_than("2.0.0", "Version 1.0.1 can't read new version metadata.") df <- read_parquet(pq_file) @@ -104,7 +104,7 @@ for (comp in c("lz4", "uncompressed", "zstd")) { }) } -test_that(paste0("Can read feather version 1"), { +test_that("Can read feather version 1", { feather_v1_file <- "files/ex_data_v1.feather" df <- read_feather(feather_v1_file) diff --git a/r/tests/testthat/golden-files/data-arrow_0.17.0_lz4.feather b/r/tests/testthat/golden-files/data-arrow_0.17.0_lz4.feather new file mode 100644 index 0000000000000000000000000000000000000000..d91acd0cc9e18c81e24bcb12bbd779217871b81d GIT binary patch literal 1650 zcmeHHu}T9$5S`=_4IxSji=aUds}yOXR$`Hqf+C0@*q9hk2$)NV3KkZJ-;fVTVdYo& z2^M~UpJ8QuZ+AvQLaelNV0PxsyxHBG-Mz-~@zH5bWDM!2M2bSmOF-4}t3^9EaW8E`NU3bHG}tRaW;@i`uYDwoiz=PV83m!RYX&?a;T%2g`hoOoV9 z&g5K=E}oV&;_YpbhdI3GN#2&oljqIsZHTy=3w_`o+t62?Glb7?vF{M{B?nzVOt1s; zFQ7%R7+-WdEovWAS?--ZwfYP^`mAd~vCO!H#mV!s4Bz*@h`E*-3-goZeW zktf2QxxT@!LmNET-RQm_4LaQ@zHMEfw|m|GAiBBf^kU!dgEMwJkM;Z`fNexkgx+L2l_hK>JX3AKhT|2~yBYlWWciW4u513r_?!K3I?}rTD*#h1mUuOY`Z-Z^x!&rfG g9-dXA2Nv?dL&E6;MC|`R$Ikrje{O_6yMI#c3wpJmBme*a literal 0 HcmV?d00001 diff --git a/r/tests/testthat/golden-files/data-arrow_0.17.0_uncompressed.feather b/r/tests/testthat/golden-files/data-arrow_0.17.0_uncompressed.feather new file mode 100644 index 0000000000000000000000000000000000000000..0198024ec747ee9854b2295a71e59ba31455192f GIT binary patch literal 1354 zcmeHHyH3ME5FE#GVns%BflvyHlqpj1DkvzNE>uVn5-kvdMB^YNIts)$wCuh4rMZ1lF9I_rVbdVrrM2li#5T$&!!&4E^ zXU#juk;{<7J}gK!iB%(x?BlblNW~R>MrP9rg|gL8nO&q>>}ke*9md#G@AG{pFQG?J^Z`bX07_GpP7G$ zhQ5!|R%WAgmo;k)8&Bt`+d6FgK_Dk#9M+2&8#ewfQU!~VZt3G%z&?=$rtyMO2x#-Mp^ literal 0 HcmV?d00001 diff --git a/r/tests/testthat/golden-files/data-arrow_0.17.0_zstd.feather b/r/tests/testthat/golden-files/data-arrow_0.17.0_zstd.feather new file mode 100644 index 0000000000000000000000000000000000000000..f6788231c8a2ecec805760df8cf8966f425e3e25 GIT binary patch literal 1626 zcmeHHy-EW?5T1BBJtT%JR8A0yT?9d*79th~Y^;PRC^j172?28nQ6Yte_yiVt01GQC z8{Z>^4`OMf@%wgnB*er@I|pWWe!iLAo4MIyskF0S5b1&%DUnb}8V@kYg8+XLI%9|-y?u1MfXXR&(PAdrkRvcL9{3o%3g#(!VD>D| zBc?akt+9{D2<(*=k?SG2$6nmB$b-dwyIUJ}lM8j=9V_6D%bA1BZ_)1%)Fl9q!X~%@ z@h9LADB8!&O+#15l+U}frW&7yM454AFplX)IGi|3S;)5bA#!DD^gL5H(*p*AxEUQQ zJHncox?!IK7unbKxYdZ8)moffmd}qW^;)ADUtCn{iM`L^?%o!A9l0Fr^(+$ezU~-E z`cD|~+cM)UIk&S`q&2&Jt*R^j;LBr$VY=d54nOVoyS{kU75~)dS6%&`KELYfe|7$; z9^4q@-n!8B*E)Mj%0Dl?=YpbDI4q7-G=2}auO~5G1LI)dq!TpI?^_>N6-{1$L8O`_ zadSu Date: Mon, 11 Jan 2021 09:42:04 -0600 Subject: [PATCH 36/38] Add skip for upcoming compatibility change. --- r/tests/testthat/test-backwards-compatibility.R | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/r/tests/testthat/test-backwards-compatibility.R b/r/tests/testthat/test-backwards-compatibility.R index 50a72968fb2..d089d392c67 100644 --- a/r/tests/testthat/test-backwards-compatibility.R +++ b/r/tests/testthat/test-backwards-compatibility.R @@ -161,10 +161,9 @@ for (comp in c("lz4", "uncompressed", "zstd")) { if (comp %in% c("lz4", "zstd")) { # there is a case mis-match with versions 0.17.0 and before for the codec names - expect_error(df <- read_feather(feather_file), "Unrecognized compression type:") - } else { - expect_error(df <- read_feather(feather_file), NA) + skip("ARROW-11163 will re-enable this compatibility") } + expect_error(df <- read_feather(feather_file), NA) }) } From 5c9fe3fefe69cc5f5af1179897ad64027274762c Mon Sep 17 00:00:00 2001 From: Jonathan Keane Date: Mon, 11 Jan 2021 09:50:48 -0600 Subject: [PATCH 37/38] Remove the skip now that ARROW-11163 is merged --- r/tests/testthat/test-backwards-compatibility.R | 4 ---- 1 file changed, 4 deletions(-) diff --git a/r/tests/testthat/test-backwards-compatibility.R b/r/tests/testthat/test-backwards-compatibility.R index d089d392c67..b9babbf744e 100644 --- a/r/tests/testthat/test-backwards-compatibility.R +++ b/r/tests/testthat/test-backwards-compatibility.R @@ -159,10 +159,6 @@ for (comp in c("lz4", "uncompressed", "zstd")) { skip_if_not_available(comp) feather_file <- test_path(paste0("golden-files/data-arrow_0.17.0_", comp,".feather")) - if (comp %in% c("lz4", "zstd")) { - # there is a case mis-match with versions 0.17.0 and before for the codec names - skip("ARROW-11163 will re-enable this compatibility") - } expect_error(df <- read_feather(feather_file), NA) }) } From e134e6053863ecae868a676aaf4b1158a94176b5 Mon Sep 17 00:00:00 2001 From: Jonathan Keane Date: Mon, 11 Jan 2021 13:01:20 -0600 Subject: [PATCH 38/38] Make a helper, test some (but not all) of the 0.17.0 reading. --- .../testthat/test-backwards-compatibility.R | 116 ++++-------------- 1 file changed, 25 insertions(+), 91 deletions(-) diff --git a/r/tests/testthat/test-backwards-compatibility.R b/r/tests/testthat/test-backwards-compatibility.R index b9babbf744e..73c25cdceb7 100644 --- a/r/tests/testthat/test-backwards-compatibility.R +++ b/r/tests/testthat/test-backwards-compatibility.R @@ -31,33 +31,25 @@ # write_parquet(example_with_metadata, "arrow/r/tests/testthat/golden-files/data-arrow_1.0.1.parquet") # quit()/exit +expect_identical_with_metadata <- function(object, expected, ..., top_level = TRUE) { + attrs_to_keep <- c("names", "class", "row.names") + if (!top_level) { + # remove not-tbl and not-data.frame attributes + for (attribute in names(attributes(expected))) { + if (attribute %in% attrs_to_keep) next + attributes(expected)[[attribute]] <- NULL + } + } + expect_identical(object, expected, ...) +} + test_that("reading a known Parquet file to dataframe with 2.0.0", { skip_if_not_available("snappy") pq_file <- test_path("golden-files/data-arrow_2.0.0.parquet") df <- read_parquet(pq_file) - expect_equal(df, example_with_metadata) - expect_identical(dim(df), c(1L, 4L)) - - expect_equal( - attributes(df), - list( - names = letters[1:4], - row.names = 1L, - top_level = list(field_one = 12, field_two = "more stuff"), - class = c("tbl_df", "tbl", "data.frame")) - ) - expect_equal(attributes(df$a), list(class = "special_string")) - expect_null(attributes(df$b)) - expect_equal( - attributes(df$c), - list( - row.names = 1L, - names = c("c1", "c2", "c3"), - class = c("tbl_df", "tbl", "data.frame") - ) - ) - expect_null(attributes(df$d)) + # this is equivalent to `expect_identical()` + expect_identical_with_metadata(df, example_with_metadata) }) test_that("reading a known Parquet file to dataframe with 1.0.1", { @@ -66,29 +58,7 @@ test_that("reading a known Parquet file to dataframe with 1.0.1", { df <- read_parquet(pq_file) # 1.0.1 didn't save top-level metadata, so we need to remove it. - example_with_metadata_sans_toplevel <- example_with_metadata - attributes(example_with_metadata_sans_toplevel)$top_level <- NULL - expect_equal(df, example_with_metadata_sans_toplevel) - expect_identical(dim(df), c(1L, 4L)) - - expect_equal( - attributes(df), - list( - names = letters[1:4], - row.names = 1L, - class = c("tbl_df", "tbl", "data.frame")) - ) - expect_equal(attributes(df$a), list(class = "special_string")) - expect_null(attributes(df$b)) - expect_equal( - attributes(df$c), - list( - row.names = 1L, - names = c("c1", "c2", "c3"), - class = c("tbl_df", "tbl", "data.frame") - ) - ) - expect_null(attributes(df$d)) + expect_identical_with_metadata(df, example_with_metadata, top_level = FALSE) }) for (comp in c("lz4", "uncompressed", "zstd")) { @@ -100,28 +70,7 @@ for (comp in c("lz4", "uncompressed", "zstd")) { feather_file <- test_path(paste0("golden-files/data-arrow_2.0.0_", comp,".feather")) df <- read_feather(feather_file) - expect_equal(df, example_with_metadata) - expect_identical(dim(df), c(1L, 4L)) - - expect_equal( - attributes(df), - list( - names = letters[1:4], - row.names = 1L, - top_level = list(field_one = 12, field_two = "more stuff"), - class = c("tbl_df", "tbl", "data.frame")) - ) - expect_equal(attributes(df$a), list(class = "special_string")) - expect_null(attributes(df$b)) - expect_equal( - attributes(df$c), - list( - row.names = 1L, - names = c("c1", "c2", "c3"), - class = c("tbl_df", "tbl", "data.frame") - ) - ) - expect_null(attributes(df$d)) + expect_identical_with_metadata(df, example_with_metadata) }) test_that("reading a known Feather file to dataframe with 1.0.1", { @@ -130,36 +79,21 @@ for (comp in c("lz4", "uncompressed", "zstd")) { df <- read_feather(feather_file) # 1.0.1 didn't save top-level metadata, so we need to remove it. - example_with_metadata_sans_toplevel <- example_with_metadata - attributes(example_with_metadata_sans_toplevel)$top_level <- NULL - expect_equal(df, example_with_metadata_sans_toplevel) - expect_identical(dim(df), c(1L, 4L)) - - expect_equal( - attributes(df), - list( - names = letters[1:4], - row.names = 1L, - class = c("tbl_df", "tbl", "data.frame")) - ) - expect_equal(attributes(df$a), list(class = "special_string")) - expect_null(attributes(df$b)) - expect_equal( - attributes(df$c), - list( - row.names = 1L, - names = c("c1", "c2", "c3"), - class = c("tbl_df", "tbl", "data.frame") - ) - ) - expect_null(attributes(df$d)) + expect_identical_with_metadata(df, example_with_metadata, top_level = FALSE) }) test_that("reading a known Feather file to dataframe with 0.17.0", { skip_if_not_available(comp) feather_file <- test_path(paste0("golden-files/data-arrow_0.17.0_", comp,".feather")) - expect_error(df <- read_feather(feather_file), NA) + df <- read_feather(feather_file) + # the metadata from 0.17.0 doesn't have the top level, the special class is + # not maintained and the embedded tibble's attributes are read in a wrong + # order. Since this is prior to 1.0.0 punting on checking the attributes + # though classes are always checked, so that must be removed before checking. + example_with_metadata_sans_special_class <- example_with_metadata + example_with_metadata_sans_special_class$a <- unclass(example_with_metadata_sans_special_class$a) + expect_equal(df, example_with_metadata_sans_special_class, check.attributes = FALSE) }) }