apache · jonkeane · Jul 27, 2024 · Jul 21, 2024 · Jul 23, 2024 · Jul 23, 2024
@@ -138,7 +138,13 @@ inline R_xlen_t r_string_size(SEXP s) {
 }  // namespace unsafe
 
 inline SEXP utf8_strings(SEXP x) {
-  return cpp11::unwind_protect([x] {
+  return cpp11::unwind_protect([&] {
+    // ensure that x is not actually altrep first this also ensures that
+    // x is not altrep even after it is materialized
+    bool was_altrep = ALTREP(x);
+    if (was_altrep) {
+      x = PROTECT(Rf_duplicate(x));
+    }
     R_xlen_t n = XLENGTH(x);
 
     // if `x` is an altrep of some sort, this will
@@ -152,6 +158,9 @@ inline SEXP utf8_strings(SEXP x) {
         SET_STRING_ELT(x, i, Rf_mkCharCE(Rf_translateCharUTF8(s), CE_UTF8));
       }
     }
+    if (was_altrep) {
+      UNPROTECT(1);
+    }
     return x;
   });
 }

@@ -738,5 +738,22 @@ test_that("read_csv2_arrow correctly parses comma decimals", {
   tf <- tempfile()
   writeLines("x;y\n1,2;c", con = tf)
   expect_equal(read_csv2_arrow(tf), tibble(x = 1.2, y = "c"))
+})
+
+test_that("altrep columns can roundtrip to table", {
+  tf <- tempfile()
+  on.exit(unlink(tf))
+  write.csv(tbl, tf, row.names = FALSE)
+
+  # read in, some columns will be altrep by default
+  new_df <- read_csv_arrow(tf)
+  expect_equal(tbl, as_tibble(arrow_table(new_df)))
+
+  # but also if we materialize the vector
+  # this could also be accomplished with printing
+  new_df <- read_csv_arrow(tf)
+  test_arrow_altrep_force_materialize(new_df$chr)
 
+  # we should still be able to turn this into a table
+  expect_equal(tbl, as_tibble(arrow_table(new_df)))
 })