From 754e559a04f09ddbc588d1514924d87a72d4a33f Mon Sep 17 00:00:00 2001
From: Benjamin Kietzman <bengilgit@gmail.com>
Date: Mon, 28 Sep 2020 13:07:24 -0400
Subject: [PATCH 01/34] ARROW-9782: [C++][Dataset] More configurable Dataset
 writing

---
 cpp/src/arrow/dataset/file_base.cc           | 239 +++++++++++++------
 cpp/src/arrow/dataset/file_base.h            |  85 +++++--
 cpp/src/arrow/dataset/file_csv.cc            |  16 ++
 cpp/src/arrow/dataset/file_csv.h             |   8 +-
 cpp/src/arrow/dataset/file_ipc.cc            |  46 +++-
 cpp/src/arrow/dataset/file_ipc.h             |  43 +++-
 cpp/src/arrow/dataset/file_ipc_test.cc       |   9 +-
 cpp/src/arrow/dataset/file_parquet.cc        |  83 ++++---
 cpp/src/arrow/dataset/file_parquet.h         |  48 +++-
 cpp/src/arrow/dataset/file_parquet_test.cc   |  32 ++-
 cpp/src/arrow/dataset/partition.cc           |  18 +-
 cpp/src/arrow/dataset/partition.h            |  14 +-
 cpp/src/arrow/dataset/partition_test.cc      |   2 +-
 cpp/src/arrow/dataset/test_util.h            | 196 +++++++--------
 cpp/src/arrow/dataset/type_fwd.h             |   7 +
 cpp/src/arrow/util/mutex.cc                  |  10 +
 cpp/src/arrow/util/mutex.h                   |  29 +++
 python/pyarrow/_dataset.pyx                  | 133 ++++-------
 python/pyarrow/dataset.py                    |  16 +-
 python/pyarrow/includes/libarrow_dataset.pxd |  25 +-
 python/pyarrow/tests/test_dataset.py         |  86 +++----
 21 files changed, 738 insertions(+), 407 deletions(-)
diff --git a/cpp/src/arrow/dataset/file_base.cc b/cpp/src/arrow/dataset/file_base.cc
index 1e47b15ee43..90d270a4b30 100644
--- a/cpp/src/arrow/dataset/file_base.cc
+++ b/cpp/src/arrow/dataset/file_base.cc
@@ -31,6 +31,7 @@
 #include "arrow/io/memory.h"
 #include "arrow/util/iterator.h"
 #include "arrow/util/logging.h"
+#include "arrow/util/mutex.h"
 #include "arrow/util/task_group.h"
 
 namespace arrow {
@@ -143,91 +144,193 @@ FragmentIterator FileSystemDataset::GetFragmentsImpl(
   return MakeVectorIterator(std::move(fragments));
 }
 
-struct WriteTask {
-  Status Execute();
+Status FileWriter::Write(RecordBatchReader* batches) {
+  for (std::shared_ptr<RecordBatch> batch;;) {
+    RETURN_NOT_OK(batches->ReadNext(&batch));
+    if (batch == nullptr) break;
+    RETURN_NOT_OK(Write(batch));
+  }
+  return Status::OK();
+}
 
-  /// The basename of files written by this WriteTask. Extensions
-  /// are derived from format
-  std::string basename;
+struct NextBasenameGenerator {
+  static Result<NextBasenameGenerator> Make(const std::string& basename_template) {
+    if (basename_template.find(fs::internal::kSep) != std::string::npos) {
+      return Status::Invalid("basename_template contained '/'");
+    }
+    size_t token_start = basename_template.find(token());
+    if (token_start == std::string::npos) {
+      return Status::Invalid("basename_template did not contain '{i}'");
+    }
+    return NextBasenameGenerator{basename_template, 0, token_start,
+                                 token_start + token().size()};
+  }
 
-  /// The partitioning with which paths will be generated
-  std::shared_ptr<Partitioning> partitioning;
+  static const std::string& token() {
+    static const std::string token = "{i}";
+    return token;
+  }
 
-  /// The format in which fragments will be written
-  std::shared_ptr<FileFormat> format;
+  const std::string& template_;
+  size_t i_, token_start_, token_end_;
 
-  /// The FileSystem and base directory into which fragments will be written
-  std::shared_ptr<fs::FileSystem> filesystem;
-  std::string base_dir;
+  std::string operator()() {
+    return template_.substr(0, token_start_) + std::to_string(i_++) +
+           template_.substr(token_end_);
+  }
+};
 
-  /// Batches to be written
-  std::shared_ptr<RecordBatchReader> batches;
+using MutexedWriter = util::Mutexed<std::shared_ptr<FileWriter>>;
 
-  /// An Expression already satisfied by every batch to be written
-  std::shared_ptr<Expression> partition_expression;
-};
+struct WriterSet {
+  WriterSet(NextBasenameGenerator next_basename,
+            const FileSystemDatasetWriteOptions& write_options)
+      : next_basename_(std::move(next_basename)),
+        base_dir_(fs::internal::EnsureTrailingSlash(write_options.base_dir)),
+        write_options_(write_options) {}
+
+  Result<std::shared_ptr<MutexedWriter>> Get(const Expression& partition_expression,
+                                             const std::shared_ptr<Schema>& schema) {
+    ARROW_ASSIGN_OR_RAISE(auto part_segments,
+                          write_options_.partitioning->Format(partition_expression));
+    std::string dir = base_dir_ + part_segments;
 
-Status WriteTask::Execute() {
-  std::unordered_map<std::string, RecordBatchVector> path_to_batches;
-
-  // TODO(bkietz) these calls to Partition() should be scattered across a TaskGroup
-  for (auto maybe_batch : IteratorFromReader(batches)) {
-    ARROW_ASSIGN_OR_RAISE(auto batch, maybe_batch);
-    ARROW_ASSIGN_OR_RAISE(auto partitioned_batches, partitioning->Partition(batch));
-    for (auto&& partitioned_batch : partitioned_batches) {
-      AndExpression expr(std::move(partitioned_batch.partition_expression),
-                         partition_expression);
-      ARROW_ASSIGN_OR_RAISE(std::string path, partitioning->Format(expr));
-      path = fs::internal::EnsureLeadingSlash(path);
-      path_to_batches[path].push_back(std::move(partitioned_batch.batch));
+    auto lock = dir_to_writer_.Lock();
+
+    auto it_success =
+        dir_to_writer_->emplace(std::move(dir), std::make_shared<MutexedWriter>());
+    std::shared_ptr<MutexedWriter> writer = it_success.first->second;
+
+    if (!it_success.second) {
+      return writer;
     }
+    // FIXME Flush writers when at capacity for open files.
+
+    std::string basename = next_basename_();
+    auto writer_lock = writer->Lock();
+    lock.Unlock();
+
+    dir = base_dir_ + part_segments;
+    RETURN_NOT_OK(write_options_.filesystem->CreateDir(dir));
+    ARROW_ASSIGN_OR_RAISE(auto destination,
+                          write_options_.filesystem->OpenOutputStream(
+                              fs::internal::ConcatAbstractPath(dir, basename)));
+    ARROW_ASSIGN_OR_RAISE(
+        **writer, write_options_.format()->MakeWriter(std::move(destination), schema,
+                                                      write_options_.file_write_options));
+    return writer;
   }
 
-  for (auto&& path_batches : path_to_batches) {
-    auto dir = base_dir + path_batches.first;
-    RETURN_NOT_OK(filesystem->CreateDir(dir, /*recursive=*/true));
-
-    auto path = fs::internal::ConcatAbstractPath(dir, basename);
-    ARROW_ASSIGN_OR_RAISE(auto destination, filesystem->OpenOutputStream(path));
+  Status FinishAll(internal::TaskGroup* task_group) {
+    for (const auto& dir_writer : *dir_to_writer_) {
+      task_group->Append([&] {
+        std::shared_ptr<FileWriter> writer = **dir_writer.second;
+        return writer->Finish();
+      });
+    }
 
-    DCHECK(!path_batches.second.empty());
-    ARROW_ASSIGN_OR_RAISE(auto reader,
-                          RecordBatchReader::Make(std::move(path_batches.second)));
-    RETURN_NOT_OK(format->WriteFragment(reader.get(), destination.get()));
+    return Status::OK();
   }
 
-  return Status::OK();
-}
+  // There should only be a single writer open for each partition directory at a time
+  util::Mutexed<std::unordered_map<std::string, std::shared_ptr<MutexedWriter>>>
+      dir_to_writer_;
+  NextBasenameGenerator next_basename_;
+  std::string base_dir_;
+  const FileSystemDatasetWriteOptions& write_options_;
+};
 
 Status FileSystemDataset::Write(std::shared_ptr<Schema> schema,
-                                std::shared_ptr<FileFormat> format,
-                                std::shared_ptr<fs::FileSystem> filesystem,
-                                std::string base_dir,
-                                std::shared_ptr<Partitioning> partitioning,
-                                std::shared_ptr<ScanContext> scan_context,
-                                FragmentIterator fragment_it) {
-  auto task_group = scan_context->TaskGroup();
-
-  base_dir = std::string(fs::internal::RemoveTrailingSlash(base_dir));
-
-  int i = 0;
-  for (auto maybe_fragment : fragment_it) {
-    ARROW_ASSIGN_OR_RAISE(auto fragment, maybe_fragment);
-    auto task = std::make_shared<WriteTask>();
-
-    task->basename = "dat_" + std::to_string(i++) + "." + format->type_name();
-    task->partition_expression = fragment->partition_expression();
-    task->format = format;
-    task->filesystem = filesystem;
-    task->base_dir = base_dir;
-    task->partitioning = partitioning;
-
-    // make a record batch reader which yields from a fragment
-    ARROW_ASSIGN_OR_RAISE(task->batches, FragmentRecordBatchReader::Make(
-                                             std::move(fragment), schema, scan_context));
-    task_group->Append([task] { return task->Execute(); });
+                                const FileSystemDatasetWriteOptions& write_options,
+                                std::shared_ptr<Scanner> scanner) {
+  auto task_group = scanner->context()->TaskGroup();
+
+  // Things we'll un-lazy for the sake of simplicity, with the tradeoff they represent:
+  //
+  // - Fragment iteration. Keeping this lazy would allow us to start partitioning/writing
+  //   any fragments we have before waiting for discovery to complete. This isn't
+  //   currently implemented for FileSystemDataset anyway: ARROW-8613
+  //
+  // - ScanTask iteration. Keeping this lazy would save some unnecessary blocking when
+  //   writing Fragments which produce scan tasks slowly. No Fragments do this.
+  //
+  // NB: neither of these will have any impact whatsoever on the common case of writing
+  //     an in-memory table to disk.
+  ARROW_ASSIGN_OR_RAISE(FragmentVector fragments, scanner->GetFragments().ToVector());
+  ScanTaskVector scan_tasks;
+  std::vector<const Fragment*> fragment_for_task;
+
+  for (const auto& fragment : fragments) {
+    ARROW_ASSIGN_OR_RAISE(
+        auto scan_task_it,
+        Scanner(fragment, scanner->options(), scanner->context()).Scan());
+    for (auto maybe_scan_task : scan_task_it) {
+      ARROW_ASSIGN_OR_RAISE(auto scan_task, maybe_scan_task);
+      scan_tasks.push_back(std::move(scan_task));
+      fragment_for_task.push_back(fragment.get());
+    }
+  }
+
+  ARROW_ASSIGN_OR_RAISE(auto next_basename,
+                        NextBasenameGenerator::Make(write_options.basename_template));
+  WriterSet writers(std::move(next_basename), write_options);
+
+  auto write_task_group = task_group->MakeSubGroup();
+  auto fragment_for_task_it = fragment_for_task.begin();
+  for (const auto& scan_task : scan_tasks) {
+    const Fragment* fragment = *fragment_for_task_it++;
+
+    write_task_group->Append([&, scan_task, fragment] {
+      ARROW_ASSIGN_OR_RAISE(auto batches, scan_task->Execute());
+
+      for (auto maybe_batch : batches) {
+        ARROW_ASSIGN_OR_RAISE(auto batch, maybe_batch);
+        ARROW_ASSIGN_OR_RAISE(auto groups, write_options.partitioning->Partition(batch));
+        batch.reset();  // drop to hopefully conserve memory
+
+        struct PendingWrite {
+          std::shared_ptr<MutexedWriter> writer;
+          std::shared_ptr<RecordBatch> batch;
+
+          Result<bool> TryWrite() {
+            if (auto lock = writer->TryLock()) {
+              RETURN_NOT_OK(writer->get()->Write(batch));
+              return true;
+            }
+            return false;
+          }
+        };
+        std::vector<PendingWrite> pending_writes(groups.batches.size());
+
+        for (size_t i = 0; i < groups.batches.size(); ++i) {
+          AndExpression partition_expression(std::move(groups.expressions[i]),
+                                             fragment->partition_expression());
+
+          ARROW_ASSIGN_OR_RAISE(auto writer, writers.Get(partition_expression,
+                                                         groups.batches[i]->schema()));
+
+          pending_writes[i] = {writer, std::move(groups.batches[i])};
+        }
+
+        for (size_t i = 0; !pending_writes.empty(); ++i) {
+          if (i >= pending_writes.size()) {
+            i = 0;
+          }
+
+          ARROW_ASSIGN_OR_RAISE(auto success, pending_writes[i].TryWrite());
+          if (success) {
+            std::swap(pending_writes.back(), pending_writes[i]);
+            pending_writes.pop_back();
+          }
+        }
+      }
+
+      return Status::OK();
+    });
   }
+  RETURN_NOT_OK(write_task_group->Finish());
 
+  RETURN_NOT_OK(writers.FinishAll(task_group.get()));
   return task_group->Finish();
 }
 
diff --git a/cpp/src/arrow/dataset/file_base.h b/cpp/src/arrow/dataset/file_base.h
index 957c876de72..e8c84f4afb6 100644
--- a/cpp/src/arrow/dataset/file_base.h
+++ b/cpp/src/arrow/dataset/file_base.h
@@ -128,6 +128,9 @@ class ARROW_DS_EXPORT FileFormat : public std::enable_shared_from_this<FileForma
   /// \brief Return true if fragments of this format can benefit from parallel scanning.
   virtual bool splittable() const { return false; }
 
+  virtual bool Equals(const FileFormat& other) const = 0;
+  //  virtual std::string ToString() const = 0;
+
   /// \brief Indicate if the FileSource is supported/readable by this format.
   virtual Result<bool> IsSupported(const FileSource& source) const = 0;
 
@@ -151,10 +154,11 @@ class ARROW_DS_EXPORT FileFormat : public std::enable_shared_from_this<FileForma
   Result<std::shared_ptr<FileFragment>> MakeFragment(
       FileSource source, std::shared_ptr<Schema> physical_schema = NULLPTR);
 
-  /// \brief Write a fragment.
-  /// FIXME(bkietz) make this pure virtual
-  virtual Status WriteFragment(RecordBatchReader* batches,
-                               io::OutputStream* destination) const = 0;
+  virtual Result<std::shared_ptr<FileWriter>> MakeWriter(
+      std::shared_ptr<io::OutputStream> destination, std::shared_ptr<Schema> schema,
+      std::shared_ptr<FileWriteOptions> options) const = 0;
+
+  virtual std::shared_ptr<FileWriteOptions> DefaultWriteOptions() = 0;
 };
 
 /// \brief A Fragment that is stored in a file with a known format
@@ -210,19 +214,9 @@ class ARROW_DS_EXPORT FileSystemDataset : public Dataset {
       std::vector<std::shared_ptr<FileFragment>> fragments);
 
   /// \brief Write a dataset.
-  ///
-  /// \param[in] schema Schema of written dataset.
-  /// \param[in] format FileFormat with which fragments will be written.
-  /// \param[in] filesystem FileSystem into which the dataset will be written.
-  /// \param[in] base_dir Root directory into which the dataset will be written.
-  /// \param[in] partitioning Partitioning used to generate fragment paths.
-  /// \param[in] scan_context Resource pool used to scan and write fragments.
-  /// \param[in] fragments Fragments to be written to disk.
-  static Status Write(std::shared_ptr<Schema> schema, std::shared_ptr<FileFormat> format,
-                      std::shared_ptr<fs::FileSystem> filesystem, std::string base_dir,
-                      std::shared_ptr<Partitioning> partitioning,
-                      std::shared_ptr<ScanContext> scan_context,
-                      FragmentIterator fragments);
+  static Status Write(std::shared_ptr<Schema> schema,
+                      const FileSystemDatasetWriteOptions& write_options,
+                      std::shared_ptr<Scanner> scanner);
 
   /// \brief Return the type name of the dataset.
   std::string type_name() const override { return "filesystem"; }
@@ -256,5 +250,62 @@ class ARROW_DS_EXPORT FileSystemDataset : public Dataset {
   std::vector<std::shared_ptr<FileFragment>> fragments_;
 };
 
+class ARROW_DS_EXPORT FileWriteOptions {
+ public:
+  virtual ~FileWriteOptions() = default;
+
+  const std::shared_ptr<FileFormat>& format() const { return format_; }
+
+ protected:
+  explicit FileWriteOptions(std::shared_ptr<FileFormat> format)
+      : format_(std::move(format)) {}
+
+  std::shared_ptr<FileFormat> format_;
+};
+
+class ARROW_DS_EXPORT FileWriter {
+ public:
+  virtual ~FileWriter() = default;
+
+  virtual Status Write(const std::shared_ptr<RecordBatch>& batch) = 0;
+
+  Status Write(RecordBatchReader* batches);
+
+  virtual Status Finish() = 0;
+
+  const std::shared_ptr<FileFormat>& format() const { return options_->format(); }
+  const std::shared_ptr<Schema>& schema() const { return schema_; }
+  const std::shared_ptr<FileWriteOptions>& options() const { return options_; }
+
+ protected:
+  FileWriter(std::shared_ptr<Schema> schema, std::shared_ptr<FileWriteOptions> options)
+      : schema_(std::move(schema)), options_(std::move(options)) {}
+
+  std::shared_ptr<Schema> schema_;
+  std::shared_ptr<FileWriteOptions> options_;
+};
+
+struct ARROW_DS_EXPORT FileSystemDatasetWriteOptions {
+  /// Options for individual fragment writing.
+  std::shared_ptr<FileWriteOptions> file_write_options;
+
+  /// FileSystem into which a dataset will be written.
+  std::shared_ptr<fs::FileSystem> filesystem;
+
+  /// Root directory into which the dataset will be written.
+  std::string base_dir;
+
+  /// Partitioning used to generate fragment paths.
+  std::shared_ptr<Partitioning> partitioning;
+
+  /// Template string used to generate fragment basenames.
+  /// {i} will be replaced by an auto incremented integer.
+  std::string basename_template;
+
+  const std::shared_ptr<FileFormat>& format() const {
+    return file_write_options->format();
+  }
+};
+
 }  // namespace dataset
 }  // namespace arrow
diff --git a/cpp/src/arrow/dataset/file_csv.cc b/cpp/src/arrow/dataset/file_csv.cc
index 3b474b428d9..f889b12fddd 100644
--- a/cpp/src/arrow/dataset/file_csv.cc
+++ b/cpp/src/arrow/dataset/file_csv.cc
@@ -157,6 +157,22 @@ class CsvScanTask : public ScanTask {
   FileSource source_;
 };
 
+bool CsvFileFormat::Equals(const FileFormat& format) const {
+  if (type_name() != format.type_name()) return false;
+
+  const auto& other_parse_options =
+      checked_cast<const CsvFileFormat&>(format).parse_options;
+
+  return parse_options.delimiter == other_parse_options.delimiter &&
+         parse_options.quoting == other_parse_options.quoting &&
+         parse_options.quote_char == other_parse_options.quote_char &&
+         parse_options.double_quote == other_parse_options.double_quote &&
+         parse_options.escaping == other_parse_options.escaping &&
+         parse_options.escape_char == other_parse_options.escape_char &&
+         parse_options.newlines_in_values == other_parse_options.newlines_in_values &&
+         parse_options.ignore_empty_lines == other_parse_options.ignore_empty_lines;
+}
+
 Result<bool> CsvFileFormat::IsSupported(const FileSource& source) const {
   RETURN_NOT_OK(source.Open().status());
   return OpenReader(source, *this).ok();
diff --git a/cpp/src/arrow/dataset/file_csv.h b/cpp/src/arrow/dataset/file_csv.h
index 022517f4585..df5593459e3 100644
--- a/cpp/src/arrow/dataset/file_csv.h
+++ b/cpp/src/arrow/dataset/file_csv.h
@@ -37,6 +37,8 @@ class ARROW_DS_EXPORT CsvFileFormat : public FileFormat {
 
   std::string type_name() const override { return "csv"; }
 
+  bool Equals(const FileFormat& other) const override;
+
   Result<bool> IsSupported(const FileSource& source) const override;
 
   /// \brief Return the schema of the file if possible.
@@ -47,9 +49,13 @@ class ARROW_DS_EXPORT CsvFileFormat : public FileFormat {
                                     std::shared_ptr<ScanContext> context,
                                     FileFragment* fragment) const override;
 
-  Status WriteFragment(RecordBatchReader*, io::OutputStream*) const override {
+  Result<std::shared_ptr<FileWriter>> MakeWriter(
+      std::shared_ptr<io::OutputStream> destination, std::shared_ptr<Schema> schema,
+      std::shared_ptr<FileWriteOptions> options) const override {
     return Status::NotImplemented("writing fragment of CsvFileFormat");
   }
+
+  std::shared_ptr<FileWriteOptions> DefaultWriteOptions() override { return NULLPTR; }
 };
 
 }  // namespace dataset
diff --git a/cpp/src/arrow/dataset/file_ipc.cc b/cpp/src/arrow/dataset/file_ipc.cc
index b849295bba5..9c7d643f9ac 100644
--- a/cpp/src/arrow/dataset/file_ipc.cc
+++ b/cpp/src/arrow/dataset/file_ipc.cc
@@ -27,9 +27,13 @@
 #include "arrow/dataset/scanner.h"
 #include "arrow/ipc/reader.h"
 #include "arrow/ipc/writer.h"
+#include "arrow/util/checked_cast.h"
 #include "arrow/util/iterator.h"
 
 namespace arrow {
+
+using internal::checked_pointer_cast;
+
 namespace dataset {
 
 static inline ipc::IpcReadOptions default_read_options() {
@@ -159,18 +163,44 @@ Result<ScanTaskIterator> IpcFileFormat::ScanFile(std::shared_ptr<ScanOptions> op
                                    fragment->source());
 }
 
-Status IpcFileFormat::WriteFragment(RecordBatchReader* batches,
-                                    io::OutputStream* destination) const {
-  ARROW_ASSIGN_OR_RAISE(auto writer, ipc::MakeFileWriter(destination, batches->schema()));
+///
+/// IpcFileWriter, IpcFileWriteOptions
+///
+
+std::shared_ptr<FileWriteOptions> IpcFileFormat::DefaultWriteOptions() {
+  std::shared_ptr<IpcFileWriteOptions> options(
+      new IpcFileWriteOptions(shared_from_this()));
+
+  options->ipc_options =
+      std::make_shared<ipc::IpcWriteOptions>(ipc::IpcWriteOptions::Defaults());
+  return options;
+}
 
-  for (;;) {
-    ARROW_ASSIGN_OR_RAISE(auto batch, batches->Next());
-    if (batch == nullptr) break;
-    RETURN_NOT_OK(writer->WriteRecordBatch(*batch));
+Result<std::shared_ptr<FileWriter>> IpcFileFormat::MakeWriter(
+    std::shared_ptr<io::OutputStream> destination, std::shared_ptr<Schema> schema,
+    std::shared_ptr<FileWriteOptions> options) const {
+  if (!Equals(*options->format())) {
+    return Status::TypeError("Mismatching format/write options.");
   }
 
-  return writer->Close();
+  auto ipc_options = checked_pointer_cast<IpcFileWriteOptions>(options);
+
+  ARROW_ASSIGN_OR_RAISE(auto writer, ipc::MakeFileWriter(destination, schema));
+  return std::shared_ptr<FileWriter>(
+      new IpcFileWriter(std::move(writer), std::move(schema), std::move(ipc_options)));
 }
 
+IpcFileWriter::IpcFileWriter(std::shared_ptr<ipc::RecordBatchWriter> writer,
+                             std::shared_ptr<Schema> schema,
+                             std::shared_ptr<IpcFileWriteOptions> options)
+    : FileWriter(std::move(schema), std::move(options)),
+      batch_writer_(std::move(writer)) {}
+
+Status IpcFileWriter::Write(const std::shared_ptr<RecordBatch>& batch) {
+  return batch_writer_->WriteRecordBatch(*batch);
+}
+
+Status IpcFileWriter::Finish() { return batch_writer_->Close(); }
+
 }  // namespace dataset
 }  // namespace arrow
diff --git a/cpp/src/arrow/dataset/file_ipc.h b/cpp/src/arrow/dataset/file_ipc.h
index be11c71a95a..50650bfedb0 100644
--- a/cpp/src/arrow/dataset/file_ipc.h
+++ b/cpp/src/arrow/dataset/file_ipc.h
@@ -28,6 +28,12 @@
 #include "arrow/result.h"
 
 namespace arrow {
+namespace ipc {
+
+class RecordBatchWriter;
+struct IpcWriteOptions;
+
+}  // namespace ipc
 namespace dataset {
 
 /// \brief A FileFormat implementation that reads from and writes to Ipc files
@@ -35,6 +41,10 @@ class ARROW_DS_EXPORT IpcFileFormat : public FileFormat {
  public:
   std::string type_name() const override { return "ipc"; }
 
+  bool Equals(const FileFormat& other) const override {
+    return type_name() == other.type_name();
+  }
+
   bool splittable() const override { return true; }
 
   Result<bool> IsSupported(const FileSource& source) const override;
@@ -47,8 +57,37 @@ class ARROW_DS_EXPORT IpcFileFormat : public FileFormat {
                                     std::shared_ptr<ScanContext> context,
                                     FileFragment* fragment) const override;
 
-  Status WriteFragment(RecordBatchReader* batches,
-                       io::OutputStream* destination) const override;
+  Result<std::shared_ptr<FileWriter>> MakeWriter(
+      std::shared_ptr<io::OutputStream> destination, std::shared_ptr<Schema> schema,
+      std::shared_ptr<FileWriteOptions> options) const override;
+
+  std::shared_ptr<FileWriteOptions> DefaultWriteOptions() override;
+};
+
+class ARROW_DS_EXPORT IpcFileWriteOptions : public FileWriteOptions {
+ public:
+  std::shared_ptr<ipc::IpcWriteOptions> ipc_options;
+
+ protected:
+  using FileWriteOptions::FileWriteOptions;
+
+  friend class IpcFileFormat;
+};
+
+class ARROW_DS_EXPORT IpcFileWriter : public FileWriter {
+ public:
+  Status Write(const std::shared_ptr<RecordBatch>& batch) override;
+
+  Status Finish() override;
+
+ private:
+  IpcFileWriter(std::shared_ptr<ipc::RecordBatchWriter> writer,
+                std::shared_ptr<Schema> schema,
+                std::shared_ptr<IpcFileWriteOptions> options);
+
+  std::shared_ptr<ipc::RecordBatchWriter> batch_writer_;
+
+  friend class IpcFileFormat;
 };
 
 }  // namespace dataset
diff --git a/cpp/src/arrow/dataset/file_ipc_test.cc b/cpp/src/arrow/dataset/file_ipc_test.cc
index 41ac5a9b7b0..808bae568b4 100644
--- a/cpp/src/arrow/dataset/file_ipc_test.cc
+++ b/cpp/src/arrow/dataset/file_ipc_test.cc
@@ -156,7 +156,10 @@ TEST_F(TestIpcFileFormat, WriteRecordBatchReader) {
 
   EXPECT_OK_AND_ASSIGN(auto sink, GetFileSink());
 
-  ASSERT_OK(format_->WriteFragment(reader.get(), sink.get()));
+  auto options = format_->DefaultWriteOptions();
+  EXPECT_OK_AND_ASSIGN(auto writer, format_->MakeWriter(sink, reader->schema(), options));
+  ASSERT_OK(writer->Write(reader.get()));
+  ASSERT_OK(writer->Finish());
 
   EXPECT_OK_AND_ASSIGN(auto written, sink->Finish());
 
@@ -168,7 +171,9 @@ class TestIpcFileSystemDataset : public testing::Test,
  public:
   void SetUp() override {
     MakeSourceDataset();
-    format_ = std::make_shared<IpcFileFormat>();
+    auto ipc_format = std::make_shared<IpcFileFormat>();
+    format_ = ipc_format;
+    SetWriteOptions(ipc_format->DefaultWriteOptions());
   }
 };
 
diff --git a/cpp/src/arrow/dataset/file_parquet.cc b/cpp/src/arrow/dataset/file_parquet.cc
index 29ed18817d9..ced75c652c4 100644
--- a/cpp/src/arrow/dataset/file_parquet.cc
+++ b/cpp/src/arrow/dataset/file_parquet.cc
@@ -42,6 +42,7 @@
 namespace arrow {
 
 using internal::checked_cast;
+using internal::checked_pointer_cast;
 
 namespace dataset {
 
@@ -285,12 +286,20 @@ class ParquetScanTaskIterator {
   size_t idx_ = 0;
 };
 
-ParquetFileFormat::ParquetFileFormat()
-    : writer_properties(parquet::default_writer_properties()),
-      arrow_writer_properties(parquet::default_arrow_writer_properties()) {}
+bool ParquetFileFormat::Equals(const FileFormat& other) const {
+  if (other.type_name() != type_name()) return false;
 
-ParquetFileFormat::ParquetFileFormat(const parquet::ReaderProperties& reader_properties)
-    : ParquetFileFormat() {
+  const auto& other_reader_options =
+      checked_cast<const ParquetFileFormat&>(other).reader_options;
+
+  // FIXME implement comparison for decryption options
+  // FIXME extract these to scan time options so comparison is unnecessary
+  return reader_options.use_buffered_stream == other_reader_options.use_buffered_stream &&
+         reader_options.buffer_size == other_reader_options.buffer_size &&
+         reader_options.dict_columns == other_reader_options.dict_columns;
+}
+
+ParquetFileFormat::ParquetFileFormat(const parquet::ReaderProperties& reader_properties) {
   reader_options.use_buffered_stream = reader_properties.is_buffered_stream_enabled();
   reader_options.buffer_size = reader_properties.buffer_size();
   reader_options.file_decryption_properties =
@@ -352,28 +361,6 @@ static inline bool RowGroupInfosAreComplete(const std::vector<RowGroupInfo>& inf
                      [](const RowGroupInfo& i) { return i.HasStatistics(); });
 }
 
-Status ParquetFileFormat::WriteFragment(RecordBatchReader* batches,
-                                        io::OutputStream* destination) const {
-  using parquet::arrow::FileWriter;
-
-  std::shared_ptr<io::OutputStream> shared_destination{destination,
-                                                       [](io::OutputStream*) {}};
-
-  std::unique_ptr<FileWriter> writer;
-  RETURN_NOT_OK(FileWriter::Open(*batches->schema(), default_memory_pool(),
-                                 shared_destination, writer_properties,
-                                 arrow_writer_properties, &writer));
-
-  for (;;) {
-    ARROW_ASSIGN_OR_RAISE(auto batch, batches->Next());
-    if (batch == nullptr) break;
-    ARROW_ASSIGN_OR_RAISE(auto table, Table::FromRecordBatches(batch->schema(), {batch}));
-    RETURN_NOT_OK(writer->WriteTable(*table, batch->num_rows()));
-  }
-
-  return writer->Close();
-}
-
 Result<ScanTaskIterator> ParquetFileFormat::ScanFile(std::shared_ptr<ScanOptions> options,
                                                      std::shared_ptr<ScanContext> context,
                                                      FileFragment* fragment) const {
@@ -440,6 +427,48 @@ Result<std::shared_ptr<FileFragment>> ParquetFileFormat::MakeFragment(
       std::move(physical_schema), {}));
 }
 
+///
+/// ParquetFileWriter, ParquetFileWriteOptions
+///
+
+std::shared_ptr<FileWriteOptions> ParquetFileFormat::DefaultWriteOptions() {
+  std::shared_ptr<ParquetFileWriteOptions> options(
+      new ParquetFileWriteOptions(shared_from_this()));
+  options->writer_properties = parquet::default_writer_properties();
+  options->arrow_writer_properties = parquet::default_arrow_writer_properties();
+  return options;
+}
+
+Result<std::shared_ptr<FileWriter>> ParquetFileFormat::MakeWriter(
+    std::shared_ptr<io::OutputStream> destination, std::shared_ptr<Schema> schema,
+    std::shared_ptr<FileWriteOptions> options) const {
+  if (!Equals(*options->format())) {
+    return Status::TypeError("Mismatching format/write options");
+  }
+
+  auto parquet_options = checked_pointer_cast<ParquetFileWriteOptions>(options);
+
+  std::unique_ptr<parquet::arrow::FileWriter> parquet_writer;
+  RETURN_NOT_OK(parquet::arrow::FileWriter::Open(
+      *schema, default_memory_pool(), destination, parquet_options->writer_properties,
+      parquet_options->arrow_writer_properties, &parquet_writer));
+
+  return std::shared_ptr<FileWriter>(
+      new ParquetFileWriter(std::move(parquet_writer), std::move(parquet_options)));
+}
+
+ParquetFileWriter::ParquetFileWriter(std::shared_ptr<parquet::arrow::FileWriter> writer,
+                                     std::shared_ptr<ParquetFileWriteOptions> options)
+    : FileWriter(writer->schema(), std::move(options)),
+      parquet_writer_(std::move(writer)) {}
+
+Status ParquetFileWriter::Write(const std::shared_ptr<RecordBatch>& batch) {
+  ARROW_ASSIGN_OR_RAISE(auto table, Table::FromRecordBatches(batch->schema(), {batch}));
+  return parquet_writer_->WriteTable(*table, batch->num_rows());
+}
+
+Status ParquetFileWriter::Finish() { return parquet_writer_->Close(); }
+
 ///
 /// RowGroupInfo
 ///
diff --git a/cpp/src/arrow/dataset/file_parquet.h b/cpp/src/arrow/dataset/file_parquet.h
index 939fdc53687..25235128257 100644
--- a/cpp/src/arrow/dataset/file_parquet.h
+++ b/cpp/src/arrow/dataset/file_parquet.h
@@ -41,11 +41,13 @@ class FileEncryptionProperties;
 
 class ReaderProperties;
 class ArrowReaderProperties;
+
 class WriterProperties;
 class ArrowWriterProperties;
 
 namespace arrow {
 class FileReader;
+class FileWriter;
 }  // namespace arrow
 }  // namespace parquet
 
@@ -57,7 +59,7 @@ class RowGroupInfo;
 /// \brief A FileFormat implementation that reads from Parquet files
 class ARROW_DS_EXPORT ParquetFileFormat : public FileFormat {
  public:
-  ParquetFileFormat();
+  ParquetFileFormat() = default;
 
   /// Convenience constructor which copies properties from a parquet::ReaderProperties.
   /// memory_pool will be ignored.
@@ -67,6 +69,8 @@ class ARROW_DS_EXPORT ParquetFileFormat : public FileFormat {
 
   bool splittable() const override { return true; }
 
+  bool Equals(const FileFormat& other) const override;
+
   struct ReaderOptions {
     /// \defgroup parquet-file-format-reader-properties properties which correspond to
     /// members of parquet::ReaderProperties.
@@ -99,10 +103,6 @@ class ARROW_DS_EXPORT ParquetFileFormat : public FileFormat {
     bool enable_parallel_column_conversion = false;
   } reader_options;
 
-  std::shared_ptr<parquet::WriterProperties> writer_properties;
-
-  std::shared_ptr<parquet::ArrowWriterProperties> arrow_writer_properties;
-
   Result<bool> IsSupported(const FileSource& source) const override;
 
   /// \brief Return the schema of the file if possible.
@@ -130,8 +130,11 @@ class ARROW_DS_EXPORT ParquetFileFormat : public FileFormat {
   Result<std::unique_ptr<parquet::arrow::FileReader>> GetReader(
       const FileSource& source, ScanOptions* = NULLPTR, ScanContext* = NULLPTR) const;
 
-  Status WriteFragment(RecordBatchReader* batches,
-                       io::OutputStream* destination) const override;
+  Result<std::shared_ptr<FileWriter>> MakeWriter(
+      std::shared_ptr<io::OutputStream> destination, std::shared_ptr<Schema> schema,
+      std::shared_ptr<FileWriteOptions> options) const override;
+
+  std::shared_ptr<FileWriteOptions> DefaultWriteOptions() override;
 };
 
 /// \brief Represents a parquet's RowGroup with extra information.
@@ -248,6 +251,37 @@ class ARROW_DS_EXPORT ParquetFileFragment : public FileFragment {
   friend class ParquetFileFormat;
 };
 
+class ARROW_DS_EXPORT ParquetFileWriteOptions : public FileWriteOptions {
+ public:
+  std::shared_ptr<parquet::WriterProperties> writer_properties;
+
+  std::shared_ptr<parquet::ArrowWriterProperties> arrow_writer_properties;
+
+ protected:
+  using FileWriteOptions::FileWriteOptions;
+
+  friend class ParquetFileFormat;
+};
+
+class ARROW_DS_EXPORT ParquetFileWriter : public FileWriter {
+ public:
+  const std::shared_ptr<parquet::arrow::FileWriter>& parquet_writer() const {
+    return parquet_writer_;
+  }
+
+  Status Write(const std::shared_ptr<RecordBatch>& batch) override;
+
+  Status Finish() override;
+
+ private:
+  ParquetFileWriter(std::shared_ptr<parquet::arrow::FileWriter> writer,
+                    std::shared_ptr<ParquetFileWriteOptions> options);
+
+  std::shared_ptr<parquet::arrow::FileWriter> parquet_writer_;
+
+  friend class ParquetFileFormat;
+};
+
 struct ParquetFactoryOptions {
   // Either an explicit Partitioning or a PartitioningFactory to discover one.
   //
diff --git a/cpp/src/arrow/dataset/file_parquet_test.cc b/cpp/src/arrow/dataset/file_parquet_test.cc
index d0a22e98382..ec47259d30a 100644
--- a/cpp/src/arrow/dataset/file_parquet_test.cc
+++ b/cpp/src/arrow/dataset/file_parquet_test.cc
@@ -48,7 +48,6 @@ using parquet::default_writer_properties;
 using parquet::WriterProperties;
 
 using parquet::CreateOutputStream;
-using parquet::arrow::FileWriter;
 using parquet::arrow::WriteTable;
 
 using testing::Pointee;
@@ -57,7 +56,7 @@ using internal::checked_pointer_cast;
 
 class ArrowParquetWriterMixin : public ::testing::Test {
  public:
-  Status WriteRecordBatch(const RecordBatch& batch, FileWriter* writer) {
+  Status WriteRecordBatch(const RecordBatch& batch, parquet::arrow::FileWriter* writer) {
     auto schema = batch.schema();
     auto size = batch.num_rows();
 
@@ -75,7 +74,8 @@ class ArrowParquetWriterMixin : public ::testing::Test {
     return Status::OK();
   }
 
-  Status WriteRecordBatchReader(RecordBatchReader* reader, FileWriter* writer) {
+  Status WriteRecordBatchReader(RecordBatchReader* reader,
+                                parquet::arrow::FileWriter* writer) {
     auto schema = reader->schema();
 
     if (!schema->Equals(*writer->schema(), false)) {
@@ -96,9 +96,9 @@ class ArrowParquetWriterMixin : public ::testing::Test {
       const std::shared_ptr<WriterProperties>& properties = default_writer_properties(),
       const std::shared_ptr<ArrowWriterProperties>& arrow_properties =
           default_arrow_writer_properties()) {
-    std::unique_ptr<FileWriter> writer;
-    RETURN_NOT_OK(FileWriter::Open(*reader->schema(), pool, sink, properties,
-                                   arrow_properties, &writer));
+    std::unique_ptr<parquet::arrow::FileWriter> writer;
+    RETURN_NOT_OK(parquet::arrow::FileWriter::Open(
+        *reader->schema(), pool, sink, properties, arrow_properties, &writer));
     RETURN_NOT_OK(WriteRecordBatchReader(reader, writer.get()));
     return writer->Close();
   }
@@ -554,7 +554,10 @@ TEST_F(TestParquetFileFormat, WriteRecordBatchReader) {
 
   EXPECT_OK_AND_ASSIGN(auto sink, GetFileSink());
 
-  ASSERT_OK(format_->WriteFragment(reader.get(), sink.get()));
+  auto options = format_->DefaultWriteOptions();
+  EXPECT_OK_AND_ASSIGN(auto writer, format_->MakeWriter(sink, reader->schema(), options));
+  ASSERT_OK(writer->Write(reader.get()));
+  ASSERT_OK(writer->Finish());
 
   EXPECT_OK_AND_ASSIGN(auto written, sink->Finish());
 
@@ -572,16 +575,21 @@ TEST_F(TestParquetFileFormat, WriteRecordBatchReaderCustomOptions) {
 
   EXPECT_OK_AND_ASSIGN(auto sink, GetFileSink());
 
-  format_->writer_properties = parquet::WriterProperties::Builder()
+  auto options =
+      checked_pointer_cast<ParquetFileWriteOptions>(format_->DefaultWriteOptions());
+  options->writer_properties = parquet::WriterProperties::Builder()
                                    .created_by("TestParquetFileFormat")
                                    ->disable_statistics()
                                    ->build();
 
-  format_->arrow_writer_properties = parquet::ArrowWriterProperties::Builder()
+  options->arrow_writer_properties = parquet::ArrowWriterProperties::Builder()
                                          .coerce_timestamps(coerce_timestamps_to)
                                          ->build();
 
-  ASSERT_OK(format_->WriteFragment(reader.get(), sink.get()));
+  EXPECT_OK_AND_ASSIGN(auto writer, format_->MakeWriter(sink, reader->schema(), options));
+  ASSERT_OK(writer->Write(reader.get()));
+  ASSERT_OK(writer->Finish());
+
   EXPECT_OK_AND_ASSIGN(auto written, sink->Finish());
   EXPECT_OK_AND_ASSIGN(auto fragment, format_->MakeFragment(FileSource{written}));
 
@@ -596,7 +604,9 @@ class TestParquetFileSystemDataset : public WriteFileSystemDatasetMixin,
   void SetUp() override {
     MakeSourceDataset();
     check_metadata_ = false;
-    format_ = std::make_shared<ParquetFileFormat>();
+    auto parquet_format = std::make_shared<ParquetFileFormat>();
+    format_ = parquet_format;
+    SetWriteOptions(parquet_format->DefaultWriteOptions());
   }
 };
 
diff --git a/cpp/src/arrow/dataset/partition.cc b/cpp/src/arrow/dataset/partition.cc
index a47ba849cfc..159e0ac0331 100644
--- a/cpp/src/arrow/dataset/partition.cc
+++ b/cpp/src/arrow/dataset/partition.cc
@@ -69,9 +69,9 @@ std::shared_ptr<Partitioning> Partitioning::Default() {
                                     " Partitioning");
     }
 
-    Result<std::vector<PartitionedBatch>> Partition(
+    Result<PartitionedBatches> Partition(
         const std::shared_ptr<RecordBatch>& batch) const override {
-      return std::vector<PartitionedBatch>{{batch, scalar(true)}};
+      return PartitionedBatches{{batch}, {scalar(true)}};
     }
   };
 
@@ -139,7 +139,7 @@ inline std::shared_ptr<Expression> ConjunctionFromGroupingRow(Scalar* row) {
   return and_(std::move(equality_expressions));
 }
 
-Result<std::vector<Partitioning::PartitionedBatch>> KeyValuePartitioning::Partition(
+Result<Partitioning::PartitionedBatches> KeyValuePartitioning::Partition(
     const std::shared_ptr<RecordBatch>& batch) const {
   FieldVector by_fields;
   ArrayVector by_columns;
@@ -158,7 +158,7 @@ Result<std::vector<Partitioning::PartitionedBatch>> KeyValuePartitioning::Partit
 
   if (by_fields.empty()) {
     // no fields to group by; return the whole batch
-    return std::vector<PartitionedBatch>{{batch, scalar(true)}};
+    return PartitionedBatches{{batch}, {scalar(true)}};
   }
 
   ARROW_ASSIGN_OR_RAISE(auto by,
@@ -168,13 +168,13 @@ Result<std::vector<Partitioning::PartitionedBatch>> KeyValuePartitioning::Partit
       checked_pointer_cast<ListArray>(groupings_and_values->GetFieldByName("groupings"));
   auto unique_rows = groupings_and_values->GetFieldByName("values");
 
-  ARROW_ASSIGN_OR_RAISE(auto grouped_batches, ApplyGroupings(*groupings, rest));
+  PartitionedBatches out;
+  ARROW_ASSIGN_OR_RAISE(out.batches, ApplyGroupings(*groupings, rest));
+  out.expressions.resize(out.batches.size());
 
-  std::vector<PartitionedBatch> out(grouped_batches.size());
-  for (size_t i = 0; i < out.size(); ++i) {
+  for (size_t i = 0; i < out.batches.size(); ++i) {
     ARROW_ASSIGN_OR_RAISE(auto row, unique_rows->GetScalar(i));
-    out[i].partition_expression = ConjunctionFromGroupingRow(row.get());
-    out[i].batch = std::move(grouped_batches[i]);
+    out.expressions[i] = ConjunctionFromGroupingRow(row.get());
   }
   return out;
 }
diff --git a/cpp/src/arrow/dataset/partition.h b/cpp/src/arrow/dataset/partition.h
index 6228999b41d..165fcfb5248 100644
--- a/cpp/src/arrow/dataset/partition.h
+++ b/cpp/src/arrow/dataset/partition.h
@@ -60,12 +60,12 @@ class ARROW_DS_EXPORT Partitioning {
   virtual std::string type_name() const = 0;
 
   /// \brief If the input batch shares any fields with this partitioning,
-  /// produce slices of the batch which satisfy mutually exclusive Expressions.
-  struct PartitionedBatch {
-    std::shared_ptr<RecordBatch> batch;
-    std::shared_ptr<Expression> partition_expression;
+  /// produce sub-batches which satisfy mutually exclusive Expressions.
+  struct PartitionedBatches {
+    RecordBatchVector batches;
+    ExpressionVector expressions;
   };
-  virtual Result<std::vector<PartitionedBatch>> Partition(
+  virtual Result<PartitionedBatches> Partition(
       const std::shared_ptr<RecordBatch>& batch) const = 0;
 
   /// \brief Parse a path into a partition expression
@@ -133,7 +133,7 @@ class ARROW_DS_EXPORT KeyValuePartitioning : public Partitioning {
   static Status SetDefaultValuesFromKeys(const Expression& expr,
                                          RecordBatchProjector* projector);
 
-  Result<std::vector<PartitionedBatch>> Partition(
+  Result<PartitionedBatches> Partition(
       const std::shared_ptr<RecordBatch>& batch) const override;
 
   Result<std::shared_ptr<Expression>> Parse(const std::string& path) const override;
@@ -240,7 +240,7 @@ class ARROW_DS_EXPORT FunctionPartitioning : public Partitioning {
     return Status::NotImplemented("formatting paths from ", type_name(), " Partitioning");
   }
 
-  Result<std::vector<PartitionedBatch>> Partition(
+  Result<PartitionedBatches> Partition(
       const std::shared_ptr<RecordBatch>& batch) const override {
     return Status::NotImplemented("partitioning batches from ", type_name(),
                                   " Partitioning");
diff --git a/cpp/src/arrow/dataset/partition_test.cc b/cpp/src/arrow/dataset/partition_test.cc
index ef92a99a967..e9ea2539e89 100644
--- a/cpp/src/arrow/dataset/partition_test.cc
+++ b/cpp/src/arrow/dataset/partition_test.cc
@@ -435,7 +435,7 @@ class RangePartitioning : public Partitioning {
   }
 
   Result<std::string> Format(const Expression&) const override { return ""; }
-  Result<std::vector<PartitionedBatch>> Partition(
+  Result<PartitionedBatches> Partition(
       const std::shared_ptr<RecordBatch>&) const override {
     return Status::OK();
   }
diff --git a/cpp/src/arrow/dataset/test_util.h b/cpp/src/arrow/dataset/test_util.h
index fcecf46552a..7822f52fe82 100644
--- a/cpp/src/arrow/dataset/test_util.h
+++ b/cpp/src/arrow/dataset/test_util.h
@@ -199,6 +199,11 @@ class DummyFileFormat : public FileFormat {
 
   std::string type_name() const override { return "dummy"; }
 
+  bool Equals(const FileFormat& other) const override {
+    return type_name() == other.type_name() &&
+           schema_->Equals(checked_cast<const DummyFileFormat&>(other).schema_);
+  }
+
   Result<bool> IsSupported(const FileSource& source) const override { return true; }
 
   Result<std::shared_ptr<Schema>> Inspect(const FileSource& source) const override {
@@ -212,10 +217,14 @@ class DummyFileFormat : public FileFormat {
     return MakeEmptyIterator<std::shared_ptr<ScanTask>>();
   }
 
-  Status WriteFragment(RecordBatchReader*, io::OutputStream*) const override {
+  Result<std::shared_ptr<FileWriter>> MakeWriter(
+      std::shared_ptr<io::OutputStream> destination, std::shared_ptr<Schema> schema,
+      std::shared_ptr<FileWriteOptions> options) const override {
     return Status::NotImplemented("writing fragment of DummyFileFormat");
   }
 
+  std::shared_ptr<FileWriteOptions> DefaultWriteOptions() override { return nullptr; }
+
  protected:
   std::shared_ptr<Schema> schema_;
 };
@@ -230,6 +239,8 @@ class JSONRecordBatchFileFormat : public FileFormat {
   explicit JSONRecordBatchFileFormat(SchemaResolver resolver)
       : resolver_(std::move(resolver)) {}
 
+  bool Equals(const FileFormat& other) const override { return this == &other; }
+
   std::string type_name() const override { return "json_record_batch"; }
 
   /// \brief Return true if the given file extension
@@ -255,10 +266,14 @@ class JSONRecordBatchFileFormat : public FileFormat {
                                            std::move(context));
   }
 
-  Status WriteFragment(RecordBatchReader*, io::OutputStream*) const override {
+  Result<std::shared_ptr<FileWriter>> MakeWriter(
+      std::shared_ptr<io::OutputStream> destination, std::shared_ptr<Schema> schema,
+      std::shared_ptr<FileWriteOptions> options) const override {
     return Status::NotImplemented("writing fragment of JSONRecordBatchFileFormat");
   }
 
+  std::shared_ptr<FileWriteOptions> DefaultWriteOptions() override { return nullptr; }
+
  protected:
   SchemaResolver resolver_;
 };
@@ -560,46 +575,54 @@ class WriteFileSystemDatasetMixin : public MakeFileSystemDatasetMixin {
     ASSERT_OK_AND_ASSIGN(auto factory,
                          FileSystemDatasetFactory::Make(fs_, s, source_format, options));
     ASSERT_OK_AND_ASSIGN(dataset_, factory->Finish());
+
+    scan_options_ = ScanOptions::Make(source_schema_);
   }
 
-  void TestWriteWithIdenticalPartitioningSchema() {
-    auto desired_partitioning = std::make_shared<DirectoryPartitioning>(
-        SchemaFromColumnNames(source_schema_, {"year", "month"}));
+  void SetWriteOptions(std::shared_ptr<FileWriteOptions> file_write_options) {
+    write_options_.file_write_options = file_write_options;
+    write_options_.filesystem = fs_;
+    write_options_.base_dir = "new_root/";
+    write_options_.basename_template = "dat_{i}";
+  }
 
-    ASSERT_OK(FileSystemDataset::Write(
-        source_schema_, format_, fs_, "new_root/", desired_partitioning,
-        std::make_shared<ScanContext>(), dataset_->GetFragments()));
+  void DoWrite(std::shared_ptr<Partitioning> desired_partitioning) {
+    write_options_.partitioning = desired_partitioning;
+    auto scanner = std::make_shared<Scanner>(dataset_, scan_options_, scan_context_);
+    ASSERT_OK(FileSystemDataset::Write(source_schema_, write_options_, scanner));
 
+    // re-discover the written dataset
     fs::FileSelector s;
     s.recursive = true;
     s.base_dir = "/new_root";
 
-    FileSystemFactoryOptions options;
-    options.partitioning = desired_partitioning;
-    ASSERT_OK_AND_ASSIGN(auto factory,
-                         FileSystemDatasetFactory::Make(fs_, s, format_, options));
+    FileSystemFactoryOptions factory_options;
+    factory_options.partitioning = desired_partitioning;
+    ASSERT_OK_AND_ASSIGN(
+        auto factory, FileSystemDatasetFactory::Make(fs_, s, format_, factory_options));
     ASSERT_OK_AND_ASSIGN(written_, factory->Finish());
+  }
+
+  void TestWriteWithIdenticalPartitioningSchema() {
+    DoWrite(std::make_shared<DirectoryPartitioning>(
+        SchemaFromColumnNames(source_schema_, {"year", "month"})));
 
-    expected_files_["/new_root/2018/1/dat_0." + format_->type_name()] = R"([
+    expected_files_["/new_root/2018/1/dat_0"] = R"([
         {"region": "NY", "model": "3", "sales": 742.0, "country": "US"},
         {"region": "NY", "model": "S", "sales": 304.125, "country": "US"},
-        {"region": "NY", "model": "Y", "sales": 27.5, "country": "US"}
-      ])";
-    expected_files_["/new_root/2018/1/dat_1." + format_->type_name()] = R"([
+        {"region": "NY", "model": "Y", "sales": 27.5, "country": "US"},
         {"region": "QC", "model": "3", "sales": 512, "country": "CA"},
         {"region": "QC", "model": "S", "sales": 978, "country": "CA"},
         {"region": "NY", "model": "X", "sales": 136.25, "country": "US"},
         {"region": "QC", "model": "X", "sales": 1.0, "country": "CA"},
         {"region": "QC", "model": "Y", "sales": 69, "country": "CA"}
       ])";
-    expected_files_["/new_root/2019/1/dat_2." + format_->type_name()] = R"([
+    expected_files_["/new_root/2019/1/dat_1"] = R"([
         {"region": "CA", "model": "3", "sales": 273.5, "country": "US"},
         {"region": "CA", "model": "S", "sales": 13, "country": "US"},
         {"region": "CA", "model": "X", "sales": 54, "country": "US"},
         {"region": "QC", "model": "S", "sales": 10, "country": "CA"},
-        {"region": "CA", "model": "Y", "sales": 21, "country": "US"}
-      ])";
-    expected_files_["/new_root/2019/1/dat_3." + format_->type_name()] = R"([
+        {"region": "CA", "model": "Y", "sales": 21, "country": "US"},
         {"region": "QC", "model": "3", "sales": 152.25, "country": "CA"},
         {"region": "QC", "model": "X", "sales": 42, "country": "CA"},
         {"region": "QC", "model": "Y", "sales": 37, "country": "CA"}
@@ -611,52 +634,32 @@ class WriteFileSystemDatasetMixin : public MakeFileSystemDatasetMixin {
   }
 
   void TestWriteWithUnrelatedPartitioningSchema() {
-    auto desired_partitioning = std::make_shared<DirectoryPartitioning>(
-        SchemaFromColumnNames(source_schema_, {"country", "region"}));
-
-    ASSERT_OK(FileSystemDataset::Write(
-        source_schema_, format_, fs_, "new_root/", desired_partitioning,
-        std::make_shared<ScanContext>(), dataset_->GetFragments()));
-
-    fs::FileSelector s;
-    s.recursive = true;
-    s.base_dir = "/new_root";
-
-    FileSystemFactoryOptions options;
-    options.partitioning = desired_partitioning;
-    ASSERT_OK_AND_ASSIGN(auto factory,
-                         FileSystemDatasetFactory::Make(fs_, s, format_, options));
-    ASSERT_OK_AND_ASSIGN(written_, factory->Finish());
+    DoWrite(std::make_shared<DirectoryPartitioning>(
+        SchemaFromColumnNames(source_schema_, {"country", "region"})));
 
     // XXX first thing a user will be annoyed by: we don't support left
     // padding the month field with 0.
-    expected_files_["/new_root/US/NY/dat_0." + format_->type_name()] = R"([
+    expected_files_["/new_root/US/NY/dat_0"] = R"([
         {"year": 2018, "month": 1, "model": "3", "sales": 742.0},
         {"year": 2018, "month": 1, "model": "S", "sales": 304.125},
-        {"year": 2018, "month": 1, "model": "Y", "sales": 27.5}
-  ])";
-    expected_files_["/new_root/US/NY/dat_1." + format_->type_name()] = R"([
+        {"year": 2018, "month": 1, "model": "Y", "sales": 27.5},
         {"year": 2018, "month": 1, "model": "X", "sales": 136.25}
   ])";
-    expected_files_["/new_root/CA/QC/dat_1." + format_->type_name()] = R"([
+    expected_files_["/new_root/CA/QC/dat_1"] = R"([
         {"year": 2018, "month": 1, "model": "3", "sales": 512},
         {"year": 2018, "month": 1, "model": "S", "sales": 978},
         {"year": 2018, "month": 1, "model": "X", "sales": 1.0},
-        {"year": 2018, "month": 1, "model": "Y", "sales": 69}
+        {"year": 2018, "month": 1, "model": "Y", "sales": 69},
+        {"year": 2019, "month": 1, "model": "S", "sales": 10},
+        {"year": 2019, "month": 1, "model": "3", "sales": 152.25},
+        {"year": 2019, "month": 1, "model": "X", "sales": 42},
+        {"year": 2019, "month": 1, "model": "Y", "sales": 37}
   ])";
-    expected_files_["/new_root/US/CA/dat_2." + format_->type_name()] = R"([
+    expected_files_["/new_root/US/CA/dat_2"] = R"([
         {"year": 2019, "month": 1, "model": "3", "sales": 273.5},
         {"year": 2019, "month": 1, "model": "S", "sales": 13},
         {"year": 2019, "month": 1, "model": "X", "sales": 54},
         {"year": 2019, "month": 1, "model": "Y", "sales": 21}
-  ])";
-    expected_files_["/new_root/CA/QC/dat_2." + format_->type_name()] = R"([
-        {"year": 2019, "month": 1, "model": "S", "sales": 10}
-  ])";
-    expected_files_["/new_root/CA/QC/dat_3." + format_->type_name()] = R"([
-        {"year": 2019, "month": 1, "model": "3", "sales": 152.25},
-        {"year": 2019, "month": 1, "model": "X", "sales": 42},
-        {"year": 2019, "month": 1, "model": "Y", "sales": 37}
   ])";
     expected_physical_schema_ =
         SchemaFromColumnNames(source_schema_, {"model", "sales", "year", "month"});
@@ -665,49 +668,31 @@ class WriteFileSystemDatasetMixin : public MakeFileSystemDatasetMixin {
   }
 
   void TestWriteWithSupersetPartitioningSchema() {
-    auto desired_partitioning = std::make_shared<DirectoryPartitioning>(
-        SchemaFromColumnNames(source_schema_, {"year", "month", "country", "region"}));
-
-    ASSERT_OK(FileSystemDataset::Write(
-        source_schema_, format_, fs_, "new_root/", desired_partitioning,
-        std::make_shared<ScanContext>(), dataset_->GetFragments()));
-
-    fs::FileSelector s;
-    s.recursive = true;
-    s.base_dir = "/new_root";
-
-    FileSystemFactoryOptions options;
-    options.partitioning = desired_partitioning;
-    ASSERT_OK_AND_ASSIGN(auto factory,
-                         FileSystemDatasetFactory::Make(fs_, s, format_, options));
-    ASSERT_OK_AND_ASSIGN(written_, factory->Finish());
+    DoWrite(std::make_shared<DirectoryPartitioning>(
+        SchemaFromColumnNames(source_schema_, {"year", "month", "country", "region"})));
 
     // XXX first thing a user will be annoyed by: we don't support left
     // padding the month field with 0.
-    expected_files_["/new_root/2018/1/US/NY/dat_0." + format_->type_name()] = R"([
+    expected_files_["/new_root/2018/1/US/NY/dat_0"] = R"([
         {"model": "3", "sales": 742.0},
         {"model": "S", "sales": 304.125},
-        {"model": "Y", "sales": 27.5}
-  ])";
-    expected_files_["/new_root/2018/1/US/NY/dat_1." + format_->type_name()] = R"([
+        {"model": "Y", "sales": 27.5},
         {"model": "X", "sales": 136.25}
   ])";
-    expected_files_["/new_root/2018/1/CA/QC/dat_1." + format_->type_name()] = R"([
+    expected_files_["/new_root/2018/1/CA/QC/dat_1"] = R"([
         {"model": "3", "sales": 512},
         {"model": "S", "sales": 978},
         {"model": "X", "sales": 1.0},
         {"model": "Y", "sales": 69}
   ])";
-    expected_files_["/new_root/2019/1/US/CA/dat_2." + format_->type_name()] = R"([
+    expected_files_["/new_root/2019/1/US/CA/dat_2"] = R"([
         {"model": "3", "sales": 273.5},
         {"model": "S", "sales": 13},
         {"model": "X", "sales": 54},
         {"model": "Y", "sales": 21}
   ])";
-    expected_files_["/new_root/2019/1/CA/QC/dat_2." + format_->type_name()] = R"([
-        {"model": "S", "sales": 10}
-  ])";
-    expected_files_["/new_root/2019/1/CA/QC/dat_3." + format_->type_name()] = R"([
+    expected_files_["/new_root/2019/1/CA/QC/dat_3"] = R"([
+        {"model": "S", "sales": 10},
         {"model": "3", "sales": 152.25},
         {"model": "X", "sales": 42},
         {"model": "Y", "sales": 37}
@@ -718,43 +703,23 @@ class WriteFileSystemDatasetMixin : public MakeFileSystemDatasetMixin {
   }
 
   void TestWriteWithEmptyPartitioningSchema() {
-    auto desired_partitioning = std::make_shared<DirectoryPartitioning>(
-        SchemaFromColumnNames(source_schema_, {}));
-
-    ASSERT_OK(FileSystemDataset::Write(
-        source_schema_, format_, fs_, "new_root/", desired_partitioning,
-        std::make_shared<ScanContext>(), dataset_->GetFragments()));
+    DoWrite(std::make_shared<DirectoryPartitioning>(
+        SchemaFromColumnNames(source_schema_, {})));
 
-    fs::FileSelector s;
-    s.recursive = true;
-    s.base_dir = "/new_root";
-
-    FileSystemFactoryOptions options;
-    options.partitioning = desired_partitioning;
-    ASSERT_OK_AND_ASSIGN(auto factory,
-                         FileSystemDatasetFactory::Make(fs_, s, format_, options));
-    ASSERT_OK_AND_ASSIGN(written_, factory->Finish());
-
-    expected_files_["/new_root/dat_0." + format_->type_name()] = R"([
+    expected_files_["/new_root/dat_0"] = R"([
         {"country": "US", "region": "NY", "year": 2018, "month": 1, "model": "3", "sales": 742.0},
         {"country": "US", "region": "NY", "year": 2018, "month": 1, "model": "S", "sales": 304.125},
-        {"country": "US", "region": "NY", "year": 2018, "month": 1, "model": "Y", "sales": 27.5}
-  ])";
-    expected_files_["/new_root/dat_1." + format_->type_name()] = R"([
+        {"country": "US", "region": "NY", "year": 2018, "month": 1, "model": "Y", "sales": 27.5},
         {"country": "CA", "region": "QC", "year": 2018, "month": 1, "model": "3", "sales": 512},
         {"country": "CA", "region": "QC", "year": 2018, "month": 1, "model": "S", "sales": 978},
         {"country": "US", "region": "NY", "year": 2018, "month": 1, "model": "X", "sales": 136.25},
         {"country": "CA", "region": "QC", "year": 2018, "month": 1, "model": "X", "sales": 1.0},
-        {"country": "CA", "region": "QC", "year": 2018, "month": 1, "model": "Y", "sales": 69}
-  ])";
-    expected_files_["/new_root/dat_2." + format_->type_name()] = R"([
+        {"country": "CA", "region": "QC", "year": 2018, "month": 1, "model": "Y", "sales": 69},
         {"country": "US", "region": "CA", "year": 2019, "month": 1, "model": "3", "sales": 273.5},
         {"country": "US", "region": "CA", "year": 2019, "month": 1, "model": "S", "sales": 13},
         {"country": "US", "region": "CA", "year": 2019, "month": 1, "model": "X", "sales": 54},
         {"country": "CA", "region": "QC", "year": 2019, "month": 1, "model": "S", "sales": 10},
-        {"country": "US", "region": "CA", "year": 2019, "month": 1, "model": "Y", "sales": 21}
-  ])";
-    expected_files_["/new_root/dat_3." + format_->type_name()] = R"([
+        {"country": "US", "region": "CA", "year": 2019, "month": 1, "model": "Y", "sales": 21},
         {"country": "CA", "region": "QC", "year": 2019, "month": 1, "model": "3", "sales": 152.25},
         {"country": "CA", "region": "QC", "year": 2019, "month": 1, "model": "X", "sales": 42},
         {"country": "CA", "region": "QC", "year": 2019, "month": 1, "model": "Y", "sales": 37}
@@ -765,12 +730,14 @@ class WriteFileSystemDatasetMixin : public MakeFileSystemDatasetMixin {
   }
 
   void AssertWrittenAsExpected() {
-    std::vector<std::string> files;
+    std::unordered_set<std::string> expected_paths, actual_paths;
     for (const auto& file_contents : expected_files_) {
-      files.push_back(file_contents.first);
+      expected_paths.insert(file_contents.first);
+    }
+    for (auto path : checked_pointer_cast<FileSystemDataset>(written_)->files()) {
+      actual_paths.insert(std::move(path));
     }
-    EXPECT_THAT(checked_pointer_cast<FileSystemDataset>(written_)->files(),
-                testing::UnorderedElementsAreArray(files));
+    EXPECT_THAT(actual_paths, testing::UnorderedElementsAreArray(expected_paths));
 
     for (auto maybe_fragment : written_->GetFragments()) {
       ASSERT_OK_AND_ASSIGN(auto fragment, maybe_fragment);
@@ -779,12 +746,13 @@ class WriteFileSystemDatasetMixin : public MakeFileSystemDatasetMixin {
       AssertSchemaEqual(*expected_physical_schema_, *actual_physical_schema,
                         check_metadata_);
 
-      AssertSchemaEqual(*expected_physical_schema_, *actual_physical_schema);
-
       const auto& path = checked_pointer_cast<FileFragment>(fragment)->source().path();
 
-      auto expected_struct = ArrayFromJSON(struct_(expected_physical_schema_->fields()),
-                                           {expected_files_[path]});
+      auto file_contents = expected_files_.find(path);
+      if (file_contents == expected_files_.end()) {
+        // file wasn't expected to be written at all; nothing to compare with
+        continue;
+      }
 
       ASSERT_OK_AND_ASSIGN(auto scanner, ScannerBuilder(actual_physical_schema, fragment,
                                                         std::make_shared<ScanContext>())
@@ -799,6 +767,9 @@ class WriteFileSystemDatasetMixin : public MakeFileSystemDatasetMixin {
         ASSERT_OK_AND_ASSIGN(actual_struct, batch->ToStructArray());
       }
 
+      auto expected_struct = ArrayFromJSON(struct_(expected_physical_schema_->fields()),
+                                           {file_contents->second});
+
       AssertArraysEqual(*expected_struct, *actual_struct, /*verbose=*/true);
     }
   }
@@ -809,6 +780,9 @@ class WriteFileSystemDatasetMixin : public MakeFileSystemDatasetMixin {
   PathAndContent expected_files_;
   std::shared_ptr<Schema> expected_physical_schema_;
   std::shared_ptr<Dataset> written_;
+  FileSystemDatasetWriteOptions write_options_;
+  std::shared_ptr<ScanOptions> scan_options_;
+  std::shared_ptr<ScanContext> scan_context_ = std::make_shared<ScanContext>();
 };
 
 }  // namespace dataset
diff --git a/cpp/src/arrow/dataset/type_fwd.h b/cpp/src/arrow/dataset/type_fwd.h
index b7604f6bddd..e329b072ed7 100644
--- a/cpp/src/arrow/dataset/type_fwd.h
+++ b/cpp/src/arrow/dataset/type_fwd.h
@@ -48,16 +48,23 @@ using FragmentVector = std::vector<std::shared_ptr<Fragment>>;
 class FileSource;
 class FileFormat;
 class FileFragment;
+class FileWriter;
+class FileWriteOptions;
 class FileSystemDataset;
+struct FileSystemDatasetWriteOptions;
 
 class InMemoryDataset;
 
 class CsvFileFormat;
 
 class IpcFileFormat;
+class IpcFileWriter;
+class IpcFileWriteOptions;
 
 class ParquetFileFormat;
 class ParquetFileFragment;
+class ParquetFileWriter;
+class ParquetFileWriteOptions;
 
 class Expression;
 using ExpressionVector = std::vector<std::shared_ptr<Expression>>;
diff --git a/cpp/src/arrow/util/mutex.cc b/cpp/src/arrow/util/mutex.cc
index fa900fd164b..e1b7700ec1b 100644
--- a/cpp/src/arrow/util/mutex.cc
+++ b/cpp/src/arrow/util/mutex.cc
@@ -19,6 +19,8 @@
 
 #include <mutex>
 
+#include "arrow/util/logging.h"
+
 namespace arrow {
 namespace util {
 
@@ -29,7 +31,14 @@ struct Mutex::Impl {
 Mutex::Guard::Guard(Mutex* locked)
     : locked_(locked, [](Mutex* locked) { locked->impl_->mutex_.unlock(); }) {}
 
+void Mutex::Guard::Unlock() {
+  if (locked_) {
+    locked_->impl_->mutex_.unlock();
+  }
+}
+
 Mutex::Guard Mutex::TryLock() {
+  DCHECK_NE(impl_, nullptr);
   if (impl_->mutex_.try_lock()) {
     return Guard{this};
   }
@@ -37,6 +46,7 @@ Mutex::Guard Mutex::TryLock() {
 }
 
 Mutex::Guard Mutex::Lock() {
+  DCHECK_NE(impl_, nullptr);
   impl_->mutex_.lock();
   return Guard{this};
 }
diff --git a/cpp/src/arrow/util/mutex.h b/cpp/src/arrow/util/mutex.h
index a0365b710fb..8a430266431 100644
--- a/cpp/src/arrow/util/mutex.h
+++ b/cpp/src/arrow/util/mutex.h
@@ -31,6 +31,8 @@ namespace util {
 class ARROW_EXPORT Mutex {
  public:
   Mutex();
+  Mutex(Mutex&&) = default;
+  Mutex& operator=(Mutex&&) = default;
 
   /// A Guard is falsy if a lock could not be acquired.
   class Guard {
@@ -41,6 +43,8 @@ class ARROW_EXPORT Mutex {
 
     explicit operator bool() const { return bool(locked_); }
 
+    void Unlock();
+
    private:
     explicit Guard(Mutex* locked);
 
@@ -56,5 +60,30 @@ class ARROW_EXPORT Mutex {
   std::unique_ptr<Impl, void (*)(Impl*)> impl_;
 };
 
+/// A trivial Mutex, T pair
+template <typename T>
+class Mutexed : Mutex {
+ public:
+  Mutexed() = default;
+  Mutexed(Mutexed&&) = default;
+  Mutexed& operator=(Mutexed&&) = default;
+  explicit Mutexed(T obj) : obj_(std::move(obj)) {}
+
+  using Mutex::Lock;
+  using Mutex::TryLock;
+
+  T& operator*() { return obj_; }
+  const T& operator*() const { return obj_; }
+
+  T* operator->() { return &obj_; }
+  const T* operator->() const { return &obj_; }
+
+  T& get() { return obj_; }
+  const T& get() const { return obj_; }
+
+ private:
+  T obj_;
+};
+
 }  // namespace util
 }  // namespace arrow
diff --git a/python/pyarrow/_dataset.pyx b/python/pyarrow/_dataset.pyx
index a8198b66d70..922d7b05bea 100644
--- a/python/pyarrow/_dataset.pyx
+++ b/python/pyarrow/_dataset.pyx
@@ -606,6 +606,29 @@ cdef shared_ptr[CExpression] _insert_implicit_casts(Expression filter,
     )
 
 
+cdef class FileWriteOptions(_Weakrefable):
+
+    cdef:
+        shared_ptr[CFileWriteOptions] wrapped
+        CFileWriteOptions* options
+
+    def __init__(self):
+        pass
+
+    cdef void init(self, const shared_ptr[CFileWriteOptions]& sp):
+        self.wrapped = sp
+        self.options = sp.get()
+
+    @staticmethod
+    cdef wrap(const shared_ptr[CFileWriteOptions]& sp):
+        cdef FileWriteOptions self = FileWriteOptions()
+        self.init(sp)
+        return self
+
+    cdef inline shared_ptr[CFileWriteOptions] unwrap(self):
+        return self.wrapped
+
+
 cdef class FileFormat(_Weakrefable):
 
     cdef:
@@ -662,6 +685,10 @@ cdef class FileFormat(_Weakrefable):
                                      <shared_ptr[CSchema]>nullptr))
         return Fragment.wrap(move(c_fragment))
 
+    @property
+    def default_write_options(self):
+        return FileWriteOptions.wrap(self.format.DefaultWriteOptions())
+
     def __eq__(self, other):
         try:
             return self.equals(other)
@@ -1058,14 +1085,11 @@ cdef class ParquetFileFormat(FileFormat):
 
     cdef:
         CParquetFileFormat* parquet_format
-        object _write_options
 
-    def __init__(self, read_options=None, dict write_options=None):
+    def __init__(self, read_options=None):
         cdef:
             shared_ptr[CParquetFileFormat] wrapped
             CParquetFileFormatReaderOptions* options
-            shared_ptr[ArrowWriterProperties] arrow_properties
-            shared_ptr[WriterProperties] properties
 
         # Read options
 
@@ -1087,33 +1111,6 @@ cdef class ParquetFileFormat(FileFormat):
             for column in read_options.dictionary_columns:
                 options.dict_columns.insert(tobytes(column))
 
-        # Write options
-
-        self._write_options = {}
-        if write_options is not None:
-            self._write_options = write_options
-            properties = _create_writer_properties(
-                use_dictionary=write_options.get("use_dictionary", True),
-                compression=write_options.get("compression", "snappy"),
-                version=write_options.get("version", "1.0"),
-                write_statistics=write_options.get("write_statistics", None),
-                data_page_size=write_options.get("data_page_size", None),
-                compression_level=write_options.get("compression_level", None),
-                use_byte_stream_split=write_options.get(
-                    "use_byte_stream_split", False),
-                data_page_version=write_options.get("data_page_version", "1.0")
-            )
-            arrow_properties = _create_arrow_writer_properties(
-                use_deprecated_int96_timestamps=write_options.get(
-                    "use_deprecated_int96_timestamps", False),
-                coerce_timestamps=write_options.get("coerce_timestamps", None),
-                allow_truncated_timestamps=write_options.get(
-                    "allow_truncated_timestamps", False),
-                writer_engine_version="V2"
-            )
-            wrapped.get().writer_properties = properties
-            wrapped.get().arrow_writer_properties = arrow_properties
-
         self.init(<shared_ptr[CFileFormat]> wrapped)
 
     cdef void init(self, const shared_ptr[CFileFormat]& sp):
@@ -1133,18 +1130,13 @@ cdef class ParquetFileFormat(FileFormat):
             enable_parallel_column_conversion=enable
         )
 
-    @property
-    def write_options(self):
-        return self._write_options
-
     def equals(self, ParquetFileFormat other):
         return (
-            self.read_options.equals(other.read_options) and
-            self.write_options == other.write_options
+            self.read_options.equals(other.read_options)
         )
 
     def __reduce__(self):
-        return ParquetFileFormat, (self.read_options, self.write_options)
+        return ParquetFileFormat, (self.read_options, )
 
     def make_fragment(self, file, filesystem=None,
                       Expression partition_expression=None, row_groups=None):
@@ -2122,7 +2114,7 @@ def _get_partition_keys(Expression partition_expression):
 
 
 def _filesystemdataset_write(
-    data, object base_dir, Schema schema not None,
+    data, object base_dir, str basename_template, Schema schema not None,
     FileFormat format not None, FileSystem filesystem not None,
     Partitioning partitioning not None, bint use_threads=True,
 ):
@@ -2130,39 +2122,27 @@ def _filesystemdataset_write(
     CFileSystemDataset.Write wrapper
     """
     cdef:
-        c_string c_base_dir
         shared_ptr[CSchema] c_schema
-        shared_ptr[CFileFormat] c_format
-        shared_ptr[CFileSystem] c_filesystem
-        shared_ptr[CPartitioning] c_partitioning
-        shared_ptr[CScanContext] c_context
-        # to create iterator of InMemory fragments
+        CFileSystemDatasetWriteOptions c_options
+        shared_ptr[CScanner] c_scanner
+
+        FileWriteOptions file_options
         vector[shared_ptr[CRecordBatch]] c_batches
-        shared_ptr[CFragment] c_fragment
-        vector[shared_ptr[CFragment]] c_fragment_vector
 
-    c_base_dir = tobytes(_stringify_path(base_dir))
+    file_options = <FileWriteOptions> format.default_write_options
+
     c_schema = pyarrow_unwrap_schema(schema)
-    c_format = format.unwrap()
-    c_filesystem = filesystem.unwrap()
-    c_partitioning = partitioning.unwrap()
-    c_context = _build_scan_context(use_threads=use_threads)
+
+    c_options.file_write_options = file_options.unwrap()
+    c_options.filesystem = filesystem.unwrap()
+    c_options.base_dir = tobytes(_stringify_path(base_dir))
+    c_options.partitioning = partitioning.unwrap()
+    c_options.basename_template = tobytes(basename_template)
 
     if isinstance(data, Dataset):
-        with nogil:
-            check_status(
-                CFileSystemDataset.Write(
-                    c_schema,
-                    c_format,
-                    c_filesystem,
-                    c_base_dir,
-                    c_partitioning,
-                    c_context,
-                    (<Dataset> data).dataset.GetFragments()
-                )
-            )
+        scanner = data._scanner(use_threads=use_threads)
     else:
-        # data is list of batches/tables, one element per fragment
+        # data is list of batches/tables
         for table in data:
             if isinstance(table, Table):
                 for batch in table.to_batches():
@@ -2170,20 +2150,11 @@ def _filesystemdataset_write(
             else:
                 c_batches.push_back((<RecordBatch> table).sp_batch)
 
-            c_fragment = shared_ptr[CFragment](
-                new CInMemoryFragment(c_batches, _true.unwrap()))
-            c_batches.clear()
-            c_fragment_vector.push_back(c_fragment)
+        data = Fragment.wrap(shared_ptr[CFragment](
+            new CInMemoryFragment(move(c_batches), _true.unwrap())))
 
-        with nogil:
-            check_status(
-                CFileSystemDataset.Write(
-                    c_schema,
-                    c_format,
-                    c_filesystem,
-                    c_base_dir,
-                    c_partitioning,
-                    c_context,
-                    MakeVectorIterator(move(c_fragment_vector))
-                )
-            )
+        scanner = Scanner.from_fragment(data, schema, use_threads=use_threads)
+
+    c_scanner = (<Scanner> scanner).unwrap()
+    with nogil:
+        check_status(CFileSystemDataset.Write(c_schema, c_options, c_scanner))
diff --git a/python/pyarrow/dataset.py b/python/pyarrow/dataset.py
index 19f3c549d56..702a50f1f35 100644
--- a/python/pyarrow/dataset.py
+++ b/python/pyarrow/dataset.py
@@ -694,7 +694,8 @@ def _ensure_write_partitioning(scheme):
     return scheme
 
 
-def write_dataset(data, base_dir, format=None, partitioning=None, schema=None,
+def write_dataset(data, base_dir, basename_template, format=None,
+                  partitioning=None, schema=None,
                   filesystem=None, use_threads=True):
     """
     Write a dataset to a given format and partitioning.
@@ -703,13 +704,13 @@ def write_dataset(data, base_dir, format=None, partitioning=None, schema=None,
     ----------
     data : Dataset, Table/RecordBatch, or list of Table/RecordBatch
         The data to write. This can be a Dataset instance or
-        in-memory Arrow data. A Table or RecordBatch is written as a
-        single fragment (resulting in a single file, or multiple files if
-        split according to the `partitioning`). If you have a Table consisting
-        of multiple record batches, you can pass ``table.to_batches()`` to
-        handle each record batch as a separate fragment.
+        in-memory Arrow data.
     base_dir : str
         The root directory where to write the dataset.
+    basename_template : str
+        A template string used to generate basenames of written data files.
+        The token '{i}' will be replaced with an automatically incremented
+        integer.
     format : FileFormat or str
         The format in which to write the dataset. Currently supported:
         "ipc"/"feather". If a FileSystemDataset is being written and `format`
@@ -750,5 +751,6 @@ def write_dataset(data, base_dir, format=None, partitioning=None, schema=None,
     filesystem, _ = _ensure_fs(filesystem)
 
     _filesystemdataset_write(
-        data, base_dir, schema, format, filesystem, partitioning, use_threads,
+        data, base_dir, basename_template, schema, format,
+        filesystem, partitioning, use_threads,
     )
diff --git a/python/pyarrow/includes/libarrow_dataset.pxd b/python/pyarrow/includes/libarrow_dataset.pxd
index a1f7cd4aa4b..5128a8ea0b6 100644
--- a/python/pyarrow/includes/libarrow_dataset.pxd
+++ b/python/pyarrow/includes/libarrow_dataset.pxd
@@ -211,6 +211,10 @@ cdef extern from "arrow/dataset/api.h" namespace "arrow::dataset" nogil:
         # the generated C++ is compiled).
         CFileSource(...)
 
+    cdef cppclass CFileWriteOptions \
+            "arrow::dataset::FileWriteOptions":
+        pass
+
     cdef cppclass CFileFormat "arrow::dataset::FileFormat":
         c_string type_name() const
         CResult[shared_ptr[CSchema]] Inspect(const CFileSource&) const
@@ -218,6 +222,7 @@ cdef extern from "arrow/dataset/api.h" namespace "arrow::dataset" nogil:
             CFileSource source,
             shared_ptr[CExpression] partition_expression,
             shared_ptr[CSchema] physical_schema)
+        shared_ptr[CFileWriteOptions] DefaultWriteOptions()
 
     cdef cppclass CFileFragment "arrow::dataset::FileFragment"(
             CFragment):
@@ -237,6 +242,10 @@ cdef extern from "arrow/dataset/api.h" namespace "arrow::dataset" nogil:
         @staticmethod
         vector[CRowGroupInfo] FromIdentifiers(vector[int])
 
+    cdef cppclass CParquetFileWriteOptions \
+            "arrow::dataset::ParquetFileWriteOptions":
+        pass
+
     cdef cppclass CParquetFileFragment "arrow::dataset::ParquetFileFragment"(
             CFileFragment):
         const vector[CRowGroupInfo]& row_groups() const
@@ -244,6 +253,14 @@ cdef extern from "arrow/dataset/api.h" namespace "arrow::dataset" nogil:
             shared_ptr[CExpression] predicate)
         CStatus EnsureCompleteMetadata()
 
+    cdef cppclass CFileSystemDatasetWriteOptions \
+            "arrow::dataset::FileSystemDatasetWriteOptions":
+        shared_ptr[CFileWriteOptions] file_write_options
+        shared_ptr[CFileSystem] filesystem
+        c_string base_dir
+        shared_ptr[CPartitioning] partitioning
+        c_string basename_template
+
     cdef cppclass CFileSystemDataset \
             "arrow::dataset::FileSystemDataset"(CDataset):
         @staticmethod
@@ -257,12 +274,8 @@ cdef extern from "arrow/dataset/api.h" namespace "arrow::dataset" nogil:
         @staticmethod
         CStatus Write(
             shared_ptr[CSchema] schema,
-            shared_ptr[CFileFormat] format,
-            shared_ptr[CFileSystem] filesystem,
-            c_string base_dir,
-            shared_ptr[CPartitioning] partitioning,
-            shared_ptr[CScanContext] scan_context,
-            CFragmentIterator fragments)
+            const CFileSystemDatasetWriteOptions& write_options,
+            shared_ptr[CScanner] scanner)
 
         c_string type()
         vector[c_string] files()
diff --git a/python/pyarrow/tests/test_dataset.py b/python/pyarrow/tests/test_dataset.py
index 13549746755..128c58c2550 100644
--- a/python/pyarrow/tests/test_dataset.py
+++ b/python/pyarrow/tests/test_dataset.py
@@ -510,9 +510,6 @@ def test_file_format_pickling():
             read_options={
                 'use_buffered_stream': True,
                 'buffer_size': 4096,
-            },
-            write_options={
-                'version': '2.0'
             }
         )
     ]
@@ -2155,7 +2152,8 @@ def _check_dataset_roundtrip(dataset, base_dir, expected_files,
     base_dir_path = base_dir_path or base_dir
 
     ds.write_dataset(
-        dataset, base_dir, format="feather", partitioning=partitioning
+        dataset, base_dir, basename_template='dat_{i}.feather',
+        format="feather", partitioning=partitioning, use_threads=False
     )
 
     # check that all files are present
@@ -2165,6 +2163,8 @@ def _check_dataset_roundtrip(dataset, base_dir, expected_files,
     # check that reading back in as dataset gives the same result
     dataset2 = ds.dataset(
         base_dir_path, format="feather", partitioning=partitioning)
+    print(dataset.to_table().to_pandas())
+    print(dataset2.to_table().to_pandas())
     assert dataset2.to_table().equals(dataset.to_table())
 
 
@@ -2178,12 +2178,12 @@ def test_write_dataset(tempdir):
 
     # full string path
     target = tempdir / 'single-file-target'
-    expected_files = [target / "dat_0.ipc"]
+    expected_files = [target / "dat_0.feather"]
     _check_dataset_roundtrip(dataset, str(target), expected_files, target)
 
     # pathlib path object
     target = tempdir / 'single-file-target2'
-    expected_files = [target / "dat_0.ipc"]
+    expected_files = [target / "dat_0.feather"]
     _check_dataset_roundtrip(dataset, target, expected_files, target)
 
     # TODO
@@ -2200,7 +2200,7 @@ def test_write_dataset(tempdir):
     dataset = ds.dataset(directory)
 
     target = tempdir / 'single-directory-target'
-    expected_files = [target / "dat_0.ipc", target / "dat_1.ipc"]
+    expected_files = [target / "dat_0.feather"]
     _check_dataset_roundtrip(dataset, str(target), expected_files, target)
 
 
@@ -2215,8 +2215,8 @@ def test_write_dataset_partitioned(tempdir):
     # hive partitioning
     target = tempdir / 'partitioned-hive-target'
     expected_paths = [
-        target / "part=a", target / "part=a" / "dat_0.ipc",
-        target / "part=b", target / "part=b" / "dat_1.ipc"
+        target / "part=a", target / "part=a" / "dat_0.feather",
+        target / "part=b", target / "part=b" / "dat_1.feather"
     ]
     partitioning_schema = ds.partitioning(
         pa.schema([("part", pa.string())]), flavor="hive")
@@ -2227,8 +2227,8 @@ def test_write_dataset_partitioned(tempdir):
     # directory partitioning
     target = tempdir / 'partitioned-dir-target'
     expected_paths = [
-        target / "a", target / "a" / "dat_0.ipc",
-        target / "b", target / "b" / "dat_1.ipc"
+        target / "a", target / "a" / "dat_0.feather",
+        target / "b", target / "b" / "dat_1.feather"
     ]
     partitioning_schema = ds.partitioning(
         pa.schema([("part", pa.string())]))
@@ -2249,20 +2249,15 @@ def test_write_dataset_use_threads(tempdir):
 
     target1 = tempdir / 'partitioned1'
     ds.write_dataset(
-        dataset, target1, format="feather", partitioning=partitioning,
-        use_threads=True
+        dataset, target1, basename_template='dat_{i}.feather',
+        format="feather", partitioning=partitioning, use_threads=True
     )
     target2 = tempdir / 'partitioned2'
     ds.write_dataset(
-        dataset, target2, format="feather", partitioning=partitioning,
-        use_threads=False
+        dataset, target2, basename_template='dat_{i}.feather',
+        format="feather", partitioning=partitioning, use_threads=False
     )
 
-    # check that all files are the same in both cases
-    paths1 = [p.relative_to(target1) for p in target1.rglob("*")]
-    paths2 = [p.relative_to(target2) for p in target2.rglob("*")]
-    assert set(paths1) == set(paths2)
-
     # check that reading in gives same result
     result1 = ds.dataset(target1, format="feather", partitioning=partitioning)
     result2 = ds.dataset(target2, format="feather", partitioning=partitioning)
@@ -2276,10 +2271,11 @@ def test_write_table(tempdir):
     ], names=["f1", "f2", "part"])
 
     base_dir = tempdir / 'single'
-    ds.write_dataset(table, base_dir, format="feather")
+    ds.write_dataset(table, base_dir,
+                     basename_template='dat_{i}.feather', format="feather")
     # check that all files are present
     file_paths = list(base_dir.rglob("*"))
-    expected_paths = [base_dir / "dat_0.ipc"]
+    expected_paths = [base_dir / "dat_0.feather"]
     assert set(file_paths) == set(expected_paths)
     # check Table roundtrip
     result = ds.dataset(base_dir, format="ipc").to_table()
@@ -2289,12 +2285,13 @@ def test_write_table(tempdir):
     base_dir = tempdir / 'partitioned'
     partitioning = ds.partitioning(
         pa.schema([("part", pa.string())]), flavor="hive")
-    ds.write_dataset(
-        table, base_dir, format="feather", partitioning=partitioning)
+    ds.write_dataset(table, base_dir,
+                     basename_template='dat_{i}.feather',
+                     format="feather", partitioning=partitioning)
     file_paths = list(base_dir.rglob("*"))
     expected_paths = [
-        base_dir / "part=a", base_dir / "part=a" / "dat_0.ipc",
-        base_dir / "part=b", base_dir / "part=b" / "dat_0.ipc"
+        base_dir / "part=a", base_dir / "part=a" / "dat_0.feather",
+        base_dir / "part=b", base_dir / "part=b" / "dat_1.feather"
     ]
     assert set(file_paths) == set(expected_paths)
     result = ds.dataset(base_dir, format="ipc", partitioning=partitioning)
@@ -2310,28 +2307,32 @@ def test_write_table_multiple_fragments(tempdir):
 
     # Table with multiple batches written as single Fragment by default
     base_dir = tempdir / 'single'
-    ds.write_dataset(table, base_dir, format="feather")
-    assert set(base_dir.rglob("*")) == set([base_dir / "dat_0.ipc"])
+    ds.write_dataset(table, base_dir, basename_template='dat_{i}.feather',
+                     format="feather")
+    assert set(base_dir.rglob("*")) == set([base_dir / "dat_0.feather"])
     assert ds.dataset(base_dir, format="ipc").to_table().equals(table)
 
     # Same for single-element list of Table
     base_dir = tempdir / 'single-list'
-    ds.write_dataset([table], base_dir, format="feather")
-    assert set(base_dir.rglob("*")) == set([base_dir / "dat_0.ipc"])
+    ds.write_dataset([table], base_dir, basename_template='dat_{i}.feather',
+                     format="feather")
+    assert set(base_dir.rglob("*")) == set([base_dir / "dat_0.feather"])
     assert ds.dataset(base_dir, format="ipc").to_table().equals(table)
 
     # Provide list of batches to write multiple fragments
     base_dir = tempdir / 'multiple'
-    ds.write_dataset(table.to_batches(), base_dir, format="feather")
+    ds.write_dataset(table.to_batches(), base_dir,
+                     basename_template='dat_{i}.feather', format="feather")
     assert set(base_dir.rglob("*")) == set(
-        [base_dir / "dat_0.ipc", base_dir / "dat_1.ipc"])
+        [base_dir / "dat_0.feather"])
     assert ds.dataset(base_dir, format="ipc").to_table().equals(table)
 
     # Provide list of tables to write multiple fragments
     base_dir = tempdir / 'multiple-table'
-    ds.write_dataset([table, table], base_dir, format="feather")
+    ds.write_dataset([table, table], base_dir,
+                     basename_template='dat_{i}.feather', format="feather")
     assert set(base_dir.rglob("*")) == set(
-        [base_dir / "dat_0.ipc", base_dir / "dat_1.ipc"])
+        [base_dir / "dat_0.feather"])
     assert ds.dataset(base_dir, format="ipc").to_table().equals(
         pa.concat_tables([table]*2)
     )
@@ -2349,7 +2350,8 @@ def test_write_dataset_parquet(tempdir):
     # using default "parquet" format string
 
     base_dir = tempdir / 'parquet_dataset'
-    ds.write_dataset(table, base_dir, format="parquet")
+    ds.write_dataset(table, base_dir,
+                     basename_template='dat_{i}.parquet', format="parquet")
     # check that all files are present
     file_paths = list(base_dir.rglob("*"))
     expected_paths = [base_dir / "dat_0.parquet"]
@@ -2359,10 +2361,10 @@ def test_write_dataset_parquet(tempdir):
     assert result.equals(table)
 
     # using custom options / format object
-
-    for version in ["1.0", "2.0"]:
-        format = ds.ParquetFileFormat(write_options=dict(version=version))
-        base_dir = tempdir / 'parquet_dataset_version{0}'.format(version)
-        ds.write_dataset(table, base_dir, format=format)
-        meta = pq.read_metadata(base_dir / "dat_0.parquet")
-        assert meta.format_version == version
+    if False:
+        for version in ["1.0", "2.0"]:
+            format = ds.ParquetFileFormat(write_options=dict(version=version))
+            base_dir = tempdir / 'parquet_dataset_version{0}'.format(version)
+            ds.write_dataset(table, base_dir, format=format)
+            meta = pq.read_metadata(base_dir / "dat_0.parquet")
+            assert meta.format_version == version

From e2c11992a8fffffd15072f381b8da85bb9aa9d7d Mon Sep 17 00:00:00 2001
From: Neal Richardson <neal.p.richardson@gmail.com>
Date: Tue, 29 Sep 2020 16:23:55 -0700
Subject: [PATCH 02/34] Minimal hacking to get the R tests passing

---
 r/R/arrowExports.R              |  8 ++++++--
 r/R/dataset-scan.R              |  9 ++++++++-
 r/R/dataset-write.R             | 36 +++++++++++++++------------------
 r/R/dataset.R                   | 10 +--------
 r/man/Dataset.Rd                |  3 ---
 r/man/write_dataset.Rd          | 10 ++++-----
 r/src/arrowExports.cpp          | 26 +++++++++++++++++++-----
 r/src/dataset.cpp               | 24 ++++++++++++++--------
 r/tests/testthat/test-dataset.R |  4 ++--
 9 files changed, 75 insertions(+), 55 deletions(-)

diff --git a/r/R/arrowExports.R b/r/R/arrowExports.R
index c059c761efe..af5b71bd436 100644
--- a/r/R/arrowExports.R
+++ b/r/R/arrowExports.R
@@ -464,12 +464,16 @@ dataset___Scanner__Scan <- function(scanner){
     .Call(`_arrow_dataset___Scanner__Scan` , scanner)
 }
 
+dataset___Scanner__schema <- function(sc){
+    .Call(`_arrow_dataset___Scanner__schema` , sc)
+}
+
 dataset___ScanTask__get_batches <- function(scan_task){
     .Call(`_arrow_dataset___ScanTask__get_batches` , scan_task)
 }
 
-dataset___Dataset__Write <- function(ds, schema, format, filesystem, path, partitioning){
-    invisible(.Call(`_arrow_dataset___Dataset__Write` , ds, schema, format, filesystem, path, partitioning))
+dataset___Dataset__Write <- function(scanner, schema, format, filesystem, path, partitioning){
+    invisible(.Call(`_arrow_dataset___Dataset__Write` , scanner, schema, format, filesystem, path, partitioning))
 }
 
 shared_ptr_is_null <- function(xp){
diff --git a/r/R/dataset-scan.R b/r/R/dataset-scan.R
index 4aabd2c2bbf..ab542fee8f9 100644
--- a/r/R/dataset-scan.R
+++ b/r/R/dataset-scan.R
@@ -57,6 +57,9 @@ Scanner <- R6Class("Scanner", inherit = ArrowObject,
   public = list(
     ToTable = function() shared_ptr(Table, dataset___Scanner__ToTable(self)),
     Scan = function() map(dataset___Scanner__Scan(self), shared_ptr, class = ScanTask)
+  ),
+  active = list(
+    schema = function() shared_ptr(Schema, dataset___Scanner__schema(self))
   )
 )
 Scanner$create <- function(dataset,
@@ -65,7 +68,7 @@ Scanner$create <- function(dataset,
                            use_threads = option_use_threads(),
                            batch_size = NULL,
                            ...) {
-  if (inherits(dataset, "arrow_dplyr_query") && inherits(dataset$.data, "Dataset")) {
+  if (inherits(dataset, "arrow_dplyr_query")) {
     return(Scanner$create(
       dataset$.data,
       dataset$selected_columns,
@@ -74,7 +77,11 @@ Scanner$create <- function(dataset,
       ...
     ))
   }
+  if (inherits(dataset, c("data.frame", "RecordBatch", "Table"))) {
+    dataset <- InMemoryDataset$create(dataset)
+  }
   assert_is(dataset, "Dataset")
+
   scanner_builder <- dataset$NewScan()
   if (use_threads) {
     scanner_builder$UseThreads()
diff --git a/r/R/dataset-write.R b/r/R/dataset-write.R
index 8cfd8dec527..b449186a9ea 100644
--- a/r/R/dataset-write.R
+++ b/r/R/dataset-write.R
@@ -26,18 +26,18 @@
 #' `schema` and `partitioning` will be taken from the result of any `select()`
 #' and `group_by()` operations done on the dataset. Note that `filter()` queries
 #' are not currently supported, and `select`-ed columns may not be renamed.
-#' @param path string path to a directory to write to (directory will be
+#' @param path string path or URI to a directory to write to (directory will be
 #' created if it does not exist)
 #' @param format file format to write the dataset to. Currently supported
 #' formats are "feather" (aka "ipc") and "parquet". Default is to write to the
 #' same format as `dataset`.
-#' @param schema [Schema] containing a subset of columns, possibly reordered,
-#' in `dataset`. Default is `dataset$schema`, i.e. all columns.
 #' @param partitioning `Partitioning` or a character vector of columns to
 #' use as partition keys (to be written as path segments). Default is to
 #' use the current `group_by()` columns.
 #' @param hive_style logical: write partition segments as Hive-style
 #' (`key1=value1/key2=value2/file.ext`) or as just bare values. Default is `TRUE`.
+#' @param filesystem A [FileSystem] where the dataset should be written if it is a
+#' string file path; default is the local file system
 #' @param ... additional format-specific arguments. For available Parquet
 #' options, see [write_parquet()].
 #' @return The input `dataset`, invisibly
@@ -45,12 +45,11 @@
 write_dataset <- function(dataset,
                           path,
                           format = dataset$format$type,
-                          schema = dataset$schema,
                           partitioning = dplyr::group_vars(dataset),
                           hive_style = TRUE,
+                          filesystem = NULL,
                           ...) {
   if (inherits(dataset, "arrow_dplyr_query")) {
-    force(partitioning) # get the group_vars before we drop the object
     # Check for a filter
     if (!isTRUE(dataset$filtered_rows)) {
       # TODO:
@@ -63,23 +62,18 @@ write_dataset <- function(dataset,
         stop("Renaming columns when writing a dataset is not yet supported", call. = FALSE)
       }
       dataset <- ensure_group_vars(dataset)
-      schema <- dataset$.data$schema[dataset$selected_columns]
     }
-    dataset <- dataset$.data
   }
-  if (inherits(dataset, c("data.frame", "RecordBatch", "Table"))) {
-    force(partitioning) # get the group_vars before we replace the object
-    if (inherits(dataset, "grouped_df")) {
-      # Drop the grouping metadata before writing; we've already consumed it
-      # now to construct `partitioning` and don't want it in the metadata$r
-      dataset <- dplyr::ungroup(dataset)
-    }
-    dataset <- InMemoryDataset$create(dataset)
-  }
-  if (!inherits(dataset, "Dataset")) {
-    stop("'dataset' must be a Dataset, not ", class(dataset)[1], call. = FALSE)
+  if (inherits(dataset, "grouped_df")) {
+    force(partitioning)
+    # Drop the grouping metadata before writing; we've already consumed it
+    # now to construct `partitioning` and don't want it in the metadata$r
+    dataset <- dplyr::ungroup(dataset)
   }
 
+  scanner <- Scanner$create(dataset)
+  schema <- scanner$schema
+
   if (!inherits(format, "FileFormat")) {
     if (identical(format, "parquet")) {
       # We have to do some special massaging of properties
@@ -93,12 +87,14 @@ write_dataset <- function(dataset,
 
   if (!inherits(partitioning, "Partitioning")) {
     # TODO: tidyselect?
-    partition_schema <- dataset$schema[partitioning]
+    partition_schema <- schema[partitioning]
     if (isTRUE(hive_style)) {
       partitioning <- HivePartitioning$create(partition_schema)
     } else {
       partitioning <- DirectoryPartitioning$create(partition_schema)
     }
   }
-  dataset$write(path, format = format, partitioning = partitioning, schema = schema, ...)
+  path_and_fs <- get_path_and_filesystem(path, filesystem)
+
+  dataset___Dataset__Write(scanner, schema, path = path_and_fs$path, format = format, partitioning = partitioning, filesystem = path_and_fs$fs)
 }
diff --git a/r/R/dataset.R b/r/R/dataset.R
index 7661c33292e..7b1d6609295 100644
--- a/r/R/dataset.R
+++ b/r/R/dataset.R
@@ -133,9 +133,6 @@ open_dataset <- function(sources,
 #'   may also replace the dataset's schema by using `ds$schema <- new_schema`.
 #'   This method currently supports only adding, removing, or reordering
 #'   fields in the schema: you cannot alter or cast the field types.
-#' - `$write(path, filesystem, schema, format, partitioning, ...)`: writes the
-#'   dataset to `path` in the `format` file format, partitioned by `partitioning`,
-#'   and invisibly returns `self`. See [write_dataset()].
 #'
 #' `FileSystemDataset` has the following methods:
 #' - `$files`: Active binding, returns the files of the `FileSystemDataset`
@@ -162,12 +159,7 @@ Dataset <- R6Class("Dataset", inherit = ArrowObject,
     # Start a new scan of the data
     # @return A [ScannerBuilder]
     NewScan = function() unique_ptr(ScannerBuilder, dataset___Dataset__NewScan(self)),
-    ToString = function() self$schema$ToString(),
-    write = function(path, filesystem = NULL, schema = self$schema, format, partitioning, ...) {
-      path_and_fs <- get_path_and_filesystem(path, filesystem)
-      dataset___Dataset__Write(self, schema, format, path_and_fs$fs, path_and_fs$path, partitioning)
-      invisible(self)
-    }
+    ToString = function() self$schema$ToString()
   ),
   active = list(
     schema = function(schema) {
diff --git a/r/man/Dataset.Rd b/r/man/Dataset.Rd
index 056b7d529e2..3c9a314195d 100644
--- a/r/man/Dataset.Rd
+++ b/r/man/Dataset.Rd
@@ -61,9 +61,6 @@ A \code{Dataset} has the following methods:
 may also replace the dataset's schema by using \code{ds$schema <- new_schema}.
 This method currently supports only adding, removing, or reordering
 fields in the schema: you cannot alter or cast the field types.
-\item \verb{$write(path, filesystem, schema, format, partitioning, ...)}: writes the
-dataset to \code{path} in the \code{format} file format, partitioned by \code{partitioning},
-and invisibly returns \code{self}. See \code{\link[=write_dataset]{write_dataset()}}.
 }
 
 \code{FileSystemDataset} has the following methods:
diff --git a/r/man/write_dataset.Rd b/r/man/write_dataset.Rd
index bb020b6ce68..8133b38479d 100644
--- a/r/man/write_dataset.Rd
+++ b/r/man/write_dataset.Rd
@@ -8,9 +8,9 @@ write_dataset(
   dataset,
   path,
   format = dataset$format$type,
-  schema = dataset$schema,
   partitioning = dplyr::group_vars(dataset),
   hive_style = TRUE,
+  filesystem = NULL,
   ...
 )
 }
@@ -21,16 +21,13 @@ write_dataset(
 and \code{group_by()} operations done on the dataset. Note that \code{filter()} queries
 are not currently supported, and \code{select}-ed columns may not be renamed.}
 
-\item{path}{string path to a directory to write to (directory will be
+\item{path}{string path or URI to a directory to write to (directory will be
 created if it does not exist)}
 
 \item{format}{file format to write the dataset to. Currently supported
 formats are "feather" (aka "ipc") and "parquet". Default is to write to the
 same format as \code{dataset}.}
 
-\item{schema}{\link{Schema} containing a subset of columns, possibly reordered,
-in \code{dataset}. Default is \code{dataset$schema}, i.e. all columns.}
-
 \item{partitioning}{\code{Partitioning} or a character vector of columns to
 use as partition keys (to be written as path segments). Default is to
 use the current \code{group_by()} columns.}
@@ -38,6 +35,9 @@ use the current \code{group_by()} columns.}
 \item{hive_style}{logical: write partition segments as Hive-style
 (\code{key1=value1/key2=value2/file.ext}) or as just bare values. Default is \code{TRUE}.}
 
+\item{filesystem}{A \link{FileSystem} where the dataset should be written if it is a
+string file path; default is the local file system}
+
 \item{...}{additional format-specific arguments. For available Parquet
 options, see \code{\link[=write_parquet]{write_parquet()}}.}
 }
diff --git a/r/src/arrowExports.cpp b/r/src/arrowExports.cpp
index 999917188b1..2b4d0723879 100644
--- a/r/src/arrowExports.cpp
+++ b/r/src/arrowExports.cpp
@@ -1821,6 +1821,21 @@ extern "C" SEXP _arrow_dataset___Scanner__Scan(SEXP scanner_sexp){
 }
 #endif
 
+// dataset.cpp
+#if defined(ARROW_R_WITH_ARROW)
+std::shared_ptr<arrow::Schema> dataset___Scanner__schema(const std::shared_ptr<ds::Scanner>& sc);
+extern "C" SEXP _arrow_dataset___Scanner__schema(SEXP sc_sexp){
+BEGIN_CPP11
+	arrow::r::Input<const std::shared_ptr<ds::Scanner>&>::type sc(sc_sexp);
+	return cpp11::as_sexp(dataset___Scanner__schema(sc));
+END_CPP11
+}
+#else
+extern "C" SEXP _arrow_dataset___Scanner__schema(SEXP sc_sexp){
+	Rf_error("Cannot call dataset___Scanner__schema(). Please use arrow::install_arrow() to install required runtime libraries. ");
+}
+#endif
+
 // dataset.cpp
 #if defined(ARROW_R_WITH_ARROW)
 std::vector<std::shared_ptr<arrow::RecordBatch>> dataset___ScanTask__get_batches(const std::shared_ptr<ds::ScanTask>& scan_task);
@@ -1838,21 +1853,21 @@ extern "C" SEXP _arrow_dataset___ScanTask__get_batches(SEXP scan_task_sexp){
 
 // dataset.cpp
 #if defined(ARROW_R_WITH_ARROW)
-void dataset___Dataset__Write(const std::shared_ptr<ds::Dataset>& ds, const std::shared_ptr<arrow::Schema>& schema, const std::shared_ptr<ds::FileFormat>& format, const std::shared_ptr<fs::FileSystem>& filesystem, std::string path, const std::shared_ptr<ds::Partitioning>& partitioning);
-extern "C" SEXP _arrow_dataset___Dataset__Write(SEXP ds_sexp, SEXP schema_sexp, SEXP format_sexp, SEXP filesystem_sexp, SEXP path_sexp, SEXP partitioning_sexp){
+void dataset___Dataset__Write(const std::shared_ptr<ds::Scanner>& scanner, const std::shared_ptr<arrow::Schema>& schema, const std::shared_ptr<ds::FileFormat>& format, const std::shared_ptr<fs::FileSystem>& filesystem, std::string path, const std::shared_ptr<ds::Partitioning>& partitioning);
+extern "C" SEXP _arrow_dataset___Dataset__Write(SEXP scanner_sexp, SEXP schema_sexp, SEXP format_sexp, SEXP filesystem_sexp, SEXP path_sexp, SEXP partitioning_sexp){
 BEGIN_CPP11
-	arrow::r::Input<const std::shared_ptr<ds::Dataset>&>::type ds(ds_sexp);
+	arrow::r::Input<const std::shared_ptr<ds::Scanner>&>::type scanner(scanner_sexp);
 	arrow::r::Input<const std::shared_ptr<arrow::Schema>&>::type schema(schema_sexp);
 	arrow::r::Input<const std::shared_ptr<ds::FileFormat>&>::type format(format_sexp);
 	arrow::r::Input<const std::shared_ptr<fs::FileSystem>&>::type filesystem(filesystem_sexp);
 	arrow::r::Input<std::string>::type path(path_sexp);
 	arrow::r::Input<const std::shared_ptr<ds::Partitioning>&>::type partitioning(partitioning_sexp);
-	dataset___Dataset__Write(ds, schema, format, filesystem, path, partitioning);
+	dataset___Dataset__Write(scanner, schema, format, filesystem, path, partitioning);
 	return R_NilValue;
 END_CPP11
 }
 #else
-extern "C" SEXP _arrow_dataset___Dataset__Write(SEXP ds_sexp, SEXP schema_sexp, SEXP format_sexp, SEXP filesystem_sexp, SEXP path_sexp, SEXP partitioning_sexp){
+extern "C" SEXP _arrow_dataset___Dataset__Write(SEXP scanner_sexp, SEXP schema_sexp, SEXP format_sexp, SEXP filesystem_sexp, SEXP path_sexp, SEXP partitioning_sexp){
 	Rf_error("Cannot call dataset___Dataset__Write(). Please use arrow::install_arrow() to install required runtime libraries. ");
 }
 #endif
@@ -6241,6 +6256,7 @@ static const R_CallMethodDef CallEntries[] = {
 		{ "_arrow_dataset___Scanner__ToTable", (DL_FUNC) &_arrow_dataset___Scanner__ToTable, 1}, 
 		{ "_arrow_dataset___Scanner__head", (DL_FUNC) &_arrow_dataset___Scanner__head, 2}, 
 		{ "_arrow_dataset___Scanner__Scan", (DL_FUNC) &_arrow_dataset___Scanner__Scan, 1}, 
+		{ "_arrow_dataset___Scanner__schema", (DL_FUNC) &_arrow_dataset___Scanner__schema, 1}, 
 		{ "_arrow_dataset___ScanTask__get_batches", (DL_FUNC) &_arrow_dataset___ScanTask__get_batches, 1}, 
 		{ "_arrow_dataset___Dataset__Write", (DL_FUNC) &_arrow_dataset___Dataset__Write, 6}, 
 		{ "_arrow_shared_ptr_is_null", (DL_FUNC) &_arrow_shared_ptr_is_null, 1}, 
diff --git a/r/src/dataset.cpp b/r/src/dataset.cpp
index c041e114d2a..349fde47db2 100644
--- a/r/src/dataset.cpp
+++ b/r/src/dataset.cpp
@@ -192,8 +192,8 @@ std::shared_ptr<ds::ParquetFileFormat> dataset___ParquetFileFormat__MakeWrite(
     const std::shared_ptr<parquet::ArrowWriterProperties>& arrow_props) {
   auto fmt = std::make_shared<ds::ParquetFileFormat>();
 
-  fmt->writer_properties = writer_props;
-  fmt->arrow_writer_properties = arrow_props;
+  // fmt->writer_properties = writer_props;
+  // fmt->arrow_writer_properties = arrow_props;
 
   return fmt;
 }
@@ -316,6 +316,12 @@ std::vector<std::shared_ptr<ds::ScanTask>> dataset___Scanner__Scan(
   return out;
 }
 
+// [[arrow::export]]
+std::shared_ptr<arrow::Schema> dataset___Scanner__schema(
+    const std::shared_ptr<ds::Scanner>& sc) {
+  return sc->schema();
+}
+
 // [[arrow::export]]
 std::vector<std::shared_ptr<arrow::RecordBatch>> dataset___ScanTask__get_batches(
     const std::shared_ptr<ds::ScanTask>& scan_task) {
@@ -331,17 +337,19 @@ std::vector<std::shared_ptr<arrow::RecordBatch>> dataset___ScanTask__get_batches
 }
 
 // [[arrow::export]]
-void dataset___Dataset__Write(const std::shared_ptr<ds::Dataset>& ds,
+void dataset___Dataset__Write(const std::shared_ptr<ds::Scanner>& scanner,
                               const std::shared_ptr<arrow::Schema>& schema,
                               const std::shared_ptr<ds::FileFormat>& format,
                               const std::shared_ptr<fs::FileSystem>& filesystem,
                               std::string path,
                               const std::shared_ptr<ds::Partitioning>& partitioning) {
-  auto frags = ds->GetFragments();
-  auto ctx = std::make_shared<ds::ScanContext>();
-  ctx->use_threads = true;
-  StopIfNotOk(ds::FileSystemDataset::Write(schema, format, filesystem, path, partitioning,
-                                           ctx, std::move(frags)));
+  ds::FileSystemDatasetWriteOptions opts;
+  opts.file_write_options = format->DefaultWriteOptions();
+  opts.filesystem = filesystem;
+  opts.base_dir = path;
+  opts.partitioning = partitioning;
+  opts.basename_template = "dat_{i}.feather";
+  StopIfNotOk(ds::FileSystemDataset::Write(schema, opts, scanner));
   return;
 }
 
diff --git a/r/tests/testthat/test-dataset.R b/r/tests/testthat/test-dataset.R
index 327d0394771..5c18a8c793d 100644
--- a/r/tests/testthat/test-dataset.R
+++ b/r/tests/testthat/test-dataset.R
@@ -870,7 +870,7 @@ test_that("Dataset writing: no partitioning", {
   dst_dir <- tempfile()
   write_dataset(ds, dst_dir, format = "feather", partitioning = NULL)
   expect_true(dir.exists(dst_dir))
-  expect_true(length(dir(dst_dir)) > 1)
+  expect_true(length(dir(dst_dir)) > 0)
 })
 
 test_that("Dataset writing: from data.frame", {
@@ -965,7 +965,7 @@ test_that("Writing a dataset: Parquet format options", {
 })
 
 test_that("Dataset writing: unsupported features/input validation", {
-  expect_error(write_dataset(4), "'dataset' must be a Dataset")
+  expect_error(write_dataset(4), 'dataset must be a "Dataset"')
 
   ds <- open_dataset(hive_dir)
 

From 9eea1bdb6318b26d7ef30538b32b5212f1744c7c Mon Sep 17 00:00:00 2001
From: Benjamin Kietzman <bengilgit@gmail.com>
Date: Tue, 29 Sep 2020 20:29:08 -0400
Subject: [PATCH 03/34] fix Scanner splitting

---
 cpp/src/arrow/dataset/file_base.cc | 10 +++++++---
 cpp/src/arrow/util/mutex.h         |  2 +-
 2 files changed, 8 insertions(+), 4 deletions(-)

diff --git a/cpp/src/arrow/dataset/file_base.cc b/cpp/src/arrow/dataset/file_base.cc
index 90d270a4b30..8611a32ac32 100644
--- a/cpp/src/arrow/dataset/file_base.cc
+++ b/cpp/src/arrow/dataset/file_base.cc
@@ -260,10 +260,14 @@ Status FileSystemDataset::Write(std::shared_ptr<Schema> schema,
   ScanTaskVector scan_tasks;
   std::vector<const Fragment*> fragment_for_task;
 
+  // Avoid contention with multithreaded readers
+  auto context = std::make_shared<ScanContext>(*scanner->context());
+  context->use_threads = false;
+
   for (const auto& fragment : fragments) {
-    ARROW_ASSIGN_OR_RAISE(
-        auto scan_task_it,
-        Scanner(fragment, scanner->options(), scanner->context()).Scan());
+    auto options = std::make_shared<ScanOptions>(*scanner->options());
+    ARROW_ASSIGN_OR_RAISE(auto scan_task_it,
+                          Scanner(fragment, std::move(options), context).Scan());
     for (auto maybe_scan_task : scan_task_it) {
       ARROW_ASSIGN_OR_RAISE(auto scan_task, maybe_scan_task);
       scan_tasks.push_back(std::move(scan_task));
diff --git a/cpp/src/arrow/util/mutex.h b/cpp/src/arrow/util/mutex.h
index 8a430266431..7e72bacc69a 100644
--- a/cpp/src/arrow/util/mutex.h
+++ b/cpp/src/arrow/util/mutex.h
@@ -35,7 +35,7 @@ class ARROW_EXPORT Mutex {
   Mutex& operator=(Mutex&&) = default;
 
   /// A Guard is falsy if a lock could not be acquired.
-  class Guard {
+  class ARROW_EXPORT Guard {
    public:
     Guard() : locked_(NULLPTR, [](Mutex* mutex) {}) {}
     Guard(Guard&&) = default;

From c263f6c8fd3ffc3e112ea09e9c51a9f6eaaa4e7c Mon Sep 17 00:00:00 2001
From: Benjamin Kietzman <bengilgit@gmail.com>
Date: Tue, 29 Sep 2020 20:44:39 -0400
Subject: [PATCH 04/34] remove debug print()s

---
 python/pyarrow/tests/test_dataset.py | 2 --
 1 file changed, 2 deletions(-)

diff --git a/python/pyarrow/tests/test_dataset.py b/python/pyarrow/tests/test_dataset.py
index 128c58c2550..c6081c1087c 100644
--- a/python/pyarrow/tests/test_dataset.py
+++ b/python/pyarrow/tests/test_dataset.py
@@ -2163,8 +2163,6 @@ def _check_dataset_roundtrip(dataset, base_dir, expected_files,
     # check that reading back in as dataset gives the same result
     dataset2 = ds.dataset(
         base_dir_path, format="feather", partitioning=partitioning)
-    print(dataset.to_table().to_pandas())
-    print(dataset2.to_table().to_pandas())
     assert dataset2.to_table().equals(dataset.to_table())
 
 

From e70dd9d94622d8b96cdbc3237f2bd0f7c6fb86e4 Mon Sep 17 00:00:00 2001
From: Benjamin Kietzman <bengilgit@gmail.com>
Date: Wed, 30 Sep 2020 17:13:16 -0400
Subject: [PATCH 05/34] don't double unlock std::mutex

---
 cpp/src/arrow/util/mutex.cc | 16 +++++++++-------
 1 file changed, 9 insertions(+), 7 deletions(-)

diff --git a/cpp/src/arrow/util/mutex.cc b/cpp/src/arrow/util/mutex.cc
index e1b7700ec1b..9fc7d1c11c1 100644
--- a/cpp/src/arrow/util/mutex.cc
+++ b/cpp/src/arrow/util/mutex.cc
@@ -24,22 +24,24 @@
 namespace arrow {
 namespace util {
 
-struct Mutex::Impl {
-  std::mutex mutex_;
-};
+struct Mutex::Impl : std::mutex {};
 
 Mutex::Guard::Guard(Mutex* locked)
-    : locked_(locked, [](Mutex* locked) { locked->impl_->mutex_.unlock(); }) {}
+    : locked_(locked, [](Mutex* locked) {
+        DCHECK(!locked->impl_->try_lock());
+        locked->impl_->unlock();
+      }) {}
 
 void Mutex::Guard::Unlock() {
   if (locked_) {
-    locked_->impl_->mutex_.unlock();
+    auto locked = std::move(locked_);
+    DCHECK_EQ(locked_, nullptr);
   }
 }
 
 Mutex::Guard Mutex::TryLock() {
   DCHECK_NE(impl_, nullptr);
-  if (impl_->mutex_.try_lock()) {
+  if (impl_->try_lock()) {
     return Guard{this};
   }
   return Guard{};
@@ -47,7 +49,7 @@ Mutex::Guard Mutex::TryLock() {
 
 Mutex::Guard Mutex::Lock() {
   DCHECK_NE(impl_, nullptr);
-  impl_->mutex_.lock();
+  impl_->lock();
   return Guard{this};
 }
 

From fa97c5264e8eaea01fcaf2f3e5402ccf6d513c6c Mon Sep 17 00:00:00 2001
From: Benjamin Kietzman <bengilgit@gmail.com>
Date: Thu, 1 Oct 2020 10:49:26 -0400
Subject: [PATCH 06/34] extract a helper for single-lookup insertion into maps

---
 cpp/src/arrow/dataset/file_base.cc | 52 +++++++++++++-----------
 cpp/src/arrow/util/map.h           | 64 ++++++++++++++++++++++++++++++
 2 files changed, 92 insertions(+), 24 deletions(-)
 create mode 100644 cpp/src/arrow/util/map.h

diff --git a/cpp/src/arrow/dataset/file_base.cc b/cpp/src/arrow/dataset/file_base.cc
index 8611a32ac32..a36ff9d411b 100644
--- a/cpp/src/arrow/dataset/file_base.cc
+++ b/cpp/src/arrow/dataset/file_base.cc
@@ -31,6 +31,7 @@
 #include "arrow/io/memory.h"
 #include "arrow/util/iterator.h"
 #include "arrow/util/logging.h"
+#include "arrow/util/map.h"
 #include "arrow/util/mutex.h"
 #include "arrow/util/task_group.h"
 
@@ -195,34 +196,37 @@ struct WriterSet {
                           write_options_.partitioning->Format(partition_expression));
     std::string dir = base_dir_ + part_segments;
 
-    auto lock = dir_to_writer_.Lock();
+    util::Mutex::Guard writer_lock;
 
-    auto it_success =
-        dir_to_writer_->emplace(std::move(dir), std::make_shared<MutexedWriter>());
-    std::shared_ptr<MutexedWriter> writer = it_success.first->second;
+    auto set_lock = mutex_.Lock();
 
-    if (!it_success.second) {
-      return writer;
+    auto writer =
+        internal::GetOrInsertGenerated(&dir_to_writer_, dir, [&](const std::string&) {
+          auto writer = std::make_shared<MutexedWriter>();
+          writer_lock = writer->Lock();
+          return writer;
+        })->second;
+
+    if (writer_lock) {
+      // NB: next_basename_() must be invoked with the set_lock held
+      auto path = fs::internal::ConcatAbstractPath(dir, next_basename_());
+      set_lock.Unlock();
+
+      RETURN_NOT_OK(write_options_.filesystem->CreateDir(dir));
+
+      ARROW_ASSIGN_OR_RAISE(auto destination,
+                            write_options_.filesystem->OpenOutputStream(path));
+
+      ARROW_ASSIGN_OR_RAISE(**writer, write_options_.format()->MakeWriter(
+                                          std::move(destination), schema,
+                                          write_options_.file_write_options));
     }
-    // FIXME Flush writers when at capacity for open files.
-
-    std::string basename = next_basename_();
-    auto writer_lock = writer->Lock();
-    lock.Unlock();
-
-    dir = base_dir_ + part_segments;
-    RETURN_NOT_OK(write_options_.filesystem->CreateDir(dir));
-    ARROW_ASSIGN_OR_RAISE(auto destination,
-                          write_options_.filesystem->OpenOutputStream(
-                              fs::internal::ConcatAbstractPath(dir, basename)));
-    ARROW_ASSIGN_OR_RAISE(
-        **writer, write_options_.format()->MakeWriter(std::move(destination), schema,
-                                                      write_options_.file_write_options));
+
     return writer;
   }
 
   Status FinishAll(internal::TaskGroup* task_group) {
-    for (const auto& dir_writer : *dir_to_writer_) {
+    for (const auto& dir_writer : dir_to_writer_) {
       task_group->Append([&] {
         std::shared_ptr<FileWriter> writer = **dir_writer.second;
         return writer->Finish();
@@ -233,8 +237,8 @@ struct WriterSet {
   }
 
   // There should only be a single writer open for each partition directory at a time
-  util::Mutexed<std::unordered_map<std::string, std::shared_ptr<MutexedWriter>>>
-      dir_to_writer_;
+  util::Mutex mutex_;
+  std::unordered_map<std::string, std::shared_ptr<MutexedWriter>> dir_to_writer_;
   NextBasenameGenerator next_basename_;
   std::string base_dir_;
   const FileSystemDatasetWriteOptions& write_options_;
@@ -313,7 +317,7 @@ Status FileSystemDataset::Write(std::shared_ptr<Schema> schema,
           ARROW_ASSIGN_OR_RAISE(auto writer, writers.Get(partition_expression,
                                                          groups.batches[i]->schema()));
 
-          pending_writes[i] = {writer, std::move(groups.batches[i])};
+          pending_writes[i] = {std::move(writer), std::move(groups.batches[i])};
         }
 
         for (size_t i = 0; !pending_writes.empty(); ++i) {
diff --git a/cpp/src/arrow/util/map.h b/cpp/src/arrow/util/map.h
new file mode 100644
index 00000000000..3872e972bd9
--- /dev/null
+++ b/cpp/src/arrow/util/map.h
@@ -0,0 +1,64 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#pragma once
+
+#include <utility>
+
+#include "arrow/result.h"
+
+namespace arrow {
+namespace internal {
+
+/// Helper providing single-lookup conditional insertion into std::map or
+/// std::unordered_map. If `key` exists in the container, an iterator to that pair
+/// will be returned. If `key` does not exist in the container, `gen(key)` will be
+/// invoked and its return value inserted.
+template <typename Map, typename Key, typename Gen>
+auto GetOrInsertGenerated(Map* map, Key&& key, Gen&& gen)
+    -> decltype(map->begin()->second = gen(map->begin()->first), map->begin()) {
+  decltype(gen(map->begin()->first)) placeholder;
+
+  auto it_success = map->emplace(std::forward<Key>(key), std::move(placeholder));
+  if (it_success.second) {
+    // insertion of placeholder succeeded, overwrite it with gen()
+    const auto& inserted_key = it_success.first->first;
+    auto* value = &it_success.first->second;
+    *value = gen(inserted_key);
+  }
+  return it_success.first;
+}
+
+template <typename Map, typename Key, typename Gen>
+auto GetOrInsertGenerated(Map* map, Key&& key, Gen&& gen)
+    -> Result<decltype(map->begin()->second = gen(map->begin()->first).ValueOrDie(),
+                       map->begin())> {
+  decltype(gen(map->begin()->first).ValueOrDie()) placeholder;
+
+  auto it_success = map->emplace(std::forward<Key>(key), std::move(placeholder));
+  if (it_success.second) {
+    // insertion of placeholder succeeded, overwrite it with gen()
+    const auto& inserted_key = it_success.first->first;
+    auto* value = &it_success.first->second;
+    ARROW_ASSIGN_OR_RAISE(*value, gen(inserted_key));
+  }
+  return it_success.first;
+}
+
+}  // namespace internal
+}  // namespace arrow
+

From 0815d6e8ba385d5a33f80dbc969608f99c076f18 Mon Sep 17 00:00:00 2001
From: Benjamin Kietzman <bengilgit@gmail.com>
Date: Thu, 1 Oct 2020 14:20:24 -0400
Subject: [PATCH 07/34] add a python binding for custom parquet write
 properties

---
 cpp/src/arrow/dataset/file_base.h            |   2 +
 python/pyarrow/_dataset.pyx                  | 201 ++++++++++++++++++-
 python/pyarrow/dataset.py                    |  10 +-
 python/pyarrow/includes/libarrow_dataset.pxd |  13 +-
 python/pyarrow/tests/test_dataset.py         |  17 +-
 5 files changed, 218 insertions(+), 25 deletions(-)

diff --git a/cpp/src/arrow/dataset/file_base.h b/cpp/src/arrow/dataset/file_base.h
index e8c84f4afb6..03b96004af6 100644
--- a/cpp/src/arrow/dataset/file_base.h
+++ b/cpp/src/arrow/dataset/file_base.h
@@ -256,6 +256,8 @@ class ARROW_DS_EXPORT FileWriteOptions {
 
   const std::shared_ptr<FileFormat>& format() const { return format_; }
 
+  std::string type_name() const { return format_->type_name(); }
+
  protected:
   explicit FileWriteOptions(std::shared_ptr<FileFormat> format)
       : format_(std::move(format)) {}
diff --git a/python/pyarrow/_dataset.pyx b/python/pyarrow/_dataset.pyx
index 922d7b05bea..77cefcd8d46 100644
--- a/python/pyarrow/_dataset.pyx
+++ b/python/pyarrow/_dataset.pyx
@@ -613,7 +613,7 @@ cdef class FileWriteOptions(_Weakrefable):
         CFileWriteOptions* options
 
     def __init__(self):
-        pass
+        _forbid_instantiation(self.__class__)
 
     cdef void init(self, const shared_ptr[CFileWriteOptions]& sp):
         self.wrapped = sp
@@ -621,7 +621,18 @@ cdef class FileWriteOptions(_Weakrefable):
 
     @staticmethod
     cdef wrap(const shared_ptr[CFileWriteOptions]& sp):
-        cdef FileWriteOptions self = FileWriteOptions()
+        type_name = frombytes(sp.get().type_name())
+
+        classes = {
+            'ipc': IpcFileWriteOptions,
+            'parquet': ParquetFileWriteOptions,
+        }
+
+        class_ = classes.get(type_name, None)
+        if class_ is None:
+            raise TypeError(type_name)
+
+        cdef FileWriteOptions self = class_.__new__(class_)
         self.init(sp)
         return self
 
@@ -685,8 +696,7 @@ cdef class FileFormat(_Weakrefable):
                                      <shared_ptr[CSchema]>nullptr))
         return Fragment.wrap(move(c_fragment))
 
-    @property
-    def default_write_options(self):
+    def make_write_options(self):
         return FileWriteOptions.wrap(self.format.DefaultWriteOptions())
 
     def __eq__(self, other):
@@ -1081,6 +1091,164 @@ cdef class ParquetReadOptions(_Weakrefable):
             return False
 
 
+cdef class ParquetFileWriteOptions(FileWriteOptions):
+
+    cdef:
+        CParquetFileWriteOptions* parquet_options
+        object _properties
+
+    def update(self, **kwargs):
+        cdef CParquetFileWriteOptions* opts = self.parquet_options
+
+        arrow_fields = {
+            "use_deprecated_int96_timestamps",
+            "coerce_timestamps",
+            "allow_truncated_timestamps",
+        }
+
+        update = False
+        update_arrow = False
+        for name, value in kwargs.items():
+            assert name in self._properties
+            self._properties[name] = value
+            if name in arrow_fields:
+                update_arrow = True
+            else:
+                update = True
+
+        if update:
+            opts.writer_properties = _create_writer_properties(
+                use_dictionary=self.use_dictionary,
+                compression=self.compression,
+                version=self.version,
+                write_statistics=self.write_statistics,
+                data_page_size=self.data_page_size,
+                compression_level=self.compression_level,
+                use_byte_stream_split=self.use_byte_stream_split,
+                data_page_version=self.data_page_version,
+            )
+
+        if update_arrow:
+            opts.arrow_writer_properties = _create_arrow_writer_properties(
+                use_deprecated_int96_timestamps=(
+                    self.use_deprecated_int96_timestamps
+                ),
+                coerce_timestamps=self.coerce_timestamps,
+                allow_truncated_timestamps=self.allow_truncated_timestamps,
+                writer_engine_version=self.writer_engine_version,
+            )
+
+    @property
+    def use_dictionary(self):
+        return self._properties['use_dictionary']
+
+    @use_dictionary.setter
+    def use_dictionary(self, use_dictionary):
+        self.update(use_dictionary=use_dictionary)
+
+    @property
+    def compression(self):
+        return self._properties['compression']
+
+    @compression.setter
+    def compression(self, compression):
+        self.update(compression=compression)
+
+    @property
+    def version(self):
+        return self._properties['version']
+
+    @version.setter
+    def version(self, version):
+        self.update(version=version)
+
+    @property
+    def write_statistics(self):
+        return self._properties['write_statistics']
+
+    @write_statistics.setter
+    def write_statistics(self, write_statistics):
+        self.update(write_statistics=write_statistics)
+
+    @property
+    def data_page_size(self):
+        return self._properties['data_page_size']
+
+    @data_page_size.setter
+    def data_page_size(self, data_page_size):
+        self.update(data_page_size=data_page_size)
+
+    @property
+    def compression_level(self):
+        return self._properties['compression_level']
+
+    @compression_level.setter
+    def compression_level(self, compression_level):
+        self.update(compression_level=compression_level)
+
+    @property
+    def use_byte_stream_split(self):
+        return self._properties['use_byte_stream_split']
+
+    @use_byte_stream_split.setter
+    def use_byte_stream_split(self, use_byte_stream_split):
+        self.update(use_byte_stream_split=use_byte_stream_split)
+
+    @property
+    def data_page_version(self):
+        return self._properties['data_page_version']
+
+    @data_page_version.setter
+    def data_page_version(self, data_page_version):
+        self.update(data_page_version=data_page_version)
+
+    @property
+    def use_deprecated_int96_timestamps(self):
+        return self._properties['use_deprecated_int96_timestamps']
+
+    @use_deprecated_int96_timestamps.setter
+    def use_deprecated_int96_timestamps(self, use_deprecated_int96_timestamps):
+        self.update(
+            use_deprecated_int96_timestamps=use_deprecated_int96_timestamps)
+
+    @property
+    def coerce_timestamps(self):
+        return self._properties['coerce_timestamps']
+
+    @coerce_timestamps.setter
+    def coerce_timestamps(self, coerce_timestamps):
+        self.update(coerce_timestamps=coerce_timestamps)
+
+    @property
+    def allow_truncated_timestamps(self):
+        return self._properties['allow_truncated_timestamps']
+
+    @allow_truncated_timestamps.setter
+    def allow_truncated_timestamps(self, allow_truncated_timestamps):
+        self.update(allow_truncated_timestamps=allow_truncated_timestamps)
+
+    @property
+    def writer_engine_version(self):
+        return 'V2'
+
+    cdef void init(self, const shared_ptr[CFileWriteOptions]& sp):
+        FileWriteOptions.init(self, sp)
+        self.parquet_options = <CParquetFileWriteOptions*> sp.get()
+        self._properties = dict(
+            use_dictionary=True,
+            compression="snappy",
+            version="1.0",
+            write_statistics=None,
+            data_page_size=None,
+            compression_level=None,
+            use_byte_stream_split=False,
+            data_page_version="1.0",
+            use_deprecated_int96_timestamps=False,
+            coerce_timestamps=None,
+            allow_truncated_timestamps=False,
+        )
+
+
 cdef class ParquetFileFormat(FileFormat):
 
     cdef:
@@ -1121,15 +1289,21 @@ cdef class ParquetFileFormat(FileFormat):
     def read_options(self):
         cdef CParquetFileFormatReaderOptions* options
         options = &self.parquet_format.reader_options
-        enable = options.enable_parallel_column_conversion
         return ParquetReadOptions(
             use_buffered_stream=options.use_buffered_stream,
             buffer_size=options.buffer_size,
             dictionary_columns={frombytes(col)
                                 for col in options.dict_columns},
-            enable_parallel_column_conversion=enable
+            enable_parallel_column_conversion=(
+                options.enable_parallel_column_conversion
+            )
         )
 
+    def make_write_options(self, **kwargs):
+        opts = FileFormat.make_write_options(self)
+        (<ParquetFileWriteOptions> opts).update(**kwargs)
+        return opts
+
     def equals(self, ParquetFileFormat other):
         return (
             self.read_options.equals(other.read_options)
@@ -1162,6 +1336,12 @@ cdef class ParquetFileFormat(FileFormat):
         return Fragment.wrap(move(c_fragment))
 
 
+cdef class IpcFileWriteOptions(FileWriteOptions):
+
+    def __init__(self):
+        _forbid_instantiation(self.__class__)
+
+
 cdef class IpcFileFormat(FileFormat):
 
     def __init__(self):
@@ -1187,6 +1367,9 @@ cdef class CsvFileFormat(FileFormat):
         FileFormat.init(self, sp)
         self.csv_format = <CCsvFileFormat*> sp.get()
 
+    def make_write_options(self):
+        raise NotImplemented("writing CSV datasets")
+
     @property
     def parse_options(self):
         return ParseOptions.wrap(self.csv_format.parse_options)
@@ -2116,7 +2299,8 @@ def _get_partition_keys(Expression partition_expression):
 def _filesystemdataset_write(
     data, object base_dir, str basename_template, Schema schema not None,
     FileFormat format not None, FileSystem filesystem not None,
-    Partitioning partitioning not None, bint use_threads=True,
+    Partitioning partitioning not None, FileWriteOptions file_options not None,
+    bint use_threads,
 ):
     """
     CFileSystemDataset.Write wrapper
@@ -2126,11 +2310,8 @@ def _filesystemdataset_write(
         CFileSystemDatasetWriteOptions c_options
         shared_ptr[CScanner] c_scanner
 
-        FileWriteOptions file_options
         vector[shared_ptr[CRecordBatch]] c_batches
 
-    file_options = <FileWriteOptions> format.default_write_options
-
     c_schema = pyarrow_unwrap_schema(schema)
 
     c_options.file_write_options = file_options.unwrap()
diff --git a/python/pyarrow/dataset.py b/python/pyarrow/dataset.py
index 702a50f1f35..574fad9d325 100644
--- a/python/pyarrow/dataset.py
+++ b/python/pyarrow/dataset.py
@@ -32,13 +32,16 @@
     FileSystemDataset,
     FileSystemDatasetFactory,
     FileSystemFactoryOptions,
+    FileWriteOptions,
     Fragment,
     HivePartitioning,
     IpcFileFormat,
+    IpcFileWriteOptions,
     ParquetDatasetFactory,
     ParquetFactoryOptions,
     ParquetFileFormat,
     ParquetFileFragment,
+    ParquetFileWriteOptions,
     ParquetReadOptions,
     Partitioning,
     PartitioningFactory,
@@ -696,7 +699,7 @@ def _ensure_write_partitioning(scheme):
 
 def write_dataset(data, base_dir, basename_template, format=None,
                   partitioning=None, schema=None,
-                  filesystem=None, use_threads=True):
+                  filesystem=None, file_options=None, use_threads=True):
     """
     Write a dataset to a given format and partitioning.
 
@@ -742,6 +745,9 @@ def write_dataset(data, base_dir, basename_template, format=None,
         )
 
     format = _ensure_format(format)
+    if file_options is None:
+        file_options = format.make_write_options()
+
     partitioning = _ensure_write_partitioning(partitioning)
 
     if filesystem is None:
@@ -752,5 +758,5 @@ def write_dataset(data, base_dir, basename_template, format=None,
 
     _filesystemdataset_write(
         data, base_dir, basename_template, schema, format,
-        filesystem, partitioning, use_threads,
+        filesystem, partitioning, file_options, use_threads,
     )
diff --git a/python/pyarrow/includes/libarrow_dataset.pxd b/python/pyarrow/includes/libarrow_dataset.pxd
index 5128a8ea0b6..2e5c9c72eb4 100644
--- a/python/pyarrow/includes/libarrow_dataset.pxd
+++ b/python/pyarrow/includes/libarrow_dataset.pxd
@@ -213,7 +213,7 @@ cdef extern from "arrow/dataset/api.h" namespace "arrow::dataset" nogil:
 
     cdef cppclass CFileWriteOptions \
             "arrow::dataset::FileWriteOptions":
-        pass
+        c_string type_name() const
 
     cdef cppclass CFileFormat "arrow::dataset::FileFormat":
         c_string type_name() const
@@ -243,8 +243,9 @@ cdef extern from "arrow/dataset/api.h" namespace "arrow::dataset" nogil:
         vector[CRowGroupInfo] FromIdentifiers(vector[int])
 
     cdef cppclass CParquetFileWriteOptions \
-            "arrow::dataset::ParquetFileWriteOptions":
-        pass
+            "arrow::dataset::ParquetFileWriteOptions"(CFileWriteOptions):
+        shared_ptr[WriterProperties] writer_properties
+        shared_ptr[ArrowWriterProperties] arrow_writer_properties
 
     cdef cppclass CParquetFileFragment "arrow::dataset::ParquetFileFragment"(
             CFileFragment):
@@ -292,14 +293,16 @@ cdef extern from "arrow/dataset/api.h" namespace "arrow::dataset" nogil:
     cdef cppclass CParquetFileFormat "arrow::dataset::ParquetFileFormat"(
             CFileFormat):
         CParquetFileFormatReaderOptions reader_options
-        shared_ptr[WriterProperties] writer_properties
-        shared_ptr[ArrowWriterProperties] arrow_writer_properties
         CResult[shared_ptr[CFileFragment]] MakeFragment(
             CFileSource source,
             shared_ptr[CExpression] partition_expression,
             vector[CRowGroupInfo] row_groups,
             shared_ptr[CSchema] physical_schema)
 
+    cdef cppclass CIpcFileWriteOptions \
+            "arrow::dataset::IpcFileWriteOptions"(CFileWriteOptions):
+        pass
+
     cdef cppclass CIpcFileFormat "arrow::dataset::IpcFileFormat"(
             CFileFormat):
         pass
diff --git a/python/pyarrow/tests/test_dataset.py b/python/pyarrow/tests/test_dataset.py
index c6081c1087c..a7e46ae4ce1 100644
--- a/python/pyarrow/tests/test_dataset.py
+++ b/python/pyarrow/tests/test_dataset.py
@@ -2358,11 +2358,12 @@ def test_write_dataset_parquet(tempdir):
     result = ds.dataset(base_dir, format="parquet").to_table()
     assert result.equals(table)
 
-    # using custom options / format object
-    if False:
-        for version in ["1.0", "2.0"]:
-            format = ds.ParquetFileFormat(write_options=dict(version=version))
-            base_dir = tempdir / 'parquet_dataset_version{0}'.format(version)
-            ds.write_dataset(table, base_dir, format=format)
-            meta = pq.read_metadata(base_dir / "dat_0.parquet")
-            assert meta.format_version == version
+    # using custom options
+    for version in ["1.0", "2.0"]:
+        format = ds.ParquetFileFormat()
+        opts = format.make_write_options(version=version)
+        base_dir = tempdir / 'parquet_dataset_version{0}'.format(version)
+        ds.write_dataset(table, base_dir, basename_template='dat_{i}.parquet',
+                         format=format, file_options=opts)
+        meta = pq.read_metadata(base_dir / "dat_0.parquet")
+        assert meta.format_version == version

From 2830bd0d746b8d55fce658a7e7c8c229c79321a4 Mon Sep 17 00:00:00 2001
From: Benjamin Kietzman <bengilgit@gmail.com>
Date: Thu, 1 Oct 2020 16:27:55 -0400
Subject: [PATCH 08/34] remove unused schema parameter

---
 cpp/src/arrow/dataset/file_base.cc           | 3 +--
 cpp/src/arrow/dataset/file_base.h            | 3 +--
 cpp/src/arrow/dataset/test_util.h            | 2 +-
 python/pyarrow/_dataset.pyx                  | 6 +-----
 python/pyarrow/includes/libarrow_dataset.pxd | 1 -
 r/man/FileWriteOptions.Rd                    | 9 +++++++++
 6 files changed, 13 insertions(+), 11 deletions(-)
 create mode 100644 r/man/FileWriteOptions.Rd

diff --git a/cpp/src/arrow/dataset/file_base.cc b/cpp/src/arrow/dataset/file_base.cc
index a36ff9d411b..9c121bca659 100644
--- a/cpp/src/arrow/dataset/file_base.cc
+++ b/cpp/src/arrow/dataset/file_base.cc
@@ -244,8 +244,7 @@ struct WriterSet {
   const FileSystemDatasetWriteOptions& write_options_;
 };
 
-Status FileSystemDataset::Write(std::shared_ptr<Schema> schema,
-                                const FileSystemDatasetWriteOptions& write_options,
+Status FileSystemDataset::Write(const FileSystemDatasetWriteOptions& write_options,
                                 std::shared_ptr<Scanner> scanner) {
   auto task_group = scanner->context()->TaskGroup();
 
diff --git a/cpp/src/arrow/dataset/file_base.h b/cpp/src/arrow/dataset/file_base.h
index 03b96004af6..11f5a608a91 100644
--- a/cpp/src/arrow/dataset/file_base.h
+++ b/cpp/src/arrow/dataset/file_base.h
@@ -214,8 +214,7 @@ class ARROW_DS_EXPORT FileSystemDataset : public Dataset {
       std::vector<std::shared_ptr<FileFragment>> fragments);
 
   /// \brief Write a dataset.
-  static Status Write(std::shared_ptr<Schema> schema,
-                      const FileSystemDatasetWriteOptions& write_options,
+  static Status Write(const FileSystemDatasetWriteOptions& write_options,
                       std::shared_ptr<Scanner> scanner);
 
   /// \brief Return the type name of the dataset.
diff --git a/cpp/src/arrow/dataset/test_util.h b/cpp/src/arrow/dataset/test_util.h
index 7822f52fe82..7251c39bffd 100644
--- a/cpp/src/arrow/dataset/test_util.h
+++ b/cpp/src/arrow/dataset/test_util.h
@@ -589,7 +589,7 @@ class WriteFileSystemDatasetMixin : public MakeFileSystemDatasetMixin {
   void DoWrite(std::shared_ptr<Partitioning> desired_partitioning) {
     write_options_.partitioning = desired_partitioning;
     auto scanner = std::make_shared<Scanner>(dataset_, scan_options_, scan_context_);
-    ASSERT_OK(FileSystemDataset::Write(source_schema_, write_options_, scanner));
+    ASSERT_OK(FileSystemDataset::Write(write_options_, scanner));
 
     // re-discover the written dataset
     fs::FileSelector s;
diff --git a/python/pyarrow/_dataset.pyx b/python/pyarrow/_dataset.pyx
index 77cefcd8d46..847b4b0d71e 100644
--- a/python/pyarrow/_dataset.pyx
+++ b/python/pyarrow/_dataset.pyx
@@ -2306,14 +2306,10 @@ def _filesystemdataset_write(
     CFileSystemDataset.Write wrapper
     """
     cdef:
-        shared_ptr[CSchema] c_schema
         CFileSystemDatasetWriteOptions c_options
         shared_ptr[CScanner] c_scanner
-
         vector[shared_ptr[CRecordBatch]] c_batches
 
-    c_schema = pyarrow_unwrap_schema(schema)
-
     c_options.file_write_options = file_options.unwrap()
     c_options.filesystem = filesystem.unwrap()
     c_options.base_dir = tobytes(_stringify_path(base_dir))
@@ -2338,4 +2334,4 @@ def _filesystemdataset_write(
 
     c_scanner = (<Scanner> scanner).unwrap()
     with nogil:
-        check_status(CFileSystemDataset.Write(c_schema, c_options, c_scanner))
+        check_status(CFileSystemDataset.Write(c_options, c_scanner))
diff --git a/python/pyarrow/includes/libarrow_dataset.pxd b/python/pyarrow/includes/libarrow_dataset.pxd
index 2e5c9c72eb4..3b5b70a37ba 100644
--- a/python/pyarrow/includes/libarrow_dataset.pxd
+++ b/python/pyarrow/includes/libarrow_dataset.pxd
@@ -274,7 +274,6 @@ cdef extern from "arrow/dataset/api.h" namespace "arrow::dataset" nogil:
 
         @staticmethod
         CStatus Write(
-            shared_ptr[CSchema] schema,
             const CFileSystemDatasetWriteOptions& write_options,
             shared_ptr[CScanner] scanner)
 
diff --git a/r/man/FileWriteOptions.Rd b/r/man/FileWriteOptions.Rd
new file mode 100644
index 00000000000..5c4d04456b4
--- /dev/null
+++ b/r/man/FileWriteOptions.Rd
@@ -0,0 +1,9 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/dataset-write.R
+\name{FileWriteOptions}
+\alias{FileWriteOptions}
+\alias{ParquetFileWriteOptions}
+\title{Format-specific write options}
+\description{
+A \code{FileWriteOptions} holds write options specific to a \code{FileFormat}.
+}

From 9b8b8fce4ac4d89e63f69f9996b892f7e6ddfe02 Mon Sep 17 00:00:00 2001
From: Benjamin Kietzman <bengilgit@gmail.com>
Date: Thu, 1 Oct 2020 16:28:09 -0400
Subject: [PATCH 09/34] repair parquet write options in R

---
 r/NAMESPACE            |  1 +
 r/R/arrowExports.R     | 20 +++++++----
 r/R/dataset-format.R   | 20 +++++------
 r/R/dataset-write.R    | 54 ++++++++++++++++++++++++------
 r/R/parquet.R          |  1 +
 r/src/arrowExports.cpp | 76 ++++++++++++++++++++++++++++++------------
 r/src/dataset.cpp      | 47 +++++++++++++++-----------
 7 files changed, 150 insertions(+), 69 deletions(-)

diff --git a/r/NAMESPACE b/r/NAMESPACE
index a46f72ea2e6..c54acf7a824 100644
--- a/r/NAMESPACE
+++ b/r/NAMESPACE
@@ -159,6 +159,7 @@ export(MessageType)
 export(MetadataVersion)
 export(ParquetFileFormat)
 export(ParquetFileReader)
+export(ParquetFileWriteOptions)
 export(ParquetFileWriter)
 export(ParquetReaderProperties)
 export(ParquetVersionType)
diff --git a/r/R/arrowExports.R b/r/R/arrowExports.R
index af5b71bd436..932d95396e7 100644
--- a/r/R/arrowExports.R
+++ b/r/R/arrowExports.R
@@ -396,12 +396,20 @@ dataset___FileFormat__type_name <- function(format){
     .Call(`_arrow_dataset___FileFormat__type_name` , format)
 }
 
-dataset___ParquetFileFormat__MakeRead <- function(use_buffered_stream, buffer_size, dict_columns){
-    .Call(`_arrow_dataset___ParquetFileFormat__MakeRead` , use_buffered_stream, buffer_size, dict_columns)
+dataset___FileFormat__DefaultWriteOptions <- function(fmt){
+    .Call(`_arrow_dataset___FileFormat__DefaultWriteOptions` , fmt)
 }
 
-dataset___ParquetFileFormat__MakeWrite <- function(writer_props, arrow_props){
-    .Call(`_arrow_dataset___ParquetFileFormat__MakeWrite` , writer_props, arrow_props)
+dataset___ParquetFileFormat__Make <- function(use_buffered_stream, buffer_size, dict_columns){
+    .Call(`_arrow_dataset___ParquetFileFormat__Make` , use_buffered_stream, buffer_size, dict_columns)
+}
+
+dataset___FileWriteOptions__type_name <- function(options){
+    .Call(`_arrow_dataset___FileWriteOptions__type_name` , options)
+}
+
+dataset___ParquetFileWriteOptions__update <- function(options, writer_props, arrow_writer_props){
+    invisible(.Call(`_arrow_dataset___ParquetFileWriteOptions__update` , options, writer_props, arrow_writer_props))
 }
 
 dataset___IpcFileFormat__Make <- function(){
@@ -472,8 +480,8 @@ dataset___ScanTask__get_batches <- function(scan_task){
     .Call(`_arrow_dataset___ScanTask__get_batches` , scan_task)
 }
 
-dataset___Dataset__Write <- function(scanner, schema, format, filesystem, path, partitioning){
-    invisible(.Call(`_arrow_dataset___Dataset__Write` , scanner, schema, format, filesystem, path, partitioning))
+dataset___Dataset__Write <- function(file_write_options, filesystem, base_dir, partitioning, basename_template, scanner){
+    invisible(.Call(`_arrow_dataset___Dataset__Write` , file_write_options, filesystem, base_dir, partitioning, basename_template, scanner))
 }
 
 shared_ptr_is_null <- function(xp){
diff --git a/r/R/dataset-format.R b/r/R/dataset-format.R
index 46e7ec4e649..c98e7b5d44e 100644
--- a/r/R/dataset-format.R
+++ b/r/R/dataset-format.R
@@ -61,6 +61,11 @@ FileFormat <- R6Class("FileFormat", inherit = ArrowObject,
       } else {
         self
       }
+    },
+    make_write_options = function(...) {
+      options <- shared_ptr(FileWriteOptions, dataset___FileFormat__DefaultWriteOptions(self))
+      options$update(...)
+      options
     }
   ),
   active = list(
@@ -91,18 +96,9 @@ FileFormat$create <- function(format, ...) {
 ParquetFileFormat <- R6Class("ParquetFileFormat", inherit = FileFormat)
 ParquetFileFormat$create <- function(use_buffered_stream = FALSE,
                                      buffer_size = 8196,
-                                     dict_columns = character(0),
-                                     writer_properties = NULL,
-                                     arrow_writer_properties = NULL) {
-  if (is.null(writer_properties) && is.null(arrow_writer_properties)) {
-    shared_ptr(ParquetFileFormat, dataset___ParquetFileFormat__MakeRead(
-      use_buffered_stream, buffer_size, dict_columns))
-  } else {
-    writer_properties = writer_properties %||% ParquetWriterProperties$create()
-    arrow_writer_properties = arrow_writer_properties %||% ParquetArrowWriterProperties$create()
-    shared_ptr(ParquetFileFormat, dataset___ParquetFileFormat__MakeWrite(
-      writer_properties, arrow_writer_properties))
-  }
+                                     dict_columns = character(0)) {
+  shared_ptr(ParquetFileFormat, dataset___ParquetFileFormat__Make(
+    use_buffered_stream, buffer_size, dict_columns))
 }
 
 #' @usage NULL
diff --git a/r/R/dataset-write.R b/r/R/dataset-write.R
index b449186a9ea..abc116ef126 100644
--- a/r/R/dataset-write.R
+++ b/r/R/dataset-write.R
@@ -44,8 +44,9 @@
 #' @export
 write_dataset <- function(dataset,
                           path,
-                          format = dataset$format$type,
+                          format = dataset$format,
                           partitioning = dplyr::group_vars(dataset),
+                          basename_template = paste("dat_{i}", format$type, sep="."),
                           hive_style = TRUE,
                           filesystem = NULL,
                           ...) {
@@ -75,14 +76,12 @@ write_dataset <- function(dataset,
   schema <- scanner$schema
 
   if (!inherits(format, "FileFormat")) {
-    if (identical(format, "parquet")) {
-      # We have to do some special massaging of properties
-      writer_props <- ParquetWriterProperties$create(dataset, ...)
-      arrow_writer_props <- ParquetArrowWriterProperties$create(...)
-      format <- ParquetFileFormat$create(writer_properties = writer_props, arrow_writer_properties = arrow_writer_props)
-    } else {
-      format <- FileFormat$create(format, ...)
-    }
+    format <- FileFormat$create(format)
+  }
+  if (inherits(format, "ParquetFileFormat")) {
+    options <- format$make_write_options(table=dataset, ...)
+  } else {
+    options <- format$make_write_options(...)
   }
 
   if (!inherits(partitioning, "Partitioning")) {
@@ -96,5 +95,40 @@ write_dataset <- function(dataset,
   }
   path_and_fs <- get_path_and_filesystem(path, filesystem)
 
-  dataset___Dataset__Write(scanner, schema, path = path_and_fs$path, format = format, partitioning = partitioning, filesystem = path_and_fs$fs)
+  dataset___Dataset__Write(options, path_and_fs$fs, path_and_fs$path,
+                           partitioning, basename_template, scanner)
 }
+
+#' Format-specific write options
+#'
+#' @description
+#' A `FileWriteOptions` holds write options specific to a `FileFormat`.
+FileWriteOptions <- R6Class("FileWriteOptions", inherit = ArrowObject,
+  public = list(
+    ..dispatch = function() {
+      type <- self$type
+      if (type == "parquet") {
+        shared_ptr(ParquetFileWriteOptions, self$pointer())
+      } else {
+        self
+      }
+    },
+    update = function(...) {
+      type <- self$type
+      if (type == "parquet") {
+        dataset___ParquetFileWriteOptions__update(self,
+            ParquetWriterProperties$create(...),
+            ParquetArrowWriterProperties$create(...))
+      }
+    }
+  ),
+  active = list(
+    type = function() dataset___FileWriteOptions__type_name(self)
+  )
+)
+
+#' @usage NULL
+#' @format NULL
+#' @rdname FileWriteOptions
+#' @export
+ParquetFileWriteOptions <- R6Class("ParquetFileWriteOptions", inherit = FileWriteOptions)
diff --git a/r/R/parquet.R b/r/R/parquet.R
index c95154e6693..15d87c15bf3 100644
--- a/r/R/parquet.R
+++ b/r/R/parquet.R
@@ -336,6 +336,7 @@ ParquetWriterProperties$create <- function(table,
     parquet___WriterProperties___Builder__create()
   )
   if (!is.null(version)) {
+    print(version)
     builder$set_version(version)
   }
   if (!is.null(compression)) {
diff --git a/r/src/arrowExports.cpp b/r/src/arrowExports.cpp
index 2b4d0723879..1c7ab14e816 100644
--- a/r/src/arrowExports.cpp
+++ b/r/src/arrowExports.cpp
@@ -1558,34 +1558,66 @@ extern "C" SEXP _arrow_dataset___FileFormat__type_name(SEXP format_sexp){
 
 // dataset.cpp
 #if defined(ARROW_R_WITH_ARROW)
-std::shared_ptr<ds::ParquetFileFormat> dataset___ParquetFileFormat__MakeRead(bool use_buffered_stream, int64_t buffer_size, cpp11::strings dict_columns);
-extern "C" SEXP _arrow_dataset___ParquetFileFormat__MakeRead(SEXP use_buffered_stream_sexp, SEXP buffer_size_sexp, SEXP dict_columns_sexp){
+std::shared_ptr<ds::FileWriteOptions> dataset___FileFormat__DefaultWriteOptions(const std::shared_ptr<ds::FileFormat>& fmt);
+extern "C" SEXP _arrow_dataset___FileFormat__DefaultWriteOptions(SEXP fmt_sexp){
+BEGIN_CPP11
+	arrow::r::Input<const std::shared_ptr<ds::FileFormat>&>::type fmt(fmt_sexp);
+	return cpp11::as_sexp(dataset___FileFormat__DefaultWriteOptions(fmt));
+END_CPP11
+}
+#else
+extern "C" SEXP _arrow_dataset___FileFormat__DefaultWriteOptions(SEXP fmt_sexp){
+	Rf_error("Cannot call dataset___FileFormat__DefaultWriteOptions(). Please use arrow::install_arrow() to install required runtime libraries. ");
+}
+#endif
+
+// dataset.cpp
+#if defined(ARROW_R_WITH_ARROW)
+std::shared_ptr<ds::ParquetFileFormat> dataset___ParquetFileFormat__Make(bool use_buffered_stream, int64_t buffer_size, cpp11::strings dict_columns);
+extern "C" SEXP _arrow_dataset___ParquetFileFormat__Make(SEXP use_buffered_stream_sexp, SEXP buffer_size_sexp, SEXP dict_columns_sexp){
 BEGIN_CPP11
 	arrow::r::Input<bool>::type use_buffered_stream(use_buffered_stream_sexp);
 	arrow::r::Input<int64_t>::type buffer_size(buffer_size_sexp);
 	arrow::r::Input<cpp11::strings>::type dict_columns(dict_columns_sexp);
-	return cpp11::as_sexp(dataset___ParquetFileFormat__MakeRead(use_buffered_stream, buffer_size, dict_columns));
+	return cpp11::as_sexp(dataset___ParquetFileFormat__Make(use_buffered_stream, buffer_size, dict_columns));
 END_CPP11
 }
 #else
-extern "C" SEXP _arrow_dataset___ParquetFileFormat__MakeRead(SEXP use_buffered_stream_sexp, SEXP buffer_size_sexp, SEXP dict_columns_sexp){
-	Rf_error("Cannot call dataset___ParquetFileFormat__MakeRead(). Please use arrow::install_arrow() to install required runtime libraries. ");
+extern "C" SEXP _arrow_dataset___ParquetFileFormat__Make(SEXP use_buffered_stream_sexp, SEXP buffer_size_sexp, SEXP dict_columns_sexp){
+	Rf_error("Cannot call dataset___ParquetFileFormat__Make(). Please use arrow::install_arrow() to install required runtime libraries. ");
 }
 #endif
 
 // dataset.cpp
 #if defined(ARROW_R_WITH_ARROW)
-std::shared_ptr<ds::ParquetFileFormat> dataset___ParquetFileFormat__MakeWrite(const std::shared_ptr<parquet::WriterProperties>& writer_props, const std::shared_ptr<parquet::ArrowWriterProperties>& arrow_props);
-extern "C" SEXP _arrow_dataset___ParquetFileFormat__MakeWrite(SEXP writer_props_sexp, SEXP arrow_props_sexp){
+std::string dataset___FileWriteOptions__type_name(const std::shared_ptr<ds::FileWriteOptions>& options);
+extern "C" SEXP _arrow_dataset___FileWriteOptions__type_name(SEXP options_sexp){
 BEGIN_CPP11
+	arrow::r::Input<const std::shared_ptr<ds::FileWriteOptions>&>::type options(options_sexp);
+	return cpp11::as_sexp(dataset___FileWriteOptions__type_name(options));
+END_CPP11
+}
+#else
+extern "C" SEXP _arrow_dataset___FileWriteOptions__type_name(SEXP options_sexp){
+	Rf_error("Cannot call dataset___FileWriteOptions__type_name(). Please use arrow::install_arrow() to install required runtime libraries. ");
+}
+#endif
+
+// dataset.cpp
+#if defined(ARROW_R_WITH_ARROW)
+void dataset___ParquetFileWriteOptions__update(const std::shared_ptr<ds::ParquetFileWriteOptions>& options, const std::shared_ptr<parquet::WriterProperties>& writer_props, const std::shared_ptr<parquet::ArrowWriterProperties>& arrow_writer_props);
+extern "C" SEXP _arrow_dataset___ParquetFileWriteOptions__update(SEXP options_sexp, SEXP writer_props_sexp, SEXP arrow_writer_props_sexp){
+BEGIN_CPP11
+	arrow::r::Input<const std::shared_ptr<ds::ParquetFileWriteOptions>&>::type options(options_sexp);
 	arrow::r::Input<const std::shared_ptr<parquet::WriterProperties>&>::type writer_props(writer_props_sexp);
-	arrow::r::Input<const std::shared_ptr<parquet::ArrowWriterProperties>&>::type arrow_props(arrow_props_sexp);
-	return cpp11::as_sexp(dataset___ParquetFileFormat__MakeWrite(writer_props, arrow_props));
+	arrow::r::Input<const std::shared_ptr<parquet::ArrowWriterProperties>&>::type arrow_writer_props(arrow_writer_props_sexp);
+	dataset___ParquetFileWriteOptions__update(options, writer_props, arrow_writer_props);
+	return R_NilValue;
 END_CPP11
 }
 #else
-extern "C" SEXP _arrow_dataset___ParquetFileFormat__MakeWrite(SEXP writer_props_sexp, SEXP arrow_props_sexp){
-	Rf_error("Cannot call dataset___ParquetFileFormat__MakeWrite(). Please use arrow::install_arrow() to install required runtime libraries. ");
+extern "C" SEXP _arrow_dataset___ParquetFileWriteOptions__update(SEXP options_sexp, SEXP writer_props_sexp, SEXP arrow_writer_props_sexp){
+	Rf_error("Cannot call dataset___ParquetFileWriteOptions__update(). Please use arrow::install_arrow() to install required runtime libraries. ");
 }
 #endif
 
@@ -1853,21 +1885,21 @@ extern "C" SEXP _arrow_dataset___ScanTask__get_batches(SEXP scan_task_sexp){
 
 // dataset.cpp
 #if defined(ARROW_R_WITH_ARROW)
-void dataset___Dataset__Write(const std::shared_ptr<ds::Scanner>& scanner, const std::shared_ptr<arrow::Schema>& schema, const std::shared_ptr<ds::FileFormat>& format, const std::shared_ptr<fs::FileSystem>& filesystem, std::string path, const std::shared_ptr<ds::Partitioning>& partitioning);
-extern "C" SEXP _arrow_dataset___Dataset__Write(SEXP scanner_sexp, SEXP schema_sexp, SEXP format_sexp, SEXP filesystem_sexp, SEXP path_sexp, SEXP partitioning_sexp){
+void dataset___Dataset__Write(const std::shared_ptr<ds::FileWriteOptions>& file_write_options, const std::shared_ptr<fs::FileSystem>& filesystem, std::string base_dir, const std::shared_ptr<ds::Partitioning>& partitioning, std::string basename_template, const std::shared_ptr<ds::Scanner>& scanner);
+extern "C" SEXP _arrow_dataset___Dataset__Write(SEXP file_write_options_sexp, SEXP filesystem_sexp, SEXP base_dir_sexp, SEXP partitioning_sexp, SEXP basename_template_sexp, SEXP scanner_sexp){
 BEGIN_CPP11
-	arrow::r::Input<const std::shared_ptr<ds::Scanner>&>::type scanner(scanner_sexp);
-	arrow::r::Input<const std::shared_ptr<arrow::Schema>&>::type schema(schema_sexp);
-	arrow::r::Input<const std::shared_ptr<ds::FileFormat>&>::type format(format_sexp);
+	arrow::r::Input<const std::shared_ptr<ds::FileWriteOptions>&>::type file_write_options(file_write_options_sexp);
 	arrow::r::Input<const std::shared_ptr<fs::FileSystem>&>::type filesystem(filesystem_sexp);
-	arrow::r::Input<std::string>::type path(path_sexp);
+	arrow::r::Input<std::string>::type base_dir(base_dir_sexp);
 	arrow::r::Input<const std::shared_ptr<ds::Partitioning>&>::type partitioning(partitioning_sexp);
-	dataset___Dataset__Write(scanner, schema, format, filesystem, path, partitioning);
+	arrow::r::Input<std::string>::type basename_template(basename_template_sexp);
+	arrow::r::Input<const std::shared_ptr<ds::Scanner>&>::type scanner(scanner_sexp);
+	dataset___Dataset__Write(file_write_options, filesystem, base_dir, partitioning, basename_template, scanner);
 	return R_NilValue;
 END_CPP11
 }
 #else
-extern "C" SEXP _arrow_dataset___Dataset__Write(SEXP scanner_sexp, SEXP schema_sexp, SEXP format_sexp, SEXP filesystem_sexp, SEXP path_sexp, SEXP partitioning_sexp){
+extern "C" SEXP _arrow_dataset___Dataset__Write(SEXP file_write_options_sexp, SEXP filesystem_sexp, SEXP base_dir_sexp, SEXP partitioning_sexp, SEXP basename_template_sexp, SEXP scanner_sexp){
 	Rf_error("Cannot call dataset___Dataset__Write(). Please use arrow::install_arrow() to install required runtime libraries. ");
 }
 #endif
@@ -6239,8 +6271,10 @@ static const R_CallMethodDef CallEntries[] = {
 		{ "_arrow_dataset___FileSystemDatasetFactory__Make1", (DL_FUNC) &_arrow_dataset___FileSystemDatasetFactory__Make1, 3}, 
 		{ "_arrow_dataset___FileSystemDatasetFactory__Make3", (DL_FUNC) &_arrow_dataset___FileSystemDatasetFactory__Make3, 4}, 
 		{ "_arrow_dataset___FileFormat__type_name", (DL_FUNC) &_arrow_dataset___FileFormat__type_name, 1}, 
-		{ "_arrow_dataset___ParquetFileFormat__MakeRead", (DL_FUNC) &_arrow_dataset___ParquetFileFormat__MakeRead, 3}, 
-		{ "_arrow_dataset___ParquetFileFormat__MakeWrite", (DL_FUNC) &_arrow_dataset___ParquetFileFormat__MakeWrite, 2}, 
+		{ "_arrow_dataset___FileFormat__DefaultWriteOptions", (DL_FUNC) &_arrow_dataset___FileFormat__DefaultWriteOptions, 1}, 
+		{ "_arrow_dataset___ParquetFileFormat__Make", (DL_FUNC) &_arrow_dataset___ParquetFileFormat__Make, 3}, 
+		{ "_arrow_dataset___FileWriteOptions__type_name", (DL_FUNC) &_arrow_dataset___FileWriteOptions__type_name, 1}, 
+		{ "_arrow_dataset___ParquetFileWriteOptions__update", (DL_FUNC) &_arrow_dataset___ParquetFileWriteOptions__update, 3}, 
 		{ "_arrow_dataset___IpcFileFormat__Make", (DL_FUNC) &_arrow_dataset___IpcFileFormat__Make, 0}, 
 		{ "_arrow_dataset___CsvFileFormat__Make", (DL_FUNC) &_arrow_dataset___CsvFileFormat__Make, 1}, 
 		{ "_arrow_dataset___DirectoryPartitioning", (DL_FUNC) &_arrow_dataset___DirectoryPartitioning, 1}, 
diff --git a/r/src/dataset.cpp b/r/src/dataset.cpp
index 349fde47db2..f4c3ed1d994 100644
--- a/r/src/dataset.cpp
+++ b/r/src/dataset.cpp
@@ -171,7 +171,13 @@ std::string dataset___FileFormat__type_name(
 }
 
 // [[arrow::export]]
-std::shared_ptr<ds::ParquetFileFormat> dataset___ParquetFileFormat__MakeRead(
+std::shared_ptr<ds::FileWriteOptions> dataset___FileFormat__DefaultWriteOptions(
+    const std::shared_ptr<ds::FileFormat>& fmt) {
+  return fmt->DefaultWriteOptions();
+}
+
+// [[arrow::export]]
+std::shared_ptr<ds::ParquetFileFormat> dataset___ParquetFileFormat__Make(
     bool use_buffered_stream, int64_t buffer_size, cpp11::strings dict_columns) {
   auto fmt = std::make_shared<ds::ParquetFileFormat>();
 
@@ -187,15 +193,18 @@ std::shared_ptr<ds::ParquetFileFormat> dataset___ParquetFileFormat__MakeRead(
 }
 
 // [[arrow::export]]
-std::shared_ptr<ds::ParquetFileFormat> dataset___ParquetFileFormat__MakeWrite(
-    const std::shared_ptr<parquet::WriterProperties>& writer_props,
-    const std::shared_ptr<parquet::ArrowWriterProperties>& arrow_props) {
-  auto fmt = std::make_shared<ds::ParquetFileFormat>();
-
-  // fmt->writer_properties = writer_props;
-  // fmt->arrow_writer_properties = arrow_props;
+std::string dataset___FileWriteOptions__type_name(
+    const std::shared_ptr<ds::FileWriteOptions>& options) {
+  return options->type_name();
+}
 
-  return fmt;
+// [[arrow::export]]
+void dataset___ParquetFileWriteOptions__update(
+    const std::shared_ptr<ds::ParquetFileWriteOptions>& options,
+    const std::shared_ptr<parquet::WriterProperties>& writer_props,
+    const std::shared_ptr<parquet::ArrowWriterProperties>& arrow_writer_props) {
+  options->writer_properties = writer_props;
+  options->arrow_writer_properties = arrow_writer_props;
 }
 
 // [[arrow::export]]
@@ -337,20 +346,18 @@ std::vector<std::shared_ptr<arrow::RecordBatch>> dataset___ScanTask__get_batches
 }
 
 // [[arrow::export]]
-void dataset___Dataset__Write(const std::shared_ptr<ds::Scanner>& scanner,
-                              const std::shared_ptr<arrow::Schema>& schema,
-                              const std::shared_ptr<ds::FileFormat>& format,
-                              const std::shared_ptr<fs::FileSystem>& filesystem,
-                              std::string path,
-                              const std::shared_ptr<ds::Partitioning>& partitioning) {
+void dataset___Dataset__Write(
+    const std::shared_ptr<ds::FileWriteOptions>& file_write_options,
+    const std::shared_ptr<fs::FileSystem>& filesystem, std::string base_dir,
+    const std::shared_ptr<ds::Partitioning>& partitioning, std::string basename_template,
+    const std::shared_ptr<ds::Scanner>& scanner) {
   ds::FileSystemDatasetWriteOptions opts;
-  opts.file_write_options = format->DefaultWriteOptions();
+  opts.file_write_options = file_write_options;
   opts.filesystem = filesystem;
-  opts.base_dir = path;
+  opts.base_dir = base_dir;
   opts.partitioning = partitioning;
-  opts.basename_template = "dat_{i}.feather";
-  StopIfNotOk(ds::FileSystemDataset::Write(schema, opts, scanner));
-  return;
+  opts.basename_template = basename_template;
+  StopIfNotOk(ds::FileSystemDataset::Write(opts, scanner));
 }
 
 #endif

From bf7d392bdb94f2568de262c01561ed816e20e079 Mon Sep 17 00:00:00 2001
From: Benjamin Kietzman <bengilgit@gmail.com>
Date: Thu, 1 Oct 2020 17:09:54 -0400
Subject: [PATCH 10/34] add a test for writing with a selection

---
 r/R/dataset-write.R             |  3 +--
 r/R/parquet.R                   |  1 -
 r/tests/testthat/test-dataset.R | 15 +++++++++++++++
 3 files changed, 16 insertions(+), 3 deletions(-)

diff --git a/r/R/dataset-write.R b/r/R/dataset-write.R
index abc116ef126..ed8742e56eb 100644
--- a/r/R/dataset-write.R
+++ b/r/R/dataset-write.R
@@ -73,7 +73,6 @@ write_dataset <- function(dataset,
   }
 
   scanner <- Scanner$create(dataset)
-  schema <- scanner$schema
 
   if (!inherits(format, "FileFormat")) {
     format <- FileFormat$create(format)
@@ -86,7 +85,7 @@ write_dataset <- function(dataset,
 
   if (!inherits(partitioning, "Partitioning")) {
     # TODO: tidyselect?
-    partition_schema <- schema[partitioning]
+    partition_schema <- scanner$schema[partitioning]
     if (isTRUE(hive_style)) {
       partitioning <- HivePartitioning$create(partition_schema)
     } else {
diff --git a/r/R/parquet.R b/r/R/parquet.R
index 15d87c15bf3..c95154e6693 100644
--- a/r/R/parquet.R
+++ b/r/R/parquet.R
@@ -336,7 +336,6 @@ ParquetWriterProperties$create <- function(table,
     parquet___WriterProperties___Builder__create()
   )
   if (!is.null(version)) {
-    print(version)
     builder$set_version(version)
   }
   if (!is.null(compression)) {
diff --git a/r/tests/testthat/test-dataset.R b/r/tests/testthat/test-dataset.R
index 5c18a8c793d..c8c4069b16b 100644
--- a/r/tests/testthat/test-dataset.R
+++ b/r/tests/testthat/test-dataset.R
@@ -898,6 +898,21 @@ test_that("Dataset writing: from data.frame", {
   )
 })
 
+test_that("Dataset writing: from data.frame with projection", {
+  skip_on_os("windows") # https://issues.apache.org/jira/browse/ARROW-9651
+  dst_dir <- tempfile()
+  stacked <- rbind(df1, df2)
+  stacked %>%
+    select(int, chr, dbl) %>%
+    group_by(int) %>%
+    write_dataset(dst_dir, format = "feather")
+  expect_true(dir.exists(dst_dir))
+  expect_identical(dir(dst_dir), sort(paste("int", c(1:10, 101:110), sep = "=")))
+
+  new_ds <- open_dataset(dst_dir, format = "feather")
+  expect_identical(names(new_ds), c("chr", "dbl", "int"))
+})
+
 test_that("Dataset writing: from RecordBatch", {
   skip_on_os("windows") # https://issues.apache.org/jira/browse/ARROW-9651
   dst_dir <- tempfile()

From 733af53c8200d8cd3d6423c89b9416aa954525ad Mon Sep 17 00:00:00 2001
From: Benjamin Kietzman <bengilgit@gmail.com>
Date: Thu, 1 Oct 2020 17:11:55 -0400
Subject: [PATCH 11/34] lint

---
 cpp/src/arrow/util/map.h | 1 -
 1 file changed, 1 deletion(-)

diff --git a/cpp/src/arrow/util/map.h b/cpp/src/arrow/util/map.h
index 3872e972bd9..66325f5e13e 100644
--- a/cpp/src/arrow/util/map.h
+++ b/cpp/src/arrow/util/map.h
@@ -61,4 +61,3 @@ auto GetOrInsertGenerated(Map* map, Key&& key, Gen&& gen)
 
 }  // namespace internal
 }  // namespace arrow
-

From 0567537035e6d4d10b7ad6dcc2115c8570629c60 Mon Sep 17 00:00:00 2001
From: Benjamin Kietzman <bengilgit@gmail.com>
Date: Thu, 1 Oct 2020 17:12:59 -0400
Subject: [PATCH 12/34] make doc

---
 r/man/write_dataset.Rd | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/r/man/write_dataset.Rd b/r/man/write_dataset.Rd
index 8133b38479d..417cc057f0f 100644
--- a/r/man/write_dataset.Rd
+++ b/r/man/write_dataset.Rd
@@ -7,8 +7,9 @@
 write_dataset(
   dataset,
   path,
-  format = dataset$format$type,
+  format = dataset$format,
   partitioning = dplyr::group_vars(dataset),
+  basename_template = paste("dat_{i}", format$type, sep = "."),
   hive_style = TRUE,
   filesystem = NULL,
   ...

From f0da04ad1b38f3321f5a4c307f798453086e2d2c Mon Sep 17 00:00:00 2001
From: Benjamin Kietzman <bengilgit@gmail.com>
Date: Thu, 1 Oct 2020 17:15:35 -0400
Subject: [PATCH 13/34] document basename_template parameter

---
 r/R/dataset-write.R    | 3 +++
 r/man/write_dataset.Rd | 4 ++++
 2 files changed, 7 insertions(+)

diff --git a/r/R/dataset-write.R b/r/R/dataset-write.R
index ed8742e56eb..94fe5cf2e40 100644
--- a/r/R/dataset-write.R
+++ b/r/R/dataset-write.R
@@ -34,6 +34,9 @@
 #' @param partitioning `Partitioning` or a character vector of columns to
 #' use as partition keys (to be written as path segments). Default is to
 #' use the current `group_by()` columns.
+#' @param basename_template `"{i}"` will be replaced with an autoincremented
+#' integer to generate basenames of datafiles. For example `"dat_{i}.feather"`
+#' will yield `"dat_0.feather", ...`.
 #' @param hive_style logical: write partition segments as Hive-style
 #' (`key1=value1/key2=value2/file.ext`) or as just bare values. Default is `TRUE`.
 #' @param filesystem A [FileSystem] where the dataset should be written if it is a
diff --git a/r/man/write_dataset.Rd b/r/man/write_dataset.Rd
index 417cc057f0f..3d74f74b621 100644
--- a/r/man/write_dataset.Rd
+++ b/r/man/write_dataset.Rd
@@ -33,6 +33,10 @@ same format as \code{dataset}.}
 use as partition keys (to be written as path segments). Default is to
 use the current \code{group_by()} columns.}
 
+\item{basename_template}{\code{"{i}"} will be replaced with an autoincremented
+integer to generate basenames of datafiles. For example \code{"dat_{i}.feather"}
+will yield \verb{"dat_0.feather", ...}.}
+
 \item{hive_style}{logical: write partition segments as Hive-style
 (\code{key1=value1/key2=value2/file.ext}) or as just bare values. Default is \code{TRUE}.}
 

From 18ea1c34ba5230f4428b80e8b565a10d497a1f82 Mon Sep 17 00:00:00 2001
From: Benjamin Kietzman <bengilgit@gmail.com>
Date: Fri, 2 Oct 2020 09:28:58 -0400
Subject: [PATCH 14/34] enable on-write filtering of written datasets

---
 r/R/dataset-write.R             | 10 +++-------
 r/man/write_dataset.Rd          |  5 +++--
 r/tests/testthat/test-dataset.R | 31 ++++++++++++-------------------
 3 files changed, 18 insertions(+), 28 deletions(-)

diff --git a/r/R/dataset-write.R b/r/R/dataset-write.R
index 94fe5cf2e40..d667db5f8d5 100644
--- a/r/R/dataset-write.R
+++ b/r/R/dataset-write.R
@@ -24,8 +24,9 @@
 #' @param dataset [Dataset], [RecordBatch], [Table], `arrow_dplyr_query`, or
 #' `data.frame`. If an `arrow_dplyr_query` or `grouped_df`,
 #' `schema` and `partitioning` will be taken from the result of any `select()`
-#' and `group_by()` operations done on the dataset. Note that `filter()` queries
-#' are not currently supported, and `select`-ed columns may not be renamed.
+#' and `group_by()` operations done on the dataset. `filter()` queries will be
+#' applied to restrict written rows.
+#' Note that `select()`-ed columns may not be renamed.
 #' @param path string path or URI to a directory to write to (directory will be
 #' created if it does not exist)
 #' @param format file format to write the dataset to. Currently supported
@@ -54,11 +55,6 @@ write_dataset <- function(dataset,
                           filesystem = NULL,
                           ...) {
   if (inherits(dataset, "arrow_dplyr_query")) {
-    # Check for a filter
-    if (!isTRUE(dataset$filtered_rows)) {
-      # TODO:
-      stop("Writing a filtered dataset is not yet supported", call. = FALSE)
-    }
     # Check for a select
     if (!identical(dataset$selected_columns, set_names(names(dataset$.data)))) {
       # We can select a subset of columns but we can't rename them
diff --git a/r/man/write_dataset.Rd b/r/man/write_dataset.Rd
index 3d74f74b621..218876427ca 100644
--- a/r/man/write_dataset.Rd
+++ b/r/man/write_dataset.Rd
@@ -19,8 +19,9 @@ write_dataset(
 \item{dataset}{\link{Dataset}, \link{RecordBatch}, \link{Table}, \code{arrow_dplyr_query}, or
 \code{data.frame}. If an \code{arrow_dplyr_query} or \code{grouped_df},
 \code{schema} and \code{partitioning} will be taken from the result of any \code{select()}
-and \code{group_by()} operations done on the dataset. Note that \code{filter()} queries
-are not currently supported, and \code{select}-ed columns may not be renamed.}
+and \code{group_by()} operations done on the dataset. \code{filter()} queries will be
+applied to restrict written rows.
+Note that \code{select()}-ed columns may not be renamed.}
 
 \item{path}{string path or URI to a directory to write to (directory will be
 created if it does not exist)}
diff --git a/r/tests/testthat/test-dataset.R b/r/tests/testthat/test-dataset.R
index c8c4069b16b..73ea71b3c49 100644
--- a/r/tests/testthat/test-dataset.R
+++ b/r/tests/testthat/test-dataset.R
@@ -853,6 +853,18 @@ test_that("Dataset writing: dplyr methods", {
     collect(new_ds) %>% arrange(int),
     rbind(df1[c("chr", "dbl", "int")], df2[c("chr", "dbl", "int")])
   )
+
+  # filter to restrict written rows
+  dst_dir3 <- tempfile()
+  ds %>%
+    filter(int == 4) %>%
+    write_dataset(dst_dir3, format = "feather")
+  new_ds <- open_dataset(dst_dir3, format = "feather")
+
+  expect_equivalent(
+    new_ds %>% select(names(df1)) %>% collect(),
+    df1 %>% filter(int == 4)
+  )
 })
 
 test_that("Dataset writing: non-hive", {
@@ -898,21 +910,6 @@ test_that("Dataset writing: from data.frame", {
   )
 })
 
-test_that("Dataset writing: from data.frame with projection", {
-  skip_on_os("windows") # https://issues.apache.org/jira/browse/ARROW-9651
-  dst_dir <- tempfile()
-  stacked <- rbind(df1, df2)
-  stacked %>%
-    select(int, chr, dbl) %>%
-    group_by(int) %>%
-    write_dataset(dst_dir, format = "feather")
-  expect_true(dir.exists(dst_dir))
-  expect_identical(dir(dst_dir), sort(paste("int", c(1:10, 101:110), sep = "=")))
-
-  new_ds <- open_dataset(dst_dir, format = "feather")
-  expect_identical(names(new_ds), c("chr", "dbl", "int"))
-})
-
 test_that("Dataset writing: from RecordBatch", {
   skip_on_os("windows") # https://issues.apache.org/jira/browse/ARROW-9651
   dst_dir <- tempfile()
@@ -984,10 +981,6 @@ test_that("Dataset writing: unsupported features/input validation", {
 
   ds <- open_dataset(hive_dir)
 
-  expect_error(
-    filter(ds, int == 4) %>% write_dataset(ds),
-    "Writing a filtered dataset is not yet supported"
-  )
   expect_error(
     select(ds, integer = int) %>% write_dataset(ds),
     "Renaming columns when writing a dataset is not yet supported"

From bca876416124692f468f5756a49f865d9f6dc768 Mon Sep 17 00:00:00 2001
From: Benjamin Kietzman <bengilgit@gmail.com>
Date: Fri, 2 Oct 2020 13:22:12 -0400
Subject: [PATCH 15/34] add LockFreeStack

---
 cpp/src/arrow/util/CMakeLists.txt      |   1 +
 cpp/src/arrow/util/lock_free.h         | 114 ++++++++++++++++
 cpp/src/arrow/util/lock_free_test.cc   | 177 +++++++++++++++++++++++++
 cpp/src/arrow/util/thread_pool_test.cc |   2 +-
 4 files changed, 293 insertions(+), 1 deletion(-)
 create mode 100644 cpp/src/arrow/util/lock_free.h
 create mode 100644 cpp/src/arrow/util/lock_free_test.cc

diff --git a/cpp/src/arrow/util/CMakeLists.txt b/cpp/src/arrow/util/CMakeLists.txt
index c968cdd1a43..46b79bd0ec5 100644
--- a/cpp/src/arrow/util/CMakeLists.txt
+++ b/cpp/src/arrow/util/CMakeLists.txt
@@ -66,6 +66,7 @@ add_arrow_test(utility-test
 add_arrow_test(threading-utility-test
                SOURCES
                future_test
+               lock_free_test
                task_group_test
                thread_pool_test)
 
diff --git a/cpp/src/arrow/util/lock_free.h b/cpp/src/arrow/util/lock_free.h
new file mode 100644
index 00000000000..89fd1584776
--- /dev/null
+++ b/cpp/src/arrow/util/lock_free.h
@@ -0,0 +1,114 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#pragma once
+
+#include <atomic>
+#include <utility>
+
+#include "arrow/util/macros.h"
+
+namespace arrow {
+namespace util {
+
+/// A lock free container with FILO storage order.
+///
+/// Two thread safe operations are supported:
+/// - Push a value onto the stack
+/// - move construct from another stack
+///
+/// range-for compatilble iteration is provided for
+/// convenience, but it is *not* thread safe.
+template <typename T>
+class LockFreeStack {
+ public:
+  LockFreeStack() = default;
+
+  ~LockFreeStack() {
+    Node* next = nodes_.load();
+
+    while (next != NULLPTR) {
+      Node* node = next;
+      next = next->next;
+      delete node;
+    }
+  }
+
+  LockFreeStack(const LockFreeStack&) = delete;
+  LockFreeStack& operator=(const LockFreeStack&) = delete;
+  LockFreeStack& operator=(LockFreeStack&& other) = delete;
+
+  LockFreeStack(LockFreeStack&& other) {
+    Node* other_nodes = other.nodes_.load();
+
+    do {
+    } while (!other.CompareExchange(&other_nodes, NULLPTR));
+
+    nodes_.store(other_nodes);
+  }
+
+  void Push(T value) {
+    Node* new_head = new Node{std::move(value), nodes_.load()};
+
+    do {
+    } while (!CompareExchange(&new_head->next, new_head));
+  }
+
+  struct iterator;
+  iterator begin();
+  iterator end();
+
+ private:
+  struct Node {
+    T value;
+    Node* next;
+  };
+
+  bool CompareExchange(Node** expected, Node* desired) {
+    return nodes_.compare_exchange_strong(*expected, desired);
+  }
+
+  std::atomic<Node*> nodes_{NULLPTR};
+};
+
+template <typename T>
+struct LockFreeStack<T>::iterator {
+  bool operator!=(iterator other) const { return node != other.node; }
+
+  T& operator*() const { return node->value; }
+
+  iterator& operator++() {
+    node = node->next;
+    return *this;
+  }
+
+  Node* node;
+};
+
+template <typename T>
+typename LockFreeStack<T>::iterator LockFreeStack<T>::begin() {
+  return {nodes_.load()};
+}
+
+template <typename T>
+typename LockFreeStack<T>::iterator LockFreeStack<T>::end() {
+  return {NULLPTR};
+}
+
+}  // namespace util
+}  // namespace arrow
+
diff --git a/cpp/src/arrow/util/lock_free_test.cc b/cpp/src/arrow/util/lock_free_test.cc
new file mode 100644
index 00000000000..3b027f8adcf
--- /dev/null
+++ b/cpp/src/arrow/util/lock_free_test.cc
@@ -0,0 +1,177 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include <algorithm>
+#include <vector>
+
+#include <gmock/gmock.h>
+#include <gtest/gtest.h>
+
+#include "arrow/status.h"
+#include "arrow/testing/gtest_util.h"
+#include "arrow/util/io_util.h"
+#include "arrow/util/lock_free.h"
+#include "arrow/util/task_group.h"
+#include "arrow/util/thread_pool.h"
+
+namespace arrow {
+
+using internal::TaskGroup;
+
+namespace util {
+
+static constexpr int kLots = 512;
+// static constexpr int kLots = 16 * 1024;
+
+class TestLockFreeStack : public ::testing::Test {
+ public:
+  void TearDown() override {
+    fflush(stdout);
+    fflush(stderr);
+  }
+
+  void AppendLots(LockFreeStack<int>* ints) {
+    for (int j = 0; j < kLots; ++j) {
+      ints->Push(j);
+    }
+  }
+
+  void AppendLots(std::vector<int>* ints) {
+    for (int j = 0; j < kLots; ++j) {
+      ints->push_back(j);
+    }
+  }
+
+  template <typename T>
+  std::vector<T> ToVector(LockFreeStack<T> stack) {
+    std::vector<T> out;
+    for (auto&& element : stack) {
+      out.push_back(std::move(element));
+    }
+    std::reverse(out.begin(), out.end());
+    return out;
+  }
+
+  template <typename T>
+  std::vector<T> Flatten(std::vector<std::vector<T>> nested) {
+    size_t flat_size = 0;
+    for (auto&& v : nested) {
+      flat_size += v.size();
+    }
+
+    std::vector<int> flat(flat_size);
+    auto it = flat.begin();
+    for (auto&& v : nested) {
+      it = std::move(v.begin(), v.end(), it);
+    }
+
+    EXPECT_EQ(it, flat.end());
+    return flat;
+  }
+
+  std::shared_ptr<TaskGroup> task_group =
+      TaskGroup::MakeThreaded(::arrow::internal::GetCpuThreadPool());
+};
+
+TEST_F(TestLockFreeStack, AppendOnly) {
+  LockFreeStack<int> ints;
+
+  for (int i = 0; i < task_group->parallelism(); ++i) {
+    task_group->Append([&] {
+      AppendLots(&ints);
+      return Status::OK();
+    });
+  }
+
+  ASSERT_OK(task_group->Finish());
+
+  std::vector<int> actual = ToVector(std::move(ints)), expected;
+
+  for (int i = 0; i < task_group->parallelism(); ++i) {
+    AppendLots(&expected);
+  }
+
+  for (auto v : {&actual, &expected}) {
+    std::sort(v->begin(), v->end());
+  }
+
+  EXPECT_THAT(actual, testing::ElementsAreArray(expected));
+}
+
+TEST_F(TestLockFreeStack, ProduceThenConsumeMoved) {
+  LockFreeStack<int> ints;
+
+  std::vector<std::vector<int>> per_thread_actual(task_group->parallelism());
+
+  for (int i = 0; i < task_group->parallelism(); ++i) {
+    task_group->Append([&, i] {
+      AppendLots(&ints);
+      per_thread_actual[i] = ToVector(std::move(ints));
+      return Status::OK();
+    });
+  }
+
+  ASSERT_OK(task_group->Finish());
+
+  std::vector<int> actual = Flatten(std::move(per_thread_actual)), expected;
+
+  for (int i = 0; i < task_group->parallelism(); ++i) {
+    AppendLots(&expected);
+  }
+
+  for (auto v : {&actual, &expected}) {
+    std::sort(v->begin(), v->end());
+  }
+
+  EXPECT_THAT(actual, testing::ElementsAreArray(expected));
+}
+
+TEST_F(TestLockFreeStack, AlternatingProduceAndConsumeMoved) {
+  LockFreeStack<int> ints;
+
+  std::vector<std::vector<int>> per_thread_actual(task_group->parallelism());
+
+  for (int i = 0; i < task_group->parallelism(); ++i) {
+    task_group->Append([&, i] {
+      for (int j = 0; j < kLots; ++j) {
+        ints.Push(j);
+        auto local = std::move(ints);
+        for (int got : local) {
+          per_thread_actual[i].push_back(got);
+        }
+      }
+      return Status::OK();
+    });
+  }
+
+  ASSERT_OK(task_group->Finish());
+
+  std::vector<int> actual = Flatten(std::move(per_thread_actual)), expected;
+
+  for (int i = 0; i < task_group->parallelism(); ++i) {
+    AppendLots(&expected);
+  }
+
+  for (auto v : {&actual, &expected}) {
+    std::sort(v->begin(), v->end());
+  }
+
+  EXPECT_THAT(actual, testing::ElementsAreArray(expected));
+}
+
+}  // namespace util
+}  // namespace arrow
diff --git a/cpp/src/arrow/util/thread_pool_test.cc b/cpp/src/arrow/util/thread_pool_test.cc
index ac0eae78165..a54880b21b1 100644
--- a/cpp/src/arrow/util/thread_pool_test.cc
+++ b/cpp/src/arrow/util/thread_pool_test.cc
@@ -126,7 +126,7 @@ class AddTester {
 
 class TestThreadPool : public ::testing::Test {
  public:
-  void TearDown() {
+  void TearDown() override {
     fflush(stdout);
     fflush(stderr);
   }

From 1c6db502fa9d53fe169775d2aa5c8a4b4bb8c166 Mon Sep 17 00:00:00 2001
From: Benjamin Kietzman <bengilgit@gmail.com>
Date: Fri, 2 Oct 2020 14:05:06 -0400
Subject: [PATCH 16/34] extract and unit test string interpolation

---
 cpp/src/arrow/dataset/file_base.cc | 62 +++++++++++++-----------------
 cpp/src/arrow/util/string.cc       | 10 +++++
 cpp/src/arrow/util/string.h        |  5 +++
 cpp/src/arrow/util/string_test.cc  | 14 +++++++
 4 files changed, 56 insertions(+), 35 deletions(-)

diff --git a/cpp/src/arrow/dataset/file_base.cc b/cpp/src/arrow/dataset/file_base.cc
index 9c121bca659..b5a7c98ba16 100644
--- a/cpp/src/arrow/dataset/file_base.cc
+++ b/cpp/src/arrow/dataset/file_base.cc
@@ -33,6 +33,7 @@
 #include "arrow/util/logging.h"
 #include "arrow/util/map.h"
 #include "arrow/util/mutex.h"
+#include "arrow/util/string.h"
 #include "arrow/util/task_group.h"
 
 namespace arrow {
@@ -146,48 +147,32 @@ FragmentIterator FileSystemDataset::GetFragmentsImpl(
 }
 
 Status FileWriter::Write(RecordBatchReader* batches) {
-  for (std::shared_ptr<RecordBatch> batch;;) {
-    RETURN_NOT_OK(batches->ReadNext(&batch));
+  while (true) {
+    ARROW_ASSIGN_OR_RAISE(auto batch, batches->Next());
     if (batch == nullptr) break;
     RETURN_NOT_OK(Write(batch));
   }
   return Status::OK();
 }
 
-struct NextBasenameGenerator {
-  static Result<NextBasenameGenerator> Make(const std::string& basename_template) {
-    if (basename_template.find(fs::internal::kSep) != std::string::npos) {
-      return Status::Invalid("basename_template contained '/'");
-    }
-    size_t token_start = basename_template.find(token());
-    if (token_start == std::string::npos) {
-      return Status::Invalid("basename_template did not contain '{i}'");
-    }
-    return NextBasenameGenerator{basename_template, 0, token_start,
-                                 token_start + token().size()};
-  }
+constexpr util::string_view kIntegerToken = "{i}";
 
-  static const std::string& token() {
-    static const std::string token = "{i}";
-    return token;
+Status ValidateBasenameTemplate(util::string_view basename_template) {
+  if (basename_template.find(fs::internal::kSep) != util::string_view::npos) {
+    return Status::Invalid("basename_template contained '/'");
   }
-
-  const std::string& template_;
-  size_t i_, token_start_, token_end_;
-
-  std::string operator()() {
-    return template_.substr(0, token_start_) + std::to_string(i_++) +
-           template_.substr(token_end_);
+  size_t token_start = basename_template.find(kIntegerToken);
+  if (token_start == util::string_view::npos) {
+    return Status::Invalid("basename_template did not contain '", kIntegerToken, "'");
   }
-};
+  return Status::OK();
+}
 
 using MutexedWriter = util::Mutexed<std::shared_ptr<FileWriter>>;
 
 struct WriterSet {
-  WriterSet(NextBasenameGenerator next_basename,
-            const FileSystemDatasetWriteOptions& write_options)
-      : next_basename_(std::move(next_basename)),
-        base_dir_(fs::internal::EnsureTrailingSlash(write_options.base_dir)),
+  explicit WriterSet(const FileSystemDatasetWriteOptions& write_options)
+      : base_dir_(fs::internal::EnsureTrailingSlash(write_options.base_dir)),
         write_options_(write_options) {}
 
   Result<std::shared_ptr<MutexedWriter>> Get(const Expression& partition_expression,
@@ -208,12 +193,19 @@ struct WriterSet {
         })->second;
 
     if (writer_lock) {
-      // NB: next_basename_() must be invoked with the set_lock held
-      auto path = fs::internal::ConcatAbstractPath(dir, next_basename_());
+      auto i = i_++;
       set_lock.Unlock();
 
       RETURN_NOT_OK(write_options_.filesystem->CreateDir(dir));
 
+      auto basename = internal::Replace(write_options_.basename_template, kIntegerToken,
+                                        std::to_string(i));
+      if (!basename) {
+        return Status::Invalid("string interpolation of basename template failed");
+      }
+
+      auto path = fs::internal::ConcatAbstractPath(dir, *basename);
+
       ARROW_ASSIGN_OR_RAISE(auto destination,
                             write_options_.filesystem->OpenOutputStream(path));
 
@@ -236,16 +228,18 @@ struct WriterSet {
     return Status::OK();
   }
 
+  int i_ = 0;
   // There should only be a single writer open for each partition directory at a time
   util::Mutex mutex_;
   std::unordered_map<std::string, std::shared_ptr<MutexedWriter>> dir_to_writer_;
-  NextBasenameGenerator next_basename_;
   std::string base_dir_;
   const FileSystemDatasetWriteOptions& write_options_;
 };
 
 Status FileSystemDataset::Write(const FileSystemDatasetWriteOptions& write_options,
                                 std::shared_ptr<Scanner> scanner) {
+  RETURN_NOT_OK(ValidateBasenameTemplate(write_options.basename_template));
+
   auto task_group = scanner->context()->TaskGroup();
 
   // Things we'll un-lazy for the sake of simplicity, with the tradeoff they represent:
@@ -278,9 +272,7 @@ Status FileSystemDataset::Write(const FileSystemDatasetWriteOptions& write_optio
     }
   }
 
-  ARROW_ASSIGN_OR_RAISE(auto next_basename,
-                        NextBasenameGenerator::Make(write_options.basename_template));
-  WriterSet writers(std::move(next_basename), write_options);
+  WriterSet writers(write_options);
 
   auto write_task_group = task_group->MakeSubGroup();
   auto fragment_for_task_it = fragment_for_task.begin();
diff --git a/cpp/src/arrow/util/string.cc b/cpp/src/arrow/util/string.cc
index b3ed2473724..625086c39b2 100644
--- a/cpp/src/arrow/util/string.cc
+++ b/cpp/src/arrow/util/string.cc
@@ -144,5 +144,15 @@ std::string AsciiToLower(util::string_view value) {
   return result;
 }
 
+util::optional<std::string> Replace(util::string_view s, util::string_view token,
+                                    util::string_view replacement) {
+  size_t token_start = s.find(token);
+  if (token_start == std::string::npos) {
+    return util::nullopt;
+  }
+  return s.substr(0, token_start).to_string() + replacement.to_string() +
+         s.substr(token_start + token.size()).to_string();
+}
+
 }  // namespace internal
 }  // namespace arrow
diff --git a/cpp/src/arrow/util/string.h b/cpp/src/arrow/util/string.h
index ea445c71ef4..4201bbe2ea8 100644
--- a/cpp/src/arrow/util/string.h
+++ b/cpp/src/arrow/util/string.h
@@ -20,6 +20,7 @@
 #include <string>
 #include <vector>
 
+#include "arrow/util/optional.h"
 #include "arrow/util/string_view.h"
 #include "arrow/util/visibility.h"
 
@@ -56,5 +57,9 @@ bool AsciiEqualsCaseInsensitive(util::string_view left, util::string_view right)
 ARROW_EXPORT
 std::string AsciiToLower(util::string_view value);
 
+ARROW_EXPORT
+util::optional<std::string> Replace(util::string_view s, util::string_view token,
+                                    util::string_view replacement);
+
 }  // namespace internal
 }  // namespace arrow
diff --git a/cpp/src/arrow/util/string_test.cc b/cpp/src/arrow/util/string_test.cc
index c56525732b0..d9b3749fdbd 100644
--- a/cpp/src/arrow/util/string_test.cc
+++ b/cpp/src/arrow/util/string_test.cc
@@ -88,5 +88,19 @@ TEST(ParseHexValue, Invalid) {
   ASSERT_RAISES(Invalid, ParseHexValue(input.c_str(), &output));
 }
 
+TEST(Replace, Basics) {
+  auto s = Replace("dat_{i}.txt", "{i}", "23");
+  EXPECT_TRUE(s);
+  EXPECT_EQ(*s, "dat_23.txt");
+
+  // only replace the first occurrence of token
+  s = Replace("dat_{i}_{i}.txt", "{i}", "23");
+  EXPECT_TRUE(s);
+  EXPECT_EQ(*s, "dat_23_{i}.txt");
+
+  s = Replace("dat_.txt", "{nope}", "23");
+  EXPECT_FALSE(s);
+}
+
 }  // namespace internal
 }  // namespace arrow

From 6eb546e18df9c9c67b4ce8acd46c513fa6f69010 Mon Sep 17 00:00:00 2001
From: Benjamin Kietzman <bengilgit@gmail.com>
Date: Fri, 2 Oct 2020 15:34:39 -0400
Subject: [PATCH 17/34] refactor ::Write() to use explicit WriteQueues

---
 cpp/src/arrow/dataset/file_base.cc   | 211 ++++++++++++++++-----------
 cpp/src/arrow/util/lock_free.h       |  37 +++--
 cpp/src/arrow/util/lock_free_test.cc |  15 +-
 cpp/src/arrow/util/mutex.cc          |   3 +-
 cpp/src/arrow/util/mutex.h           |  25 ----
 5 files changed, 153 insertions(+), 138 deletions(-)

diff --git a/cpp/src/arrow/dataset/file_base.cc b/cpp/src/arrow/dataset/file_base.cc
index b5a7c98ba16..2c42221b192 100644
--- a/cpp/src/arrow/dataset/file_base.cc
+++ b/cpp/src/arrow/dataset/file_base.cc
@@ -30,7 +30,9 @@
 #include "arrow/io/interfaces.h"
 #include "arrow/io/memory.h"
 #include "arrow/util/iterator.h"
+#include "arrow/util/lock_free.h"
 #include "arrow/util/logging.h"
+#include "arrow/util/make_unique.h"
 #include "arrow/util/map.h"
 #include "arrow/util/mutex.h"
 #include "arrow/util/string.h"
@@ -168,73 +170,94 @@ Status ValidateBasenameTemplate(util::string_view basename_template) {
   return Status::OK();
 }
 
-using MutexedWriter = util::Mutexed<std::shared_ptr<FileWriter>>;
-
-struct WriterSet {
-  explicit WriterSet(const FileSystemDatasetWriteOptions& write_options)
-      : base_dir_(fs::internal::EnsureTrailingSlash(write_options.base_dir)),
-        write_options_(write_options) {}
+class WriteQueue {
+ public:
+  explicit WriteQueue(util::string_view partition_expression)
+      : partition_expression_(partition_expression) {}
+
+  // Push a batch into the writer's queue of pending writes.
+  void Push(std::shared_ptr<RecordBatch> batch) { pending_.Push(std::move(batch)); }
+
+  // Take the writer's lock and flush its queue of pending writes.
+  // Returns false if the lock could not be acquired. The queue is guaranteed to be
+  // flushed if every thread which calls Push() subsequently calls TryFlush() until
+  // it returns true.
+  Result<bool> TryFlush() {
+    if (auto lock = writer_mutex_.TryLock()) {
+      RecordBatchVector pending_vector;
+      do {
+        // Move pending to a local variable exclusively viewed by this thread
+        auto pending = std::move(pending_);
+        pending_vector = pending.ToVector();
+
+        for (auto it = pending_vector.rbegin(); it != pending_vector.rend(); ++it) {
+          RETURN_NOT_OK(writer_->Write(*it));
+        }
 
-  Result<std::shared_ptr<MutexedWriter>> Get(const Expression& partition_expression,
-                                             const std::shared_ptr<Schema>& schema) {
-    ARROW_ASSIGN_OR_RAISE(auto part_segments,
-                          write_options_.partitioning->Format(partition_expression));
-    std::string dir = base_dir_ + part_segments;
+        // Since we're holding this writer's lock anyway we may as well check for batches
+        // added while we were writing
+      } while (!pending_vector.empty());
+      return true;
+    }
+    return false;
+  }
 
-    util::Mutex::Guard writer_lock;
+  util::Mutex::Guard Lock() { return writer_mutex_.Lock(); }
 
-    auto set_lock = mutex_.Lock();
+  const std::shared_ptr<FileWriter>& writer() const { return writer_; }
 
-    auto writer =
-        internal::GetOrInsertGenerated(&dir_to_writer_, dir, [&](const std::string&) {
-          auto writer = std::make_shared<MutexedWriter>();
-          writer_lock = writer->Lock();
-          return writer;
-        })->second;
+  Status OpenWriter(const FileSystemDatasetWriteOptions& write_options, int index,
+                    std::shared_ptr<Schema> schema) {
+    auto dir = fs::internal::EnsureTrailingSlash(write_options.base_dir) +
+               partition_expression_.to_string();
 
-    if (writer_lock) {
-      auto i = i_++;
-      set_lock.Unlock();
+    auto basename = internal::Replace(write_options.basename_template, kIntegerToken,
+                                      std::to_string(index));
+    if (!basename) {
+      return Status::Invalid("string interpolation of basename template failed");
+    }
 
-      RETURN_NOT_OK(write_options_.filesystem->CreateDir(dir));
+    auto path = fs::internal::ConcatAbstractPath(dir, *basename);
 
-      auto basename = internal::Replace(write_options_.basename_template, kIntegerToken,
-                                        std::to_string(i));
-      if (!basename) {
-        return Status::Invalid("string interpolation of basename template failed");
-      }
+    RETURN_NOT_OK(write_options.filesystem->CreateDir(dir));
+    ARROW_ASSIGN_OR_RAISE(auto destination,
+                          write_options.filesystem->OpenOutputStream(path));
 
-      auto path = fs::internal::ConcatAbstractPath(dir, *basename);
+    ARROW_ASSIGN_OR_RAISE(writer_, write_options.format()->MakeWriter(
+                                       std::move(destination), std::move(schema),
+                                       write_options.file_write_options));
+    return Status::OK();
+  }
 
-      ARROW_ASSIGN_OR_RAISE(auto destination,
-                            write_options_.filesystem->OpenOutputStream(path));
+ private:
+  // FileWriters are not required to be thread safe, so they must be guarded with a mutex.
+  util::Mutex writer_mutex_;
+  std::shared_ptr<FileWriter> writer_;
+  // A stack into which batches to write will be pushed
+  util::LockFreeStack<std::shared_ptr<RecordBatch>> pending_;
 
-      ARROW_ASSIGN_OR_RAISE(**writer, write_options_.format()->MakeWriter(
-                                          std::move(destination), schema,
-                                          write_options_.file_write_options));
-    }
+  // The (formatted) partition expression to which this queue corresponds
+  util::string_view partition_expression_;
+};
 
-    return writer;
+Result<std::shared_ptr<FileWriter>> MakeWriter(
+    const FileSystemDatasetWriteOptions& write_options, std::string part,
+    int auto_incremented, std::shared_ptr<Schema> schema) {
+  auto dir = write_options.base_dir + part;
+  auto basename = internal::Replace(write_options.basename_template, kIntegerToken,
+                                    std::to_string(auto_incremented));
+  if (!basename) {
+    return Status::Invalid("string interpolation of basename template failed");
   }
+  auto path = fs::internal::ConcatAbstractPath(dir, *basename);
 
-  Status FinishAll(internal::TaskGroup* task_group) {
-    for (const auto& dir_writer : dir_to_writer_) {
-      task_group->Append([&] {
-        std::shared_ptr<FileWriter> writer = **dir_writer.second;
-        return writer->Finish();
-      });
-    }
-
-    return Status::OK();
-  }
+  RETURN_NOT_OK(write_options.filesystem->CreateDir(dir));
+  ARROW_ASSIGN_OR_RAISE(auto destination,
+                        write_options.filesystem->OpenOutputStream(path));
 
-  int i_ = 0;
-  // There should only be a single writer open for each partition directory at a time
-  util::Mutex mutex_;
-  std::unordered_map<std::string, std::shared_ptr<MutexedWriter>> dir_to_writer_;
-  std::string base_dir_;
-  const FileSystemDatasetWriteOptions& write_options_;
-};
+  return write_options.format()->MakeWriter(std::move(destination), std::move(schema),
+                                            write_options.file_write_options);
+}
 
 Status FileSystemDataset::Write(const FileSystemDatasetWriteOptions& write_options,
                                 std::shared_ptr<Scanner> scanner) {
@@ -272,14 +295,18 @@ Status FileSystemDataset::Write(const FileSystemDatasetWriteOptions& write_optio
     }
   }
 
-  WriterSet writers(write_options);
+  // Store a mapping from partitions (represened by their formatted partition expressions)
+  // to a WriteQueue which flushes batches into that partition's output file.
+  // `unordered_map` is not thread safe, so a lock on queues_mutex_ must be held whenever
+  // queues_ is accessed or modified.
+  util::Mutex queues_mutex_;
+  std::unordered_map<std::string, std::unique_ptr<WriteQueue>> queues_;
 
-  auto write_task_group = task_group->MakeSubGroup();
   auto fragment_for_task_it = fragment_for_task.begin();
   for (const auto& scan_task : scan_tasks) {
     const Fragment* fragment = *fragment_for_task_it++;
 
-    write_task_group->Append([&, scan_task, fragment] {
+    task_group->Append([&, scan_task, fragment] {
       ARROW_ASSIGN_OR_RAISE(auto batches, scan_task->Execute());
 
       for (auto maybe_batch : batches) {
@@ -287,49 +314,59 @@ Status FileSystemDataset::Write(const FileSystemDatasetWriteOptions& write_optio
         ARROW_ASSIGN_OR_RAISE(auto groups, write_options.partitioning->Partition(batch));
         batch.reset();  // drop to hopefully conserve memory
 
-        struct PendingWrite {
-          std::shared_ptr<MutexedWriter> writer;
-          std::shared_ptr<RecordBatch> batch;
-
-          Result<bool> TryWrite() {
-            if (auto lock = writer->TryLock()) {
-              RETURN_NOT_OK(writer->get()->Write(batch));
-              return true;
-            }
-            return false;
-          }
-        };
-        std::vector<PendingWrite> pending_writes(groups.batches.size());
-
         for (size_t i = 0; i < groups.batches.size(); ++i) {
           AndExpression partition_expression(std::move(groups.expressions[i]),
                                              fragment->partition_expression());
-
-          ARROW_ASSIGN_OR_RAISE(auto writer, writers.Get(partition_expression,
-                                                         groups.batches[i]->schema()));
-
-          pending_writes[i] = {std::move(writer), std::move(groups.batches[i])};
-        }
-
-        for (size_t i = 0; !pending_writes.empty(); ++i) {
-          if (i >= pending_writes.size()) {
-            i = 0;
+          auto batch = std::move(groups.batches[i]);
+
+          ARROW_ASSIGN_OR_RAISE(auto part,
+                                write_options.partitioning->Format(partition_expression));
+
+          // Now we need to look up the relevant WriteQueue in `queues_`
+          auto queues_lock = queues_mutex_.Lock();
+          util::Mutex::Guard new_queue_lock;
+
+          WriteQueue* queue = internal::GetOrInsertGenerated(
+                                  &queues_, std::move(part),
+                                  [&](const std::string& part) {
+                                    auto queue = internal::make_unique<WriteQueue>(part);
+                                    new_queue_lock = queue->Lock();
+                                    return queue;
+                                  })
+                                  ->second.get();
+
+          if (new_queue_lock) {
+            size_t queue_index = queues_.size() - 1;
+            // We have openned a new WriteQueue for `part` and must set up its FileWriter.
+            // That could be slow, so we'd like to release queues_lock now, allowing other
+            // threads to look up already-open queues. This is safe since we are mutating
+            // an element of queues_ in place, which does not conflict with access to any
+            // other element. However, we do need to keep new_queue_lock while we open the
+            // FileWriter to prevent other threads from using the writer before it has
+            // been constructed.
+            queues_lock.Unlock();
+            RETURN_NOT_OK(queue->OpenWriter(write_options, queue_index, batch->schema()));
+            new_queue_lock.Unlock();
           }
 
-          ARROW_ASSIGN_OR_RAISE(auto success, pending_writes[i].TryWrite());
-          if (success) {
-            std::swap(pending_writes.back(), pending_writes[i]);
-            pending_writes.pop_back();
-          }
+          queue->Push(std::move(batch));
+
+          bool flushed;
+          do {
+            ARROW_ASSIGN_OR_RAISE(flushed, queue->TryFlush());
+          } while (!flushed);
         }
       }
 
       return Status::OK();
     });
   }
-  RETURN_NOT_OK(write_task_group->Finish());
+  RETURN_NOT_OK(task_group->Finish());
 
-  RETURN_NOT_OK(writers.FinishAll(task_group.get()));
+  task_group = scanner->context()->TaskGroup();
+  for (const auto& part_queue : queues_) {
+    task_group->Append([&] { return part_queue.second->writer()->Finish(); });
+  }
   return task_group->Finish();
 }
 
diff --git a/cpp/src/arrow/util/lock_free.h b/cpp/src/arrow/util/lock_free.h
index 89fd1584776..244f751b46b 100644
--- a/cpp/src/arrow/util/lock_free.h
+++ b/cpp/src/arrow/util/lock_free.h
@@ -19,6 +19,7 @@
 
 #include <atomic>
 #include <utility>
+#include <vector>
 
 #include "arrow/util/macros.h"
 
@@ -30,23 +31,12 @@ namespace util {
 /// Two thread safe operations are supported:
 /// - Push a value onto the stack
 /// - move construct from another stack
-///
-/// range-for compatilble iteration is provided for
-/// convenience, but it is *not* thread safe.
 template <typename T>
 class LockFreeStack {
  public:
   LockFreeStack() = default;
 
-  ~LockFreeStack() {
-    Node* next = nodes_.load();
-
-    while (next != NULLPTR) {
-      Node* node = next;
-      next = next->next;
-      delete node;
-    }
-  }
+  ~LockFreeStack() { Delete(); }
 
   LockFreeStack(const LockFreeStack&) = delete;
   LockFreeStack& operator=(const LockFreeStack&) = delete;
@@ -68,16 +58,39 @@ class LockFreeStack {
     } while (!CompareExchange(&new_head->next, new_head));
   }
 
+  /// range-for compatible iteration interface
+  /// NB: *not* thread safe
   struct iterator;
   iterator begin();
   iterator end();
 
+  std::vector<T> ToVector() {
+    std::vector<T> out;
+    for (auto&& element : *this) {
+      out.push_back(std::move(element));
+    }
+    Delete();
+    return out;
+  }
+
  private:
   struct Node {
     T value;
     Node* next;
   };
 
+  void Delete() {
+    Node* next = nodes_.load();
+
+    while (next != NULLPTR) {
+      Node* node = next;
+      next = next->next;
+      delete node;
+    }
+
+    nodes_.store(NULLPTR);
+  }
+
   bool CompareExchange(Node** expected, Node* desired) {
     return nodes_.compare_exchange_strong(*expected, desired);
   }
diff --git a/cpp/src/arrow/util/lock_free_test.cc b/cpp/src/arrow/util/lock_free_test.cc
index 3b027f8adcf..6913c2e9721 100644
--- a/cpp/src/arrow/util/lock_free_test.cc
+++ b/cpp/src/arrow/util/lock_free_test.cc
@@ -56,16 +56,6 @@ class TestLockFreeStack : public ::testing::Test {
     }
   }
 
-  template <typename T>
-  std::vector<T> ToVector(LockFreeStack<T> stack) {
-    std::vector<T> out;
-    for (auto&& element : stack) {
-      out.push_back(std::move(element));
-    }
-    std::reverse(out.begin(), out.end());
-    return out;
-  }
-
   template <typename T>
   std::vector<T> Flatten(std::vector<std::vector<T>> nested) {
     size_t flat_size = 0;
@@ -99,7 +89,7 @@ TEST_F(TestLockFreeStack, AppendOnly) {
 
   ASSERT_OK(task_group->Finish());
 
-  std::vector<int> actual = ToVector(std::move(ints)), expected;
+  std::vector<int> actual = ints.ToVector(), expected;
 
   for (int i = 0; i < task_group->parallelism(); ++i) {
     AppendLots(&expected);
@@ -120,7 +110,8 @@ TEST_F(TestLockFreeStack, ProduceThenConsumeMoved) {
   for (int i = 0; i < task_group->parallelism(); ++i) {
     task_group->Append([&, i] {
       AppendLots(&ints);
-      per_thread_actual[i] = ToVector(std::move(ints));
+      auto local = std::move(ints);
+      per_thread_actual[i] = local.ToVector();
       return Status::OK();
     });
   }
diff --git a/cpp/src/arrow/util/mutex.cc b/cpp/src/arrow/util/mutex.cc
index 9fc7d1c11c1..63ff026f4ee 100644
--- a/cpp/src/arrow/util/mutex.cc
+++ b/cpp/src/arrow/util/mutex.cc
@@ -34,8 +34,7 @@ Mutex::Guard::Guard(Mutex* locked)
 
 void Mutex::Guard::Unlock() {
   if (locked_) {
-    auto locked = std::move(locked_);
-    DCHECK_EQ(locked_, nullptr);
+    locked_.reset();
   }
 }
 
diff --git a/cpp/src/arrow/util/mutex.h b/cpp/src/arrow/util/mutex.h
index 7e72bacc69a..8c9f0d40002 100644
--- a/cpp/src/arrow/util/mutex.h
+++ b/cpp/src/arrow/util/mutex.h
@@ -60,30 +60,5 @@ class ARROW_EXPORT Mutex {
   std::unique_ptr<Impl, void (*)(Impl*)> impl_;
 };
 
-/// A trivial Mutex, T pair
-template <typename T>
-class Mutexed : Mutex {
- public:
-  Mutexed() = default;
-  Mutexed(Mutexed&&) = default;
-  Mutexed& operator=(Mutexed&&) = default;
-  explicit Mutexed(T obj) : obj_(std::move(obj)) {}
-
-  using Mutex::Lock;
-  using Mutex::TryLock;
-
-  T& operator*() { return obj_; }
-  const T& operator*() const { return obj_; }
-
-  T* operator->() { return &obj_; }
-  const T* operator->() const { return &obj_; }
-
-  T& get() { return obj_; }
-  const T& get() const { return obj_; }
-
- private:
-  T obj_;
-};
-
 }  // namespace util
 }  // namespace arrow

From ed5ec52032951331d83c70d15626dbdc65256182 Mon Sep 17 00:00:00 2001
From: Benjamin Kietzman <bengilgit@gmail.com>
Date: Fri, 2 Oct 2020 17:05:29 -0400
Subject: [PATCH 18/34] cache queue mapping local to each thread

---
 cpp/src/arrow/dataset/file_base.cc | 144 ++++++++++++++++++-----------
 cpp/src/arrow/util/map.h           |  12 +--
 2 files changed, 94 insertions(+), 62 deletions(-)

diff --git a/cpp/src/arrow/dataset/file_base.cc b/cpp/src/arrow/dataset/file_base.cc
index 2c42221b192..92164c15ec0 100644
--- a/cpp/src/arrow/dataset/file_base.cc
+++ b/cpp/src/arrow/dataset/file_base.cc
@@ -18,6 +18,8 @@
 #include "arrow/dataset/file_base.h"
 
 #include <algorithm>
+#include <unordered_map>
+#include <unordered_set>
 #include <vector>
 
 #include "arrow/dataset/dataset_internal.h"
@@ -172,16 +174,18 @@ Status ValidateBasenameTemplate(util::string_view basename_template) {
 
 class WriteQueue {
  public:
-  explicit WriteQueue(util::string_view partition_expression)
-      : partition_expression_(partition_expression) {}
+  WriteQueue(std::string partition_expression,
+             util::Mutex::Guard* wait_for_opened_writer_lock)
+      : partition_expression_(std::move(partition_expression)) {
+    *wait_for_opened_writer_lock = writer_mutex_.Lock();
+  }
 
   // Push a batch into the writer's queue of pending writes.
   void Push(std::shared_ptr<RecordBatch> batch) { pending_.Push(std::move(batch)); }
 
-  // Take the writer's lock and flush its queue of pending writes.
-  // Returns false if the lock could not be acquired. The queue is guaranteed to be
-  // flushed if every thread which calls Push() subsequently calls TryFlush() until
-  // it returns true.
+  // Try to lock a writer and flush its queue of pending writes.  Returns false if the
+  // lock could not be acquired. The queue is guaranteed to be flushed if every thread
+  // which calls Push() subsequently calls TryFlush() until it returns true.
   Result<bool> TryFlush() {
     if (auto lock = writer_mutex_.TryLock()) {
       RecordBatchVector pending_vector;
@@ -202,14 +206,29 @@ class WriteQueue {
     return false;
   }
 
-  util::Mutex::Guard Lock() { return writer_mutex_.Lock(); }
+  // Flush a set of WriteQueues. If we find any queue to be locked we try flushing all
+  // the others before retrying to minimize busy waiting.
+  static Status FlushSet(std::unordered_set<WriteQueue*> set) {
+    while (!set.empty()) {
+      for (auto it = set.begin(); it != set.end();) {
+        WriteQueue* queue = *it;
+        ARROW_ASSIGN_OR_RAISE(bool flushed, queue->TryFlush());
+        if (flushed) {
+          it = set.erase(it);
+        } else {
+          ++it;
+        }
+      }
+    }
+    return Status::OK();
+  }
 
   const std::shared_ptr<FileWriter>& writer() const { return writer_; }
 
-  Status OpenWriter(const FileSystemDatasetWriteOptions& write_options, int index,
+  Status OpenWriter(const FileSystemDatasetWriteOptions& write_options, size_t index,
                     std::shared_ptr<Schema> schema) {
-    auto dir = fs::internal::EnsureTrailingSlash(write_options.base_dir) +
-               partition_expression_.to_string();
+    auto dir =
+        fs::internal::EnsureTrailingSlash(write_options.base_dir) + partition_expression_;
 
     auto basename = internal::Replace(write_options.basename_template, kIntegerToken,
                                       std::to_string(index));
@@ -237,28 +256,9 @@ class WriteQueue {
   util::LockFreeStack<std::shared_ptr<RecordBatch>> pending_;
 
   // The (formatted) partition expression to which this queue corresponds
-  util::string_view partition_expression_;
+  std::string partition_expression_;
 };
 
-Result<std::shared_ptr<FileWriter>> MakeWriter(
-    const FileSystemDatasetWriteOptions& write_options, std::string part,
-    int auto_incremented, std::shared_ptr<Schema> schema) {
-  auto dir = write_options.base_dir + part;
-  auto basename = internal::Replace(write_options.basename_template, kIntegerToken,
-                                    std::to_string(auto_incremented));
-  if (!basename) {
-    return Status::Invalid("string interpolation of basename template failed");
-  }
-  auto path = fs::internal::ConcatAbstractPath(dir, *basename);
-
-  RETURN_NOT_OK(write_options.filesystem->CreateDir(dir));
-  ARROW_ASSIGN_OR_RAISE(auto destination,
-                        write_options.filesystem->OpenOutputStream(path));
-
-  return write_options.format()->MakeWriter(std::move(destination), std::move(schema),
-                                            write_options.file_write_options);
-}
-
 Status FileSystemDataset::Write(const FileSystemDatasetWriteOptions& write_options,
                                 std::shared_ptr<Scanner> scanner) {
   RETURN_NOT_OK(ValidateBasenameTemplate(write_options.basename_template));
@@ -298,15 +298,29 @@ Status FileSystemDataset::Write(const FileSystemDatasetWriteOptions& write_optio
   // Store a mapping from partitions (represened by their formatted partition expressions)
   // to a WriteQueue which flushes batches into that partition's output file.
   // `unordered_map` is not thread safe, so a lock on queues_mutex_ must be held whenever
-  // queues_ is accessed or modified.
-  util::Mutex queues_mutex_;
-  std::unordered_map<std::string, std::unique_ptr<WriteQueue>> queues_;
+  // queues is accessed or modified.
+  util::Mutex queues_mutex;
+  std::unordered_map<std::string, std::unique_ptr<WriteQueue>> queues;
+
+  // Thread-local caches of partition-to-WriteQueue mapping.
+  using QueueCache = std::unordered_map<std::string, WriteQueue*>;
+  util::Mutex caches_mutex;
+  std::vector<QueueCache> caches;
 
   auto fragment_for_task_it = fragment_for_task.begin();
   for (const auto& scan_task : scan_tasks) {
     const Fragment* fragment = *fragment_for_task_it++;
 
     task_group->Append([&, scan_task, fragment] {
+      // check out a cache of `queues`, if available
+      auto caches_lock = caches_mutex.Lock();
+      QueueCache local_queues;
+      if (!caches.empty()) {
+        local_queues = std::move(caches.back());
+        caches.pop_back();
+      }
+      caches_lock.Unlock();
+
       ARROW_ASSIGN_OR_RAISE(auto batches, scan_task->Execute());
 
       for (auto maybe_batch : batches) {
@@ -314,6 +328,7 @@ Status FileSystemDataset::Write(const FileSystemDatasetWriteOptions& write_optio
         ARROW_ASSIGN_OR_RAISE(auto groups, write_options.partitioning->Partition(batch));
         batch.reset();  // drop to hopefully conserve memory
 
+        std::unordered_set<WriteQueue*> need_flushed;
         for (size_t i = 0; i < groups.batches.size(); ++i) {
           AndExpression partition_expression(std::move(groups.expressions[i]),
                                              fragment->partition_expression());
@@ -322,49 +337,66 @@ Status FileSystemDataset::Write(const FileSystemDatasetWriteOptions& write_optio
           ARROW_ASSIGN_OR_RAISE(auto part,
                                 write_options.partitioning->Format(partition_expression));
 
-          // Now we need to look up the relevant WriteQueue in `queues_`
-          auto queues_lock = queues_mutex_.Lock();
-          util::Mutex::Guard new_queue_lock;
-
-          WriteQueue* queue = internal::GetOrInsertGenerated(
-                                  &queues_, std::move(part),
-                                  [&](const std::string& part) {
-                                    auto queue = internal::make_unique<WriteQueue>(part);
-                                    new_queue_lock = queue->Lock();
-                                    return queue;
-                                  })
-                                  ->second.get();
-
-          if (new_queue_lock) {
-            size_t queue_index = queues_.size() - 1;
+          util::Mutex::Guard queues_lock, wait_for_opened_writer_lock;
+
+          WriteQueue* queue =
+              // try to lookup the relevant WriteQueue up in our local cache
+              internal::GetOrInsertGenerated(
+                  &local_queues, part,
+                  [&](const std::string&) {
+                    // local lookup failed, fall back to lookup in `queues`
+                    queues_lock = queues_mutex.Lock();
+                    return internal::GetOrInsertGenerated(
+                               &queues, std::move(part),
+                               [&](const std::string& part) {
+                                 // lookup in `queues` also failed,
+                                 // generate a new WriteQueue
+                                 return internal::make_unique<WriteQueue>(
+                                     part, &wait_for_opened_writer_lock);
+                               })
+                        ->second.get();
+                  })
+                  ->second;
+
+          if (wait_for_opened_writer_lock) {
+            size_t queue_index = queues.size() - 1;
             // We have openned a new WriteQueue for `part` and must set up its FileWriter.
             // That could be slow, so we'd like to release queues_lock now, allowing other
             // threads to look up already-open queues. This is safe since we are mutating
-            // an element of queues_ in place, which does not conflict with access to any
+            // an element of `queues` in place, which does not conflict with access to any
             // other element. However, we do need to keep new_queue_lock while we open the
             // FileWriter to prevent other threads from using the writer before it has
             // been constructed.
             queues_lock.Unlock();
             RETURN_NOT_OK(queue->OpenWriter(write_options, queue_index, batch->schema()));
-            new_queue_lock.Unlock();
+            wait_for_opened_writer_lock.Unlock();
           }
 
-          queue->Push(std::move(batch));
+          if (queues_lock) {
+            queues_lock.Unlock();
+          }
 
-          bool flushed;
-          do {
-            ARROW_ASSIGN_OR_RAISE(flushed, queue->TryFlush());
-          } while (!flushed);
+          queue->Push(std::move(batch));
+          need_flushed.insert(queue);
         }
+
+        // flush all touched WriteQueues
+        RETURN_NOT_OK(WriteQueue::FlushSet(std::move(need_flushed)));
       }
 
+      // check cache back in
+      caches_lock = caches_mutex.Lock();
+      auto larger_or_same = std::lower_bound(
+          caches.begin(), caches.end(), local_queues,
+          [](const QueueCache& l, const QueueCache& r) { return l.size() < r.size(); });
+      caches.insert(larger_or_same, std::move(local_queues));
       return Status::OK();
     });
   }
   RETURN_NOT_OK(task_group->Finish());
 
   task_group = scanner->context()->TaskGroup();
-  for (const auto& part_queue : queues_) {
+  for (const auto& part_queue : queues) {
     task_group->Append([&] { return part_queue.second->writer()->Finish(); });
   }
   return task_group->Finish();
diff --git a/cpp/src/arrow/util/map.h b/cpp/src/arrow/util/map.h
index 66325f5e13e..2ca1cf4c82d 100644
--- a/cpp/src/arrow/util/map.h
+++ b/cpp/src/arrow/util/map.h
@@ -29,11 +29,11 @@ namespace internal {
 /// will be returned. If `key` does not exist in the container, `gen(key)` will be
 /// invoked and its return value inserted.
 template <typename Map, typename Key, typename Gen>
-auto GetOrInsertGenerated(Map* map, Key&& key, Gen&& gen)
+auto GetOrInsertGenerated(Map* map, Key key, Gen&& gen)
     -> decltype(map->begin()->second = gen(map->begin()->first), map->begin()) {
-  decltype(gen(map->begin()->first)) placeholder;
+  decltype(gen(map->begin()->first)) placeholder{};
 
-  auto it_success = map->emplace(std::forward<Key>(key), std::move(placeholder));
+  auto it_success = map->emplace(std::move(key), std::move(placeholder));
   if (it_success.second) {
     // insertion of placeholder succeeded, overwrite it with gen()
     const auto& inserted_key = it_success.first->first;
@@ -44,12 +44,12 @@ auto GetOrInsertGenerated(Map* map, Key&& key, Gen&& gen)
 }
 
 template <typename Map, typename Key, typename Gen>
-auto GetOrInsertGenerated(Map* map, Key&& key, Gen&& gen)
+auto GetOrInsertGenerated(Map* map, Key key, Gen&& gen)
     -> Result<decltype(map->begin()->second = gen(map->begin()->first).ValueOrDie(),
                        map->begin())> {
-  decltype(gen(map->begin()->first).ValueOrDie()) placeholder;
+  decltype(gen(map->begin()->first).ValueOrDie()) placeholder{};
 
-  auto it_success = map->emplace(std::forward<Key>(key), std::move(placeholder));
+  auto it_success = map->emplace(std::move(key), std::move(placeholder));
   if (it_success.second) {
     // insertion of placeholder succeeded, overwrite it with gen()
     const auto& inserted_key = it_success.first->first;

From 1a541d65512601cad6a188afc24a41396bdc320e Mon Sep 17 00:00:00 2001
From: Benjamin Kietzman <bengilgit@gmail.com>
Date: Fri, 2 Oct 2020 18:42:05 -0400
Subject: [PATCH 19/34] lint, simplify WriteQueue storage, try workaround for
 atomic::atomic()

---
 cpp/src/arrow/dataset/file_base.cc | 35 ++++++++++++++++++++----------
 cpp/src/arrow/util/lock_free.h     |  5 ++---
 2 files changed, 25 insertions(+), 15 deletions(-)

diff --git a/cpp/src/arrow/dataset/file_base.cc b/cpp/src/arrow/dataset/file_base.cc
index 92164c15ec0..7e9a25a06c1 100644
--- a/cpp/src/arrow/dataset/file_base.cc
+++ b/cpp/src/arrow/dataset/file_base.cc
@@ -18,6 +18,8 @@
 #include "arrow/dataset/file_base.h"
 
 #include <algorithm>
+#include <deque>
+#include <set>
 #include <unordered_map>
 #include <unordered_set>
 #include <vector>
@@ -248,6 +250,8 @@ class WriteQueue {
     return Status::OK();
   }
 
+  using Set = std::unordered_map<std::string, WriteQueue*>;
+
  private:
   // FileWriters are not required to be thread safe, so they must be guarded with a mutex.
   util::Mutex writer_mutex_;
@@ -295,17 +299,25 @@ Status FileSystemDataset::Write(const FileSystemDatasetWriteOptions& write_optio
     }
   }
 
+  // WriteQueues are stored in this deque until writing is completed and are otherwise
+  // referenced by non-owning pointers.
+  std::deque<WriteQueue> queues_storage;
+
   // Store a mapping from partitions (represened by their formatted partition expressions)
   // to a WriteQueue which flushes batches into that partition's output file.
   // `unordered_map` is not thread safe, so a lock on queues_mutex_ must be held whenever
   // queues is accessed or modified.
   util::Mutex queues_mutex;
-  std::unordered_map<std::string, std::unique_ptr<WriteQueue>> queues;
+  WriteQueue::Set queues;
 
   // Thread-local caches of partition-to-WriteQueue mapping.
-  using QueueCache = std::unordered_map<std::string, WriteQueue*>;
   util::Mutex caches_mutex;
-  std::vector<QueueCache> caches;
+  struct SortByCacheSize {
+    bool operator()(const WriteQueue::Set& l, const WriteQueue::Set& r) {
+      return l.size() < r.size();
+    }
+  };
+  std::set<WriteQueue::Set, SortByCacheSize> caches;
 
   auto fragment_for_task_it = fragment_for_task.begin();
   for (const auto& scan_task : scan_tasks) {
@@ -313,11 +325,12 @@ Status FileSystemDataset::Write(const FileSystemDatasetWriteOptions& write_optio
 
     task_group->Append([&, scan_task, fragment] {
       // check out a cache of `queues`, if available
+      WriteQueue::Set local_queues;
       auto caches_lock = caches_mutex.Lock();
-      QueueCache local_queues;
       if (!caches.empty()) {
-        local_queues = std::move(caches.back());
-        caches.pop_back();
+        auto back = caches.end();
+        local_queues = std::move(*--back);
+        caches.erase(back);
       }
       caches_lock.Unlock();
 
@@ -351,10 +364,11 @@ Status FileSystemDataset::Write(const FileSystemDatasetWriteOptions& write_optio
                                [&](const std::string& part) {
                                  // lookup in `queues` also failed,
                                  // generate a new WriteQueue
-                                 return internal::make_unique<WriteQueue>(
+                                 queues_storage.emplace_back(
                                      part, &wait_for_opened_writer_lock);
+                                 return &queues_storage.back();
                                })
-                        ->second.get();
+                        ->second;
                   })
                   ->second;
 
@@ -386,10 +400,7 @@ Status FileSystemDataset::Write(const FileSystemDatasetWriteOptions& write_optio
 
       // check cache back in
       caches_lock = caches_mutex.Lock();
-      auto larger_or_same = std::lower_bound(
-          caches.begin(), caches.end(), local_queues,
-          [](const QueueCache& l, const QueueCache& r) { return l.size() < r.size(); });
-      caches.insert(larger_or_same, std::move(local_queues));
+      caches.insert(std::move(local_queues));
       return Status::OK();
     });
   }
diff --git a/cpp/src/arrow/util/lock_free.h b/cpp/src/arrow/util/lock_free.h
index 244f751b46b..cd76bd30803 100644
--- a/cpp/src/arrow/util/lock_free.h
+++ b/cpp/src/arrow/util/lock_free.h
@@ -34,7 +34,7 @@ namespace util {
 template <typename T>
 class LockFreeStack {
  public:
-  LockFreeStack() = default;
+  LockFreeStack() { nodes_.store(NULLPTR); }
 
   ~LockFreeStack() { Delete(); }
 
@@ -95,7 +95,7 @@ class LockFreeStack {
     return nodes_.compare_exchange_strong(*expected, desired);
   }
 
-  std::atomic<Node*> nodes_{NULLPTR};
+  std::atomic<Node*> nodes_;
 };
 
 template <typename T>
@@ -124,4 +124,3 @@ typename LockFreeStack<T>::iterator LockFreeStack<T>::end() {
 
 }  // namespace util
 }  // namespace arrow
-

From 95e548e05c0bd06ebf3a97793c5f8fb8cb9a7497 Mon Sep 17 00:00:00 2001
From: Benjamin Kietzman <bengilgit@gmail.com>
Date: Sat, 3 Oct 2020 09:47:02 -0400
Subject: [PATCH 20/34] comparator must be const

---
 cpp/src/arrow/dataset/file_base.cc | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/cpp/src/arrow/dataset/file_base.cc b/cpp/src/arrow/dataset/file_base.cc
index 7e9a25a06c1..5258cd18615 100644
--- a/cpp/src/arrow/dataset/file_base.cc
+++ b/cpp/src/arrow/dataset/file_base.cc
@@ -313,7 +313,7 @@ Status FileSystemDataset::Write(const FileSystemDatasetWriteOptions& write_optio
   // Thread-local caches of partition-to-WriteQueue mapping.
   util::Mutex caches_mutex;
   struct SortByCacheSize {
-    bool operator()(const WriteQueue::Set& l, const WriteQueue::Set& r) {
+    bool operator()(const WriteQueue::Set& l, const WriteQueue::Set& r) const {
       return l.size() < r.size();
     }
   };

From 7fd7185e7a2ad72b56097dffa795fc80f0db27e4 Mon Sep 17 00:00:00 2001
From: Benjamin Kietzman <bengilgit@gmail.com>
Date: Sat, 3 Oct 2020 20:32:17 -0400
Subject: [PATCH 21/34] simplify thread local caching

---
 cpp/src/arrow/dataset/file_base.cc | 87 +++++++++++++-----------------
 1 file changed, 38 insertions(+), 49 deletions(-)

diff --git a/cpp/src/arrow/dataset/file_base.cc b/cpp/src/arrow/dataset/file_base.cc
index 5258cd18615..59ed38f2700 100644
--- a/cpp/src/arrow/dataset/file_base.cc
+++ b/cpp/src/arrow/dataset/file_base.cc
@@ -19,7 +19,6 @@
 
 #include <algorithm>
 #include <deque>
-#include <set>
 #include <unordered_map>
 #include <unordered_set>
 #include <vector>
@@ -176,9 +175,9 @@ Status ValidateBasenameTemplate(util::string_view basename_template) {
 
 class WriteQueue {
  public:
-  WriteQueue(std::string partition_expression,
+  WriteQueue(std::string partition_expression, size_t index,
              util::Mutex::Guard* wait_for_opened_writer_lock)
-      : partition_expression_(std::move(partition_expression)) {
+      : partition_expression_(std::move(partition_expression)), index_(index) {
     *wait_for_opened_writer_lock = writer_mutex_.Lock();
   }
 
@@ -227,13 +226,13 @@ class WriteQueue {
 
   const std::shared_ptr<FileWriter>& writer() const { return writer_; }
 
-  Status OpenWriter(const FileSystemDatasetWriteOptions& write_options, size_t index,
+  Status OpenWriter(const FileSystemDatasetWriteOptions& write_options,
                     std::shared_ptr<Schema> schema) {
     auto dir =
         fs::internal::EnsureTrailingSlash(write_options.base_dir) + partition_expression_;
 
     auto basename = internal::Replace(write_options.basename_template, kIntegerToken,
-                                      std::to_string(index));
+                                      std::to_string(index_));
     if (!basename) {
       return Status::Invalid("string interpolation of basename template failed");
     }
@@ -261,6 +260,8 @@ class WriteQueue {
 
   // The (formatted) partition expression to which this queue corresponds
   std::string partition_expression_;
+
+  size_t index_;
 };
 
 Status FileSystemDataset::Write(const FileSystemDatasetWriteOptions& write_options,
@@ -312,12 +313,7 @@ Status FileSystemDataset::Write(const FileSystemDatasetWriteOptions& write_optio
 
   // Thread-local caches of partition-to-WriteQueue mapping.
   util::Mutex caches_mutex;
-  struct SortByCacheSize {
-    bool operator()(const WriteQueue::Set& l, const WriteQueue::Set& r) const {
-      return l.size() < r.size();
-    }
-  };
-  std::set<WriteQueue::Set, SortByCacheSize> caches;
+  std::vector<WriteQueue::Set> caches(task_group->parallelism());
 
   auto fragment_for_task_it = fragment_for_task.begin();
   for (const auto& scan_task : scan_tasks) {
@@ -325,13 +321,9 @@ Status FileSystemDataset::Write(const FileSystemDatasetWriteOptions& write_optio
 
     task_group->Append([&, scan_task, fragment] {
       // check out a cache of `queues`, if available
-      WriteQueue::Set local_queues;
       auto caches_lock = caches_mutex.Lock();
-      if (!caches.empty()) {
-        auto back = caches.end();
-        local_queues = std::move(*--back);
-        caches.erase(back);
-      }
+      WriteQueue::Set local_queues = std::move(caches.back());
+      caches.pop_back();
       caches_lock.Unlock();
 
       ARROW_ASSIGN_OR_RAISE(auto batches, scan_task->Execute());
@@ -350,46 +342,43 @@ Status FileSystemDataset::Write(const FileSystemDatasetWriteOptions& write_optio
           ARROW_ASSIGN_OR_RAISE(auto part,
                                 write_options.partitioning->Format(partition_expression));
 
-          util::Mutex::Guard queues_lock, wait_for_opened_writer_lock;
+          util::Mutex::Guard wait_for_opened_writer_lock;
+
+          using internal::GetOrInsertGenerated;
 
           WriteQueue* queue =
               // try to lookup the relevant WriteQueue up in our local cache
-              internal::GetOrInsertGenerated(
-                  &local_queues, part,
-                  [&](const std::string&) {
-                    // local lookup failed, fall back to lookup in `queues`
-                    queues_lock = queues_mutex.Lock();
-                    return internal::GetOrInsertGenerated(
-                               &queues, std::move(part),
-                               [&](const std::string& part) {
-                                 // lookup in `queues` also failed,
-                                 // generate a new WriteQueue
-                                 queues_storage.emplace_back(
-                                     part, &wait_for_opened_writer_lock);
-                                 return &queues_storage.back();
-                               })
-                        ->second;
-                  })
-                  ->second;
+              GetOrInsertGenerated(&local_queues, part, [&](const std::string&) {
+                // local lookup failed, fall back to lookup in `queues`
+                auto queues_lock = queues_mutex.Lock();
+
+                return GetOrInsertGenerated(&queues, std::move(part),
+                                            [&](const std::string& part) {
+                                              // lookup in `queues` also failed,
+                                              // generate a new WriteQueue
+                                              size_t queue_index = queues.size() - 1;
+
+                                              queues_storage.emplace_back(
+                                                  part, queue_index,
+                                                  &wait_for_opened_writer_lock);
+
+                                              return &queues_storage.back();
+                                            })
+                    ->second;
+              })->second;
 
           if (wait_for_opened_writer_lock) {
-            size_t queue_index = queues.size() - 1;
             // We have openned a new WriteQueue for `part` and must set up its FileWriter.
-            // That could be slow, so we'd like to release queues_lock now, allowing other
-            // threads to look up already-open queues. This is safe since we are mutating
-            // an element of `queues` in place, which does not conflict with access to any
-            // other element. However, we do need to keep new_queue_lock while we open the
-            // FileWriter to prevent other threads from using the writer before it has
-            // been constructed.
-            queues_lock.Unlock();
-            RETURN_NOT_OK(queue->OpenWriter(write_options, queue_index, batch->schema()));
+            // That could be slow, so we've already released queues_lock, allowing
+            // other threads to look up already-open queues. This is safe since we are
+            // mutating an element of `queues` in place, which does not conflict with
+            // access to any other element. However, we do need to continue holding
+            // wait_for_opened_writer_lock while we open the FileWriter to prevent other
+            // threads from using the writer before it has been constructed.
+            RETURN_NOT_OK(queue->OpenWriter(write_options, batch->schema()));
             wait_for_opened_writer_lock.Unlock();
           }
 
-          if (queues_lock) {
-            queues_lock.Unlock();
-          }
-
           queue->Push(std::move(batch));
           need_flushed.insert(queue);
         }
@@ -400,7 +389,7 @@ Status FileSystemDataset::Write(const FileSystemDatasetWriteOptions& write_optio
 
       // check cache back in
       caches_lock = caches_mutex.Lock();
-      caches.insert(std::move(local_queues));
+      caches.push_back(std::move(local_queues));
       return Status::OK();
     });
   }

From dde5eed100427d594a8f8539cc087e0d1031b0b0 Mon Sep 17 00:00:00 2001
From: Benjamin Kietzman <bengilgit@gmail.com>
Date: Mon, 5 Oct 2020 08:34:48 -0400
Subject: [PATCH 22/34] simplify: revert local queue lookup caching

---
 cpp/src/arrow/dataset/file_base.cc | 52 +++++++++++-------------------
 1 file changed, 19 insertions(+), 33 deletions(-)

diff --git a/cpp/src/arrow/dataset/file_base.cc b/cpp/src/arrow/dataset/file_base.cc
index 59ed38f2700..8a29135c476 100644
--- a/cpp/src/arrow/dataset/file_base.cc
+++ b/cpp/src/arrow/dataset/file_base.cc
@@ -305,27 +305,17 @@ Status FileSystemDataset::Write(const FileSystemDatasetWriteOptions& write_optio
   std::deque<WriteQueue> queues_storage;
 
   // Store a mapping from partitions (represened by their formatted partition expressions)
-  // to a WriteQueue which flushes batches into that partition's output file.
-  // `unordered_map` is not thread safe, so a lock on queues_mutex_ must be held whenever
-  // queues is accessed or modified.
+  // to a WriteQueue which flushes batches into that partition's output file. In principle
+  // any thread could produce a batch for any partition, so each task alternates between
+  // pushing batches and flushing them to disk.
   util::Mutex queues_mutex;
   WriteQueue::Set queues;
 
-  // Thread-local caches of partition-to-WriteQueue mapping.
-  util::Mutex caches_mutex;
-  std::vector<WriteQueue::Set> caches(task_group->parallelism());
-
   auto fragment_for_task_it = fragment_for_task.begin();
   for (const auto& scan_task : scan_tasks) {
     const Fragment* fragment = *fragment_for_task_it++;
 
     task_group->Append([&, scan_task, fragment] {
-      // check out a cache of `queues`, if available
-      auto caches_lock = caches_mutex.Lock();
-      WriteQueue::Set local_queues = std::move(caches.back());
-      caches.pop_back();
-      caches_lock.Unlock();
-
       ARROW_ASSIGN_OR_RAISE(auto batches, scan_task->Execute());
 
       for (auto maybe_batch : batches) {
@@ -346,26 +336,25 @@ Status FileSystemDataset::Write(const FileSystemDatasetWriteOptions& write_optio
 
           using internal::GetOrInsertGenerated;
 
-          WriteQueue* queue =
-              // try to lookup the relevant WriteQueue up in our local cache
-              GetOrInsertGenerated(&local_queues, part, [&](const std::string&) {
-                // local lookup failed, fall back to lookup in `queues`
-                auto queues_lock = queues_mutex.Lock();
+          WriteQueue* queue;
+          {
+            // lookup the queue to which batch should be appended
+            auto queues_lock = queues_mutex.Lock();
 
-                return GetOrInsertGenerated(&queues, std::move(part),
-                                            [&](const std::string& part) {
-                                              // lookup in `queues` also failed,
-                                              // generate a new WriteQueue
-                                              size_t queue_index = queues.size() - 1;
+            queue = GetOrInsertGenerated(&queues, std::move(part),
+                                         [&](const std::string& part) {
+                                           // lookup in `queues` also failed,
+                                           // generate a new WriteQueue
+                                           size_t queue_index = queues.size() - 1;
 
-                                              queues_storage.emplace_back(
-                                                  part, queue_index,
-                                                  &wait_for_opened_writer_lock);
+                                           queues_storage.emplace_back(
+                                               part, queue_index,
+                                               &wait_for_opened_writer_lock);
 
-                                              return &queues_storage.back();
-                                            })
-                    ->second;
-              })->second;
+                                           return &queues_storage.back();
+                                         })
+                        ->second;
+          }
 
           if (wait_for_opened_writer_lock) {
             // We have openned a new WriteQueue for `part` and must set up its FileWriter.
@@ -387,9 +376,6 @@ Status FileSystemDataset::Write(const FileSystemDatasetWriteOptions& write_optio
         RETURN_NOT_OK(WriteQueue::FlushSet(std::move(need_flushed)));
       }
 
-      // check cache back in
-      caches_lock = caches_mutex.Lock();
-      caches.push_back(std::move(local_queues));
       return Status::OK();
     });
   }

From dfc2291343eec2da7606511e8814a55451726941 Mon Sep 17 00:00:00 2001
From: Benjamin Kietzman <bengilgit@gmail.com>
Date: Mon, 5 Oct 2020 09:02:04 -0400
Subject: [PATCH 23/34] revert lock_free

---
 cpp/src/arrow/dataset/file_base.cc   |  68 +++++------
 cpp/src/arrow/util/CMakeLists.txt    |   1 -
 cpp/src/arrow/util/lock_free.h       | 126 --------------------
 cpp/src/arrow/util/lock_free_test.cc | 168 ---------------------------
 4 files changed, 29 insertions(+), 334 deletions(-)
 delete mode 100644 cpp/src/arrow/util/lock_free.h
 delete mode 100644 cpp/src/arrow/util/lock_free_test.cc

diff --git a/cpp/src/arrow/dataset/file_base.cc b/cpp/src/arrow/dataset/file_base.cc
index 8a29135c476..aa9efb0dfa6 100644
--- a/cpp/src/arrow/dataset/file_base.cc
+++ b/cpp/src/arrow/dataset/file_base.cc
@@ -33,7 +33,6 @@
 #include "arrow/io/interfaces.h"
 #include "arrow/io/memory.h"
 #include "arrow/util/iterator.h"
-#include "arrow/util/lock_free.h"
 #include "arrow/util/logging.h"
 #include "arrow/util/make_unique.h"
 #include "arrow/util/map.h"
@@ -173,6 +172,8 @@ Status ValidateBasenameTemplate(util::string_view basename_template) {
   return Status::OK();
 }
 
+/// WriteQueue allows batches to be pushed from multiple threads while another thread
+/// flushes some to disk.
 class WriteQueue {
  public:
   WriteQueue(std::string partition_expression, size_t index,
@@ -182,43 +183,30 @@ class WriteQueue {
   }
 
   // Push a batch into the writer's queue of pending writes.
-  void Push(std::shared_ptr<RecordBatch> batch) { pending_.Push(std::move(batch)); }
-
-  // Try to lock a writer and flush its queue of pending writes.  Returns false if the
-  // lock could not be acquired. The queue is guaranteed to be flushed if every thread
-  // which calls Push() subsequently calls TryFlush() until it returns true.
-  Result<bool> TryFlush() {
-    if (auto lock = writer_mutex_.TryLock()) {
-      RecordBatchVector pending_vector;
-      do {
-        // Move pending to a local variable exclusively viewed by this thread
-        auto pending = std::move(pending_);
-        pending_vector = pending.ToVector();
-
-        for (auto it = pending_vector.rbegin(); it != pending_vector.rend(); ++it) {
-          RETURN_NOT_OK(writer_->Write(*it));
-        }
-
-        // Since we're holding this writer's lock anyway we may as well check for batches
-        // added while we were writing
-      } while (!pending_vector.empty());
-      return true;
-    }
-    return false;
+  void Push(std::shared_ptr<RecordBatch> batch) {
+    auto push_lock = push_mutex_.Lock();
+    pending_.push_back(std::move(batch));
   }
 
-  // Flush a set of WriteQueues. If we find any queue to be locked we try flushing all
-  // the others before retrying to minimize busy waiting.
-  static Status FlushSet(std::unordered_set<WriteQueue*> set) {
-    while (!set.empty()) {
-      for (auto it = set.begin(); it != set.end();) {
-        WriteQueue* queue = *it;
-        ARROW_ASSIGN_OR_RAISE(bool flushed, queue->TryFlush());
-        if (flushed) {
-          it = set.erase(it);
-        } else {
-          ++it;
+  // Flush all pending batches, or return immediately if another thread is already
+  // flushing this queue.
+  Status Flush() {
+    if (auto writer_lock = writer_mutex_.TryLock()) {
+      while (true) {
+        std::shared_ptr<RecordBatch> batch;
+        {
+          auto push_lock = push_mutex_.Lock();
+          if (pending_.empty()) {
+            // Ensure the writer_lock is released before the push_lock. Otherwise another
+            // thread might successfully Push() a batch but then fail to Flush() it since
+            // the writer_lock is still held, leaving an unflushed batch in pending_.
+            writer_lock.Unlock();
+            break;
+          }
+          batch = std::move(pending_.front());
+          pending_.pop_front();
         }
+        RETURN_NOT_OK(writer_->Write(batch));
       }
     }
     return Status::OK();
@@ -252,11 +240,11 @@ class WriteQueue {
   using Set = std::unordered_map<std::string, WriteQueue*>;
 
  private:
-  // FileWriters are not required to be thread safe, so they must be guarded with a mutex.
   util::Mutex writer_mutex_;
   std::shared_ptr<FileWriter> writer_;
-  // A stack into which batches to write will be pushed
-  util::LockFreeStack<std::shared_ptr<RecordBatch>> pending_;
+
+  util::Mutex push_mutex_;
+  std::deque<std::shared_ptr<RecordBatch>> pending_;
 
   // The (formatted) partition expression to which this queue corresponds
   std::string partition_expression_;
@@ -373,7 +361,9 @@ Status FileSystemDataset::Write(const FileSystemDatasetWriteOptions& write_optio
         }
 
         // flush all touched WriteQueues
-        RETURN_NOT_OK(WriteQueue::FlushSet(std::move(need_flushed)));
+        for (auto queue : need_flushed) {
+          RETURN_NOT_OK(queue->Flush());
+        }
       }
 
       return Status::OK();
diff --git a/cpp/src/arrow/util/CMakeLists.txt b/cpp/src/arrow/util/CMakeLists.txt
index 46b79bd0ec5..c968cdd1a43 100644
--- a/cpp/src/arrow/util/CMakeLists.txt
+++ b/cpp/src/arrow/util/CMakeLists.txt
@@ -66,7 +66,6 @@ add_arrow_test(utility-test
 add_arrow_test(threading-utility-test
                SOURCES
                future_test
-               lock_free_test
                task_group_test
                thread_pool_test)
 
diff --git a/cpp/src/arrow/util/lock_free.h b/cpp/src/arrow/util/lock_free.h
deleted file mode 100644
index cd76bd30803..00000000000
--- a/cpp/src/arrow/util/lock_free.h
+++ /dev/null
@@ -1,126 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-#pragma once
-
-#include <atomic>
-#include <utility>
-#include <vector>
-
-#include "arrow/util/macros.h"
-
-namespace arrow {
-namespace util {
-
-/// A lock free container with FILO storage order.
-///
-/// Two thread safe operations are supported:
-/// - Push a value onto the stack
-/// - move construct from another stack
-template <typename T>
-class LockFreeStack {
- public:
-  LockFreeStack() { nodes_.store(NULLPTR); }
-
-  ~LockFreeStack() { Delete(); }
-
-  LockFreeStack(const LockFreeStack&) = delete;
-  LockFreeStack& operator=(const LockFreeStack&) = delete;
-  LockFreeStack& operator=(LockFreeStack&& other) = delete;
-
-  LockFreeStack(LockFreeStack&& other) {
-    Node* other_nodes = other.nodes_.load();
-
-    do {
-    } while (!other.CompareExchange(&other_nodes, NULLPTR));
-
-    nodes_.store(other_nodes);
-  }
-
-  void Push(T value) {
-    Node* new_head = new Node{std::move(value), nodes_.load()};
-
-    do {
-    } while (!CompareExchange(&new_head->next, new_head));
-  }
-
-  /// range-for compatible iteration interface
-  /// NB: *not* thread safe
-  struct iterator;
-  iterator begin();
-  iterator end();
-
-  std::vector<T> ToVector() {
-    std::vector<T> out;
-    for (auto&& element : *this) {
-      out.push_back(std::move(element));
-    }
-    Delete();
-    return out;
-  }
-
- private:
-  struct Node {
-    T value;
-    Node* next;
-  };
-
-  void Delete() {
-    Node* next = nodes_.load();
-
-    while (next != NULLPTR) {
-      Node* node = next;
-      next = next->next;
-      delete node;
-    }
-
-    nodes_.store(NULLPTR);
-  }
-
-  bool CompareExchange(Node** expected, Node* desired) {
-    return nodes_.compare_exchange_strong(*expected, desired);
-  }
-
-  std::atomic<Node*> nodes_;
-};
-
-template <typename T>
-struct LockFreeStack<T>::iterator {
-  bool operator!=(iterator other) const { return node != other.node; }
-
-  T& operator*() const { return node->value; }
-
-  iterator& operator++() {
-    node = node->next;
-    return *this;
-  }
-
-  Node* node;
-};
-
-template <typename T>
-typename LockFreeStack<T>::iterator LockFreeStack<T>::begin() {
-  return {nodes_.load()};
-}
-
-template <typename T>
-typename LockFreeStack<T>::iterator LockFreeStack<T>::end() {
-  return {NULLPTR};
-}
-
-}  // namespace util
-}  // namespace arrow
diff --git a/cpp/src/arrow/util/lock_free_test.cc b/cpp/src/arrow/util/lock_free_test.cc
deleted file mode 100644
index 6913c2e9721..00000000000
--- a/cpp/src/arrow/util/lock_free_test.cc
+++ /dev/null
@@ -1,168 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-#include <algorithm>
-#include <vector>
-
-#include <gmock/gmock.h>
-#include <gtest/gtest.h>
-
-#include "arrow/status.h"
-#include "arrow/testing/gtest_util.h"
-#include "arrow/util/io_util.h"
-#include "arrow/util/lock_free.h"
-#include "arrow/util/task_group.h"
-#include "arrow/util/thread_pool.h"
-
-namespace arrow {
-
-using internal::TaskGroup;
-
-namespace util {
-
-static constexpr int kLots = 512;
-// static constexpr int kLots = 16 * 1024;
-
-class TestLockFreeStack : public ::testing::Test {
- public:
-  void TearDown() override {
-    fflush(stdout);
-    fflush(stderr);
-  }
-
-  void AppendLots(LockFreeStack<int>* ints) {
-    for (int j = 0; j < kLots; ++j) {
-      ints->Push(j);
-    }
-  }
-
-  void AppendLots(std::vector<int>* ints) {
-    for (int j = 0; j < kLots; ++j) {
-      ints->push_back(j);
-    }
-  }
-
-  template <typename T>
-  std::vector<T> Flatten(std::vector<std::vector<T>> nested) {
-    size_t flat_size = 0;
-    for (auto&& v : nested) {
-      flat_size += v.size();
-    }
-
-    std::vector<int> flat(flat_size);
-    auto it = flat.begin();
-    for (auto&& v : nested) {
-      it = std::move(v.begin(), v.end(), it);
-    }
-
-    EXPECT_EQ(it, flat.end());
-    return flat;
-  }
-
-  std::shared_ptr<TaskGroup> task_group =
-      TaskGroup::MakeThreaded(::arrow::internal::GetCpuThreadPool());
-};
-
-TEST_F(TestLockFreeStack, AppendOnly) {
-  LockFreeStack<int> ints;
-
-  for (int i = 0; i < task_group->parallelism(); ++i) {
-    task_group->Append([&] {
-      AppendLots(&ints);
-      return Status::OK();
-    });
-  }
-
-  ASSERT_OK(task_group->Finish());
-
-  std::vector<int> actual = ints.ToVector(), expected;
-
-  for (int i = 0; i < task_group->parallelism(); ++i) {
-    AppendLots(&expected);
-  }
-
-  for (auto v : {&actual, &expected}) {
-    std::sort(v->begin(), v->end());
-  }
-
-  EXPECT_THAT(actual, testing::ElementsAreArray(expected));
-}
-
-TEST_F(TestLockFreeStack, ProduceThenConsumeMoved) {
-  LockFreeStack<int> ints;
-
-  std::vector<std::vector<int>> per_thread_actual(task_group->parallelism());
-
-  for (int i = 0; i < task_group->parallelism(); ++i) {
-    task_group->Append([&, i] {
-      AppendLots(&ints);
-      auto local = std::move(ints);
-      per_thread_actual[i] = local.ToVector();
-      return Status::OK();
-    });
-  }
-
-  ASSERT_OK(task_group->Finish());
-
-  std::vector<int> actual = Flatten(std::move(per_thread_actual)), expected;
-
-  for (int i = 0; i < task_group->parallelism(); ++i) {
-    AppendLots(&expected);
-  }
-
-  for (auto v : {&actual, &expected}) {
-    std::sort(v->begin(), v->end());
-  }
-
-  EXPECT_THAT(actual, testing::ElementsAreArray(expected));
-}
-
-TEST_F(TestLockFreeStack, AlternatingProduceAndConsumeMoved) {
-  LockFreeStack<int> ints;
-
-  std::vector<std::vector<int>> per_thread_actual(task_group->parallelism());
-
-  for (int i = 0; i < task_group->parallelism(); ++i) {
-    task_group->Append([&, i] {
-      for (int j = 0; j < kLots; ++j) {
-        ints.Push(j);
-        auto local = std::move(ints);
-        for (int got : local) {
-          per_thread_actual[i].push_back(got);
-        }
-      }
-      return Status::OK();
-    });
-  }
-
-  ASSERT_OK(task_group->Finish());
-
-  std::vector<int> actual = Flatten(std::move(per_thread_actual)), expected;
-
-  for (int i = 0; i < task_group->parallelism(); ++i) {
-    AppendLots(&expected);
-  }
-
-  for (auto v : {&actual, &expected}) {
-    std::sort(v->begin(), v->end());
-  }
-
-  EXPECT_THAT(actual, testing::ElementsAreArray(expected));
-}
-
-}  // namespace util
-}  // namespace arrow

From a3454d96209c11dc9ce3994abdfe5c2a5fdaa555 Mon Sep 17 00:00:00 2001
From: Benjamin Kietzman <bengilgit@gmail.com>
Date: Mon, 5 Oct 2020 09:07:07 -0400
Subject: [PATCH 24/34] more exact typing in GetOrInsertGenerated

---
 cpp/src/arrow/dataset/file_base.cc | 22 ++++++++++------------
 cpp/src/arrow/util/map.h           |  8 ++++----
 2 files changed, 14 insertions(+), 16 deletions(-)

diff --git a/cpp/src/arrow/dataset/file_base.cc b/cpp/src/arrow/dataset/file_base.cc
index aa9efb0dfa6..c930d73d391 100644
--- a/cpp/src/arrow/dataset/file_base.cc
+++ b/cpp/src/arrow/dataset/file_base.cc
@@ -322,25 +322,23 @@ Status FileSystemDataset::Write(const FileSystemDatasetWriteOptions& write_optio
 
           util::Mutex::Guard wait_for_opened_writer_lock;
 
-          using internal::GetOrInsertGenerated;
-
           WriteQueue* queue;
           {
             // lookup the queue to which batch should be appended
             auto queues_lock = queues_mutex.Lock();
 
-            queue = GetOrInsertGenerated(&queues, std::move(part),
-                                         [&](const std::string& part) {
-                                           // lookup in `queues` also failed,
-                                           // generate a new WriteQueue
-                                           size_t queue_index = queues.size() - 1;
+            queue = internal::GetOrInsertGenerated(
+                        &queues, std::move(part),
+                        [&](const std::string& emplaced_part) {
+                          // lookup in `queues` also failed,
+                          // generate a new WriteQueue
+                          size_t queue_index = queues.size() - 1;
 
-                                           queues_storage.emplace_back(
-                                               part, queue_index,
-                                               &wait_for_opened_writer_lock);
+                          queues_storage.emplace_back(emplaced_part, queue_index,
+                                                      &wait_for_opened_writer_lock);
 
-                                           return &queues_storage.back();
-                                         })
+                          return &queues_storage.back();
+                        })
                         ->second;
           }
 
diff --git a/cpp/src/arrow/util/map.h b/cpp/src/arrow/util/map.h
index 2ca1cf4c82d..5523909061d 100644
--- a/cpp/src/arrow/util/map.h
+++ b/cpp/src/arrow/util/map.h
@@ -28,8 +28,8 @@ namespace internal {
 /// std::unordered_map. If `key` exists in the container, an iterator to that pair
 /// will be returned. If `key` does not exist in the container, `gen(key)` will be
 /// invoked and its return value inserted.
-template <typename Map, typename Key, typename Gen>
-auto GetOrInsertGenerated(Map* map, Key key, Gen&& gen)
+template <typename Map, typename Gen>
+auto GetOrInsertGenerated(Map* map, typename Map::key_type key, Gen&& gen)
     -> decltype(map->begin()->second = gen(map->begin()->first), map->begin()) {
   decltype(gen(map->begin()->first)) placeholder{};
 
@@ -43,8 +43,8 @@ auto GetOrInsertGenerated(Map* map, Key key, Gen&& gen)
   return it_success.first;
 }
 
-template <typename Map, typename Key, typename Gen>
-auto GetOrInsertGenerated(Map* map, Key key, Gen&& gen)
+template <typename Map, typename Gen>
+auto GetOrInsertGenerated(Map* map, typename Map::key_type key, Gen&& gen)
     -> Result<decltype(map->begin()->second = gen(map->begin()->first).ValueOrDie(),
                        map->begin())> {
   decltype(gen(map->begin()->first).ValueOrDie()) placeholder{};

From 448e04e9d291e793d7347f1ccb6be0db501a42ae Mon Sep 17 00:00:00 2001
From: Benjamin Kietzman <bengilgit@gmail.com>
Date: Mon, 5 Oct 2020 17:01:05 -0400
Subject: [PATCH 25/34] move lazy initialization locking into Flush()

---
 cpp/src/arrow/dataset/file_base.cc | 61 +++++++++++-------------------
 1 file changed, 23 insertions(+), 38 deletions(-)

diff --git a/cpp/src/arrow/dataset/file_base.cc b/cpp/src/arrow/dataset/file_base.cc
index c930d73d391..79083f73199 100644
--- a/cpp/src/arrow/dataset/file_base.cc
+++ b/cpp/src/arrow/dataset/file_base.cc
@@ -177,10 +177,10 @@ Status ValidateBasenameTemplate(util::string_view basename_template) {
 class WriteQueue {
  public:
   WriteQueue(std::string partition_expression, size_t index,
-             util::Mutex::Guard* wait_for_opened_writer_lock)
-      : partition_expression_(std::move(partition_expression)), index_(index) {
-    *wait_for_opened_writer_lock = writer_mutex_.Lock();
-  }
+             std::shared_ptr<Schema> schema)
+      : partition_expression_(std::move(partition_expression)),
+        index_(index),
+        schema_(std::move(schema)) {}
 
   // Push a batch into the writer's queue of pending writes.
   void Push(std::shared_ptr<RecordBatch> batch) {
@@ -190,8 +190,14 @@ class WriteQueue {
 
   // Flush all pending batches, or return immediately if another thread is already
   // flushing this queue.
-  Status Flush() {
+  Status Flush(const FileSystemDatasetWriteOptions& write_options) {
     if (auto writer_lock = writer_mutex_.TryLock()) {
+      if (writer_ == nullptr) {
+        // FileWriters are opened lazily to avoid blocking while holding the lock on the
+        // scan-wide queue set
+        RETURN_NOT_OK(OpenWriter(write_options));
+      }
+
       while (true) {
         std::shared_ptr<RecordBatch> batch;
         {
@@ -214,8 +220,8 @@ class WriteQueue {
 
   const std::shared_ptr<FileWriter>& writer() const { return writer_; }
 
-  Status OpenWriter(const FileSystemDatasetWriteOptions& write_options,
-                    std::shared_ptr<Schema> schema) {
+ private:
+  Status OpenWriter(const FileSystemDatasetWriteOptions& write_options) {
     auto dir =
         fs::internal::EnsureTrailingSlash(write_options.base_dir) + partition_expression_;
 
@@ -231,15 +237,12 @@ class WriteQueue {
     ARROW_ASSIGN_OR_RAISE(auto destination,
                           write_options.filesystem->OpenOutputStream(path));
 
-    ARROW_ASSIGN_OR_RAISE(writer_, write_options.format()->MakeWriter(
-                                       std::move(destination), std::move(schema),
-                                       write_options.file_write_options));
+    ARROW_ASSIGN_OR_RAISE(
+        writer_, write_options.format()->MakeWriter(std::move(destination), schema_,
+                                                    write_options.file_write_options));
     return Status::OK();
   }
 
-  using Set = std::unordered_map<std::string, WriteQueue*>;
-
- private:
   util::Mutex writer_mutex_;
   std::shared_ptr<FileWriter> writer_;
 
@@ -250,6 +253,8 @@ class WriteQueue {
   std::string partition_expression_;
 
   size_t index_;
+
+  std::shared_ptr<Schema> schema_;
 };
 
 Status FileSystemDataset::Write(const FileSystemDatasetWriteOptions& write_options,
@@ -288,16 +293,12 @@ Status FileSystemDataset::Write(const FileSystemDatasetWriteOptions& write_optio
     }
   }
 
-  // WriteQueues are stored in this deque until writing is completed and are otherwise
-  // referenced by non-owning pointers.
-  std::deque<WriteQueue> queues_storage;
-
   // Store a mapping from partitions (represened by their formatted partition expressions)
   // to a WriteQueue which flushes batches into that partition's output file. In principle
   // any thread could produce a batch for any partition, so each task alternates between
   // pushing batches and flushing them to disk.
   util::Mutex queues_mutex;
-  WriteQueue::Set queues;
+  std::unordered_map<std::string, std::unique_ptr<WriteQueue>> queues;
 
   auto fragment_for_task_it = fragment_for_task.begin();
   for (const auto& scan_task : scan_tasks) {
@@ -320,8 +321,6 @@ Status FileSystemDataset::Write(const FileSystemDatasetWriteOptions& write_optio
           ARROW_ASSIGN_OR_RAISE(auto part,
                                 write_options.partitioning->Format(partition_expression));
 
-          util::Mutex::Guard wait_for_opened_writer_lock;
-
           WriteQueue* queue;
           {
             // lookup the queue to which batch should be appended
@@ -334,24 +333,10 @@ Status FileSystemDataset::Write(const FileSystemDatasetWriteOptions& write_optio
                           // generate a new WriteQueue
                           size_t queue_index = queues.size() - 1;
 
-                          queues_storage.emplace_back(emplaced_part, queue_index,
-                                                      &wait_for_opened_writer_lock);
-
-                          return &queues_storage.back();
+                          return internal::make_unique<WriteQueue>(
+                              emplaced_part, queue_index, batch->schema());
                         })
-                        ->second;
-          }
-
-          if (wait_for_opened_writer_lock) {
-            // We have openned a new WriteQueue for `part` and must set up its FileWriter.
-            // That could be slow, so we've already released queues_lock, allowing
-            // other threads to look up already-open queues. This is safe since we are
-            // mutating an element of `queues` in place, which does not conflict with
-            // access to any other element. However, we do need to continue holding
-            // wait_for_opened_writer_lock while we open the FileWriter to prevent other
-            // threads from using the writer before it has been constructed.
-            RETURN_NOT_OK(queue->OpenWriter(write_options, batch->schema()));
-            wait_for_opened_writer_lock.Unlock();
+                        ->second.get();
           }
 
           queue->Push(std::move(batch));
@@ -360,7 +345,7 @@ Status FileSystemDataset::Write(const FileSystemDatasetWriteOptions& write_optio
 
         // flush all touched WriteQueues
         for (auto queue : need_flushed) {
-          RETURN_NOT_OK(queue->Flush());
+          RETURN_NOT_OK(queue->Flush(write_options));
         }
       }
 

From 87d863a78bde186a0542b3b0fb9095a2fc778066 Mon Sep 17 00:00:00 2001
From: Benjamin Kietzman <bengilgit@gmail.com>
Date: Mon, 5 Oct 2020 17:05:05 -0400
Subject: [PATCH 26/34] fix comment

---
 cpp/src/arrow/dataset/file_base.cc | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/cpp/src/arrow/dataset/file_base.cc b/cpp/src/arrow/dataset/file_base.cc
index 79083f73199..ef044ea3a03 100644
--- a/cpp/src/arrow/dataset/file_base.cc
+++ b/cpp/src/arrow/dataset/file_base.cc
@@ -193,8 +193,7 @@ class WriteQueue {
   Status Flush(const FileSystemDatasetWriteOptions& write_options) {
     if (auto writer_lock = writer_mutex_.TryLock()) {
       if (writer_ == nullptr) {
-        // FileWriters are opened lazily to avoid blocking while holding the lock on the
-        // scan-wide queue set
+        // FileWriters are opened lazily to avoid blocking access to a scan-wide queue set
         RETURN_NOT_OK(OpenWriter(write_options));
       }
 

From 7db8bf33aa8393bfc7f31517aea0681e1963fdc1 Mon Sep 17 00:00:00 2001
From: Benjamin Kietzman <bengilgit@gmail.com>
Date: Tue, 6 Oct 2020 09:54:57 -0400
Subject: [PATCH 27/34] address review comments

---
 cpp/src/arrow/dataset/file_base.h     |  1 -
 cpp/src/arrow/dataset/file_ipc.cc     |  6 +++---
 cpp/src/arrow/dataset/file_parquet.cc | 24 ++++++++++++------------
 cpp/src/arrow/util/mutex.cc           | 18 +++++++-----------
 cpp/src/arrow/util/mutex.h            |  2 +-
 cpp/src/arrow/util/string.h           |  2 ++
 6 files changed, 25 insertions(+), 28 deletions(-)

diff --git a/cpp/src/arrow/dataset/file_base.h b/cpp/src/arrow/dataset/file_base.h
index 11f5a608a91..ee0df11b32b 100644
--- a/cpp/src/arrow/dataset/file_base.h
+++ b/cpp/src/arrow/dataset/file_base.h
@@ -129,7 +129,6 @@ class ARROW_DS_EXPORT FileFormat : public std::enable_shared_from_this<FileForma
   virtual bool splittable() const { return false; }
 
   virtual bool Equals(const FileFormat& other) const = 0;
-  //  virtual std::string ToString() const = 0;
 
   /// \brief Indicate if the FileSource is supported/readable by this format.
   virtual Result<bool> IsSupported(const FileSource& source) const = 0;
diff --git a/cpp/src/arrow/dataset/file_ipc.cc b/cpp/src/arrow/dataset/file_ipc.cc
index 9c7d643f9ac..e15de84b9bf 100644
--- a/cpp/src/arrow/dataset/file_ipc.cc
+++ b/cpp/src/arrow/dataset/file_ipc.cc
@@ -163,9 +163,9 @@ Result<ScanTaskIterator> IpcFileFormat::ScanFile(std::shared_ptr<ScanOptions> op
                                    fragment->source());
 }
 
-///
-/// IpcFileWriter, IpcFileWriteOptions
-///
+//
+// IpcFileWriter, IpcFileWriteOptions
+//
 
 std::shared_ptr<FileWriteOptions> IpcFileFormat::DefaultWriteOptions() {
   std::shared_ptr<IpcFileWriteOptions> options(
diff --git a/cpp/src/arrow/dataset/file_parquet.cc b/cpp/src/arrow/dataset/file_parquet.cc
index ced75c652c4..fe2f64ad526 100644
--- a/cpp/src/arrow/dataset/file_parquet.cc
+++ b/cpp/src/arrow/dataset/file_parquet.cc
@@ -427,9 +427,9 @@ Result<std::shared_ptr<FileFragment>> ParquetFileFormat::MakeFragment(
       std::move(physical_schema), {}));
 }
 
-///
-/// ParquetFileWriter, ParquetFileWriteOptions
-///
+//
+// ParquetFileWriter, ParquetFileWriteOptions
+//
 
 std::shared_ptr<FileWriteOptions> ParquetFileFormat::DefaultWriteOptions() {
   std::shared_ptr<ParquetFileWriteOptions> options(
@@ -469,9 +469,9 @@ Status ParquetFileWriter::Write(const std::shared_ptr<RecordBatch>& batch) {
 
 Status ParquetFileWriter::Finish() { return parquet_writer_->Close(); }
 
-///
-/// RowGroupInfo
-///
+//
+// RowGroupInfo
+//
 
 std::vector<RowGroupInfo> RowGroupInfo::FromIdentifiers(const std::vector<int> ids) {
   std::vector<RowGroupInfo> results;
@@ -526,9 +526,9 @@ bool RowGroupInfo::Satisfy(const Expression& predicate) const {
   return !HasStatistics() || predicate.IsSatisfiableWith(statistics_expression_);
 }
 
-///
-/// ParquetFileFragment
-///
+//
+// ParquetFileFragment
+//
 
 ParquetFileFragment::ParquetFileFragment(FileSource source,
                                          std::shared_ptr<FileFormat> format,
@@ -631,9 +631,9 @@ Result<std::vector<RowGroupInfo>> ParquetFileFragment::FilterRowGroups(
   return row_groups;
 }
 
-///
-/// ParquetDatasetFactory
-///
+//
+// ParquetDatasetFactory
+//
 
 ParquetDatasetFactory::ParquetDatasetFactory(
     std::shared_ptr<fs::FileSystem> filesystem, std::shared_ptr<ParquetFileFormat> format,
diff --git a/cpp/src/arrow/util/mutex.cc b/cpp/src/arrow/util/mutex.cc
index 63ff026f4ee..7456d7889d8 100644
--- a/cpp/src/arrow/util/mutex.cc
+++ b/cpp/src/arrow/util/mutex.cc
@@ -24,23 +24,19 @@
 namespace arrow {
 namespace util {
 
-struct Mutex::Impl : std::mutex {};
+struct Mutex::Impl {
+  std::mutex mutex_;
+};
 
 Mutex::Guard::Guard(Mutex* locked)
     : locked_(locked, [](Mutex* locked) {
-        DCHECK(!locked->impl_->try_lock());
-        locked->impl_->unlock();
+        DCHECK(!locked->impl_->mutex_.try_lock());
+        locked->impl_->mutex_.unlock();
       }) {}
 
-void Mutex::Guard::Unlock() {
-  if (locked_) {
-    locked_.reset();
-  }
-}
-
 Mutex::Guard Mutex::TryLock() {
   DCHECK_NE(impl_, nullptr);
-  if (impl_->try_lock()) {
+  if (impl_->mutex_.try_lock()) {
     return Guard{this};
   }
   return Guard{};
@@ -48,7 +44,7 @@ Mutex::Guard Mutex::TryLock() {
 
 Mutex::Guard Mutex::Lock() {
   DCHECK_NE(impl_, nullptr);
-  impl_->lock();
+  impl_->mutex_.lock();
   return Guard{this};
 }
 
diff --git a/cpp/src/arrow/util/mutex.h b/cpp/src/arrow/util/mutex.h
index 8c9f0d40002..f4fc64181fb 100644
--- a/cpp/src/arrow/util/mutex.h
+++ b/cpp/src/arrow/util/mutex.h
@@ -43,7 +43,7 @@ class ARROW_EXPORT Mutex {
 
     explicit operator bool() const { return bool(locked_); }
 
-    void Unlock();
+    void Unlock() { locked_.reset(); }
 
    private:
     explicit Guard(Mutex* locked);
diff --git a/cpp/src/arrow/util/string.h b/cpp/src/arrow/util/string.h
index 4201bbe2ea8..56a02dfb20b 100644
--- a/cpp/src/arrow/util/string.h
+++ b/cpp/src/arrow/util/string.h
@@ -57,6 +57,8 @@ bool AsciiEqualsCaseInsensitive(util::string_view left, util::string_view right)
 ARROW_EXPORT
 std::string AsciiToLower(util::string_view value);
 
+/// \brief Search for the first instance of a token and replace it or return nullopt if
+/// the token is not found.
 ARROW_EXPORT
 util::optional<std::string> Replace(util::string_view s, util::string_view token,
                                     util::string_view replacement);

From 33257a6fcbefdd9de54a6a0faa8b863ed7566bf1 Mon Sep 17 00:00:00 2001
From: Benjamin Kietzman <bengilgit@gmail.com>
Date: Tue, 6 Oct 2020 10:12:16 -0400
Subject: [PATCH 28/34] add default basename_template for python

---
 python/pyarrow/_dataset.pyx          | 16 +++++++---
 python/pyarrow/dataset.py            | 18 ++++++-----
 python/pyarrow/tests/test_dataset.py | 46 ++++++++++++----------------
 3 files changed, 42 insertions(+), 38 deletions(-)

diff --git a/python/pyarrow/_dataset.pyx b/python/pyarrow/_dataset.pyx
index 847b4b0d71e..bfee82ba362 100644
--- a/python/pyarrow/_dataset.pyx
+++ b/python/pyarrow/_dataset.pyx
@@ -699,6 +699,10 @@ cdef class FileFormat(_Weakrefable):
     def make_write_options(self):
         return FileWriteOptions.wrap(self.format.DefaultWriteOptions())
 
+    @property
+    def default_extname(self):
+        return frombytes(self.format.type_name())
+
     def __eq__(self, other):
         try:
             return self.equals(other)
@@ -1350,6 +1354,10 @@ cdef class IpcFileFormat(FileFormat):
     def equals(self, IpcFileFormat other):
         return True
 
+    @property
+    def default_extname(self):
+        return "feather"
+
     def __reduce__(self):
         return IpcFileFormat, tuple()
 
@@ -2297,10 +2305,10 @@ def _get_partition_keys(Expression partition_expression):
 
 
 def _filesystemdataset_write(
-    data, object base_dir, str basename_template, Schema schema not None,
-    FileFormat format not None, FileSystem filesystem not None,
-    Partitioning partitioning not None, FileWriteOptions file_options not None,
-    bint use_threads,
+    data not None, object base_dir not None, str basename_template not None,
+    Schema schema not None, FileFormat format not None,
+    FileSystem filesystem not None, Partitioning partitioning not None,
+    FileWriteOptions file_options not None, bint use_threads,
 ):
     """
     CFileSystemDataset.Write wrapper
diff --git a/python/pyarrow/dataset.py b/python/pyarrow/dataset.py
index 574fad9d325..5cadd69dc6f 100644
--- a/python/pyarrow/dataset.py
+++ b/python/pyarrow/dataset.py
@@ -697,7 +697,7 @@ def _ensure_write_partitioning(scheme):
     return scheme
 
 
-def write_dataset(data, base_dir, basename_template, format=None,
+def write_dataset(data, base_dir, basename_template=None, format=None,
                   partitioning=None, schema=None,
                   filesystem=None, file_options=None, use_threads=True):
     """
@@ -710,16 +710,17 @@ def write_dataset(data, base_dir, basename_template, format=None,
         in-memory Arrow data.
     base_dir : str
         The root directory where to write the dataset.
-    basename_template : str
+    basename_template : str, optional
         A template string used to generate basenames of written data files.
         The token '{i}' will be replaced with an automatically incremented
-        integer.
+        integer. If not specified, it defaults to
+        "dat_{i}." + format.default_extname
     format : FileFormat or str
         The format in which to write the dataset. Currently supported:
-        "ipc"/"feather". If a FileSystemDataset is being written and `format`
-        is not specified, it defaults to the same format as the specified
-        FileSystemDataset. When writing a Table or RecordBatch, this keyword
-        is required.
+        "parquet", "ipc"/"feather". If a FileSystemDataset is being written
+        and `format` is not specified, it defaults to the same format as the
+        specified FileSystemDataset. When writing a Table or RecordBatch, this
+        keyword is required.
     partitioning : Partitioning, optional
         The partitioning scheme specified with the ``partitioning()``
         function.
@@ -745,6 +746,9 @@ def write_dataset(data, base_dir, basename_template, format=None,
         )
 
     format = _ensure_format(format)
+    if basename_template is None:
+        basename_template = "dat_{i}." + format.default_extname
+
     if file_options is None:
         file_options = format.make_write_options()
 
diff --git a/python/pyarrow/tests/test_dataset.py b/python/pyarrow/tests/test_dataset.py
index a7e46ae4ce1..3df92fb8944 100644
--- a/python/pyarrow/tests/test_dataset.py
+++ b/python/pyarrow/tests/test_dataset.py
@@ -2151,10 +2151,8 @@ def _check_dataset_roundtrip(dataset, base_dir, expected_files,
                              base_dir_path=None, partitioning=None):
     base_dir_path = base_dir_path or base_dir
 
-    ds.write_dataset(
-        dataset, base_dir, basename_template='dat_{i}.feather',
-        format="feather", partitioning=partitioning, use_threads=False
-    )
+    ds.write_dataset(dataset, base_dir, format="feather",
+            partitioning=partitioning, use_threads=False)
 
     # check that all files are present
     file_paths = list(base_dir_path.rglob("*"))
@@ -2247,13 +2245,13 @@ def test_write_dataset_use_threads(tempdir):
 
     target1 = tempdir / 'partitioned1'
     ds.write_dataset(
-        dataset, target1, basename_template='dat_{i}.feather',
-        format="feather", partitioning=partitioning, use_threads=True
+        dataset, target1, format="feather", partitioning=partitioning,
+        use_threads=True
     )
     target2 = tempdir / 'partitioned2'
     ds.write_dataset(
-        dataset, target2, basename_template='dat_{i}.feather',
-        format="feather", partitioning=partitioning, use_threads=False
+        dataset, target2, format="feather", partitioning=partitioning,
+        use_threads=False
     )
 
     # check that reading in gives same result
@@ -2270,10 +2268,10 @@ def test_write_table(tempdir):
 
     base_dir = tempdir / 'single'
     ds.write_dataset(table, base_dir,
-                     basename_template='dat_{i}.feather', format="feather")
+                     basename_template='dat_{i}.arrow', format="feather")
     # check that all files are present
     file_paths = list(base_dir.rglob("*"))
-    expected_paths = [base_dir / "dat_0.feather"]
+    expected_paths = [base_dir / "dat_0.arrow"]
     assert set(file_paths) == set(expected_paths)
     # check Table roundtrip
     result = ds.dataset(base_dir, format="ipc").to_table()
@@ -2283,13 +2281,13 @@ def test_write_table(tempdir):
     base_dir = tempdir / 'partitioned'
     partitioning = ds.partitioning(
         pa.schema([("part", pa.string())]), flavor="hive")
-    ds.write_dataset(table, base_dir,
-                     basename_template='dat_{i}.feather',
-                     format="feather", partitioning=partitioning)
+    ds.write_dataset(table, base_dir, format="feather",
+                     basename_template='dat_{i}.arrow',
+                     partitioning=partitioning)
     file_paths = list(base_dir.rglob("*"))
     expected_paths = [
-        base_dir / "part=a", base_dir / "part=a" / "dat_0.feather",
-        base_dir / "part=b", base_dir / "part=b" / "dat_1.feather"
+        base_dir / "part=a", base_dir / "part=a" / "dat_0.arrow",
+        base_dir / "part=b", base_dir / "part=b" / "dat_1.arrow"
     ]
     assert set(file_paths) == set(expected_paths)
     result = ds.dataset(base_dir, format="ipc", partitioning=partitioning)
@@ -2305,30 +2303,26 @@ def test_write_table_multiple_fragments(tempdir):
 
     # Table with multiple batches written as single Fragment by default
     base_dir = tempdir / 'single'
-    ds.write_dataset(table, base_dir, basename_template='dat_{i}.feather',
-                     format="feather")
+    ds.write_dataset(table, base_dir, format="feather")
     assert set(base_dir.rglob("*")) == set([base_dir / "dat_0.feather"])
     assert ds.dataset(base_dir, format="ipc").to_table().equals(table)
 
     # Same for single-element list of Table
     base_dir = tempdir / 'single-list'
-    ds.write_dataset([table], base_dir, basename_template='dat_{i}.feather',
-                     format="feather")
+    ds.write_dataset([table], base_dir, format="feather")
     assert set(base_dir.rglob("*")) == set([base_dir / "dat_0.feather"])
     assert ds.dataset(base_dir, format="ipc").to_table().equals(table)
 
     # Provide list of batches to write multiple fragments
     base_dir = tempdir / 'multiple'
-    ds.write_dataset(table.to_batches(), base_dir,
-                     basename_template='dat_{i}.feather', format="feather")
+    ds.write_dataset(table.to_batches(), base_dir, format="feather")
     assert set(base_dir.rglob("*")) == set(
         [base_dir / "dat_0.feather"])
     assert ds.dataset(base_dir, format="ipc").to_table().equals(table)
 
     # Provide list of tables to write multiple fragments
     base_dir = tempdir / 'multiple-table'
-    ds.write_dataset([table, table], base_dir,
-                     basename_template='dat_{i}.feather', format="feather")
+    ds.write_dataset([table, table], base_dir, format="feather")
     assert set(base_dir.rglob("*")) == set(
         [base_dir / "dat_0.feather"])
     assert ds.dataset(base_dir, format="ipc").to_table().equals(
@@ -2348,8 +2342,7 @@ def test_write_dataset_parquet(tempdir):
     # using default "parquet" format string
 
     base_dir = tempdir / 'parquet_dataset'
-    ds.write_dataset(table, base_dir,
-                     basename_template='dat_{i}.parquet', format="parquet")
+    ds.write_dataset(table, base_dir, format="parquet")
     # check that all files are present
     file_paths = list(base_dir.rglob("*"))
     expected_paths = [base_dir / "dat_0.parquet"]
@@ -2363,7 +2356,6 @@ def test_write_dataset_parquet(tempdir):
         format = ds.ParquetFileFormat()
         opts = format.make_write_options(version=version)
         base_dir = tempdir / 'parquet_dataset_version{0}'.format(version)
-        ds.write_dataset(table, base_dir, basename_template='dat_{i}.parquet',
-                         format=format, file_options=opts)
+        ds.write_dataset(table, base_dir, format=format, file_options=opts)
         meta = pq.read_metadata(base_dir / "dat_0.parquet")
         assert meta.format_version == version

From d46c1af09401f6208607c278581b2166383c986d Mon Sep 17 00:00:00 2001
From: Neal Richardson <neal.p.richardson@gmail.com>
Date: Mon, 5 Oct 2020 14:23:24 -0700
Subject: [PATCH 29/34] R code/doc polishing

---
 r/NAMESPACE                     |  3 +-
 r/R/dataset-format.R            | 39 ++++++++++++++++---
 r/R/dataset-scan.R              |  3 ++
 r/R/dataset-write.R             | 69 ++++++---------------------------
 r/man/FileWriteOptions.Rd       |  3 +-
 r/man/write_dataset.Rd          |  7 ++--
 r/tests/testthat/test-dataset.R | 16 +++++++-
 7 files changed, 70 insertions(+), 70 deletions(-)

diff --git a/r/NAMESPACE b/r/NAMESPACE
index c54acf7a824..f33d39244d4 100644
--- a/r/NAMESPACE
+++ b/r/NAMESPACE
@@ -22,6 +22,7 @@ S3method(Ops,array_expression)
 S3method(all,equal.ArrowObject)
 S3method(as.character,Array)
 S3method(as.character,ChunkedArray)
+S3method(as.character,FileFormat)
 S3method(as.character,Scalar)
 S3method(as.data.frame,RecordBatch)
 S3method(as.data.frame,Table)
@@ -74,6 +75,7 @@ S3method(min,ChunkedArray)
 S3method(names,Dataset)
 S3method(names,FeatherReader)
 S3method(names,RecordBatch)
+S3method(names,Scanner)
 S3method(names,ScannerBuilder)
 S3method(names,Schema)
 S3method(names,Table)
@@ -159,7 +161,6 @@ export(MessageType)
 export(MetadataVersion)
 export(ParquetFileFormat)
 export(ParquetFileReader)
-export(ParquetFileWriteOptions)
 export(ParquetFileWriter)
 export(ParquetReaderProperties)
 export(ParquetVersionType)
diff --git a/r/R/dataset-format.R b/r/R/dataset-format.R
index c98e7b5d44e..ec3ac36f9e5 100644
--- a/r/R/dataset-format.R
+++ b/r/R/dataset-format.R
@@ -61,11 +61,6 @@ FileFormat <- R6Class("FileFormat", inherit = ArrowObject,
       } else {
         self
       }
-    },
-    make_write_options = function(...) {
-      options <- shared_ptr(FileWriteOptions, dataset___FileFormat__DefaultWriteOptions(self))
-      options$update(...)
-      options
     }
   ),
   active = list(
@@ -89,6 +84,13 @@ FileFormat$create <- function(format, ...) {
   }
 }
 
+#' @export
+as.character.FileFormat <- function(x, ...) {
+  out <- x$type
+  # Slight hack: special case IPC -> feather, otherwise is just the type_name
+  ifelse(out == "ipc", "feather", out)
+}
+
 #' @usage NULL
 #' @format NULL
 #' @rdname FileFormat
@@ -125,3 +127,30 @@ csv_file_format_parse_options <- function(...) {
     CsvParseOptions$create(...)
   }
 }
+
+#' Format-specific write options
+#'
+#' @description
+#' A `FileWriteOptions` holds write options specific to a `FileFormat`.
+FileWriteOptions <- R6Class("FileWriteOptions", inherit = ArrowObject,
+  public = list(
+    update = function(...) {
+      if (self$type == "parquet") {
+        dataset___ParquetFileWriteOptions__update(self,
+            ParquetWriterProperties$create(...),
+            ParquetArrowWriterProperties$create(...))
+      }
+      invisible(self)
+    }
+  ),
+  active = list(
+    type = function() dataset___FileWriteOptions__type_name(self)
+  )
+)
+FileWriteOptions$create <- function(format, ...) {
+  if (!inherits(format, "FileFormat")) {
+    format <- FileFormat$create(format)
+  }
+  options <- shared_ptr(FileWriteOptions, dataset___FileFormat__DefaultWriteOptions(format))
+  options$update(...)
+}
diff --git a/r/R/dataset-scan.R b/r/R/dataset-scan.R
index ab542fee8f9..e9017825782 100644
--- a/r/R/dataset-scan.R
+++ b/r/R/dataset-scan.R
@@ -98,6 +98,9 @@ Scanner$create <- function(dataset,
   scanner_builder$Finish()
 }
 
+#' @export
+names.Scanner <- function(x) names(x$schema)
+
 ScanTask <- R6Class("ScanTask", inherit = ArrowObject,
   public = list(
     Execute = function() map(dataset___ScanTask__get_batches(self), shared_ptr, class = RecordBatch)
diff --git a/r/R/dataset-write.R b/r/R/dataset-write.R
index d667db5f8d5..d56126f05b2 100644
--- a/r/R/dataset-write.R
+++ b/r/R/dataset-write.R
@@ -35,8 +35,9 @@
 #' @param partitioning `Partitioning` or a character vector of columns to
 #' use as partition keys (to be written as path segments). Default is to
 #' use the current `group_by()` columns.
-#' @param basename_template `"{i}"` will be replaced with an autoincremented
-#' integer to generate basenames of datafiles. For example `"dat_{i}.feather"`
+#' @param basename_template string template for the names of files to be written.
+#' Must contain `"{i}"`, which will be replaced with an autoincremented
+#' integer to generate basenames of datafiles. For example, `"dat_{i}.feather"`
 #' will yield `"dat_0.feather", ...`.
 #' @param hive_style logical: write partition segments as Hive-style
 #' (`key1=value1/key2=value2/file.ext`) or as just bare values. Default is `TRUE`.
@@ -50,21 +51,18 @@ write_dataset <- function(dataset,
                           path,
                           format = dataset$format,
                           partitioning = dplyr::group_vars(dataset),
-                          basename_template = paste("dat_{i}", format$type, sep="."),
+                          basename_template = paste0("dat_{i}.", as.character(format)),
                           hive_style = TRUE,
                           filesystem = NULL,
                           ...) {
   if (inherits(dataset, "arrow_dplyr_query")) {
-    # Check for a select
-    if (!identical(dataset$selected_columns, set_names(names(dataset$.data)))) {
-      # We can select a subset of columns but we can't rename them
-      if (!setequal(dataset$selected_columns, names(dataset$selected_columns))) {
-        stop("Renaming columns when writing a dataset is not yet supported", call. = FALSE)
-      }
-      dataset <- ensure_group_vars(dataset)
+    # We can select a subset of columns but we can't rename them
+    if (!all(dataset$selected_columns == names(dataset$selected_columns))) {
+      stop("Renaming columns when writing a dataset is not yet supported", call. = FALSE)
     }
-  }
-  if (inherits(dataset, "grouped_df")) {
+    # partitioning vars need to be in the `select` schema
+    dataset <- ensure_group_vars(dataset)
+  } else if (inherits(dataset, "grouped_df")) {
     force(partitioning)
     # Drop the grouping metadata before writing; we've already consumed it
     # now to construct `partitioning` and don't want it in the metadata$r
@@ -72,18 +70,7 @@ write_dataset <- function(dataset,
   }
 
   scanner <- Scanner$create(dataset)
-
-  if (!inherits(format, "FileFormat")) {
-    format <- FileFormat$create(format)
-  }
-  if (inherits(format, "ParquetFileFormat")) {
-    options <- format$make_write_options(table=dataset, ...)
-  } else {
-    options <- format$make_write_options(...)
-  }
-
   if (!inherits(partitioning, "Partitioning")) {
-    # TODO: tidyselect?
     partition_schema <- scanner$schema[partitioning]
     if (isTRUE(hive_style)) {
       partitioning <- HivePartitioning$create(partition_schema)
@@ -91,42 +78,10 @@ write_dataset <- function(dataset,
       partitioning <- DirectoryPartitioning$create(partition_schema)
     }
   }
+
   path_and_fs <- get_path_and_filesystem(path, filesystem)
+  options <- FileWriteOptions$create(format, table = scanner, ...)
 
   dataset___Dataset__Write(options, path_and_fs$fs, path_and_fs$path,
                            partitioning, basename_template, scanner)
 }
-
-#' Format-specific write options
-#'
-#' @description
-#' A `FileWriteOptions` holds write options specific to a `FileFormat`.
-FileWriteOptions <- R6Class("FileWriteOptions", inherit = ArrowObject,
-  public = list(
-    ..dispatch = function() {
-      type <- self$type
-      if (type == "parquet") {
-        shared_ptr(ParquetFileWriteOptions, self$pointer())
-      } else {
-        self
-      }
-    },
-    update = function(...) {
-      type <- self$type
-      if (type == "parquet") {
-        dataset___ParquetFileWriteOptions__update(self,
-            ParquetWriterProperties$create(...),
-            ParquetArrowWriterProperties$create(...))
-      }
-    }
-  ),
-  active = list(
-    type = function() dataset___FileWriteOptions__type_name(self)
-  )
-)
-
-#' @usage NULL
-#' @format NULL
-#' @rdname FileWriteOptions
-#' @export
-ParquetFileWriteOptions <- R6Class("ParquetFileWriteOptions", inherit = FileWriteOptions)
diff --git a/r/man/FileWriteOptions.Rd b/r/man/FileWriteOptions.Rd
index 5c4d04456b4..661393c8e0d 100644
--- a/r/man/FileWriteOptions.Rd
+++ b/r/man/FileWriteOptions.Rd
@@ -1,8 +1,7 @@
 % Generated by roxygen2: do not edit by hand
-% Please edit documentation in R/dataset-write.R
+% Please edit documentation in R/dataset-format.R
 \name{FileWriteOptions}
 \alias{FileWriteOptions}
-\alias{ParquetFileWriteOptions}
 \title{Format-specific write options}
 \description{
 A \code{FileWriteOptions} holds write options specific to a \code{FileFormat}.
diff --git a/r/man/write_dataset.Rd b/r/man/write_dataset.Rd
index 218876427ca..ee51d244a44 100644
--- a/r/man/write_dataset.Rd
+++ b/r/man/write_dataset.Rd
@@ -9,7 +9,7 @@ write_dataset(
   path,
   format = dataset$format,
   partitioning = dplyr::group_vars(dataset),
-  basename_template = paste("dat_{i}", format$type, sep = "."),
+  basename_template = paste0("dat_{i}.", as.character(format)),
   hive_style = TRUE,
   filesystem = NULL,
   ...
@@ -34,8 +34,9 @@ same format as \code{dataset}.}
 use as partition keys (to be written as path segments). Default is to
 use the current \code{group_by()} columns.}
 
-\item{basename_template}{\code{"{i}"} will be replaced with an autoincremented
-integer to generate basenames of datafiles. For example \code{"dat_{i}.feather"}
+\item{basename_template}{string template for the names of files to be written.
+Must contain \code{"{i}"}, which will be replaced with an autoincremented
+integer to generate basenames of datafiles. For example, \code{"dat_{i}.feather"}
 will yield \verb{"dat_0.feather", ...}.}
 
 \item{hive_style}{logical: write partition segments as Hive-style
diff --git a/r/tests/testthat/test-dataset.R b/r/tests/testthat/test-dataset.R
index 73ea71b3c49..f33f8ad1def 100644
--- a/r/tests/testthat/test-dataset.R
+++ b/r/tests/testthat/test-dataset.R
@@ -759,6 +759,14 @@ test_that("Writing a dataset: CSV->IPC", {
       filter(integer > 6) %>%
       summarize(mean = mean(integer))
   )
+
+  # Check whether "int" is present in the files or just in the dirs
+  first <- read_feather(
+    dir(dst_dir, pattern = ".feather$", recursive = TRUE, full.names = TRUE)[1],
+    as_data_frame = FALSE
+  )
+  # It shouldn't be there
+  expect_false("int" %in% names(first))
 })
 
 test_that("Writing a dataset: Parquet->IPC", {
@@ -980,14 +988,18 @@ test_that("Dataset writing: unsupported features/input validation", {
   expect_error(write_dataset(4), 'dataset must be a "Dataset"')
 
   ds <- open_dataset(hive_dir)
-
   expect_error(
     select(ds, integer = int) %>% write_dataset(ds),
     "Renaming columns when writing a dataset is not yet supported"
   )
-
   expect_error(
     write_dataset(ds, partitioning = c("int", "NOTACOLUMN"), format = "ipc"),
     'Invalid field name: "NOTACOLUMN"'
   )
+  expect_error(
+    write_dataset(ds, tempfile(), basename_template = "something_without_i")
+  )
+  expect_error(
+    write_dataset(ds, tempfile(), basename_template = NULL)
+  )
 })

From 7f1255e9383366202b1dcaadf1dfee519d35a49b Mon Sep 17 00:00:00 2001
From: Neal Richardson <neal.p.richardson@gmail.com>
Date: Mon, 5 Oct 2020 14:27:26 -0700
Subject: [PATCH 30/34] Update vignette now that you can filter when writing

---
 r/vignettes/dataset.Rmd | 14 ++++++++++++--
 1 file changed, 12 insertions(+), 2 deletions(-)

diff --git a/r/vignettes/dataset.Rmd b/r/vignettes/dataset.Rmd
index 488f7f4f43b..df460c2682a 100644
--- a/r/vignettes/dataset.Rmd
+++ b/r/vignettes/dataset.Rmd
@@ -350,6 +350,16 @@ partitions are because they can be read from the file paths.
 (To instead write bare values for partition segments,
 i.e. `1` rather than `payment_type=1`, call `write_dataset()` with `hive_style = FALSE`.)
 
+Perhaps, though, `payment_type == 1` is the only data we care about for our
+current work, and we just want to drop the rest and have a smaller working set.
+For this, we can `filter()` them out when writing:
+
+```r
+ds %>%
+  filter(payment_type == 1) %>%
+  write_dataset("nyc-taxi/feather", format = "feather")
+```
+
 The other thing we can do when writing datasets is select a subset of and/or reorder
 columns. Suppose we never care about `vendor_id`, and being a string column,
 it can take up a lot of space when we read it in, so let's drop it:
@@ -361,5 +371,5 @@ ds %>%
   write_dataset("nyc-taxi/feather", format = "feather")
 ```
 
-Note that you cannot currently rename columns when writing, nor can you `filter()`
-and write a subset of the data.
+Note that while you can select a subset of columns,
+you cannot currently rename columns when writing.

From 16d9d53bbda0ff29df4ea6c5c1d68808b2caede9 Mon Sep 17 00:00:00 2001
From: Benjamin Kietzman <bengilgit@gmail.com>
Date: Tue, 6 Oct 2020 10:49:18 -0400
Subject: [PATCH 31/34] lint fix

---
 python/pyarrow/tests/test_dataset.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/python/pyarrow/tests/test_dataset.py b/python/pyarrow/tests/test_dataset.py
index 3df92fb8944..fbb06564803 100644
--- a/python/pyarrow/tests/test_dataset.py
+++ b/python/pyarrow/tests/test_dataset.py
@@ -2152,7 +2152,7 @@ def _check_dataset_roundtrip(dataset, base_dir, expected_files,
     base_dir_path = base_dir_path or base_dir
 
     ds.write_dataset(dataset, base_dir, format="feather",
-            partitioning=partitioning, use_threads=False)
+                     partitioning=partitioning, use_threads=False)
 
     # check that all files are present
     file_paths = list(base_dir_path.rglob("*"))

From 086f59d8a3799181609c48813b7299028efcde10 Mon Sep 17 00:00:00 2001
From: Benjamin Kietzman <bengilgit@gmail.com>
Date: Tue, 6 Oct 2020 11:25:55 -0400
Subject: [PATCH 32/34] writing without partitioning will create a single file

---
 r/tests/testthat/test-s3-minio.R | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/r/tests/testthat/test-s3-minio.R b/r/tests/testthat/test-s3-minio.R
index 63902aac40a..5d9213f4f21 100644
--- a/r/tests/testthat/test-s3-minio.R
+++ b/r/tests/testthat/test-s3-minio.R
@@ -119,7 +119,7 @@ if (arrow_with_s3() && process_is_running("minio server")) {
   test_that("write_dataset with fs", {
     ds <- open_dataset(minio_path("hive_dir"), filesystem = fs)
     write_dataset(ds, minio_path("new_dataset_dir"), filesystem = fs)
-    expect_length(fs$GetFileInfo(FileSelector$create(minio_path("new_dataset_dir"))), 2)
+    expect_length(fs$GetFileInfo(FileSelector$create(minio_path("new_dataset_dir"))), 1)
   })
 
   test_that("S3FileSystem input validation", {

From 998d760ef0c1ac21145b426075f6ffba0d1ae675 Mon Sep 17 00:00:00 2001
From: Benjamin Kietzman <bengilgit@gmail.com>
Date: Wed, 7 Oct 2020 13:12:40 -0400
Subject: [PATCH 33/34] address review comments

---
 python/pyarrow/_dataset.pyx                  | 132 ++++---------------
 python/pyarrow/dataset.py                    |  24 +++-
 python/pyarrow/includes/libarrow_dataset.pxd |   1 +
 python/pyarrow/tests/test_dataset.py         |  28 ++--
 r/R/dataset-write.R                          |   6 +-
 5 files changed, 59 insertions(+), 132 deletions(-)

diff --git a/python/pyarrow/_dataset.pyx b/python/pyarrow/_dataset.pyx
index bfee82ba362..0aec214831b 100644
--- a/python/pyarrow/_dataset.pyx
+++ b/python/pyarrow/_dataset.pyx
@@ -636,6 +636,10 @@ cdef class FileWriteOptions(_Weakrefable):
         self.init(sp)
         return self
 
+    @property
+    def format(self):
+        return FileFormat.wrap(self.options.format())
+
     cdef inline shared_ptr[CFileWriteOptions] unwrap(self):
         return self.wrapped
 
@@ -1113,7 +1117,8 @@ cdef class ParquetFileWriteOptions(FileWriteOptions):
         update = False
         update_arrow = False
         for name, value in kwargs.items():
-            assert name in self._properties
+            if name not in self._properties:
+                raise TypeError("unexpected parquet write option: " + name)
             self._properties[name] = value
             if name in arrow_fields:
                 update_arrow = True
@@ -1122,119 +1127,30 @@ cdef class ParquetFileWriteOptions(FileWriteOptions):
 
         if update:
             opts.writer_properties = _create_writer_properties(
-                use_dictionary=self.use_dictionary,
-                compression=self.compression,
-                version=self.version,
-                write_statistics=self.write_statistics,
-                data_page_size=self.data_page_size,
-                compression_level=self.compression_level,
-                use_byte_stream_split=self.use_byte_stream_split,
-                data_page_version=self.data_page_version,
+                use_dictionary=self._properties["use_dictionary"],
+                compression=self._properties["compression"],
+                version=self._properties["version"],
+                write_statistics=self._properties["write_statistics"],
+                data_page_size=self._properties["data_page_size"],
+                compression_level=self._properties["compression_level"],
+                use_byte_stream_split=(
+                    self._properties["use_byte_stream_split"]
+                ),
+                data_page_version=self._properties["data_page_version"],
             )
 
         if update_arrow:
             opts.arrow_writer_properties = _create_arrow_writer_properties(
                 use_deprecated_int96_timestamps=(
-                    self.use_deprecated_int96_timestamps
+                    self._properties["use_deprecated_int96_timestamps"]
+                ),
+                coerce_timestamps=self._properties["coerce_timestamps"],
+                allow_truncated_timestamps=(
+                    self._properties["allow_truncated_timestamps"]
                 ),
-                coerce_timestamps=self.coerce_timestamps,
-                allow_truncated_timestamps=self.allow_truncated_timestamps,
-                writer_engine_version=self.writer_engine_version,
+                writer_engine_version='V2'
             )
 
-    @property
-    def use_dictionary(self):
-        return self._properties['use_dictionary']
-
-    @use_dictionary.setter
-    def use_dictionary(self, use_dictionary):
-        self.update(use_dictionary=use_dictionary)
-
-    @property
-    def compression(self):
-        return self._properties['compression']
-
-    @compression.setter
-    def compression(self, compression):
-        self.update(compression=compression)
-
-    @property
-    def version(self):
-        return self._properties['version']
-
-    @version.setter
-    def version(self, version):
-        self.update(version=version)
-
-    @property
-    def write_statistics(self):
-        return self._properties['write_statistics']
-
-    @write_statistics.setter
-    def write_statistics(self, write_statistics):
-        self.update(write_statistics=write_statistics)
-
-    @property
-    def data_page_size(self):
-        return self._properties['data_page_size']
-
-    @data_page_size.setter
-    def data_page_size(self, data_page_size):
-        self.update(data_page_size=data_page_size)
-
-    @property
-    def compression_level(self):
-        return self._properties['compression_level']
-
-    @compression_level.setter
-    def compression_level(self, compression_level):
-        self.update(compression_level=compression_level)
-
-    @property
-    def use_byte_stream_split(self):
-        return self._properties['use_byte_stream_split']
-
-    @use_byte_stream_split.setter
-    def use_byte_stream_split(self, use_byte_stream_split):
-        self.update(use_byte_stream_split=use_byte_stream_split)
-
-    @property
-    def data_page_version(self):
-        return self._properties['data_page_version']
-
-    @data_page_version.setter
-    def data_page_version(self, data_page_version):
-        self.update(data_page_version=data_page_version)
-
-    @property
-    def use_deprecated_int96_timestamps(self):
-        return self._properties['use_deprecated_int96_timestamps']
-
-    @use_deprecated_int96_timestamps.setter
-    def use_deprecated_int96_timestamps(self, use_deprecated_int96_timestamps):
-        self.update(
-            use_deprecated_int96_timestamps=use_deprecated_int96_timestamps)
-
-    @property
-    def coerce_timestamps(self):
-        return self._properties['coerce_timestamps']
-
-    @coerce_timestamps.setter
-    def coerce_timestamps(self, coerce_timestamps):
-        self.update(coerce_timestamps=coerce_timestamps)
-
-    @property
-    def allow_truncated_timestamps(self):
-        return self._properties['allow_truncated_timestamps']
-
-    @allow_truncated_timestamps.setter
-    def allow_truncated_timestamps(self, allow_truncated_timestamps):
-        self.update(allow_truncated_timestamps=allow_truncated_timestamps)
-
-    @property
-    def writer_engine_version(self):
-        return 'V2'
-
     cdef void init(self, const shared_ptr[CFileWriteOptions]& sp):
         FileWriteOptions.init(self, sp)
         self.parquet_options = <CParquetFileWriteOptions*> sp.get()
@@ -2306,8 +2222,8 @@ def _get_partition_keys(Expression partition_expression):
 
 def _filesystemdataset_write(
     data not None, object base_dir not None, str basename_template not None,
-    Schema schema not None, FileFormat format not None,
-    FileSystem filesystem not None, Partitioning partitioning not None,
+    Schema schema not None, FileSystem filesystem not None,
+    Partitioning partitioning not None,
     FileWriteOptions file_options not None, bint use_threads,
 ):
     """
diff --git a/python/pyarrow/dataset.py b/python/pyarrow/dataset.py
index 5cadd69dc6f..f278e05c007 100644
--- a/python/pyarrow/dataset.py
+++ b/python/pyarrow/dataset.py
@@ -714,7 +714,7 @@ def write_dataset(data, base_dir, basename_template=None, format=None,
         A template string used to generate basenames of written data files.
         The token '{i}' will be replaced with an automatically incremented
         integer. If not specified, it defaults to
-        "dat_{i}." + format.default_extname
+        "part-{i}." + format.default_extname
     format : FileFormat or str
         The format in which to write the dataset. Currently supported:
         "parquet", "ipc"/"feather". If a FileSystemDataset is being written
@@ -726,14 +726,15 @@ def write_dataset(data, base_dir, basename_template=None, format=None,
         function.
     schema : Schema, optional
     filesystem : FileSystem, optional
+    file_options : FileWriteOptions, optional
+        FileFormat specific write options, created using the
+        ``FileFormat.make_write_options()`` function.
     use_threads : bool, default True
         Write files in parallel. If enabled, then maximum parallelism will be
         used determined by the number of available CPU cores.
     """
     if isinstance(data, Dataset):
         schema = schema or data.schema
-        if isinstance(data, FileSystemDataset):
-            format = format or data.format
     elif isinstance(data, (pa.Table, pa.RecordBatch)):
         schema = schema or data.schema
         data = [data]
@@ -745,13 +746,22 @@ def write_dataset(data, base_dir, basename_template=None, format=None,
             "objects are supported."
         )
 
-    format = _ensure_format(format)
-    if basename_template is None:
-        basename_template = "dat_{i}." + format.default_extname
+    if format is None and isinstance(data, FileSystemDataset):
+        format = data.format
+    else:
+        format = _ensure_format(format)
 
     if file_options is None:
         file_options = format.make_write_options()
 
+    if format != file_options.format:
+        raise TypeError("Supplied FileWriteOptions have format {}, "
+                        "which doesn't match supplied FileFormat {}".format(
+                            format, file_options))
+
+    if basename_template is None:
+        basename_template = "part-{i}." + format.default_extname
+
     partitioning = _ensure_write_partitioning(partitioning)
 
     if filesystem is None:
@@ -761,6 +771,6 @@ def write_dataset(data, base_dir, basename_template=None, format=None,
     filesystem, _ = _ensure_fs(filesystem)
 
     _filesystemdataset_write(
-        data, base_dir, basename_template, schema, format,
+        data, base_dir, basename_template, schema,
         filesystem, partitioning, file_options, use_threads,
     )
diff --git a/python/pyarrow/includes/libarrow_dataset.pxd b/python/pyarrow/includes/libarrow_dataset.pxd
index 3b5b70a37ba..8bf8b78b07b 100644
--- a/python/pyarrow/includes/libarrow_dataset.pxd
+++ b/python/pyarrow/includes/libarrow_dataset.pxd
@@ -213,6 +213,7 @@ cdef extern from "arrow/dataset/api.h" namespace "arrow::dataset" nogil:
 
     cdef cppclass CFileWriteOptions \
             "arrow::dataset::FileWriteOptions":
+        const shared_ptr[CFileFormat]& format() const
         c_string type_name() const
 
     cdef cppclass CFileFormat "arrow::dataset::FileFormat":
diff --git a/python/pyarrow/tests/test_dataset.py b/python/pyarrow/tests/test_dataset.py
index fbb06564803..8c24c66ea99 100644
--- a/python/pyarrow/tests/test_dataset.py
+++ b/python/pyarrow/tests/test_dataset.py
@@ -2174,18 +2174,18 @@ def test_write_dataset(tempdir):
 
     # full string path
     target = tempdir / 'single-file-target'
-    expected_files = [target / "dat_0.feather"]
+    expected_files = [target / "part-0.feather"]
     _check_dataset_roundtrip(dataset, str(target), expected_files, target)
 
     # pathlib path object
     target = tempdir / 'single-file-target2'
-    expected_files = [target / "dat_0.feather"]
+    expected_files = [target / "part-0.feather"]
     _check_dataset_roundtrip(dataset, target, expected_files, target)
 
     # TODO
     # # relative path
     # target = tempdir / 'single-file-target3'
-    # expected_files = [target / "dat_0.ipc"]
+    # expected_files = [target / "part-0.ipc"]
     # _check_dataset_roundtrip(
     #     dataset, './single-file-target3', expected_files, target)
 
@@ -2196,7 +2196,7 @@ def test_write_dataset(tempdir):
     dataset = ds.dataset(directory)
 
     target = tempdir / 'single-directory-target'
-    expected_files = [target / "dat_0.feather"]
+    expected_files = [target / "part-0.feather"]
     _check_dataset_roundtrip(dataset, str(target), expected_files, target)
 
 
@@ -2211,8 +2211,8 @@ def test_write_dataset_partitioned(tempdir):
     # hive partitioning
     target = tempdir / 'partitioned-hive-target'
     expected_paths = [
-        target / "part=a", target / "part=a" / "dat_0.feather",
-        target / "part=b", target / "part=b" / "dat_1.feather"
+        target / "part=a", target / "part=a" / "part-0.feather",
+        target / "part=b", target / "part=b" / "part-1.feather"
     ]
     partitioning_schema = ds.partitioning(
         pa.schema([("part", pa.string())]), flavor="hive")
@@ -2223,8 +2223,8 @@ def test_write_dataset_partitioned(tempdir):
     # directory partitioning
     target = tempdir / 'partitioned-dir-target'
     expected_paths = [
-        target / "a", target / "a" / "dat_0.feather",
-        target / "b", target / "b" / "dat_1.feather"
+        target / "a", target / "a" / "part-0.feather",
+        target / "b", target / "b" / "part-1.feather"
     ]
     partitioning_schema = ds.partitioning(
         pa.schema([("part", pa.string())]))
@@ -2304,27 +2304,27 @@ def test_write_table_multiple_fragments(tempdir):
     # Table with multiple batches written as single Fragment by default
     base_dir = tempdir / 'single'
     ds.write_dataset(table, base_dir, format="feather")
-    assert set(base_dir.rglob("*")) == set([base_dir / "dat_0.feather"])
+    assert set(base_dir.rglob("*")) == set([base_dir / "part-0.feather"])
     assert ds.dataset(base_dir, format="ipc").to_table().equals(table)
 
     # Same for single-element list of Table
     base_dir = tempdir / 'single-list'
     ds.write_dataset([table], base_dir, format="feather")
-    assert set(base_dir.rglob("*")) == set([base_dir / "dat_0.feather"])
+    assert set(base_dir.rglob("*")) == set([base_dir / "part-0.feather"])
     assert ds.dataset(base_dir, format="ipc").to_table().equals(table)
 
     # Provide list of batches to write multiple fragments
     base_dir = tempdir / 'multiple'
     ds.write_dataset(table.to_batches(), base_dir, format="feather")
     assert set(base_dir.rglob("*")) == set(
-        [base_dir / "dat_0.feather"])
+        [base_dir / "part-0.feather"])
     assert ds.dataset(base_dir, format="ipc").to_table().equals(table)
 
     # Provide list of tables to write multiple fragments
     base_dir = tempdir / 'multiple-table'
     ds.write_dataset([table, table], base_dir, format="feather")
     assert set(base_dir.rglob("*")) == set(
-        [base_dir / "dat_0.feather"])
+        [base_dir / "part-0.feather"])
     assert ds.dataset(base_dir, format="ipc").to_table().equals(
         pa.concat_tables([table]*2)
     )
@@ -2345,7 +2345,7 @@ def test_write_dataset_parquet(tempdir):
     ds.write_dataset(table, base_dir, format="parquet")
     # check that all files are present
     file_paths = list(base_dir.rglob("*"))
-    expected_paths = [base_dir / "dat_0.parquet"]
+    expected_paths = [base_dir / "part-0.parquet"]
     assert set(file_paths) == set(expected_paths)
     # check Table roundtrip
     result = ds.dataset(base_dir, format="parquet").to_table()
@@ -2357,5 +2357,5 @@ def test_write_dataset_parquet(tempdir):
         opts = format.make_write_options(version=version)
         base_dir = tempdir / 'parquet_dataset_version{0}'.format(version)
         ds.write_dataset(table, base_dir, format=format, file_options=opts)
-        meta = pq.read_metadata(base_dir / "dat_0.parquet")
+        meta = pq.read_metadata(base_dir / "part-0.parquet")
         assert meta.format_version == version
diff --git a/r/R/dataset-write.R b/r/R/dataset-write.R
index d56126f05b2..abeb0ce4393 100644
--- a/r/R/dataset-write.R
+++ b/r/R/dataset-write.R
@@ -37,8 +37,8 @@
 #' use the current `group_by()` columns.
 #' @param basename_template string template for the names of files to be written.
 #' Must contain `"{i}"`, which will be replaced with an autoincremented
-#' integer to generate basenames of datafiles. For example, `"dat_{i}.feather"`
-#' will yield `"dat_0.feather", ...`.
+#' integer to generate basenames of datafiles. For example, `"part-{i}.feather"`
+#' will yield `"part-0.feather", ...`.
 #' @param hive_style logical: write partition segments as Hive-style
 #' (`key1=value1/key2=value2/file.ext`) or as just bare values. Default is `TRUE`.
 #' @param filesystem A [FileSystem] where the dataset should be written if it is a
@@ -51,7 +51,7 @@ write_dataset <- function(dataset,
                           path,
                           format = dataset$format,
                           partitioning = dplyr::group_vars(dataset),
-                          basename_template = paste0("dat_{i}.", as.character(format)),
+                          basename_template = paste0("part-{i}.", as.character(format)),
                           hive_style = TRUE,
                           filesystem = NULL,
                           ...) {

From 5602aa843368a70ed130c12f0f80cab8fc7fe7e5 Mon Sep 17 00:00:00 2001
From: Benjamin Kietzman <bengilgit@gmail.com>
Date: Wed, 7 Oct 2020 14:44:19 -0400
Subject: [PATCH 34/34] correct R doc after dat_{i} -> part-{i}

---
 r/man/write_dataset.Rd  |  6 +++---
 r/vignettes/dataset.Rmd | 11 +++--------
 2 files changed, 6 insertions(+), 11 deletions(-)

diff --git a/r/man/write_dataset.Rd b/r/man/write_dataset.Rd
index ee51d244a44..e12c4287266 100644
--- a/r/man/write_dataset.Rd
+++ b/r/man/write_dataset.Rd
@@ -9,7 +9,7 @@ write_dataset(
   path,
   format = dataset$format,
   partitioning = dplyr::group_vars(dataset),
-  basename_template = paste0("dat_{i}.", as.character(format)),
+  basename_template = paste0("part-{i}.", as.character(format)),
   hive_style = TRUE,
   filesystem = NULL,
   ...
@@ -36,8 +36,8 @@ use the current \code{group_by()} columns.}
 
 \item{basename_template}{string template for the names of files to be written.
 Must contain \code{"{i}"}, which will be replaced with an autoincremented
-integer to generate basenames of datafiles. For example, \code{"dat_{i}.feather"}
-will yield \verb{"dat_0.feather", ...}.}
+integer to generate basenames of datafiles. For example, \code{"part-{i}.feather"}
+will yield \verb{"part-0.feather", ...}.}
 
 \item{hive_style}{logical: write partition segments as Hive-style
 (\code{key1=value1/key2=value2/file.ext}) or as just bare values. Default is \code{TRUE}.}
diff --git a/r/vignettes/dataset.Rmd b/r/vignettes/dataset.Rmd
index df460c2682a..d9c90261f82 100644
--- a/r/vignettes/dataset.Rmd
+++ b/r/vignettes/dataset.Rmd
@@ -328,17 +328,12 @@ system("tree nyc-taxi/feather")
 
 # feather
 # ├── payment_type=1
-# │   ├── dat_0.ipc
-# │   ├── dat_1.ipc
-# │   ├── dat_2.ipc
-# │   ├── dat_3.ipc
-# │   ├── dat_4.ipc
-# │   └── dat_5.ipc
+# │   └── part-5.feather
 # ├── payment_type=2
-# │   ├── dat_0.ipc
+# │   └── part-0.feather
 # ...
 # └── payment_type=5
-#     └── dat_2.ipc
+#     └── part-2.feather
 #
 # 5 directories, 25 files
 ```