From 8214444a74d99428845d0fadf46eb2a54016c203 Mon Sep 17 00:00:00 2001
From: Peter Vary <peter.vary.apache@gmail.com>
Date: Sun, 8 Dec 2024 10:13:58 +0100
Subject: [PATCH 01/11] Core, Spark: Refactor RewriteFileGroup planner to core

---
 .../actions/RewriteFileGroupPlanner.java      | 177 +++++++++++++++++
 .../actions/SizeBasedFileRewriter.java        |   2 +-
 .../actions/TestRewriteFileGroupPlanner.java  | 161 ++++++++++++++++
 .../actions/RewriteDataFilesSparkAction.java  | 182 +++---------------
 .../actions/TestRewriteDataFilesAction.java   |   7 +-
 5 files changed, 369 insertions(+), 160 deletions(-)
 create mode 100644 core/src/main/java/org/apache/iceberg/actions/RewriteFileGroupPlanner.java
 create mode 100644 core/src/test/java/org/apache/iceberg/actions/TestRewriteFileGroupPlanner.java
diff --git a/core/src/main/java/org/apache/iceberg/actions/RewriteFileGroupPlanner.java b/core/src/main/java/org/apache/iceberg/actions/RewriteFileGroupPlanner.java
new file mode 100644
index 000000000000..6d1e27503da7
--- /dev/null
+++ b/core/src/main/java/org/apache/iceberg/actions/RewriteFileGroupPlanner.java
@@ -0,0 +1,177 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.iceberg.actions;
+
+import java.io.IOException;
+import java.util.List;
+import java.util.Map;
+import java.util.concurrent.atomic.AtomicInteger;
+import java.util.stream.Stream;
+import org.apache.iceberg.DataFile;
+import org.apache.iceberg.FileScanTask;
+import org.apache.iceberg.RewriteJobOrder;
+import org.apache.iceberg.StructLike;
+import org.apache.iceberg.Table;
+import org.apache.iceberg.data.GenericRecord;
+import org.apache.iceberg.expressions.Expression;
+import org.apache.iceberg.io.CloseableIterable;
+import org.apache.iceberg.relocated.com.google.common.collect.ImmutableList;
+import org.apache.iceberg.relocated.com.google.common.collect.Lists;
+import org.apache.iceberg.relocated.com.google.common.collect.Maps;
+import org.apache.iceberg.types.Types;
+import org.apache.iceberg.util.StructLikeMap;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+/**
+ * Checks the files in the table, and using the {@link FileRewriter} plans the groups for
+ * compaction.
+ */
+public class RewriteFileGroupPlanner {
+  private static final Logger LOG = LoggerFactory.getLogger(RewriteFileGroupPlanner.class);
+
+  private final FileRewriter<FileScanTask, DataFile> rewriter;
+  private final RewriteJobOrder rewriteJobOrder;
+
+  public RewriteFileGroupPlanner(
+      FileRewriter<FileScanTask, DataFile> rewriter, RewriteJobOrder rewriteJobOrder) {
+    this.rewriter = rewriter;
+    this.rewriteJobOrder = rewriteJobOrder;
+  }
+
+  public RewritePlanResult plan(
+      Table table, Expression filter, long startingSnapshotId, boolean caseSensitive) {
+    StructLikeMap<List<List<FileScanTask>>> plan =
+        planFileGroups(table, filter, startingSnapshotId, caseSensitive);
+    RewriteExecutionContext ctx = new RewriteExecutionContext();
+    Stream<RewriteFileGroup> groups =
+        plan.entrySet().stream()
+            .filter(e -> !e.getValue().isEmpty())
+            .flatMap(
+                e -> {
+                  StructLike partition = e.getKey();
+                  List<List<FileScanTask>> scanGroups = e.getValue();
+                  return scanGroups.stream().map(tasks -> newRewriteGroup(ctx, partition, tasks));
+                })
+            .sorted(RewriteFileGroup.comparator(rewriteJobOrder));
+    Map<StructLike, Integer> groupsInPartition = plan.transformValues(List::size);
+    int totalGroupCount = groupsInPartition.values().stream().reduce(Integer::sum).orElse(0);
+    return new RewritePlanResult(groups, totalGroupCount, groupsInPartition);
+  }
+
+  private StructLikeMap<List<List<FileScanTask>>> planFileGroups(
+      Table table, Expression filter, long startingSnapshotId, boolean caseSensitive) {
+    CloseableIterable<FileScanTask> fileScanTasks =
+        table
+            .newScan()
+            .useSnapshot(startingSnapshotId)
+            .caseSensitive(caseSensitive)
+            .filter(filter)
+            .ignoreResiduals()
+            .planFiles();
+
+    try {
+      Types.StructType partitionType = table.spec().partitionType();
+      StructLikeMap<List<FileScanTask>> filesByPartition =
+          groupByPartition(table, partitionType, fileScanTasks);
+      return filesByPartition.transformValues(
+          tasks -> ImmutableList.copyOf(rewriter.planFileGroups(tasks)));
+    } finally {
+      try {
+        fileScanTasks.close();
+      } catch (IOException io) {
+        LOG.error("Cannot properly close file iterable while planning for rewrite", io);
+      }
+    }
+  }
+
+  private StructLikeMap<List<FileScanTask>> groupByPartition(
+      Table table, Types.StructType partitionType, Iterable<FileScanTask> tasks) {
+    StructLikeMap<List<FileScanTask>> filesByPartition = StructLikeMap.create(partitionType);
+    StructLike emptyStruct = GenericRecord.create(partitionType);
+
+    for (FileScanTask task : tasks) {
+      // If a task uses an incompatible partition spec the data inside could contain values
+      // which belong to multiple partitions in the current spec. Treating all such files as
+      // un-partitioned and grouping them together helps to minimize new files made.
+      StructLike taskPartition =
+          task.file().specId() == table.spec().specId() ? task.file().partition() : emptyStruct;
+
+      filesByPartition.computeIfAbsent(taskPartition, unused -> Lists.newArrayList()).add(task);
+    }
+
+    return filesByPartition;
+  }
+
+  private RewriteFileGroup newRewriteGroup(
+      RewriteExecutionContext ctx, StructLike partition, List<FileScanTask> tasks) {
+    RewriteDataFiles.FileGroupInfo info =
+        ImmutableRewriteDataFiles.FileGroupInfo.builder()
+            .globalIndex(ctx.currentGlobalIndex())
+            .partitionIndex(ctx.currentPartitionIndex(partition))
+            .partition(partition)
+            .build();
+    return new RewriteFileGroup(info, Lists.newArrayList(tasks));
+  }
+
+  public static class RewritePlanResult {
+    private final Stream<RewriteFileGroup> groups;
+    private final int totalGroupCount;
+    private final Map<StructLike, Integer> groupsInPartition;
+
+    private RewritePlanResult(
+        Stream<RewriteFileGroup> groups,
+        int totalGroupCount,
+        Map<StructLike, Integer> groupsInPartition) {
+      this.groups = groups;
+      this.totalGroupCount = totalGroupCount;
+      this.groupsInPartition = groupsInPartition;
+    }
+
+    public Stream<RewriteFileGroup> groups() {
+      return groups;
+    }
+
+    public int groupsInPartition(StructLike partition) {
+      return groupsInPartition.get(partition);
+    }
+
+    public int totalGroupCount() {
+      return totalGroupCount;
+    }
+  }
+
+  private static class RewriteExecutionContext {
+    private final Map<StructLike, Integer> partitionIndexMap;
+    private final AtomicInteger groupIndex;
+
+    private RewriteExecutionContext() {
+      this.partitionIndexMap = Maps.newConcurrentMap();
+      this.groupIndex = new AtomicInteger(1);
+    }
+
+    private int currentGlobalIndex() {
+      return groupIndex.getAndIncrement();
+    }
+
+    private int currentPartitionIndex(StructLike partition) {
+      return partitionIndexMap.merge(partition, 1, Integer::sum);
+    }
+  }
+}
diff --git a/core/src/main/java/org/apache/iceberg/actions/SizeBasedFileRewriter.java b/core/src/main/java/org/apache/iceberg/actions/SizeBasedFileRewriter.java
index cea7003c1a38..5d45392c5487 100644
--- a/core/src/main/java/org/apache/iceberg/actions/SizeBasedFileRewriter.java
+++ b/core/src/main/java/org/apache/iceberg/actions/SizeBasedFileRewriter.java
@@ -191,7 +191,7 @@ protected long inputSize(List<T> group) {
    * of output files. The final split size is adjusted to be at least as big as the target file size
    * but less than the max write file size.
    */
-  protected long splitSize(long inputSize) {
+  public long splitSize(long inputSize) {
     long estimatedSplitSize = (inputSize / numOutputFiles(inputSize)) + SPLIT_OVERHEAD;
     if (estimatedSplitSize < targetFileSize) {
       return targetFileSize;
diff --git a/core/src/test/java/org/apache/iceberg/actions/TestRewriteFileGroupPlanner.java b/core/src/test/java/org/apache/iceberg/actions/TestRewriteFileGroupPlanner.java
new file mode 100644
index 000000000000..d3382fb8b349
--- /dev/null
+++ b/core/src/test/java/org/apache/iceberg/actions/TestRewriteFileGroupPlanner.java
@@ -0,0 +1,161 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.iceberg.actions;
+
+import static org.assertj.core.api.Assertions.assertThat;
+
+import java.io.File;
+import java.util.List;
+import java.util.Map;
+import java.util.Set;
+import java.util.UUID;
+import java.util.stream.Collectors;
+import org.apache.iceberg.DataFile;
+import org.apache.iceberg.DataFiles;
+import org.apache.iceberg.FileScanTask;
+import org.apache.iceberg.RewriteJobOrder;
+import org.apache.iceberg.StructLike;
+import org.apache.iceberg.TestBase;
+import org.apache.iceberg.TestTables;
+import org.apache.iceberg.expressions.Expressions;
+import org.apache.iceberg.relocated.com.google.common.collect.ImmutableList;
+import org.apache.iceberg.relocated.com.google.common.collect.ImmutableMap;
+import org.apache.iceberg.relocated.com.google.common.collect.Lists;
+import org.junit.jupiter.api.AfterEach;
+import org.junit.jupiter.api.BeforeEach;
+import org.junit.jupiter.api.Test;
+import org.junit.jupiter.api.io.TempDir;
+import org.junit.jupiter.params.ParameterizedTest;
+import org.junit.jupiter.params.provider.EnumSource;
+
+class TestRewriteFileGroupPlanner {
+  private static final DataFile FILE_1 = newDataFile("data_bucket=0", 10);
+  private static final DataFile FILE_2 = newDataFile("data_bucket=0", 10);
+  private static final DataFile FILE_3 = newDataFile("data_bucket=0", 10);
+  private static final DataFile FILE_4 = newDataFile("data_bucket=1", 11);
+  private static final DataFile FILE_5 = newDataFile("data_bucket=1", 11);
+  private static final DataFile FILE_6 = newDataFile("data_bucket=2", 50);
+
+  private static final Map<RewriteJobOrder, List<StructLike>> EXPECTED =
+      ImmutableMap.of(
+          RewriteJobOrder.FILES_DESC,
+              ImmutableList.of(FILE_1.partition(), FILE_4.partition(), FILE_6.partition()),
+          RewriteJobOrder.FILES_ASC,
+              ImmutableList.of(FILE_6.partition(), FILE_4.partition(), FILE_1.partition()),
+          RewriteJobOrder.BYTES_DESC,
+              ImmutableList.of(FILE_6.partition(), FILE_1.partition(), FILE_4.partition()),
+          RewriteJobOrder.BYTES_ASC,
+              ImmutableList.of(FILE_4.partition(), FILE_1.partition(), FILE_6.partition()));
+
+  @TempDir private File tableDir = null;
+  private TestTables.TestTable table = null;
+
+  @BeforeEach
+  public void setupTable() throws Exception {
+    this.table = TestTables.create(tableDir, "test", TestBase.SCHEMA, TestBase.SPEC, 3);
+  }
+
+  @AfterEach
+  public void cleanupTables() {
+    TestTables.clearTables();
+  }
+
+  @ParameterizedTest
+  @EnumSource(
+      value = RewriteJobOrder.class,
+      names = {"FILES_DESC", "FILES_ASC", "BYTES_DESC", "BYTES_ASC"})
+  void testGroups(RewriteJobOrder order) {
+    table
+        .newAppend()
+        .appendFile(FILE_1)
+        .appendFile(FILE_2)
+        .appendFile(FILE_3)
+        .appendFile(FILE_4)
+        .appendFile(FILE_5)
+        .appendFile(FILE_6)
+        .commit();
+    RewriteFileGroupPlanner planner = new RewriteFileGroupPlanner(new DummyRewriter(false), order);
+    RewriteFileGroupPlanner.RewritePlanResult result =
+        planner.plan(table, Expressions.alwaysTrue(), table.currentSnapshot().snapshotId(), false);
+    List<RewriteFileGroup> groups = result.groups().collect(Collectors.toList());
+    assertThat(groups.stream().map(group -> group.info().partition()).collect(Collectors.toList()))
+        .isEqualTo(EXPECTED.get(order));
+    assertThat(result.totalGroupCount()).isEqualTo(3);
+    EXPECTED.get(order).forEach(s -> assertThat(result.groupsInPartition(s)).isEqualTo(1));
+  }
+
+  @Test
+  void testContext() {
+    table
+        .newAppend()
+        .appendFile(FILE_1)
+        .appendFile(FILE_2)
+        .appendFile(FILE_3)
+        .appendFile(FILE_4)
+        .appendFile(FILE_5)
+        .appendFile(FILE_6)
+        .commit();
+    RewriteFileGroupPlanner planner =
+        new RewriteFileGroupPlanner(new DummyRewriter(true), RewriteJobOrder.FILES_DESC);
+    RewriteFileGroupPlanner.RewritePlanResult result =
+        planner.plan(table, Expressions.alwaysTrue(), table.currentSnapshot().snapshotId(), false);
+    assertThat(result.totalGroupCount()).isEqualTo(6);
+    assertThat(result.groupsInPartition(FILE_1.partition())).isEqualTo(3);
+    assertThat(result.groupsInPartition(FILE_4.partition())).isEqualTo(2);
+    assertThat(result.groupsInPartition(FILE_6.partition())).isEqualTo(1);
+  }
+
+  private static class DummyRewriter implements FileRewriter<FileScanTask, DataFile> {
+    private final boolean split;
+
+    private DummyRewriter(boolean split) {
+      this.split = split;
+    }
+
+    @Override
+    public Set<String> validOptions() {
+      return Set.of();
+    }
+
+    @Override
+    public void init(Map<String, String> options) {}
+
+    @Override
+    public Iterable<List<FileScanTask>> planFileGroups(Iterable<FileScanTask> tasks) {
+      List<FileScanTask> taskList = Lists.newArrayList(tasks);
+      return split
+          ? taskList.stream().map(ImmutableList::of).collect(Collectors.toList())
+          : ImmutableList.of(taskList);
+    }
+
+    @Override
+    public Set<DataFile> rewrite(List<FileScanTask> group) {
+      return Set.of();
+    }
+  }
+
+  private static DataFile newDataFile(String partitionPath, long fileSize) {
+    return DataFiles.builder(TestBase.SPEC)
+        .withPath("/path/to/data-" + UUID.randomUUID() + ".parquet")
+        .withFileSizeInBytes(fileSize)
+        .withPartitionPath(partitionPath)
+        .withRecordCount(1)
+        .build();
+  }
+}
diff --git a/spark/v3.5/spark/src/main/java/org/apache/iceberg/spark/actions/RewriteDataFilesSparkAction.java b/spark/v3.5/spark/src/main/java/org/apache/iceberg/spark/actions/RewriteDataFilesSparkAction.java
index e04a0c88b4bb..fe0cbdaa4c46 100644
--- a/spark/v3.5/spark/src/main/java/org/apache/iceberg/spark/actions/RewriteDataFilesSparkAction.java
+++ b/spark/v3.5/spark/src/main/java/org/apache/iceberg/spark/actions/RewriteDataFilesSparkAction.java
@@ -18,20 +18,16 @@
  */
 package org.apache.iceberg.spark.actions;
 
-import java.io.IOException;
 import java.math.RoundingMode;
 import java.util.Arrays;
 import java.util.Collection;
 import java.util.List;
-import java.util.Map;
 import java.util.Set;
 import java.util.concurrent.ConcurrentLinkedQueue;
 import java.util.concurrent.ExecutorService;
 import java.util.concurrent.Executors;
 import java.util.concurrent.ThreadPoolExecutor;
-import java.util.concurrent.atomic.AtomicInteger;
 import java.util.stream.Collectors;
-import java.util.stream.Stream;
 import org.apache.iceberg.DataFile;
 import org.apache.iceberg.FileScanTask;
 import org.apache.iceberg.RewriteJobOrder;
@@ -44,28 +40,24 @@
 import org.apache.iceberg.actions.RewriteDataFiles;
 import org.apache.iceberg.actions.RewriteDataFilesCommitManager;
 import org.apache.iceberg.actions.RewriteFileGroup;
-import org.apache.iceberg.data.GenericRecord;
+import org.apache.iceberg.actions.RewriteFileGroupPlanner;
+import org.apache.iceberg.actions.RewriteFileGroupPlanner.RewritePlanResult;
 import org.apache.iceberg.exceptions.CommitFailedException;
 import org.apache.iceberg.exceptions.ValidationException;
 import org.apache.iceberg.expressions.Expression;
 import org.apache.iceberg.expressions.Expressions;
-import org.apache.iceberg.io.CloseableIterable;
 import org.apache.iceberg.relocated.com.google.common.annotations.VisibleForTesting;
 import org.apache.iceberg.relocated.com.google.common.base.Preconditions;
 import org.apache.iceberg.relocated.com.google.common.collect.ImmutableList;
 import org.apache.iceberg.relocated.com.google.common.collect.ImmutableSet;
 import org.apache.iceberg.relocated.com.google.common.collect.Iterables;
-import org.apache.iceberg.relocated.com.google.common.collect.Lists;
-import org.apache.iceberg.relocated.com.google.common.collect.Maps;
 import org.apache.iceberg.relocated.com.google.common.collect.Queues;
 import org.apache.iceberg.relocated.com.google.common.collect.Sets;
 import org.apache.iceberg.relocated.com.google.common.math.IntMath;
 import org.apache.iceberg.relocated.com.google.common.util.concurrent.MoreExecutors;
 import org.apache.iceberg.relocated.com.google.common.util.concurrent.ThreadFactoryBuilder;
 import org.apache.iceberg.spark.SparkUtil;
-import org.apache.iceberg.types.Types.StructType;
 import org.apache.iceberg.util.PropertyUtil;
-import org.apache.iceberg.util.StructLikeMap;
 import org.apache.iceberg.util.Tasks;
 import org.apache.spark.sql.SparkSession;
 import org.apache.spark.sql.internal.SQLConf;
@@ -171,21 +163,17 @@ public RewriteDataFiles.Result execute() {
 
     validateAndInitOptions();
 
-    StructLikeMap<List<List<FileScanTask>>> fileGroupsByPartition =
-        planFileGroups(startingSnapshotId);
-    RewriteExecutionContext ctx = new RewriteExecutionContext(fileGroupsByPartition);
+    RewritePlanResult result = plan(startingSnapshotId);
 
-    if (ctx.totalGroupCount() == 0) {
+    if (result.totalGroupCount() == 0) {
       LOG.info("Nothing found to rewrite in {}", table.name());
       return EMPTY_RESULT;
     }
 
-    Stream<RewriteFileGroup> groupStream = toGroupStream(ctx, fileGroupsByPartition);
-
     Builder resultBuilder =
         partialProgressEnabled
-            ? doExecuteWithPartialProgress(ctx, groupStream, commitManager(startingSnapshotId))
-            : doExecute(ctx, groupStream, commitManager(startingSnapshotId));
+            ? doExecuteWithPartialProgress(result, commitManager(startingSnapshotId))
+            : doExecute(result, commitManager(startingSnapshotId));
 
     if (removeDanglingDeletes) {
       RemoveDanglingDeletesSparkAction action =
@@ -193,68 +181,18 @@ public RewriteDataFiles.Result execute() {
       int removedCount = Iterables.size(action.execute().removedDeleteFiles());
       resultBuilder.removedDeleteFilesCount(removedCount);
     }
-    return resultBuilder.build();
-  }
-
-  StructLikeMap<List<List<FileScanTask>>> planFileGroups(long startingSnapshotId) {
-    CloseableIterable<FileScanTask> fileScanTasks =
-        table
-            .newScan()
-            .useSnapshot(startingSnapshotId)
-            .caseSensitive(caseSensitive)
-            .filter(filter)
-            .ignoreResiduals()
-            .planFiles();
 
-    try {
-      StructType partitionType = table.spec().partitionType();
-      StructLikeMap<List<FileScanTask>> filesByPartition =
-          groupByPartition(partitionType, fileScanTasks);
-      return fileGroupsByPartition(filesByPartition);
-    } finally {
-      try {
-        fileScanTasks.close();
-      } catch (IOException io) {
-        LOG.error("Cannot properly close file iterable while planning for rewrite", io);
-      }
-    }
-  }
-
-  private StructLikeMap<List<FileScanTask>> groupByPartition(
-      StructType partitionType, Iterable<FileScanTask> tasks) {
-    StructLikeMap<List<FileScanTask>> filesByPartition = StructLikeMap.create(partitionType);
-    StructLike emptyStruct = GenericRecord.create(partitionType);
-
-    for (FileScanTask task : tasks) {
-      // If a task uses an incompatible partition spec the data inside could contain values
-      // which belong to multiple partitions in the current spec. Treating all such files as
-      // un-partitioned and grouping them together helps to minimize new files made.
-      StructLike taskPartition =
-          task.file().specId() == table.spec().specId() ? task.file().partition() : emptyStruct;
-
-      List<FileScanTask> files = filesByPartition.get(taskPartition);
-      if (files == null) {
-        files = Lists.newArrayList();
-      }
-
-      files.add(task);
-      filesByPartition.put(taskPartition, files);
-    }
-    return filesByPartition;
-  }
-
-  private StructLikeMap<List<List<FileScanTask>>> fileGroupsByPartition(
-      StructLikeMap<List<FileScanTask>> filesByPartition) {
-    return filesByPartition.transformValues(this::planFileGroups);
+    return resultBuilder.build();
   }
 
-  private List<List<FileScanTask>> planFileGroups(List<FileScanTask> tasks) {
-    return ImmutableList.copyOf(rewriter.planFileGroups(tasks));
+  RewritePlanResult plan(long startingSnapshotId) {
+    return new RewriteFileGroupPlanner(rewriter, rewriteJobOrder)
+        .plan(table, filter, startingSnapshotId, caseSensitive);
   }
 
   @VisibleForTesting
-  RewriteFileGroup rewriteFiles(RewriteExecutionContext ctx, RewriteFileGroup fileGroup) {
-    String desc = jobDesc(fileGroup, ctx);
+  RewriteFileGroup rewriteFiles(RewritePlanResult planResult, RewriteFileGroup fileGroup) {
+    String desc = jobDesc(fileGroup, planResult);
     Set<DataFile> addedFiles =
         withJobGroupInfo(
             newJobGroupInfo("REWRITE-DATA-FILES", desc),
@@ -280,29 +218,25 @@ RewriteDataFilesCommitManager commitManager(long startingSnapshotId) {
   }
 
   private Builder doExecute(
-      RewriteExecutionContext ctx,
-      Stream<RewriteFileGroup> groupStream,
-      RewriteDataFilesCommitManager commitManager) {
+      RewritePlanResult planResult, RewriteDataFilesCommitManager commitManager) {
     ExecutorService rewriteService = rewriteService();
 
     ConcurrentLinkedQueue<RewriteFileGroup> rewrittenGroups = Queues.newConcurrentLinkedQueue();
 
     Tasks.Builder<RewriteFileGroup> rewriteTaskBuilder =
-        Tasks.foreach(groupStream)
+        Tasks.foreach(planResult.groups())
             .executeWith(rewriteService)
             .stopOnFailure()
             .noRetry()
             .onFailure(
-                (fileGroup, exception) -> {
-                  LOG.warn(
-                      "Failure during rewrite process for group {}", fileGroup.info(), exception);
-                });
+                (fileGroup, exception) ->
+                    LOG.warn(
+                        "Failure during rewrite process for group {}",
+                        fileGroup.info(),
+                        exception));
 
     try {
-      rewriteTaskBuilder.run(
-          fileGroup -> {
-            rewrittenGroups.add(rewriteFiles(ctx, fileGroup));
-          });
+      rewriteTaskBuilder.run(fileGroup -> rewrittenGroups.add(rewriteFiles(planResult, fileGroup)));
     } catch (Exception e) {
       // At least one rewrite group failed, clean up all completed rewrites
       LOG.error(
@@ -345,20 +279,19 @@ private Builder doExecute(
   }
 
   private Builder doExecuteWithPartialProgress(
-      RewriteExecutionContext ctx,
-      Stream<RewriteFileGroup> groupStream,
-      RewriteDataFilesCommitManager commitManager) {
+      RewritePlanResult planResult, RewriteDataFilesCommitManager commitManager) {
     ExecutorService rewriteService = rewriteService();
 
     // start commit service
-    int groupsPerCommit = IntMath.divide(ctx.totalGroupCount(), maxCommits, RoundingMode.CEILING);
+    int groupsPerCommit =
+        IntMath.divide(planResult.totalGroupCount(), maxCommits, RoundingMode.CEILING);
     RewriteDataFilesCommitManager.CommitService commitService =
         commitManager.service(groupsPerCommit);
     commitService.start();
 
     Collection<FileGroupFailureResult> rewriteFailures = new ConcurrentLinkedQueue<>();
     // start rewrite tasks
-    Tasks.foreach(groupStream)
+    Tasks.foreach(planResult.groups())
         .suppressFailureWhenFinished()
         .executeWith(rewriteService)
         .noRetry()
@@ -371,7 +304,7 @@ private Builder doExecuteWithPartialProgress(
                       .dataFilesCount(fileGroup.numFiles())
                       .build());
             })
-        .run(fileGroup -> commitService.offer(rewriteFiles(ctx, fileGroup)));
+        .run(fileGroup -> commitService.offer(rewriteFiles(planResult, fileGroup)));
     rewriteService.shutdown();
 
     // stop commit service
@@ -404,32 +337,6 @@ private Builder doExecuteWithPartialProgress(
         .rewriteFailures(rewriteFailures);
   }
 
-  Stream<RewriteFileGroup> toGroupStream(
-      RewriteExecutionContext ctx, Map<StructLike, List<List<FileScanTask>>> groupsByPartition) {
-    return groupsByPartition.entrySet().stream()
-        .filter(e -> !e.getValue().isEmpty())
-        .flatMap(
-            e -> {
-              StructLike partition = e.getKey();
-              List<List<FileScanTask>> scanGroups = e.getValue();
-              return scanGroups.stream().map(tasks -> newRewriteGroup(ctx, partition, tasks));
-            })
-        .sorted(RewriteFileGroup.comparator(rewriteJobOrder));
-  }
-
-  private RewriteFileGroup newRewriteGroup(
-      RewriteExecutionContext ctx, StructLike partition, List<FileScanTask> tasks) {
-    int globalIndex = ctx.currentGlobalIndex();
-    int partitionIndex = ctx.currentPartitionIndex(partition);
-    FileGroupInfo info =
-        ImmutableRewriteDataFiles.FileGroupInfo.builder()
-            .globalIndex(globalIndex)
-            .partitionIndex(partitionIndex)
-            .partition(partition)
-            .build();
-    return new RewriteFileGroup(info, tasks);
-  }
-
   private Iterable<FileGroupRewriteResult> toRewriteResults(List<RewriteFileGroup> commitResults) {
     return commitResults.stream().map(RewriteFileGroup::asResult).collect(Collectors.toList());
   }
@@ -492,7 +399,7 @@ void validateAndInitOptions() {
         PARTIAL_PROGRESS_ENABLED);
   }
 
-  private String jobDesc(RewriteFileGroup group, RewriteExecutionContext ctx) {
+  private String jobDesc(RewriteFileGroup group, RewritePlanResult planResult) {
     StructLike partition = group.info().partition();
     if (partition.size() > 0) {
       return String.format(
@@ -500,10 +407,10 @@ private String jobDesc(RewriteFileGroup group, RewriteExecutionContext ctx) {
           group.rewrittenFiles().size(),
           rewriter.description(),
           group.info().globalIndex(),
-          ctx.totalGroupCount(),
+          planResult.totalGroupCount(),
           partition,
           group.info().partitionIndex(),
-          ctx.groupsInPartition(partition),
+          planResult.groupsInPartition(partition),
           table.name());
     } else {
       return String.format(
@@ -511,39 +418,8 @@ private String jobDesc(RewriteFileGroup group, RewriteExecutionContext ctx) {
           group.rewrittenFiles().size(),
           rewriter.description(),
           group.info().globalIndex(),
-          ctx.totalGroupCount(),
+          planResult.totalGroupCount(),
           table.name());
     }
   }
-
-  @VisibleForTesting
-  static class RewriteExecutionContext {
-    private final StructLikeMap<Integer> numGroupsByPartition;
-    private final int totalGroupCount;
-    private final Map<StructLike, Integer> partitionIndexMap;
-    private final AtomicInteger groupIndex;
-
-    RewriteExecutionContext(StructLikeMap<List<List<FileScanTask>>> fileGroupsByPartition) {
-      this.numGroupsByPartition = fileGroupsByPartition.transformValues(List::size);
-      this.totalGroupCount = numGroupsByPartition.values().stream().reduce(Integer::sum).orElse(0);
-      this.partitionIndexMap = Maps.newConcurrentMap();
-      this.groupIndex = new AtomicInteger(1);
-    }
-
-    public int currentGlobalIndex() {
-      return groupIndex.getAndIncrement();
-    }
-
-    public int currentPartitionIndex(StructLike partition) {
-      return partitionIndexMap.merge(partition, 1, Integer::sum);
-    }
-
-    public int groupsInPartition(StructLike partition) {
-      return numGroupsByPartition.get(partition);
-    }
-
-    public int totalGroupCount() {
-      return totalGroupCount;
-    }
-  }
 }
diff --git a/spark/v3.5/spark/src/test/java/org/apache/iceberg/spark/actions/TestRewriteDataFilesAction.java b/spark/v3.5/spark/src/test/java/org/apache/iceberg/spark/actions/TestRewriteDataFilesAction.java
index 38c4d32a90d2..2127b20aa9b1 100644
--- a/spark/v3.5/spark/src/test/java/org/apache/iceberg/spark/actions/TestRewriteDataFilesAction.java
+++ b/spark/v3.5/spark/src/test/java/org/apache/iceberg/spark/actions/TestRewriteDataFilesAction.java
@@ -108,7 +108,6 @@
 import org.apache.iceberg.spark.SparkTableUtil;
 import org.apache.iceberg.spark.SparkWriteOptions;
 import org.apache.iceberg.spark.TestBase;
-import org.apache.iceberg.spark.actions.RewriteDataFilesSparkAction.RewriteExecutionContext;
 import org.apache.iceberg.spark.data.TestHelpers;
 import org.apache.iceberg.spark.source.ThreeColumnRecord;
 import org.apache.iceberg.types.Comparators;
@@ -117,7 +116,6 @@
 import org.apache.iceberg.types.Types.NestedField;
 import org.apache.iceberg.util.ArrayUtil;
 import org.apache.iceberg.util.Pair;
-import org.apache.iceberg.util.StructLikeMap;
 import org.apache.spark.sql.Dataset;
 import org.apache.spark.sql.Encoders;
 import org.apache.spark.sql.Row;
@@ -1852,11 +1850,8 @@ protected List<DataFile> currentDataFiles(Table table) {
 
   private Stream<RewriteFileGroup> toGroupStream(Table table, RewriteDataFilesSparkAction rewrite) {
     rewrite.validateAndInitOptions();
-    StructLikeMap<List<List<FileScanTask>>> fileGroupsByPartition =
-        rewrite.planFileGroups(table.currentSnapshot().snapshotId());
 
-    return rewrite.toGroupStream(
-        new RewriteExecutionContext(fileGroupsByPartition), fileGroupsByPartition);
+    return rewrite.plan(table.currentSnapshot().snapshotId()).groups();
   }
 
   protected List<Object[]> currentData() {

From ef646fbcd9ce2c6af1ada35008ca6dbcc3d43310 Mon Sep 17 00:00:00 2001
From: Peter Vary <peter_vary4@apple.com>
Date: Thu, 14 Nov 2024 10:46:52 +0100
Subject: [PATCH 02/11] Russell's comments

---
 .../org/apache/iceberg/EmptyStructLike.java   |  4 +-
 .../actions/RewriteFileGroupPlanner.java      | 39 ++++++++++++-------
 .../actions/TestRewriteFileGroupPlanner.java  |  4 +-
 .../actions/RewriteDataFilesSparkAction.java  | 30 +++++++-------
 4 files changed, 44 insertions(+), 33 deletions(-)

diff --git a/api/src/main/java/org/apache/iceberg/EmptyStructLike.java b/api/src/main/java/org/apache/iceberg/EmptyStructLike.java
index 2d57f4c01a66..8b046780aa7a 100644
--- a/api/src/main/java/org/apache/iceberg/EmptyStructLike.java
+++ b/api/src/main/java/org/apache/iceberg/EmptyStructLike.java
@@ -20,13 +20,13 @@
 
 import java.io.Serializable;
 
-class EmptyStructLike implements StructLike, Serializable {
+public class EmptyStructLike implements StructLike, Serializable {
 
   private static final EmptyStructLike INSTANCE = new EmptyStructLike();
 
   private EmptyStructLike() {}
 
-  static EmptyStructLike get() {
+  public static EmptyStructLike get() {
     return INSTANCE;
   }
 
diff --git a/core/src/main/java/org/apache/iceberg/actions/RewriteFileGroupPlanner.java b/core/src/main/java/org/apache/iceberg/actions/RewriteFileGroupPlanner.java
index 6d1e27503da7..d80a6163dc94 100644
--- a/core/src/main/java/org/apache/iceberg/actions/RewriteFileGroupPlanner.java
+++ b/core/src/main/java/org/apache/iceberg/actions/RewriteFileGroupPlanner.java
@@ -24,11 +24,11 @@
 import java.util.concurrent.atomic.AtomicInteger;
 import java.util.stream.Stream;
 import org.apache.iceberg.DataFile;
+import org.apache.iceberg.EmptyStructLike;
 import org.apache.iceberg.FileScanTask;
 import org.apache.iceberg.RewriteJobOrder;
 import org.apache.iceberg.StructLike;
 import org.apache.iceberg.Table;
-import org.apache.iceberg.data.GenericRecord;
 import org.apache.iceberg.expressions.Expression;
 import org.apache.iceberg.io.CloseableIterable;
 import org.apache.iceberg.relocated.com.google.common.collect.ImmutableList;
@@ -40,8 +40,8 @@
 import org.slf4j.LoggerFactory;
 
 /**
- * Checks the files in the table, and using the {@link FileRewriter} plans the groups for
- * compaction.
+ * Checks the files in the {@link Table}. The {@link RewriteFileGroup}s are grouped by partitions
+ * and split by the {@link FileRewriter}.
  */
 public class RewriteFileGroupPlanner {
   private static final Logger LOG = LoggerFactory.getLogger(RewriteFileGroupPlanner.class);
@@ -55,10 +55,18 @@ public RewriteFileGroupPlanner(
     this.rewriteJobOrder = rewriteJobOrder;
   }
 
-  public RewritePlanResult plan(
-      Table table, Expression filter, long startingSnapshotId, boolean caseSensitive) {
+  /**
+   * Generates the plan for the current table.
+   *
+   * @param table to plan for
+   * @param filter to exclude files from planning
+   * @param snapshotId of the last snapshot included in the plan
+   * @param caseSensitive setting for filtering
+   * @return the generated plan which could be executed during the compaction
+   */
+  public RewritePlan plan(Table table, Expression filter, long snapshotId, boolean caseSensitive) {
     StructLikeMap<List<List<FileScanTask>>> plan =
-        planFileGroups(table, filter, startingSnapshotId, caseSensitive);
+        planFileGroups(table, filter, snapshotId, caseSensitive);
     RewriteExecutionContext ctx = new RewriteExecutionContext();
     Stream<RewriteFileGroup> groups =
         plan.entrySet().stream()
@@ -72,15 +80,15 @@ public RewritePlanResult plan(
             .sorted(RewriteFileGroup.comparator(rewriteJobOrder));
     Map<StructLike, Integer> groupsInPartition = plan.transformValues(List::size);
     int totalGroupCount = groupsInPartition.values().stream().reduce(Integer::sum).orElse(0);
-    return new RewritePlanResult(groups, totalGroupCount, groupsInPartition);
+    return new RewritePlan(groups, totalGroupCount, groupsInPartition);
   }
 
   private StructLikeMap<List<List<FileScanTask>>> planFileGroups(
-      Table table, Expression filter, long startingSnapshotId, boolean caseSensitive) {
+      Table table, Expression filter, long snapshotId, boolean caseSensitive) {
     CloseableIterable<FileScanTask> fileScanTasks =
         table
             .newScan()
-            .useSnapshot(startingSnapshotId)
+            .useSnapshot(snapshotId)
             .caseSensitive(caseSensitive)
             .filter(filter)
             .ignoreResiduals()
@@ -104,14 +112,15 @@ private StructLikeMap<List<List<FileScanTask>>> planFileGroups(
   private StructLikeMap<List<FileScanTask>> groupByPartition(
       Table table, Types.StructType partitionType, Iterable<FileScanTask> tasks) {
     StructLikeMap<List<FileScanTask>> filesByPartition = StructLikeMap.create(partitionType);
-    StructLike emptyStruct = GenericRecord.create(partitionType);
 
     for (FileScanTask task : tasks) {
       // If a task uses an incompatible partition spec the data inside could contain values
       // which belong to multiple partitions in the current spec. Treating all such files as
       // un-partitioned and grouping them together helps to minimize new files made.
       StructLike taskPartition =
-          task.file().specId() == table.spec().specId() ? task.file().partition() : emptyStruct;
+          task.file().specId() == table.spec().specId()
+              ? task.file().partition()
+              : EmptyStructLike.get();
 
       filesByPartition.computeIfAbsent(taskPartition, unused -> Lists.newArrayList()).add(task);
     }
@@ -130,12 +139,13 @@ private RewriteFileGroup newRewriteGroup(
     return new RewriteFileGroup(info, Lists.newArrayList(tasks));
   }
 
-  public static class RewritePlanResult {
+  /** Result of the data file rewrite planning. */
+  public static class RewritePlan {
     private final Stream<RewriteFileGroup> groups;
     private final int totalGroupCount;
     private final Map<StructLike, Integer> groupsInPartition;
 
-    private RewritePlanResult(
+    private RewritePlan(
         Stream<RewriteFileGroup> groups,
         int totalGroupCount,
         Map<StructLike, Integer> groupsInPartition) {
@@ -144,14 +154,17 @@ private RewritePlanResult(
       this.groupsInPartition = groupsInPartition;
     }
 
+    /** The stream of the generated {@link RewriteFileGroup}s. */
     public Stream<RewriteFileGroup> groups() {
       return groups;
     }
 
+    /** The number of the generated groups in the given partition. */
     public int groupsInPartition(StructLike partition) {
       return groupsInPartition.get(partition);
     }
 
+    /** The total number of the groups generated by this plan. */
     public int totalGroupCount() {
       return totalGroupCount;
     }
diff --git a/core/src/test/java/org/apache/iceberg/actions/TestRewriteFileGroupPlanner.java b/core/src/test/java/org/apache/iceberg/actions/TestRewriteFileGroupPlanner.java
index d3382fb8b349..8bf7018eccc4 100644
--- a/core/src/test/java/org/apache/iceberg/actions/TestRewriteFileGroupPlanner.java
+++ b/core/src/test/java/org/apache/iceberg/actions/TestRewriteFileGroupPlanner.java
@@ -91,7 +91,7 @@ void testGroups(RewriteJobOrder order) {
         .appendFile(FILE_6)
         .commit();
     RewriteFileGroupPlanner planner = new RewriteFileGroupPlanner(new DummyRewriter(false), order);
-    RewriteFileGroupPlanner.RewritePlanResult result =
+    RewriteFileGroupPlanner.RewritePlan result =
         planner.plan(table, Expressions.alwaysTrue(), table.currentSnapshot().snapshotId(), false);
     List<RewriteFileGroup> groups = result.groups().collect(Collectors.toList());
     assertThat(groups.stream().map(group -> group.info().partition()).collect(Collectors.toList()))
@@ -113,7 +113,7 @@ void testContext() {
         .commit();
     RewriteFileGroupPlanner planner =
         new RewriteFileGroupPlanner(new DummyRewriter(true), RewriteJobOrder.FILES_DESC);
-    RewriteFileGroupPlanner.RewritePlanResult result =
+    RewriteFileGroupPlanner.RewritePlan result =
         planner.plan(table, Expressions.alwaysTrue(), table.currentSnapshot().snapshotId(), false);
     assertThat(result.totalGroupCount()).isEqualTo(6);
     assertThat(result.groupsInPartition(FILE_1.partition())).isEqualTo(3);
diff --git a/spark/v3.5/spark/src/main/java/org/apache/iceberg/spark/actions/RewriteDataFilesSparkAction.java b/spark/v3.5/spark/src/main/java/org/apache/iceberg/spark/actions/RewriteDataFilesSparkAction.java
index fe0cbdaa4c46..84520187d3fc 100644
--- a/spark/v3.5/spark/src/main/java/org/apache/iceberg/spark/actions/RewriteDataFilesSparkAction.java
+++ b/spark/v3.5/spark/src/main/java/org/apache/iceberg/spark/actions/RewriteDataFilesSparkAction.java
@@ -41,7 +41,7 @@
 import org.apache.iceberg.actions.RewriteDataFilesCommitManager;
 import org.apache.iceberg.actions.RewriteFileGroup;
 import org.apache.iceberg.actions.RewriteFileGroupPlanner;
-import org.apache.iceberg.actions.RewriteFileGroupPlanner.RewritePlanResult;
+import org.apache.iceberg.actions.RewriteFileGroupPlanner.RewritePlan;
 import org.apache.iceberg.exceptions.CommitFailedException;
 import org.apache.iceberg.exceptions.ValidationException;
 import org.apache.iceberg.expressions.Expression;
@@ -163,17 +163,17 @@ public RewriteDataFiles.Result execute() {
 
     validateAndInitOptions();
 
-    RewritePlanResult result = plan(startingSnapshotId);
+    RewritePlan plan = plan(startingSnapshotId);
 
-    if (result.totalGroupCount() == 0) {
+    if (plan.totalGroupCount() == 0) {
       LOG.info("Nothing found to rewrite in {}", table.name());
       return EMPTY_RESULT;
     }
 
     Builder resultBuilder =
         partialProgressEnabled
-            ? doExecuteWithPartialProgress(result, commitManager(startingSnapshotId))
-            : doExecute(result, commitManager(startingSnapshotId));
+            ? doExecuteWithPartialProgress(plan, commitManager(startingSnapshotId))
+            : doExecute(plan, commitManager(startingSnapshotId));
 
     if (removeDanglingDeletes) {
       RemoveDanglingDeletesSparkAction action =
@@ -185,13 +185,13 @@ public RewriteDataFiles.Result execute() {
     return resultBuilder.build();
   }
 
-  RewritePlanResult plan(long startingSnapshotId) {
+  RewritePlan plan(long startingSnapshotId) {
     return new RewriteFileGroupPlanner(rewriter, rewriteJobOrder)
         .plan(table, filter, startingSnapshotId, caseSensitive);
   }
 
   @VisibleForTesting
-  RewriteFileGroup rewriteFiles(RewritePlanResult planResult, RewriteFileGroup fileGroup) {
+  RewriteFileGroup rewriteFiles(RewritePlan planResult, RewriteFileGroup fileGroup) {
     String desc = jobDesc(fileGroup, planResult);
     Set<DataFile> addedFiles =
         withJobGroupInfo(
@@ -217,8 +217,7 @@ RewriteDataFilesCommitManager commitManager(long startingSnapshotId) {
         table, startingSnapshotId, useStartingSequenceNumber, commitSummary());
   }
 
-  private Builder doExecute(
-      RewritePlanResult planResult, RewriteDataFilesCommitManager commitManager) {
+  private Builder doExecute(RewritePlan planResult, RewriteDataFilesCommitManager commitManager) {
     ExecutorService rewriteService = rewriteService();
 
     ConcurrentLinkedQueue<RewriteFileGroup> rewrittenGroups = Queues.newConcurrentLinkedQueue();
@@ -229,11 +228,10 @@ private Builder doExecute(
             .stopOnFailure()
             .noRetry()
             .onFailure(
-                (fileGroup, exception) ->
-                    LOG.warn(
-                        "Failure during rewrite process for group {}",
-                        fileGroup.info(),
-                        exception));
+                (fileGroup, exception) -> {
+                  LOG.warn(
+                      "Failure during rewrite process for group {}", fileGroup.info(), exception);
+                });
 
     try {
       rewriteTaskBuilder.run(fileGroup -> rewrittenGroups.add(rewriteFiles(planResult, fileGroup)));
@@ -279,7 +277,7 @@ private Builder doExecute(
   }
 
   private Builder doExecuteWithPartialProgress(
-      RewritePlanResult planResult, RewriteDataFilesCommitManager commitManager) {
+      RewritePlan planResult, RewriteDataFilesCommitManager commitManager) {
     ExecutorService rewriteService = rewriteService();
 
     // start commit service
@@ -399,7 +397,7 @@ void validateAndInitOptions() {
         PARTIAL_PROGRESS_ENABLED);
   }
 
-  private String jobDesc(RewriteFileGroup group, RewritePlanResult planResult) {
+  private String jobDesc(RewriteFileGroup group, RewritePlan planResult) {
     StructLike partition = group.info().partition();
     if (partition.size() > 0) {
       return String.format(

From 4e01ed713bf2ac574fbdb1cb00076ccd19fcf45e Mon Sep 17 00:00:00 2001
From: Peter Vary <peter_vary4@apple.com>
Date: Thu, 14 Nov 2024 13:24:47 +0100
Subject: [PATCH 03/11] Reverting EmptyStructLike changes

---
 api/src/main/java/org/apache/iceberg/EmptyStructLike.java  | 4 ++--
 .../apache/iceberg/actions/RewriteFileGroupPlanner.java    | 7 +++----
 2 files changed, 5 insertions(+), 6 deletions(-)

diff --git a/api/src/main/java/org/apache/iceberg/EmptyStructLike.java b/api/src/main/java/org/apache/iceberg/EmptyStructLike.java
index 8b046780aa7a..2d57f4c01a66 100644
--- a/api/src/main/java/org/apache/iceberg/EmptyStructLike.java
+++ b/api/src/main/java/org/apache/iceberg/EmptyStructLike.java
@@ -20,13 +20,13 @@
 
 import java.io.Serializable;
 
-public class EmptyStructLike implements StructLike, Serializable {
+class EmptyStructLike implements StructLike, Serializable {
 
   private static final EmptyStructLike INSTANCE = new EmptyStructLike();
 
   private EmptyStructLike() {}
 
-  public static EmptyStructLike get() {
+  static EmptyStructLike get() {
     return INSTANCE;
   }
 
diff --git a/core/src/main/java/org/apache/iceberg/actions/RewriteFileGroupPlanner.java b/core/src/main/java/org/apache/iceberg/actions/RewriteFileGroupPlanner.java
index d80a6163dc94..56ba48ca431f 100644
--- a/core/src/main/java/org/apache/iceberg/actions/RewriteFileGroupPlanner.java
+++ b/core/src/main/java/org/apache/iceberg/actions/RewriteFileGroupPlanner.java
@@ -24,11 +24,11 @@
 import java.util.concurrent.atomic.AtomicInteger;
 import java.util.stream.Stream;
 import org.apache.iceberg.DataFile;
-import org.apache.iceberg.EmptyStructLike;
 import org.apache.iceberg.FileScanTask;
 import org.apache.iceberg.RewriteJobOrder;
 import org.apache.iceberg.StructLike;
 import org.apache.iceberg.Table;
+import org.apache.iceberg.data.GenericRecord;
 import org.apache.iceberg.expressions.Expression;
 import org.apache.iceberg.io.CloseableIterable;
 import org.apache.iceberg.relocated.com.google.common.collect.ImmutableList;
@@ -112,15 +112,14 @@ private StructLikeMap<List<List<FileScanTask>>> planFileGroups(
   private StructLikeMap<List<FileScanTask>> groupByPartition(
       Table table, Types.StructType partitionType, Iterable<FileScanTask> tasks) {
     StructLikeMap<List<FileScanTask>> filesByPartition = StructLikeMap.create(partitionType);
+    StructLike emptyStruct = GenericRecord.create(partitionType);
 
     for (FileScanTask task : tasks) {
       // If a task uses an incompatible partition spec the data inside could contain values
       // which belong to multiple partitions in the current spec. Treating all such files as
       // un-partitioned and grouping them together helps to minimize new files made.
       StructLike taskPartition =
-          task.file().specId() == table.spec().specId()
-              ? task.file().partition()
-              : EmptyStructLike.get();
+          task.file().specId() == table.spec().specId() ? task.file().partition() : emptyStruct;
 
       filesByPartition.computeIfAbsent(taskPartition, unused -> Lists.newArrayList()).add(task);
     }

From 7de2eb5690d2827216b6fcb0cfd7c843dc3ba843 Mon Sep 17 00:00:00 2001
From: Peter Vary <peter_vary4@apple.com>
Date: Mon, 18 Nov 2024 15:22:12 +0100
Subject: [PATCH 04/11] Szehon's comments

---
 .../actions/RewriteFileGroupPlanner.java      |  4 +--
 .../actions/RewriteDataFilesSparkAction.java  | 27 +++++++++----------
 2 files changed, 15 insertions(+), 16 deletions(-)

diff --git a/core/src/main/java/org/apache/iceberg/actions/RewriteFileGroupPlanner.java b/core/src/main/java/org/apache/iceberg/actions/RewriteFileGroupPlanner.java
index 56ba48ca431f..e7f0f8ea6518 100644
--- a/core/src/main/java/org/apache/iceberg/actions/RewriteFileGroupPlanner.java
+++ b/core/src/main/java/org/apache/iceberg/actions/RewriteFileGroupPlanner.java
@@ -40,8 +40,8 @@
 import org.slf4j.LoggerFactory;
 
 /**
- * Checks the files in the {@link Table}. The {@link RewriteFileGroup}s are grouped by partitions
- * and split by the {@link FileRewriter}.
+ * Groups specified files in the {@link Table} by {@link RewriteFileGroup}s. These will be grouped
+ * by partitions.
  */
 public class RewriteFileGroupPlanner {
   private static final Logger LOG = LoggerFactory.getLogger(RewriteFileGroupPlanner.class);
diff --git a/spark/v3.5/spark/src/main/java/org/apache/iceberg/spark/actions/RewriteDataFilesSparkAction.java b/spark/v3.5/spark/src/main/java/org/apache/iceberg/spark/actions/RewriteDataFilesSparkAction.java
index 84520187d3fc..442ebc09670f 100644
--- a/spark/v3.5/spark/src/main/java/org/apache/iceberg/spark/actions/RewriteDataFilesSparkAction.java
+++ b/spark/v3.5/spark/src/main/java/org/apache/iceberg/spark/actions/RewriteDataFilesSparkAction.java
@@ -191,8 +191,8 @@ RewritePlan plan(long startingSnapshotId) {
   }
 
   @VisibleForTesting
-  RewriteFileGroup rewriteFiles(RewritePlan planResult, RewriteFileGroup fileGroup) {
-    String desc = jobDesc(fileGroup, planResult);
+  RewriteFileGroup rewriteFiles(RewritePlan plan, RewriteFileGroup fileGroup) {
+    String desc = jobDesc(fileGroup, plan);
     Set<DataFile> addedFiles =
         withJobGroupInfo(
             newJobGroupInfo("REWRITE-DATA-FILES", desc),
@@ -217,13 +217,13 @@ RewriteDataFilesCommitManager commitManager(long startingSnapshotId) {
         table, startingSnapshotId, useStartingSequenceNumber, commitSummary());
   }
 
-  private Builder doExecute(RewritePlan planResult, RewriteDataFilesCommitManager commitManager) {
+  private Builder doExecute(RewritePlan plan, RewriteDataFilesCommitManager commitManager) {
     ExecutorService rewriteService = rewriteService();
 
     ConcurrentLinkedQueue<RewriteFileGroup> rewrittenGroups = Queues.newConcurrentLinkedQueue();
 
     Tasks.Builder<RewriteFileGroup> rewriteTaskBuilder =
-        Tasks.foreach(planResult.groups())
+        Tasks.foreach(plan.groups())
             .executeWith(rewriteService)
             .stopOnFailure()
             .noRetry()
@@ -234,7 +234,7 @@ private Builder doExecute(RewritePlan planResult, RewriteDataFilesCommitManager
                 });
 
     try {
-      rewriteTaskBuilder.run(fileGroup -> rewrittenGroups.add(rewriteFiles(planResult, fileGroup)));
+      rewriteTaskBuilder.run(fileGroup -> rewrittenGroups.add(rewriteFiles(plan, fileGroup)));
     } catch (Exception e) {
       // At least one rewrite group failed, clean up all completed rewrites
       LOG.error(
@@ -277,19 +277,18 @@ private Builder doExecute(RewritePlan planResult, RewriteDataFilesCommitManager
   }
 
   private Builder doExecuteWithPartialProgress(
-      RewritePlan planResult, RewriteDataFilesCommitManager commitManager) {
+      RewritePlan plan, RewriteDataFilesCommitManager commitManager) {
     ExecutorService rewriteService = rewriteService();
 
     // start commit service
-    int groupsPerCommit =
-        IntMath.divide(planResult.totalGroupCount(), maxCommits, RoundingMode.CEILING);
+    int groupsPerCommit = IntMath.divide(plan.totalGroupCount(), maxCommits, RoundingMode.CEILING);
     RewriteDataFilesCommitManager.CommitService commitService =
         commitManager.service(groupsPerCommit);
     commitService.start();
 
     Collection<FileGroupFailureResult> rewriteFailures = new ConcurrentLinkedQueue<>();
     // start rewrite tasks
-    Tasks.foreach(planResult.groups())
+    Tasks.foreach(plan.groups())
         .suppressFailureWhenFinished()
         .executeWith(rewriteService)
         .noRetry()
@@ -302,7 +301,7 @@ private Builder doExecuteWithPartialProgress(
                       .dataFilesCount(fileGroup.numFiles())
                       .build());
             })
-        .run(fileGroup -> commitService.offer(rewriteFiles(planResult, fileGroup)));
+        .run(fileGroup -> commitService.offer(rewriteFiles(plan, fileGroup)));
     rewriteService.shutdown();
 
     // stop commit service
@@ -397,7 +396,7 @@ void validateAndInitOptions() {
         PARTIAL_PROGRESS_ENABLED);
   }
 
-  private String jobDesc(RewriteFileGroup group, RewritePlan planResult) {
+  private String jobDesc(RewriteFileGroup group, RewritePlan plan) {
     StructLike partition = group.info().partition();
     if (partition.size() > 0) {
       return String.format(
@@ -405,10 +404,10 @@ private String jobDesc(RewriteFileGroup group, RewritePlan planResult) {
           group.rewrittenFiles().size(),
           rewriter.description(),
           group.info().globalIndex(),
-          planResult.totalGroupCount(),
+          plan.totalGroupCount(),
           partition,
           group.info().partitionIndex(),
-          planResult.groupsInPartition(partition),
+          plan.groupsInPartition(partition),
           table.name());
     } else {
       return String.format(
@@ -416,7 +415,7 @@ private String jobDesc(RewriteFileGroup group, RewritePlan planResult) {
           group.rewrittenFiles().size(),
           rewriter.description(),
           group.info().globalIndex(),
-          planResult.totalGroupCount(),
+          plan.totalGroupCount(),
           table.name());
     }
   }

From e7f633d59a1c63811d31270aab0f935a44ba0848 Mon Sep 17 00:00:00 2001
From: Peter Vary <peter_vary4@apple.com>
Date: Thu, 21 Nov 2024 09:18:24 +0100
Subject: [PATCH 05/11] First version of the refactor

---
 ...Rewriter.java => FileRewriteExecutor.java} |  31 +--
 .../iceberg/actions/FileRewriteGroup.java     |  87 ++++++
 .../iceberg/actions/FileRewritePlan.java      |  86 ++++++
 .../iceberg/actions/FileRewritePlanner.java   |  69 +++++
 .../iceberg/actions/RewriteFileGroup.java     |  59 +----
 .../actions/RewriteFileGroupPlanner.java      | 199 +++++++++-----
 .../actions/RewritePositionDeletesGroup.java  |  57 +---
 .../RewritePositionDeletesGroupPlanner.java   | 235 +++++++++++++++++
 .../actions/SizeBasedDataRewriter.java        | 109 --------
 ....java => SizeBasedFileRewritePlanner.java} |  44 ++--
 .../SizeBasedPositionDeletesRewriter.java     |  58 ----
 .../actions/TestRewriteFileGroupPlanner.java  |  57 ++--
 .../actions/TestSizeBasedRewriter.java        |  68 ++---
 .../TestRewritePositionDeleteFiles.java       |   6 +-
 .../IcebergSortCompactionBenchmark.java       |  30 +--
 .../actions/RewriteDataFilesSparkAction.java  |  73 ++---
 ...RewritePositionDeleteFilesSparkAction.java | 206 ++++-----------
 ...a => SparkBinPackDataRewriteExecutor.java} |   9 +-
 ...inPackPositionDeletesRewriteExecutor.java} |  19 +-
 .../spark/actions/SparkRewriteExecutor.java   |  83 ++++++
 ...=> SparkShufflingDataRewriteExecutor.java} |  19 +-
 ...=> SparkSizeBasedDataRewriteExecutor.java} |  17 +-
 ...java => SparkSortDataRewriteExecutor.java} |   6 +-
 ...va => SparkZOrderDataRewriteExecutor.java} |   6 +-
 .../actions/TestRewriteDataFilesAction.java   |  98 +++----
 .../TestRewritePositionDeleteFilesAction.java |  52 ++--
 ...java => TestSparkFileRewriteExecutor.java} | 249 +++++++++---------
 .../spark/source/TestCompressionSettings.java |   4 +-
 28 files changed, 1176 insertions(+), 860 deletions(-)
 rename core/src/main/java/org/apache/iceberg/actions/{FileRewriter.java => FileRewriteExecutor.java} (62%)
 create mode 100644 core/src/main/java/org/apache/iceberg/actions/FileRewriteGroup.java
 create mode 100644 core/src/main/java/org/apache/iceberg/actions/FileRewritePlan.java
 create mode 100644 core/src/main/java/org/apache/iceberg/actions/FileRewritePlanner.java
 create mode 100644 core/src/main/java/org/apache/iceberg/actions/RewritePositionDeletesGroupPlanner.java
 delete mode 100644 core/src/main/java/org/apache/iceberg/actions/SizeBasedDataRewriter.java
 rename core/src/main/java/org/apache/iceberg/actions/{SizeBasedFileRewriter.java => SizeBasedFileRewritePlanner.java} (92%)
 delete mode 100644 core/src/main/java/org/apache/iceberg/actions/SizeBasedPositionDeletesRewriter.java
 rename spark/v3.5/spark/src/main/java/org/apache/iceberg/spark/actions/{SparkBinPackDataRewriter.java => SparkBinPackDataRewriteExecutor.java} (88%)
 rename spark/v3.5/spark/src/main/java/org/apache/iceberg/spark/actions/{SparkBinPackPositionDeletesRewriter.java => SparkBinPackPositionDeletesRewriteExecutor.java} (88%)
 create mode 100644 spark/v3.5/spark/src/main/java/org/apache/iceberg/spark/actions/SparkRewriteExecutor.java
 rename spark/v3.5/spark/src/main/java/org/apache/iceberg/spark/actions/{SparkShufflingDataRewriter.java => SparkShufflingDataRewriteExecutor.java} (93%)
 rename spark/v3.5/spark/src/main/java/org/apache/iceberg/spark/actions/{SparkSizeBasedDataRewriter.java => SparkSizeBasedDataRewriteExecutor.java} (74%)
 rename spark/v3.5/spark/src/main/java/org/apache/iceberg/spark/actions/{SparkSortDataRewriter.java => SparkSortDataRewriteExecutor.java} (89%)
 rename spark/v3.5/spark/src/main/java/org/apache/iceberg/spark/actions/{SparkZOrderDataRewriter.java => SparkZOrderDataRewriteExecutor.java} (97%)
 rename spark/v3.5/spark/src/test/java/org/apache/iceberg/spark/actions/{TestSparkFileRewriter.java => TestSparkFileRewriteExecutor.java} (57%)

diff --git a/core/src/main/java/org/apache/iceberg/actions/FileRewriter.java b/core/src/main/java/org/apache/iceberg/actions/FileRewriteExecutor.java
similarity index 62%
rename from core/src/main/java/org/apache/iceberg/actions/FileRewriter.java
rename to core/src/main/java/org/apache/iceberg/actions/FileRewriteExecutor.java
index 7c6b4e8d7ef5..c024cb42877a 100644
--- a/core/src/main/java/org/apache/iceberg/actions/FileRewriter.java
+++ b/core/src/main/java/org/apache/iceberg/actions/FileRewriteExecutor.java
@@ -18,24 +18,24 @@
  */
 package org.apache.iceberg.actions;
 
-import java.util.List;
 import java.util.Map;
 import java.util.Set;
 import org.apache.iceberg.ContentFile;
 import org.apache.iceberg.ContentScanTask;
 
 /**
- * A class for rewriting content files.
+ * A class for rewriting content file groups ({@link FileRewriteGroup}).
  *
- * <p>The entire rewrite operation is broken down into pieces based on partitioning, and size-based
- * groups within a partition. These subunits of the rewrite are referred to as file groups. A file
- * group will be processed by a single framework "action". For example, in Spark this means that
- * each group would be rewritten in its own Spark job.
- *
- * @param <T> the Java type of tasks to read content files
- * @param <F> the Java type of content files
+ * @param <I> the Java type of the plan info
+ * @param <T> the Java type of the tasks to read content files
+ * @param <F> the Java type of the content files
+ * @param <G> the Java type of the planned groups
  */
-public interface FileRewriter<T extends ContentScanTask<F>, F extends ContentFile<F>> {
+public interface FileRewriteExecutor<
+    I,
+    T extends ContentScanTask<F>,
+    F extends ContentFile<F>,
+    G extends FileRewriteGroup<I, T, F>> {
 
   /** Returns a description for this rewriter. */
   default String description() {
@@ -56,14 +56,11 @@ default String description() {
   void init(Map<String, String> options);
 
   /**
-   * Selects files which this rewriter believes are valid targets to be rewritten based on their
-   * scan tasks and groups those scan tasks into file groups. The file groups are then rewritten in
-   * a single executable unit, such as a Spark job.
+   * Initializes the rewriter using the information generated during planning.
    *
-   * @param tasks an iterable of scan task for files in a partition
-   * @return groups of scan tasks for files to be rewritten in a single executable unit
+   * @param plan containing the configuration data
    */
-  Iterable<List<T>> planFileGroups(Iterable<T> tasks);
+  void initPlan(FileRewritePlan<I, T, F, G> plan);
 
   /**
    * Rewrite a group of files represented by the given list of scan tasks.
@@ -73,5 +70,5 @@ default String description() {
    * @param group a group of scan tasks for files to be rewritten together
    * @return a set of newly written files
    */
-  Set<F> rewrite(List<T> group);
+  Set<F> rewrite(G group);
 }
diff --git a/core/src/main/java/org/apache/iceberg/actions/FileRewriteGroup.java b/core/src/main/java/org/apache/iceberg/actions/FileRewriteGroup.java
new file mode 100644
index 000000000000..c43bf5cd85f6
--- /dev/null
+++ b/core/src/main/java/org/apache/iceberg/actions/FileRewriteGroup.java
@@ -0,0 +1,87 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.iceberg.actions;
+
+import java.util.Comparator;
+import java.util.List;
+import org.apache.iceberg.ContentFile;
+import org.apache.iceberg.ContentScanTask;
+import org.apache.iceberg.RewriteJobOrder;
+
+/**
+ * Container class representing a set of files to be rewritten by a {@link FileRewriteExecutor}.
+ *
+ * @param <I> the Java type of the plan info
+ * @param <T> the Java type of the tasks to read content files
+ * @param <F> the Java type of the content files
+ */
+public abstract class FileRewriteGroup<I, T extends ContentScanTask<F>, F extends ContentFile<F>> {
+  private final I info;
+  private final List<T> fileScanTasks;
+  private final long splitSize;
+  private final int expectedOutputFiles;
+
+  protected FileRewriteGroup(
+      I info, List<T> fileScanTasks, long splitSize, int expectedOutputFiles) {
+    this.info = info;
+    this.fileScanTasks = fileScanTasks;
+    this.splitSize = splitSize;
+    this.expectedOutputFiles = expectedOutputFiles;
+  }
+
+  public I info() {
+    return info;
+  }
+
+  public List<T> fileScans() {
+    return fileScanTasks;
+  }
+
+  public long splitSize() {
+    return splitSize;
+  }
+
+  public int expectedOutputFiles() {
+    return expectedOutputFiles;
+  }
+
+  public long sizeInBytes() {
+    return fileScanTasks.stream().mapToLong(T::length).sum();
+  }
+
+  public int numInputFiles() {
+    return fileScanTasks.size();
+  }
+
+  public static <I, T extends ContentScanTask<F>, F extends ContentFile<F>>
+      Comparator<FileRewriteGroup<I, T, F>> comparator(RewriteJobOrder rewriteJobOrder) {
+    switch (rewriteJobOrder) {
+      case BYTES_ASC:
+        return Comparator.comparing(FileRewriteGroup::sizeInBytes);
+      case BYTES_DESC:
+        return Comparator.comparing(FileRewriteGroup::sizeInBytes, Comparator.reverseOrder());
+      case FILES_ASC:
+        return Comparator.comparing(FileRewriteGroup::numInputFiles);
+      case FILES_DESC:
+        return Comparator.comparing(FileRewriteGroup::numInputFiles, Comparator.reverseOrder());
+      default:
+        return (unused, unused2) -> 0;
+    }
+  }
+}
diff --git a/core/src/main/java/org/apache/iceberg/actions/FileRewritePlan.java b/core/src/main/java/org/apache/iceberg/actions/FileRewritePlan.java
new file mode 100644
index 000000000000..ad6349de2f80
--- /dev/null
+++ b/core/src/main/java/org/apache/iceberg/actions/FileRewritePlan.java
@@ -0,0 +1,86 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.iceberg.actions;
+
+import java.util.Map;
+import java.util.stream.Stream;
+import org.apache.iceberg.ContentFile;
+import org.apache.iceberg.ContentScanTask;
+import org.apache.iceberg.StructLike;
+
+/**
+ * Result of the file rewrite planning.
+ *
+ * <p>Contains the planned groups, calculated values required by the {@link FileRewriteExecutor}s
+ * and statistics.
+ *
+ * @param <I> the Java type of the plan info
+ * @param <T> the Java type of the tasks to read content files
+ * @param <F> the Java type of the content files
+ * @param <G> the Java type of the planned groups
+ */
+public class FileRewritePlan<
+    I,
+    T extends ContentScanTask<F>,
+    F extends ContentFile<F>,
+    G extends FileRewriteGroup<I, T, F>> {
+  private final Stream<G> groups;
+  private final int totalGroupCount;
+  private final Map<StructLike, Integer> groupsInPartition;
+  private final long writeMaxFileSize;
+  private final int outputSpecId;
+
+  protected FileRewritePlan(
+      Stream<G> groups,
+      int totalGroupCount,
+      Map<StructLike, Integer> groupsInPartition,
+      long writeMaxFileSize,
+      int outputSpecId) {
+    this.groups = groups;
+    this.totalGroupCount = totalGroupCount;
+    this.groupsInPartition = groupsInPartition;
+    this.writeMaxFileSize = writeMaxFileSize;
+    this.outputSpecId = outputSpecId;
+  }
+
+  /** The stream of the generated {@link RewriteFileGroup}s. */
+  public Stream<G> groups() {
+    return groups;
+  }
+
+  /** The number of the generated groups in the given partition. */
+  public int groupsInPartition(StructLike partition) {
+    return groupsInPartition.get(partition);
+  }
+
+  /** The total number of the groups generated by this plan. */
+  public int totalGroupCount() {
+    return totalGroupCount;
+  }
+
+  /** Calculated maximum file size for the target files */
+  public long writeMaxFileSize() {
+    return writeMaxFileSize;
+  }
+
+  /** Partition specification id for the target files */
+  public int outputSpecId() {
+    return outputSpecId;
+  }
+}
diff --git a/core/src/main/java/org/apache/iceberg/actions/FileRewritePlanner.java b/core/src/main/java/org/apache/iceberg/actions/FileRewritePlanner.java
new file mode 100644
index 000000000000..ff770874a9a3
--- /dev/null
+++ b/core/src/main/java/org/apache/iceberg/actions/FileRewritePlanner.java
@@ -0,0 +1,69 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.iceberg.actions;
+
+import java.util.Map;
+import java.util.Set;
+import org.apache.iceberg.ContentFile;
+import org.apache.iceberg.ContentScanTask;
+
+/**
+ * A class for planning content file rewrites.
+ *
+ * <p>The entire rewrite operation is broken down into pieces based on partitioning, and size-based
+ * groups within a partition. These subunits of the rewrite are referred to as file groups. A file
+ * group will be processed by a {@link FileRewriteExecutor} in a single framework "action". For
+ * example, in Spark this means that each group would be rewritten in its own Spark job.
+ *
+ * @param <I> the Java type of the plan info
+ * @param <T> the Java type of the tasks to read content files
+ * @param <F> the Java type of the content files
+ * @param <G> the Java type of the planned groups
+ */
+public interface FileRewritePlanner<
+    I,
+    T extends ContentScanTask<F>,
+    F extends ContentFile<F>,
+    G extends FileRewriteGroup<I, T, F>> {
+
+  /** Returns a description for this rewriter. */
+  default String description() {
+    return getClass().getName();
+  }
+
+  /**
+   * Returns a set of supported options for this rewriter. Only options specified in this list will
+   * be accepted at runtime. Any other options will be rejected.
+   */
+  Set<String> validOptions();
+
+  /**
+   * Initializes this rewriter using provided options.
+   *
+   * @param options options to initialize this rewriter
+   */
+  void init(Map<String, String> options);
+
+  /**
+   * Generates the plan for rewrite.
+   *
+   * @return the generated plan which could be executed during the compaction
+   */
+  FileRewritePlan<I, T, F, G> plan();
+}
diff --git a/core/src/main/java/org/apache/iceberg/actions/RewriteFileGroup.java b/core/src/main/java/org/apache/iceberg/actions/RewriteFileGroup.java
index dfc9842780f5..b43d94a2bb8c 100644
--- a/core/src/main/java/org/apache/iceberg/actions/RewriteFileGroup.java
+++ b/core/src/main/java/org/apache/iceberg/actions/RewriteFileGroup.java
@@ -18,39 +18,29 @@
  */
 package org.apache.iceberg.actions;
 
-import java.util.Comparator;
 import java.util.List;
 import java.util.Set;
 import java.util.stream.Collectors;
 import org.apache.iceberg.DataFile;
 import org.apache.iceberg.FileScanTask;
-import org.apache.iceberg.RewriteJobOrder;
 import org.apache.iceberg.actions.RewriteDataFiles.FileGroupInfo;
 import org.apache.iceberg.relocated.com.google.common.base.MoreObjects;
 import org.apache.iceberg.relocated.com.google.common.base.Preconditions;
 import org.apache.iceberg.util.DataFileSet;
 
 /**
- * Container class representing a set of files to be rewritten by a RewriteAction and the new files
- * which have been written by the action.
+ * Container class representing a set of data files to be rewritten by a RewriteAction and the new
+ * files which have been written by the action.
  */
-public class RewriteFileGroup {
-  private final FileGroupInfo info;
-  private final List<FileScanTask> fileScanTasks;
-
+public class RewriteFileGroup extends FileRewriteGroup<FileGroupInfo, FileScanTask, DataFile> {
   private DataFileSet addedFiles = DataFileSet.create();
 
-  public RewriteFileGroup(FileGroupInfo info, List<FileScanTask> fileScanTasks) {
-    this.info = info;
-    this.fileScanTasks = fileScanTasks;
-  }
-
-  public FileGroupInfo info() {
-    return info;
-  }
-
-  public List<FileScanTask> fileScans() {
-    return fileScanTasks;
+  public RewriteFileGroup(
+      FileGroupInfo info,
+      List<FileScanTask> fileScanTasks,
+      long splitSize,
+      int expectedOutputFiles) {
+    super(info, fileScanTasks, splitSize, expectedOutputFiles);
   }
 
   public void setOutputFiles(Set<DataFile> files) {
@@ -70,9 +60,9 @@ public Set<DataFile> addedFiles() {
   public RewriteDataFiles.FileGroupRewriteResult asResult() {
     Preconditions.checkState(addedFiles != null, "Cannot get result, Group was never rewritten");
     return ImmutableRewriteDataFiles.FileGroupRewriteResult.builder()
-        .info(info)
+        .info(info())
         .addedDataFilesCount(addedFiles.size())
-        .rewrittenDataFilesCount(fileScanTasks.size())
+        .rewrittenDataFilesCount(fileScans().size())
         .rewrittenBytesCount(sizeInBytes())
         .build();
   }
@@ -80,35 +70,12 @@ public RewriteDataFiles.FileGroupRewriteResult asResult() {
   @Override
   public String toString() {
     return MoreObjects.toStringHelper(this)
-        .add("info", info)
-        .add("numRewrittenFiles", fileScanTasks.size())
+        .add("info", info())
+        .add("numRewrittenFiles", fileScans().size())
         .add(
             "numAddedFiles",
             addedFiles == null ? "Rewrite Incomplete" : Integer.toString(addedFiles.size()))
         .add("numRewrittenBytes", sizeInBytes())
         .toString();
   }
-
-  public long sizeInBytes() {
-    return fileScanTasks.stream().mapToLong(FileScanTask::length).sum();
-  }
-
-  public int numFiles() {
-    return fileScanTasks.size();
-  }
-
-  public static Comparator<RewriteFileGroup> comparator(RewriteJobOrder rewriteJobOrder) {
-    switch (rewriteJobOrder) {
-      case BYTES_ASC:
-        return Comparator.comparing(RewriteFileGroup::sizeInBytes);
-      case BYTES_DESC:
-        return Comparator.comparing(RewriteFileGroup::sizeInBytes, Comparator.reverseOrder());
-      case FILES_ASC:
-        return Comparator.comparing(RewriteFileGroup::numFiles);
-      case FILES_DESC:
-        return Comparator.comparing(RewriteFileGroup::numFiles, Comparator.reverseOrder());
-      default:
-        return (unused, unused2) -> 0;
-    }
-  }
 }
diff --git a/core/src/main/java/org/apache/iceberg/actions/RewriteFileGroupPlanner.java b/core/src/main/java/org/apache/iceberg/actions/RewriteFileGroupPlanner.java
index e7f0f8ea6518..38df04217d98 100644
--- a/core/src/main/java/org/apache/iceberg/actions/RewriteFileGroupPlanner.java
+++ b/core/src/main/java/org/apache/iceberg/actions/RewriteFileGroupPlanner.java
@@ -21,6 +21,7 @@
 import java.io.IOException;
 import java.util.List;
 import java.util.Map;
+import java.util.Set;
 import java.util.concurrent.atomic.AtomicInteger;
 import java.util.stream.Stream;
 import org.apache.iceberg.DataFile;
@@ -28,13 +29,20 @@
 import org.apache.iceberg.RewriteJobOrder;
 import org.apache.iceberg.StructLike;
 import org.apache.iceberg.Table;
+import org.apache.iceberg.TableProperties;
+import org.apache.iceberg.actions.RewriteDataFiles.FileGroupInfo;
 import org.apache.iceberg.data.GenericRecord;
 import org.apache.iceberg.expressions.Expression;
 import org.apache.iceberg.io.CloseableIterable;
+import org.apache.iceberg.relocated.com.google.common.annotations.VisibleForTesting;
+import org.apache.iceberg.relocated.com.google.common.base.Preconditions;
 import org.apache.iceberg.relocated.com.google.common.collect.ImmutableList;
+import org.apache.iceberg.relocated.com.google.common.collect.ImmutableSet;
+import org.apache.iceberg.relocated.com.google.common.collect.Iterables;
 import org.apache.iceberg.relocated.com.google.common.collect.Lists;
 import org.apache.iceberg.relocated.com.google.common.collect.Maps;
 import org.apache.iceberg.types.Types;
+import org.apache.iceberg.util.PropertyUtil;
 import org.apache.iceberg.util.StructLikeMap;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
@@ -43,30 +51,86 @@
  * Groups specified files in the {@link Table} by {@link RewriteFileGroup}s. These will be grouped
  * by partitions.
  */
-public class RewriteFileGroupPlanner {
+public class RewriteFileGroupPlanner
+    extends SizeBasedFileRewritePlanner<FileGroupInfo, FileScanTask, DataFile, RewriteFileGroup> {
+  /**
+   * The minimum number of deletes that needs to be associated with a data file for it to be
+   * considered for rewriting. If a data file has this number of deletes or more, it will be
+   * rewritten regardless of its file size determined by {@link #MIN_FILE_SIZE_BYTES} and {@link
+   * #MAX_FILE_SIZE_BYTES}. If a file group contains a file that satisfies this condition, the file
+   * group will be rewritten regardless of the number of files in the file group determined by
+   * {@link #MIN_INPUT_FILES}.
+   *
+   * <p>Defaults to Integer.MAX_VALUE, which means this feature is not enabled by default.
+   */
+  public static final String DELETE_FILE_THRESHOLD = "delete-file-threshold";
+
+  public static final int DELETE_FILE_THRESHOLD_DEFAULT = Integer.MAX_VALUE;
+
   private static final Logger LOG = LoggerFactory.getLogger(RewriteFileGroupPlanner.class);
 
-  private final FileRewriter<FileScanTask, DataFile> rewriter;
-  private final RewriteJobOrder rewriteJobOrder;
+  private final Expression filter;
+  private final long snapshotId;
+  private final boolean caseSensitive;
+
+  private int deleteFileThreshold;
+  private RewriteJobOrder rewriteJobOrder;
 
   public RewriteFileGroupPlanner(
-      FileRewriter<FileScanTask, DataFile> rewriter, RewriteJobOrder rewriteJobOrder) {
-    this.rewriter = rewriter;
-    this.rewriteJobOrder = rewriteJobOrder;
+      Table table, Expression filter, long snapshotId, boolean caseSensitive) {
+    super(table);
+    this.filter = filter;
+    this.snapshotId = snapshotId;
+    this.caseSensitive = caseSensitive;
+  }
+
+  @Override
+  public Set<String> validOptions() {
+    return ImmutableSet.<String>builder()
+        .addAll(super.validOptions())
+        .add(DELETE_FILE_THRESHOLD)
+        .add(RewriteDataFiles.REWRITE_JOB_ORDER)
+        .build();
+  }
+
+  @Override
+  public void init(Map<String, String> options) {
+    super.init(options);
+    this.deleteFileThreshold = deleteFileThreshold(options);
+    this.rewriteJobOrder =
+        RewriteJobOrder.fromName(
+            PropertyUtil.propertyAsString(
+                options,
+                RewriteDataFiles.REWRITE_JOB_ORDER,
+                RewriteDataFiles.REWRITE_JOB_ORDER_DEFAULT));
+  }
+
+  @Override
+  protected Iterable<FileScanTask> filterFiles(Iterable<FileScanTask> tasks) {
+    return Iterables.filter(tasks, task -> wronglySized(task) || tooManyDeletes(task));
+  }
+
+  @Override
+  protected Iterable<List<FileScanTask>> filterFileGroups(List<List<FileScanTask>> groups) {
+    return Iterables.filter(groups, this::shouldRewrite);
+  }
+
+  @Override
+  protected long defaultTargetFileSize() {
+    return PropertyUtil.propertyAsLong(
+        table().properties(),
+        TableProperties.WRITE_TARGET_FILE_SIZE_BYTES,
+        TableProperties.WRITE_TARGET_FILE_SIZE_BYTES_DEFAULT);
   }
 
   /**
    * Generates the plan for the current table.
    *
-   * @param table to plan for
-   * @param filter to exclude files from planning
-   * @param snapshotId of the last snapshot included in the plan
-   * @param caseSensitive setting for filtering
    * @return the generated plan which could be executed during the compaction
    */
-  public RewritePlan plan(Table table, Expression filter, long snapshotId, boolean caseSensitive) {
-    StructLikeMap<List<List<FileScanTask>>> plan =
-        planFileGroups(table, filter, snapshotId, caseSensitive);
+  @Override
+  public FileRewritePlan<FileGroupInfo, FileScanTask, DataFile, RewriteFileGroup> plan() {
+    StructLikeMap<List<List<FileScanTask>>> plan = planFileGroups();
     RewriteExecutionContext ctx = new RewriteExecutionContext();
     Stream<RewriteFileGroup> groups =
         plan.entrySet().stream()
@@ -75,31 +139,67 @@ public RewritePlan plan(Table table, Expression filter, long snapshotId, boolean
                 e -> {
                   StructLike partition = e.getKey();
                   List<List<FileScanTask>> scanGroups = e.getValue();
-                  return scanGroups.stream().map(tasks -> newRewriteGroup(ctx, partition, tasks));
+                  return scanGroups.stream()
+                      .map(
+                          tasks -> {
+                            long inputSize = inputSize(tasks);
+                            return newRewriteGroup(
+                                ctx,
+                                partition,
+                                tasks,
+                                splitSize(inputSize),
+                                numOutputFiles(inputSize));
+                          });
                 })
             .sorted(RewriteFileGroup.comparator(rewriteJobOrder));
     Map<StructLike, Integer> groupsInPartition = plan.transformValues(List::size);
     int totalGroupCount = groupsInPartition.values().stream().reduce(Integer::sum).orElse(0);
-    return new RewritePlan(groups, totalGroupCount, groupsInPartition);
+    return new FileRewritePlan<>(
+        groups, totalGroupCount, groupsInPartition, writeMaxFileSize(), outputSpecId());
   }
 
-  private StructLikeMap<List<List<FileScanTask>>> planFileGroups(
-      Table table, Expression filter, long snapshotId, boolean caseSensitive) {
-    CloseableIterable<FileScanTask> fileScanTasks =
-        table
-            .newScan()
-            .useSnapshot(snapshotId)
-            .caseSensitive(caseSensitive)
-            .filter(filter)
-            .ignoreResiduals()
-            .planFiles();
+  @VisibleForTesting
+  CloseableIterable<FileScanTask> tasks() {
+    return table()
+        .newScan()
+        .useSnapshot(snapshotId)
+        .caseSensitive(caseSensitive)
+        .filter(filter)
+        .ignoreResiduals()
+        .planFiles();
+  }
+
+  private int deleteFileThreshold(Map<String, String> options) {
+    int value =
+        PropertyUtil.propertyAsInt(options, DELETE_FILE_THRESHOLD, DELETE_FILE_THRESHOLD_DEFAULT);
+    Preconditions.checkArgument(
+        value >= 0, "'%s' is set to %s but must be >= 0", DELETE_FILE_THRESHOLD, value);
+    return value;
+  }
+
+  private boolean tooManyDeletes(FileScanTask task) {
+    return task.deletes() != null && task.deletes().size() >= deleteFileThreshold;
+  }
+
+  private boolean shouldRewrite(List<FileScanTask> group) {
+    return enoughInputFiles(group)
+        || enoughContent(group)
+        || tooMuchContent(group)
+        || anyTaskHasTooManyDeletes(group);
+  }
+
+  private boolean anyTaskHasTooManyDeletes(List<FileScanTask> group) {
+    return group.stream().anyMatch(this::tooManyDeletes);
+  }
+
+  private StructLikeMap<List<List<FileScanTask>>> planFileGroups() {
+    CloseableIterable<FileScanTask> fileScanTasks = tasks();
 
     try {
-      Types.StructType partitionType = table.spec().partitionType();
+      Types.StructType partitionType = table().spec().partitionType();
       StructLikeMap<List<FileScanTask>> filesByPartition =
-          groupByPartition(table, partitionType, fileScanTasks);
-      return filesByPartition.transformValues(
-          tasks -> ImmutableList.copyOf(rewriter.planFileGroups(tasks)));
+          groupByPartition(table(), partitionType, fileScanTasks);
+      return filesByPartition.transformValues(tasks -> ImmutableList.copyOf(planFileGroups(tasks)));
     } finally {
       try {
         fileScanTasks.close();
@@ -128,45 +228,18 @@ private StructLikeMap<List<FileScanTask>> groupByPartition(
   }
 
   private RewriteFileGroup newRewriteGroup(
-      RewriteExecutionContext ctx, StructLike partition, List<FileScanTask> tasks) {
-    RewriteDataFiles.FileGroupInfo info =
+      RewriteExecutionContext ctx,
+      StructLike partition,
+      List<FileScanTask> tasks,
+      long splitSize,
+      int numOutputSize) {
+    FileGroupInfo info =
         ImmutableRewriteDataFiles.FileGroupInfo.builder()
             .globalIndex(ctx.currentGlobalIndex())
             .partitionIndex(ctx.currentPartitionIndex(partition))
             .partition(partition)
             .build();
-    return new RewriteFileGroup(info, Lists.newArrayList(tasks));
-  }
-
-  /** Result of the data file rewrite planning. */
-  public static class RewritePlan {
-    private final Stream<RewriteFileGroup> groups;
-    private final int totalGroupCount;
-    private final Map<StructLike, Integer> groupsInPartition;
-
-    private RewritePlan(
-        Stream<RewriteFileGroup> groups,
-        int totalGroupCount,
-        Map<StructLike, Integer> groupsInPartition) {
-      this.groups = groups;
-      this.totalGroupCount = totalGroupCount;
-      this.groupsInPartition = groupsInPartition;
-    }
-
-    /** The stream of the generated {@link RewriteFileGroup}s. */
-    public Stream<RewriteFileGroup> groups() {
-      return groups;
-    }
-
-    /** The number of the generated groups in the given partition. */
-    public int groupsInPartition(StructLike partition) {
-      return groupsInPartition.get(partition);
-    }
-
-    /** The total number of the groups generated by this plan. */
-    public int totalGroupCount() {
-      return totalGroupCount;
-    }
+    return new RewriteFileGroup(info, Lists.newArrayList(tasks), splitSize, numOutputSize);
   }
 
   private static class RewriteExecutionContext {
diff --git a/core/src/main/java/org/apache/iceberg/actions/RewritePositionDeletesGroup.java b/core/src/main/java/org/apache/iceberg/actions/RewritePositionDeletesGroup.java
index d1c688417a64..96640bb5d9b6 100644
--- a/core/src/main/java/org/apache/iceberg/actions/RewritePositionDeletesGroup.java
+++ b/core/src/main/java/org/apache/iceberg/actions/RewritePositionDeletesGroup.java
@@ -18,13 +18,11 @@
  */
 package org.apache.iceberg.actions;
 
-import java.util.Comparator;
 import java.util.List;
 import java.util.Set;
 import java.util.stream.Collectors;
 import org.apache.iceberg.DeleteFile;
 import org.apache.iceberg.PositionDeletesScanTask;
-import org.apache.iceberg.RewriteJobOrder;
 import org.apache.iceberg.actions.RewritePositionDeleteFiles.FileGroupInfo;
 import org.apache.iceberg.actions.RewritePositionDeleteFiles.FileGroupRewriteResult;
 import org.apache.iceberg.relocated.com.google.common.base.MoreObjects;
@@ -35,29 +33,23 @@
  * Container class representing a set of position delete files to be rewritten by a {@link
  * RewritePositionDeleteFiles} and the new files which have been written by the action.
  */
-public class RewritePositionDeletesGroup {
-  private final FileGroupInfo info;
-  private final List<PositionDeletesScanTask> tasks;
+public class RewritePositionDeletesGroup
+    extends FileRewriteGroup<FileGroupInfo, PositionDeletesScanTask, DeleteFile> {
   private final long maxRewrittenDataSequenceNumber;
 
   private DeleteFileSet addedDeleteFiles = DeleteFileSet.create();
 
-  public RewritePositionDeletesGroup(FileGroupInfo info, List<PositionDeletesScanTask> tasks) {
+  public RewritePositionDeletesGroup(
+      FileGroupInfo info,
+      List<PositionDeletesScanTask> tasks,
+      long splitSize,
+      int expectedOutputFiles) {
+    super(info, tasks, splitSize, expectedOutputFiles);
     Preconditions.checkArgument(!tasks.isEmpty(), "Tasks must not be empty");
-    this.info = info;
-    this.tasks = tasks;
     this.maxRewrittenDataSequenceNumber =
         tasks.stream().mapToLong(t -> t.file().dataSequenceNumber()).max().getAsLong();
   }
 
-  public FileGroupInfo info() {
-    return info;
-  }
-
-  public List<PositionDeletesScanTask> tasks() {
-    return tasks;
-  }
-
   public void setOutputFiles(Set<DeleteFile> files) {
     addedDeleteFiles = DeleteFileSet.of(files);
   }
@@ -67,7 +59,7 @@ public long maxRewrittenDataSequenceNumber() {
   }
 
   public Set<DeleteFile> rewrittenDeleteFiles() {
-    return tasks().stream()
+    return fileScans().stream()
         .map(PositionDeletesScanTask::file)
         .collect(Collectors.toCollection(DeleteFileSet::create));
   }
@@ -81,9 +73,9 @@ public FileGroupRewriteResult asResult() {
         addedDeleteFiles != null, "Cannot get result, Group was never rewritten");
 
     return ImmutableRewritePositionDeleteFiles.FileGroupRewriteResult.builder()
-        .info(info)
+        .info(info())
         .addedDeleteFilesCount(addedDeleteFiles.size())
-        .rewrittenDeleteFilesCount(tasks.size())
+        .rewrittenDeleteFilesCount(fileScans().size())
         .rewrittenBytesCount(rewrittenBytes())
         .addedBytesCount(addedBytes())
         .build();
@@ -92,8 +84,8 @@ public FileGroupRewriteResult asResult() {
   @Override
   public String toString() {
     return MoreObjects.toStringHelper(this)
-        .add("info", info)
-        .add("numRewrittenPositionDeleteFiles", tasks.size())
+        .add("info", info())
+        .add("numRewrittenPositionDeleteFiles", fileScans().size())
         .add(
             "numAddedPositionDeleteFiles",
             addedDeleteFiles == null
@@ -105,31 +97,10 @@ public String toString() {
   }
 
   public long rewrittenBytes() {
-    return tasks.stream().mapToLong(PositionDeletesScanTask::length).sum();
+    return fileScans().stream().mapToLong(PositionDeletesScanTask::length).sum();
   }
 
   public long addedBytes() {
     return addedDeleteFiles.stream().mapToLong(DeleteFile::fileSizeInBytes).sum();
   }
-
-  public int numRewrittenDeleteFiles() {
-    return tasks.size();
-  }
-
-  public static Comparator<RewritePositionDeletesGroup> comparator(RewriteJobOrder order) {
-    switch (order) {
-      case BYTES_ASC:
-        return Comparator.comparing(RewritePositionDeletesGroup::rewrittenBytes);
-      case BYTES_DESC:
-        return Comparator.comparing(
-            RewritePositionDeletesGroup::rewrittenBytes, Comparator.reverseOrder());
-      case FILES_ASC:
-        return Comparator.comparing(RewritePositionDeletesGroup::numRewrittenDeleteFiles);
-      case FILES_DESC:
-        return Comparator.comparing(
-            RewritePositionDeletesGroup::numRewrittenDeleteFiles, Comparator.reverseOrder());
-      default:
-        return (unused, unused2) -> 0;
-    }
-  }
 }
diff --git a/core/src/main/java/org/apache/iceberg/actions/RewritePositionDeletesGroupPlanner.java b/core/src/main/java/org/apache/iceberg/actions/RewritePositionDeletesGroupPlanner.java
new file mode 100644
index 000000000000..d83677139a37
--- /dev/null
+++ b/core/src/main/java/org/apache/iceberg/actions/RewritePositionDeletesGroupPlanner.java
@@ -0,0 +1,235 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.iceberg.actions;
+
+import java.io.IOException;
+import java.util.List;
+import java.util.Map;
+import java.util.Set;
+import java.util.concurrent.atomic.AtomicInteger;
+import java.util.stream.Stream;
+import org.apache.iceberg.DeleteFile;
+import org.apache.iceberg.MetadataTableType;
+import org.apache.iceberg.MetadataTableUtils;
+import org.apache.iceberg.Partitioning;
+import org.apache.iceberg.PositionDeletesScanTask;
+import org.apache.iceberg.PositionDeletesTable;
+import org.apache.iceberg.RewriteJobOrder;
+import org.apache.iceberg.StructLike;
+import org.apache.iceberg.Table;
+import org.apache.iceberg.TableProperties;
+import org.apache.iceberg.actions.RewritePositionDeleteFiles.FileGroupInfo;
+import org.apache.iceberg.expressions.Expression;
+import org.apache.iceberg.io.CloseableIterable;
+import org.apache.iceberg.relocated.com.google.common.collect.ImmutableList;
+import org.apache.iceberg.relocated.com.google.common.collect.ImmutableSet;
+import org.apache.iceberg.relocated.com.google.common.collect.Iterables;
+import org.apache.iceberg.relocated.com.google.common.collect.Lists;
+import org.apache.iceberg.relocated.com.google.common.collect.Maps;
+import org.apache.iceberg.types.Types;
+import org.apache.iceberg.util.PartitionUtil;
+import org.apache.iceberg.util.PropertyUtil;
+import org.apache.iceberg.util.StructLikeMap;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+/**
+ * Groups specified files in the {@link Table} by {@link RewriteFileGroup}s. These will be grouped
+ * by partitions.
+ */
+public class RewritePositionDeletesGroupPlanner
+    extends SizeBasedFileRewritePlanner<
+        FileGroupInfo, PositionDeletesScanTask, DeleteFile, RewritePositionDeletesGroup> {
+  private static final Logger LOG =
+      LoggerFactory.getLogger(RewritePositionDeletesGroupPlanner.class);
+
+  private final Expression filter;
+  private final boolean caseSensitive;
+  private RewriteJobOrder rewriteJobOrder;
+
+  public RewritePositionDeletesGroupPlanner(Table table, Expression filter, boolean caseSensitive) {
+    super(table);
+    this.caseSensitive = caseSensitive;
+    this.filter = filter;
+  }
+
+  @Override
+  public Set<String> validOptions() {
+    return ImmutableSet.<String>builder()
+        .addAll(super.validOptions())
+        .add(RewriteDataFiles.REWRITE_JOB_ORDER)
+        .build();
+  }
+
+  @Override
+  public void init(Map<String, String> options) {
+    super.init(options);
+    this.rewriteJobOrder =
+        RewriteJobOrder.fromName(
+            PropertyUtil.propertyAsString(
+                options,
+                RewritePositionDeleteFiles.REWRITE_JOB_ORDER,
+                RewritePositionDeleteFiles.REWRITE_JOB_ORDER_DEFAULT));
+  }
+
+  /**
+   * Generates the plan for the current table.
+   *
+   * @return the generated plan which could be executed during the compaction
+   */
+  @Override
+  public FileRewritePlan<
+          FileGroupInfo, PositionDeletesScanTask, DeleteFile, RewritePositionDeletesGroup>
+      plan() {
+    StructLikeMap<List<List<PositionDeletesScanTask>>> plan = planFileGroups();
+    RewriteExecutionContext ctx = new RewriteExecutionContext();
+    Stream<RewritePositionDeletesGroup> groups =
+        plan.entrySet().stream()
+            .filter(e -> !e.getValue().isEmpty())
+            .flatMap(
+                e -> {
+                  StructLike partition = e.getKey();
+                  List<List<PositionDeletesScanTask>> scanGroups = e.getValue();
+                  return scanGroups.stream()
+                      .map(
+                          tasks -> {
+                            long inputSize = inputSize(tasks);
+                            return newRewriteGroup(
+                                ctx,
+                                partition,
+                                tasks,
+                                splitSize(inputSize),
+                                numOutputFiles(inputSize));
+                          });
+                })
+            .sorted(RewritePositionDeletesGroup.comparator(rewriteJobOrder));
+    Map<StructLike, Integer> groupsInPartition = plan.transformValues(List::size);
+    int totalGroupCount = groupsInPartition.values().stream().reduce(Integer::sum).orElse(0);
+    return new FileRewritePlan<>(
+        groups, totalGroupCount, groupsInPartition, writeMaxFileSize(), outputSpecId());
+  }
+
+  private StructLikeMap<List<List<PositionDeletesScanTask>>> planFileGroups() {
+    Table deletesTable =
+        MetadataTableUtils.createMetadataTableInstance(table(), MetadataTableType.POSITION_DELETES);
+    CloseableIterable<PositionDeletesScanTask> fileTasks = planFiles(deletesTable);
+
+    try {
+      Types.StructType partitionType = Partitioning.partitionType(deletesTable);
+      StructLikeMap<List<PositionDeletesScanTask>> fileTasksByPartition =
+          groupByPartition(partitionType, fileTasks);
+      return fileTasksByPartition.transformValues(
+          tasks -> ImmutableList.copyOf(planFileGroups(tasks)));
+    } finally {
+      try {
+        fileTasks.close();
+      } catch (IOException io) {
+        LOG.error("Cannot properly close file iterable while planning for rewrite", io);
+      }
+    }
+  }
+
+  @Override
+  protected Iterable<PositionDeletesScanTask> filterFiles(Iterable<PositionDeletesScanTask> tasks) {
+    return Iterables.filter(tasks, this::wronglySized);
+  }
+
+  @Override
+  protected Iterable<List<PositionDeletesScanTask>> filterFileGroups(
+      List<List<PositionDeletesScanTask>> groups) {
+    return Iterables.filter(groups, this::shouldRewrite);
+  }
+
+  private boolean shouldRewrite(List<PositionDeletesScanTask> group) {
+    return enoughInputFiles(group) || enoughContent(group) || tooMuchContent(group);
+  }
+
+  @Override
+  protected long defaultTargetFileSize() {
+    return PropertyUtil.propertyAsLong(
+        table().properties(),
+        TableProperties.DELETE_TARGET_FILE_SIZE_BYTES,
+        TableProperties.DELETE_TARGET_FILE_SIZE_BYTES_DEFAULT);
+  }
+
+  private CloseableIterable<PositionDeletesScanTask> planFiles(Table deletesTable) {
+    PositionDeletesTable.PositionDeletesBatchScan scan =
+        (PositionDeletesTable.PositionDeletesBatchScan) deletesTable.newBatchScan();
+    return CloseableIterable.transform(
+        scan.baseTableFilter(filter).caseSensitive(caseSensitive).ignoreResiduals().planFiles(),
+        PositionDeletesScanTask.class::cast);
+  }
+
+  private StructLikeMap<List<PositionDeletesScanTask>> groupByPartition(
+      Types.StructType partitionType, Iterable<PositionDeletesScanTask> tasks) {
+    StructLikeMap<List<PositionDeletesScanTask>> filesByPartition =
+        StructLikeMap.create(partitionType);
+
+    for (PositionDeletesScanTask task : tasks) {
+      StructLike coerced = coercePartition(task, partitionType);
+
+      List<PositionDeletesScanTask> partitionTasks = filesByPartition.get(coerced);
+      if (partitionTasks == null) {
+        partitionTasks = Lists.newArrayList();
+      }
+      partitionTasks.add(task);
+      filesByPartition.put(coerced, partitionTasks);
+    }
+
+    return filesByPartition;
+  }
+
+  private RewritePositionDeletesGroup newRewriteGroup(
+      RewriteExecutionContext ctx,
+      StructLike partition,
+      List<PositionDeletesScanTask> tasks,
+      long splitSize,
+      int numOutputSize) {
+    ImmutableRewritePositionDeleteFiles.FileGroupInfo info =
+        ImmutableRewritePositionDeleteFiles.FileGroupInfo.builder()
+            .globalIndex(ctx.currentGlobalIndex())
+            .partitionIndex(ctx.currentPartitionIndex(partition))
+            .partition(partition)
+            .build();
+    return new RewritePositionDeletesGroup(
+        info, Lists.newArrayList(tasks), splitSize, numOutputSize);
+  }
+
+  private static class RewriteExecutionContext {
+    private final Map<StructLike, Integer> partitionIndexMap;
+    private final AtomicInteger groupIndex;
+
+    private RewriteExecutionContext() {
+      this.partitionIndexMap = Maps.newConcurrentMap();
+      this.groupIndex = new AtomicInteger(1);
+    }
+
+    private int currentGlobalIndex() {
+      return groupIndex.getAndIncrement();
+    }
+
+    private int currentPartitionIndex(StructLike partition) {
+      return partitionIndexMap.merge(partition, 1, Integer::sum);
+    }
+  }
+
+  private StructLike coercePartition(PositionDeletesScanTask task, Types.StructType partitionType) {
+    return PartitionUtil.coercePartition(partitionType, task.spec(), task.partition());
+  }
+}
diff --git a/core/src/main/java/org/apache/iceberg/actions/SizeBasedDataRewriter.java b/core/src/main/java/org/apache/iceberg/actions/SizeBasedDataRewriter.java
deleted file mode 100644
index e5b5908804e7..000000000000
--- a/core/src/main/java/org/apache/iceberg/actions/SizeBasedDataRewriter.java
+++ /dev/null
@@ -1,109 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *   http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied.  See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-package org.apache.iceberg.actions;
-
-import java.util.List;
-import java.util.Map;
-import java.util.Set;
-import org.apache.iceberg.DataFile;
-import org.apache.iceberg.FileScanTask;
-import org.apache.iceberg.Table;
-import org.apache.iceberg.TableProperties;
-import org.apache.iceberg.relocated.com.google.common.base.Preconditions;
-import org.apache.iceberg.relocated.com.google.common.collect.ImmutableSet;
-import org.apache.iceberg.relocated.com.google.common.collect.Iterables;
-import org.apache.iceberg.util.PropertyUtil;
-
-public abstract class SizeBasedDataRewriter extends SizeBasedFileRewriter<FileScanTask, DataFile> {
-
-  /**
-   * The minimum number of deletes that needs to be associated with a data file for it to be
-   * considered for rewriting. If a data file has this number of deletes or more, it will be
-   * rewritten regardless of its file size determined by {@link #MIN_FILE_SIZE_BYTES} and {@link
-   * #MAX_FILE_SIZE_BYTES}. If a file group contains a file that satisfies this condition, the file
-   * group will be rewritten regardless of the number of files in the file group determined by
-   * {@link #MIN_INPUT_FILES}.
-   *
-   * <p>Defaults to Integer.MAX_VALUE, which means this feature is not enabled by default.
-   */
-  public static final String DELETE_FILE_THRESHOLD = "delete-file-threshold";
-
-  public static final int DELETE_FILE_THRESHOLD_DEFAULT = Integer.MAX_VALUE;
-
-  private int deleteFileThreshold;
-
-  protected SizeBasedDataRewriter(Table table) {
-    super(table);
-  }
-
-  @Override
-  public Set<String> validOptions() {
-    return ImmutableSet.<String>builder()
-        .addAll(super.validOptions())
-        .add(DELETE_FILE_THRESHOLD)
-        .build();
-  }
-
-  @Override
-  public void init(Map<String, String> options) {
-    super.init(options);
-    this.deleteFileThreshold = deleteFileThreshold(options);
-  }
-
-  @Override
-  protected Iterable<FileScanTask> filterFiles(Iterable<FileScanTask> tasks) {
-    return Iterables.filter(tasks, task -> wronglySized(task) || tooManyDeletes(task));
-  }
-
-  private boolean tooManyDeletes(FileScanTask task) {
-    return task.deletes() != null && task.deletes().size() >= deleteFileThreshold;
-  }
-
-  @Override
-  protected Iterable<List<FileScanTask>> filterFileGroups(List<List<FileScanTask>> groups) {
-    return Iterables.filter(groups, this::shouldRewrite);
-  }
-
-  private boolean shouldRewrite(List<FileScanTask> group) {
-    return enoughInputFiles(group)
-        || enoughContent(group)
-        || tooMuchContent(group)
-        || anyTaskHasTooManyDeletes(group);
-  }
-
-  private boolean anyTaskHasTooManyDeletes(List<FileScanTask> group) {
-    return group.stream().anyMatch(this::tooManyDeletes);
-  }
-
-  @Override
-  protected long defaultTargetFileSize() {
-    return PropertyUtil.propertyAsLong(
-        table().properties(),
-        TableProperties.WRITE_TARGET_FILE_SIZE_BYTES,
-        TableProperties.WRITE_TARGET_FILE_SIZE_BYTES_DEFAULT);
-  }
-
-  private int deleteFileThreshold(Map<String, String> options) {
-    int value =
-        PropertyUtil.propertyAsInt(options, DELETE_FILE_THRESHOLD, DELETE_FILE_THRESHOLD_DEFAULT);
-    Preconditions.checkArgument(
-        value >= 0, "'%s' is set to %s but must be >= 0", DELETE_FILE_THRESHOLD, value);
-    return value;
-  }
-}
diff --git a/core/src/main/java/org/apache/iceberg/actions/SizeBasedFileRewriter.java b/core/src/main/java/org/apache/iceberg/actions/SizeBasedFileRewritePlanner.java
similarity index 92%
rename from core/src/main/java/org/apache/iceberg/actions/SizeBasedFileRewriter.java
rename to core/src/main/java/org/apache/iceberg/actions/SizeBasedFileRewritePlanner.java
index 5d45392c5487..f743c689da35 100644
--- a/core/src/main/java/org/apache/iceberg/actions/SizeBasedFileRewriter.java
+++ b/core/src/main/java/org/apache/iceberg/actions/SizeBasedFileRewritePlanner.java
@@ -24,7 +24,6 @@
 import java.util.Set;
 import org.apache.iceberg.ContentFile;
 import org.apache.iceberg.ContentScanTask;
-import org.apache.iceberg.PartitionSpec;
 import org.apache.iceberg.Table;
 import org.apache.iceberg.relocated.com.google.common.base.Preconditions;
 import org.apache.iceberg.relocated.com.google.common.collect.ImmutableSet;
@@ -36,7 +35,7 @@
 import org.slf4j.LoggerFactory;
 
 /**
- * A file rewriter that determines which files to rewrite based on their size.
+ * A file rewrite planner that determines which files to rewrite based on their size.
  *
  * <p>If files are smaller than the {@link #MIN_FILE_SIZE_BYTES} threshold or larger than the {@link
  * #MAX_FILE_SIZE_BYTES} threshold, they are considered targets for being rewritten.
@@ -48,10 +47,14 @@
  *
  * <p>Note that implementations may add extra conditions for selecting files or filtering groups.
  */
-public abstract class SizeBasedFileRewriter<T extends ContentScanTask<F>, F extends ContentFile<F>>
-    implements FileRewriter<T, F> {
+public abstract class SizeBasedFileRewritePlanner<
+        I,
+        T extends ContentScanTask<F>,
+        F extends ContentFile<F>,
+        G extends FileRewriteGroup<I, T, F>>
+    implements FileRewritePlanner<I, T, F, G> {
 
-  private static final Logger LOG = LoggerFactory.getLogger(SizeBasedFileRewriter.class);
+  private static final Logger LOG = LoggerFactory.getLogger(SizeBasedFileRewritePlanner.class);
 
   /** The target output file size that this file rewriter will attempt to generate. */
   public static final String TARGET_FILE_SIZE_BYTES = "target-file-size-bytes";
@@ -102,7 +105,7 @@ public abstract class SizeBasedFileRewriter<T extends ContentScanTask<F>, F exte
 
   public static final long MAX_FILE_GROUP_SIZE_BYTES_DEFAULT = 100L * 1024 * 1024 * 1024; // 100 GB
 
-  private static final long SPLIT_OVERHEAD = 5 * 1024;
+  private static final long SPLIT_OVERHEAD = 5L * 1024;
 
   private final Table table;
   private long targetFileSize;
@@ -114,7 +117,7 @@ public abstract class SizeBasedFileRewriter<T extends ContentScanTask<F>, F exte
 
   private int outputSpecId;
 
-  protected SizeBasedFileRewriter(Table table) {
+  protected SizeBasedFileRewritePlanner(Table table) {
     this.table = table;
   }
 
@@ -145,7 +148,6 @@ public void init(Map<String, String> options) {
     this.targetFileSize = sizeThresholds.get(TARGET_FILE_SIZE_BYTES);
     this.minFileSize = sizeThresholds.get(MIN_FILE_SIZE_BYTES);
     this.maxFileSize = sizeThresholds.get(MAX_FILE_SIZE_BYTES);
-
     this.minInputFiles = minInputFiles(options);
     this.rewriteAll = rewriteAll(options);
     this.maxGroupSize = maxGroupSize(options);
@@ -160,7 +162,6 @@ protected boolean wronglySized(T task) {
     return task.length() < minFileSize || task.length() > maxFileSize;
   }
 
-  @Override
   public Iterable<List<T>> planFileGroups(Iterable<T> tasks) {
     Iterable<T> filteredTasks = rewriteAll ? tasks : filterFiles(tasks);
     BinPacking.ListPacker<T> packer = new BinPacking.ListPacker<>(maxGroupSize, 1, false);
@@ -191,14 +192,12 @@ protected long inputSize(List<T> group) {
    * of output files. The final split size is adjusted to be at least as big as the target file size
    * but less than the max write file size.
    */
-  public long splitSize(long inputSize) {
+  protected long splitSize(long inputSize) {
     long estimatedSplitSize = (inputSize / numOutputFiles(inputSize)) + SPLIT_OVERHEAD;
     if (estimatedSplitSize < targetFileSize) {
       return targetFileSize;
-    } else if (estimatedSplitSize > writeMaxFileSize()) {
-      return writeMaxFileSize();
     } else {
-      return estimatedSplitSize;
+      return Math.min(estimatedSplitSize, writeMaxFileSize());
     }
   }
 
@@ -216,7 +215,7 @@ public long splitSize(long inputSize) {
    * @param inputSize a total input size for a file group
    * @return the number of files this rewriter should create
    */
-  protected long numOutputFiles(long inputSize) {
+  protected int numOutputFiles(long inputSize) {
     if (inputSize < targetFileSize) {
       return 1;
     }
@@ -227,18 +226,17 @@ protected long numOutputFiles(long inputSize) {
 
     if (LongMath.mod(inputSize, targetFileSize) > minFileSize) {
       // the remainder file is of a valid size for this rewrite so keep it
-      return numFilesWithRemainder;
+      return (int) numFilesWithRemainder;
 
-    } else if (avgFileSizeWithoutRemainder
-        < Math.min(1.1 * targetFileSize, (double) writeMaxFileSize())) {
+    } else if (avgFileSizeWithoutRemainder < Math.min(1.1 * targetFileSize, writeMaxFileSize())) {
       // if the reminder is distributed amongst other files,
       // the average file size will be no more than 10% bigger than the target file size
       // so round down and distribute remainder amongst other files
-      return numFilesWithoutRemainder;
+      return (int) numFilesWithoutRemainder;
 
     } else {
       // keep the remainder file as it is not OK to distribute it amongst other files
-      return numFilesWithRemainder;
+      return (int) numFilesWithRemainder;
     }
   }
 
@@ -259,15 +257,11 @@ protected long numOutputFiles(long inputSize) {
    *
    * @return the target size plus one half of the distance between max and target
    */
-  protected long writeMaxFileSize() {
+  public long writeMaxFileSize() {
     return (long) (targetFileSize + ((maxFileSize - targetFileSize) * 0.5));
   }
 
-  protected PartitionSpec outputSpec() {
-    return table.specs().get(outputSpecId);
-  }
-
-  protected int outputSpecId() {
+  public int outputSpecId() {
     return outputSpecId;
   }
 
diff --git a/core/src/main/java/org/apache/iceberg/actions/SizeBasedPositionDeletesRewriter.java b/core/src/main/java/org/apache/iceberg/actions/SizeBasedPositionDeletesRewriter.java
deleted file mode 100644
index c08a31a731f4..000000000000
--- a/core/src/main/java/org/apache/iceberg/actions/SizeBasedPositionDeletesRewriter.java
+++ /dev/null
@@ -1,58 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *   http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied.  See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-package org.apache.iceberg.actions;
-
-import java.util.List;
-import org.apache.iceberg.DeleteFile;
-import org.apache.iceberg.PositionDeletesScanTask;
-import org.apache.iceberg.Table;
-import org.apache.iceberg.TableProperties;
-import org.apache.iceberg.relocated.com.google.common.collect.Iterables;
-import org.apache.iceberg.util.PropertyUtil;
-
-public abstract class SizeBasedPositionDeletesRewriter
-    extends SizeBasedFileRewriter<PositionDeletesScanTask, DeleteFile> {
-
-  protected SizeBasedPositionDeletesRewriter(Table table) {
-    super(table);
-  }
-
-  @Override
-  protected Iterable<PositionDeletesScanTask> filterFiles(Iterable<PositionDeletesScanTask> tasks) {
-    return Iterables.filter(tasks, this::wronglySized);
-  }
-
-  @Override
-  protected Iterable<List<PositionDeletesScanTask>> filterFileGroups(
-      List<List<PositionDeletesScanTask>> groups) {
-    return Iterables.filter(groups, this::shouldRewrite);
-  }
-
-  private boolean shouldRewrite(List<PositionDeletesScanTask> group) {
-    return enoughInputFiles(group) || enoughContent(group) || tooMuchContent(group);
-  }
-
-  @Override
-  protected long defaultTargetFileSize() {
-    return PropertyUtil.propertyAsLong(
-        table().properties(),
-        TableProperties.DELETE_TARGET_FILE_SIZE_BYTES,
-        TableProperties.DELETE_TARGET_FILE_SIZE_BYTES_DEFAULT);
-  }
-}
diff --git a/core/src/test/java/org/apache/iceberg/actions/TestRewriteFileGroupPlanner.java b/core/src/test/java/org/apache/iceberg/actions/TestRewriteFileGroupPlanner.java
index 8bf7018eccc4..903e7b27313c 100644
--- a/core/src/test/java/org/apache/iceberg/actions/TestRewriteFileGroupPlanner.java
+++ b/core/src/test/java/org/apache/iceberg/actions/TestRewriteFileGroupPlanner.java
@@ -18,12 +18,12 @@
  */
 package org.apache.iceberg.actions;
 
+import static org.apache.iceberg.actions.RewriteDataFiles.REWRITE_JOB_ORDER;
 import static org.assertj.core.api.Assertions.assertThat;
 
 import java.io.File;
 import java.util.List;
 import java.util.Map;
-import java.util.Set;
 import java.util.UUID;
 import java.util.stream.Collectors;
 import org.apache.iceberg.DataFile;
@@ -33,10 +33,10 @@
 import org.apache.iceberg.StructLike;
 import org.apache.iceberg.TestBase;
 import org.apache.iceberg.TestTables;
+import org.apache.iceberg.actions.RewriteDataFiles.FileGroupInfo;
 import org.apache.iceberg.expressions.Expressions;
 import org.apache.iceberg.relocated.com.google.common.collect.ImmutableList;
 import org.apache.iceberg.relocated.com.google.common.collect.ImmutableMap;
-import org.apache.iceberg.relocated.com.google.common.collect.Lists;
 import org.junit.jupiter.api.AfterEach;
 import org.junit.jupiter.api.BeforeEach;
 import org.junit.jupiter.api.Test;
@@ -90,9 +90,14 @@ void testGroups(RewriteJobOrder order) {
         .appendFile(FILE_5)
         .appendFile(FILE_6)
         .commit();
-    RewriteFileGroupPlanner planner = new RewriteFileGroupPlanner(new DummyRewriter(false), order);
-    RewriteFileGroupPlanner.RewritePlan result =
-        planner.plan(table, Expressions.alwaysTrue(), table.currentSnapshot().snapshotId(), false);
+    RewriteFileGroupPlanner planner =
+        new RewriteFileGroupPlanner(
+            table, Expressions.alwaysTrue(), table.currentSnapshot().snapshotId(), false);
+    planner.init(
+        ImmutableMap.of(
+            RewriteFileGroupPlanner.REWRITE_ALL, "true", REWRITE_JOB_ORDER, order.name()));
+    FileRewritePlan<FileGroupInfo, FileScanTask, DataFile, RewriteFileGroup> result =
+        planner.plan();
     List<RewriteFileGroup> groups = result.groups().collect(Collectors.toList());
     assertThat(groups.stream().map(group -> group.info().partition()).collect(Collectors.toList()))
         .isEqualTo(EXPECTED.get(order));
@@ -112,44 +117,22 @@ void testContext() {
         .appendFile(FILE_6)
         .commit();
     RewriteFileGroupPlanner planner =
-        new RewriteFileGroupPlanner(new DummyRewriter(true), RewriteJobOrder.FILES_DESC);
-    RewriteFileGroupPlanner.RewritePlan result =
-        planner.plan(table, Expressions.alwaysTrue(), table.currentSnapshot().snapshotId(), false);
+        new RewriteFileGroupPlanner(
+            table, Expressions.alwaysTrue(), table.currentSnapshot().snapshotId(), false);
+    planner.init(
+        ImmutableMap.of(
+            RewriteFileGroupPlanner.REWRITE_ALL,
+            "true",
+            RewriteFileGroupPlanner.MAX_FILE_GROUP_SIZE_BYTES,
+            "10"));
+    FileRewritePlan<FileGroupInfo, FileScanTask, DataFile, RewriteFileGroup> result =
+        planner.plan();
     assertThat(result.totalGroupCount()).isEqualTo(6);
     assertThat(result.groupsInPartition(FILE_1.partition())).isEqualTo(3);
     assertThat(result.groupsInPartition(FILE_4.partition())).isEqualTo(2);
     assertThat(result.groupsInPartition(FILE_6.partition())).isEqualTo(1);
   }
 
-  private static class DummyRewriter implements FileRewriter<FileScanTask, DataFile> {
-    private final boolean split;
-
-    private DummyRewriter(boolean split) {
-      this.split = split;
-    }
-
-    @Override
-    public Set<String> validOptions() {
-      return Set.of();
-    }
-
-    @Override
-    public void init(Map<String, String> options) {}
-
-    @Override
-    public Iterable<List<FileScanTask>> planFileGroups(Iterable<FileScanTask> tasks) {
-      List<FileScanTask> taskList = Lists.newArrayList(tasks);
-      return split
-          ? taskList.stream().map(ImmutableList::of).collect(Collectors.toList())
-          : ImmutableList.of(taskList);
-    }
-
-    @Override
-    public Set<DataFile> rewrite(List<FileScanTask> group) {
-      return Set.of();
-    }
-  }
-
   private static DataFile newDataFile(String partitionPath, long fileSize) {
     return DataFiles.builder(TestBase.SPEC)
         .withPath("/path/to/data-" + UUID.randomUUID() + ".parquet")
diff --git a/core/src/test/java/org/apache/iceberg/actions/TestSizeBasedRewriter.java b/core/src/test/java/org/apache/iceberg/actions/TestSizeBasedRewriter.java
index 77d16d3bc821..82286d250574 100644
--- a/core/src/test/java/org/apache/iceberg/actions/TestSizeBasedRewriter.java
+++ b/core/src/test/java/org/apache/iceberg/actions/TestSizeBasedRewriter.java
@@ -19,25 +19,30 @@
 package org.apache.iceberg.actions;
 
 import static org.assertj.core.api.Assertions.assertThat;
+import static org.mockito.Mockito.when;
 
 import java.util.Arrays;
 import java.util.List;
 import java.util.Map;
-import java.util.Set;
 import org.apache.iceberg.DataFile;
 import org.apache.iceberg.FileScanTask;
 import org.apache.iceberg.MockFileScanTask;
 import org.apache.iceberg.ParameterizedTestExtension;
 import org.apache.iceberg.Parameters;
+import org.apache.iceberg.StructLike;
 import org.apache.iceberg.Table;
 import org.apache.iceberg.TestBase;
+import org.apache.iceberg.expressions.Expression;
+import org.apache.iceberg.expressions.Expressions;
+import org.apache.iceberg.io.CloseableIterable;
 import org.apache.iceberg.relocated.com.google.common.collect.ImmutableList;
 import org.apache.iceberg.relocated.com.google.common.collect.ImmutableMap;
 import org.junit.jupiter.api.TestTemplate;
 import org.junit.jupiter.api.extension.ExtendWith;
+import org.mockito.Mockito;
 
 @ExtendWith(ParameterizedTestExtension.class)
-public class TestSizeBasedRewriter extends TestBase {
+class TestSizeBasedRewriter extends TestBase {
 
   @Parameters(name = "formatVersion = {0}")
   protected static List<Object> parameters() {
@@ -45,54 +50,57 @@ protected static List<Object> parameters() {
   }
 
   @TestTemplate
-  public void testSplitSizeLowerBound() {
-    SizeBasedDataFileRewriterImpl rewriter = new SizeBasedDataFileRewriterImpl(table);
-
-    FileScanTask task1 = new MockFileScanTask(145L * 1024 * 1024);
-    FileScanTask task2 = new MockFileScanTask(145L * 1024 * 1024);
-    FileScanTask task3 = new MockFileScanTask(145L * 1024 * 1024);
-    FileScanTask task4 = new MockFileScanTask(145L * 1024 * 1024);
+  void testSplitSizeLowerBound() {
+    FileScanTask task1 = new MockFileScanTask(mockDataFile());
+    FileScanTask task2 = new MockFileScanTask(mockDataFile());
+    FileScanTask task3 = new MockFileScanTask(mockDataFile());
+    FileScanTask task4 = new MockFileScanTask(mockDataFile());
     List<FileScanTask> tasks = ImmutableList.of(task1, task2, task3, task4);
 
+    RewriteFileGroupPlanner planner = new TestingPlanner(table, Expressions.alwaysTrue(), 1, tasks);
+
     long minFileSize = 256L * 1024 * 1024;
     long targetFileSize = 512L * 1024 * 1024;
     long maxFileSize = 768L * 1024 * 1024;
 
     Map<String, String> options =
         ImmutableMap.of(
-            SizeBasedDataRewriter.MIN_FILE_SIZE_BYTES, String.valueOf(minFileSize),
-            SizeBasedDataRewriter.TARGET_FILE_SIZE_BYTES, String.valueOf(targetFileSize),
-            SizeBasedDataRewriter.MAX_FILE_SIZE_BYTES, String.valueOf(maxFileSize));
-    rewriter.init(options);
+            RewriteFileGroupPlanner.MIN_FILE_SIZE_BYTES, String.valueOf(minFileSize),
+            RewriteFileGroupPlanner.TARGET_FILE_SIZE_BYTES, String.valueOf(targetFileSize),
+            RewriteFileGroupPlanner.MAX_FILE_SIZE_BYTES, String.valueOf(maxFileSize));
+    planner.init(options);
 
     // the total task size is 580 MB and the target file size is 512 MB
     // the remainder must be written into a separate file as it exceeds 10%
-    long numOutputFiles = rewriter.computeNumOutputFiles(tasks);
-    assertThat(numOutputFiles).isEqualTo(2);
+
+    RewriteFileGroup group = planner.plan().groups().iterator().next();
+
+    assertThat(group.expectedOutputFiles()).isEqualTo(2);
 
     // the split size must be >= targetFileSize and < maxFileSize
-    long splitSize = rewriter.computeSplitSize(tasks);
-    assertThat(splitSize).isGreaterThanOrEqualTo(targetFileSize);
-    assertThat(splitSize).isLessThan(maxFileSize);
+    long splitSize = group.sizeInBytes();
+    assertThat(splitSize).isGreaterThanOrEqualTo(targetFileSize).isLessThan(maxFileSize);
   }
 
-  private static class SizeBasedDataFileRewriterImpl extends SizeBasedDataRewriter {
+  private static class TestingPlanner extends RewriteFileGroupPlanner {
+    private final List<FileScanTask> tasks;
 
-    SizeBasedDataFileRewriterImpl(Table table) {
-      super(table);
+    private TestingPlanner(
+        Table table, Expression filter, long snapshotId, List<FileScanTask> tasks) {
+      super(table, filter, snapshotId, false);
+      this.tasks = tasks;
     }
 
     @Override
-    public Set<DataFile> rewrite(List<FileScanTask> group) {
-      throw new UnsupportedOperationException("Not implemented");
-    }
-
-    public long computeSplitSize(List<FileScanTask> group) {
-      return splitSize(inputSize(group));
+    CloseableIterable<FileScanTask> tasks() {
+      return CloseableIterable.withNoopClose(tasks);
     }
+  }
 
-    public long computeNumOutputFiles(List<FileScanTask> group) {
-      return numOutputFiles(inputSize(group));
-    }
+  private DataFile mockDataFile() {
+    DataFile file = Mockito.mock(DataFile.class);
+    when(file.partition()).thenReturn(Mockito.mock(StructLike.class));
+    when(file.fileSizeInBytes()).thenReturn(145L * 1024 * 1024);
+    return file;
   }
 }
diff --git a/spark/v3.5/spark-extensions/src/test/java/org/apache/iceberg/spark/extensions/TestRewritePositionDeleteFiles.java b/spark/v3.5/spark-extensions/src/test/java/org/apache/iceberg/spark/extensions/TestRewritePositionDeleteFiles.java
index f3be0a870972..5a1bdb983f7c 100644
--- a/spark/v3.5/spark-extensions/src/test/java/org/apache/iceberg/spark/extensions/TestRewritePositionDeleteFiles.java
+++ b/spark/v3.5/spark-extensions/src/test/java/org/apache/iceberg/spark/extensions/TestRewritePositionDeleteFiles.java
@@ -51,7 +51,7 @@
 import org.apache.iceberg.Table;
 import org.apache.iceberg.actions.RewritePositionDeleteFiles.FileGroupRewriteResult;
 import org.apache.iceberg.actions.RewritePositionDeleteFiles.Result;
-import org.apache.iceberg.actions.SizeBasedFileRewriter;
+import org.apache.iceberg.actions.SizeBasedFileRewritePlanner;
 import org.apache.iceberg.data.GenericAppenderFactory;
 import org.apache.iceberg.data.Record;
 import org.apache.iceberg.deletes.PositionDelete;
@@ -217,7 +217,7 @@ private void testDanglingDelete(String partitionCol, int numDataFiles) throws Ex
 
     SparkActions.get(spark)
         .rewriteDataFiles(table)
-        .option(SizeBasedFileRewriter.REWRITE_ALL, "true")
+        .option(SizeBasedFileRewritePlanner.REWRITE_ALL, "true")
         .execute();
 
     // write dangling delete files for 'old data files'
@@ -230,7 +230,7 @@ private void testDanglingDelete(String partitionCol, int numDataFiles) throws Ex
     Result result =
         SparkActions.get(spark)
             .rewritePositionDeletes(table)
-            .option(SizeBasedFileRewriter.REWRITE_ALL, "true")
+            .option(SizeBasedFileRewritePlanner.REWRITE_ALL, "true")
             .execute();
 
     List<DeleteFile> newDeleteFiles = deleteFiles(table);
diff --git a/spark/v3.5/spark/src/jmh/java/org/apache/iceberg/spark/action/IcebergSortCompactionBenchmark.java b/spark/v3.5/spark/src/jmh/java/org/apache/iceberg/spark/action/IcebergSortCompactionBenchmark.java
index 95bebc7caed4..88ab82bd600a 100644
--- a/spark/v3.5/spark/src/jmh/java/org/apache/iceberg/spark/action/IcebergSortCompactionBenchmark.java
+++ b/spark/v3.5/spark/src/jmh/java/org/apache/iceberg/spark/action/IcebergSortCompactionBenchmark.java
@@ -35,7 +35,7 @@
 import org.apache.iceberg.SortDirection;
 import org.apache.iceberg.SortOrder;
 import org.apache.iceberg.Table;
-import org.apache.iceberg.actions.SizeBasedFileRewriter;
+import org.apache.iceberg.actions.SizeBasedFileRewritePlanner;
 import org.apache.iceberg.relocated.com.google.common.io.Files;
 import org.apache.iceberg.spark.Spark3Util;
 import org.apache.iceberg.spark.SparkSchemaUtil;
@@ -105,7 +105,7 @@ public void cleanUpIteration() throws IOException {
   public void sortInt() {
     SparkActions.get()
         .rewriteDataFiles(table())
-        .option(SizeBasedFileRewriter.REWRITE_ALL, "true")
+        .option(SizeBasedFileRewritePlanner.REWRITE_ALL, "true")
         .sort(
             SortOrder.builderFor(table().schema())
                 .sortBy("intCol", SortDirection.ASC, NullOrder.NULLS_FIRST)
@@ -118,7 +118,7 @@ public void sortInt() {
   public void sortInt2() {
     SparkActions.get()
         .rewriteDataFiles(table())
-        .option(SizeBasedFileRewriter.REWRITE_ALL, "true")
+        .option(SizeBasedFileRewritePlanner.REWRITE_ALL, "true")
         .sort(
             SortOrder.builderFor(table().schema())
                 .sortBy("intCol", SortDirection.ASC, NullOrder.NULLS_FIRST)
@@ -132,7 +132,7 @@ public void sortInt2() {
   public void sortInt3() {
     SparkActions.get()
         .rewriteDataFiles(table())
-        .option(SizeBasedFileRewriter.REWRITE_ALL, "true")
+        .option(SizeBasedFileRewritePlanner.REWRITE_ALL, "true")
         .sort(
             SortOrder.builderFor(table().schema())
                 .sortBy("intCol", SortDirection.ASC, NullOrder.NULLS_FIRST)
@@ -148,7 +148,7 @@ public void sortInt3() {
   public void sortInt4() {
     SparkActions.get()
         .rewriteDataFiles(table())
-        .option(SizeBasedFileRewriter.REWRITE_ALL, "true")
+        .option(SizeBasedFileRewritePlanner.REWRITE_ALL, "true")
         .sort(
             SortOrder.builderFor(table().schema())
                 .sortBy("intCol", SortDirection.ASC, NullOrder.NULLS_FIRST)
@@ -164,7 +164,7 @@ public void sortInt4() {
   public void sortString() {
     SparkActions.get()
         .rewriteDataFiles(table())
-        .option(SizeBasedFileRewriter.REWRITE_ALL, "true")
+        .option(SizeBasedFileRewritePlanner.REWRITE_ALL, "true")
         .sort(
             SortOrder.builderFor(table().schema())
                 .sortBy("stringCol", SortDirection.ASC, NullOrder.NULLS_FIRST)
@@ -177,7 +177,7 @@ public void sortString() {
   public void sortFourColumns() {
     SparkActions.get()
         .rewriteDataFiles(table())
-        .option(SizeBasedFileRewriter.REWRITE_ALL, "true")
+        .option(SizeBasedFileRewritePlanner.REWRITE_ALL, "true")
         .sort(
             SortOrder.builderFor(table().schema())
                 .sortBy("stringCol", SortDirection.ASC, NullOrder.NULLS_FIRST)
@@ -193,7 +193,7 @@ public void sortFourColumns() {
   public void sortSixColumns() {
     SparkActions.get()
         .rewriteDataFiles(table())
-        .option(SizeBasedFileRewriter.REWRITE_ALL, "true")
+        .option(SizeBasedFileRewritePlanner.REWRITE_ALL, "true")
         .sort(
             SortOrder.builderFor(table().schema())
                 .sortBy("stringCol", SortDirection.ASC, NullOrder.NULLS_FIRST)
@@ -211,7 +211,7 @@ public void sortSixColumns() {
   public void zSortInt() {
     SparkActions.get()
         .rewriteDataFiles(table())
-        .option(SizeBasedFileRewriter.REWRITE_ALL, "true")
+        .option(SizeBasedFileRewritePlanner.REWRITE_ALL, "true")
         .zOrder("intCol")
         .execute();
   }
@@ -221,7 +221,7 @@ public void zSortInt() {
   public void zSortInt2() {
     SparkActions.get()
         .rewriteDataFiles(table())
-        .option(SizeBasedFileRewriter.REWRITE_ALL, "true")
+        .option(SizeBasedFileRewritePlanner.REWRITE_ALL, "true")
         .zOrder("intCol", "intCol2")
         .execute();
   }
@@ -231,7 +231,7 @@ public void zSortInt2() {
   public void zSortInt3() {
     SparkActions.get()
         .rewriteDataFiles(table())
-        .option(SizeBasedFileRewriter.REWRITE_ALL, "true")
+        .option(SizeBasedFileRewritePlanner.REWRITE_ALL, "true")
         .zOrder("intCol", "intCol2", "intCol3")
         .execute();
   }
@@ -241,7 +241,7 @@ public void zSortInt3() {
   public void zSortInt4() {
     SparkActions.get()
         .rewriteDataFiles(table())
-        .option(SizeBasedFileRewriter.REWRITE_ALL, "true")
+        .option(SizeBasedFileRewritePlanner.REWRITE_ALL, "true")
         .zOrder("intCol", "intCol2", "intCol3", "intCol4")
         .execute();
   }
@@ -251,7 +251,7 @@ public void zSortInt4() {
   public void zSortString() {
     SparkActions.get()
         .rewriteDataFiles(table())
-        .option(SizeBasedFileRewriter.REWRITE_ALL, "true")
+        .option(SizeBasedFileRewritePlanner.REWRITE_ALL, "true")
         .zOrder("stringCol")
         .execute();
   }
@@ -261,7 +261,7 @@ public void zSortString() {
   public void zSortFourColumns() {
     SparkActions.get()
         .rewriteDataFiles(table())
-        .option(SizeBasedFileRewriter.REWRITE_ALL, "true")
+        .option(SizeBasedFileRewritePlanner.REWRITE_ALL, "true")
         .zOrder("stringCol", "intCol", "dateCol", "doubleCol")
         .execute();
   }
@@ -271,7 +271,7 @@ public void zSortFourColumns() {
   public void zSortSixColumns() {
     SparkActions.get()
         .rewriteDataFiles(table())
-        .option(SizeBasedFileRewriter.REWRITE_ALL, "true")
+        .option(SizeBasedFileRewritePlanner.REWRITE_ALL, "true")
         .zOrder("stringCol", "intCol", "dateCol", "timestampCol", "doubleCol", "longCol")
         .execute();
   }
diff --git a/spark/v3.5/spark/src/main/java/org/apache/iceberg/spark/actions/RewriteDataFilesSparkAction.java b/spark/v3.5/spark/src/main/java/org/apache/iceberg/spark/actions/RewriteDataFilesSparkAction.java
index 442ebc09670f..8cf189ee8b79 100644
--- a/spark/v3.5/spark/src/main/java/org/apache/iceberg/spark/actions/RewriteDataFilesSparkAction.java
+++ b/spark/v3.5/spark/src/main/java/org/apache/iceberg/spark/actions/RewriteDataFilesSparkAction.java
@@ -30,18 +30,17 @@
 import java.util.stream.Collectors;
 import org.apache.iceberg.DataFile;
 import org.apache.iceberg.FileScanTask;
-import org.apache.iceberg.RewriteJobOrder;
 import org.apache.iceberg.SortOrder;
 import org.apache.iceberg.StructLike;
 import org.apache.iceberg.Table;
-import org.apache.iceberg.actions.FileRewriter;
+import org.apache.iceberg.actions.FileRewriteExecutor;
+import org.apache.iceberg.actions.FileRewritePlan;
 import org.apache.iceberg.actions.ImmutableRewriteDataFiles;
 import org.apache.iceberg.actions.ImmutableRewriteDataFiles.Result.Builder;
 import org.apache.iceberg.actions.RewriteDataFiles;
 import org.apache.iceberg.actions.RewriteDataFilesCommitManager;
 import org.apache.iceberg.actions.RewriteFileGroup;
 import org.apache.iceberg.actions.RewriteFileGroupPlanner;
-import org.apache.iceberg.actions.RewriteFileGroupPlanner.RewritePlan;
 import org.apache.iceberg.exceptions.CommitFailedException;
 import org.apache.iceberg.exceptions.ValidationException;
 import org.apache.iceberg.expressions.Expression;
@@ -93,9 +92,10 @@ public class RewriteDataFilesSparkAction
   private boolean partialProgressEnabled;
   private boolean removeDanglingDeletes;
   private boolean useStartingSequenceNumber;
-  private RewriteJobOrder rewriteJobOrder;
-  private FileRewriter<FileScanTask, DataFile> rewriter = null;
   private boolean caseSensitive;
+  private RewriteFileGroupPlanner planner = null;
+  private FileRewriteExecutor<FileGroupInfo, FileScanTask, DataFile, RewriteFileGroup> rewriter =
+      null;
 
   RewriteDataFilesSparkAction(SparkSession spark, Table table) {
     super(spark.cloneSession());
@@ -114,7 +114,7 @@ protected RewriteDataFilesSparkAction self() {
   public RewriteDataFilesSparkAction binPack() {
     Preconditions.checkArgument(
         rewriter == null, "Must use only one rewriter type (bin-pack, sort, zorder)");
-    this.rewriter = new SparkBinPackDataRewriter(spark(), table);
+    this.rewriter = new SparkBinPackDataRewriteExecutor(spark(), table);
     return this;
   }
 
@@ -122,7 +122,7 @@ public RewriteDataFilesSparkAction binPack() {
   public RewriteDataFilesSparkAction sort(SortOrder sortOrder) {
     Preconditions.checkArgument(
         rewriter == null, "Must use only one rewriter type (bin-pack, sort, zorder)");
-    this.rewriter = new SparkSortDataRewriter(spark(), table, sortOrder);
+    this.rewriter = new SparkSortDataRewriteExecutor(spark(), table, sortOrder);
     return this;
   }
 
@@ -130,7 +130,7 @@ public RewriteDataFilesSparkAction sort(SortOrder sortOrder) {
   public RewriteDataFilesSparkAction sort() {
     Preconditions.checkArgument(
         rewriter == null, "Must use only one rewriter type (bin-pack, sort, zorder)");
-    this.rewriter = new SparkSortDataRewriter(spark(), table);
+    this.rewriter = new SparkSortDataRewriteExecutor(spark(), table);
     return this;
   }
 
@@ -138,7 +138,7 @@ public RewriteDataFilesSparkAction sort() {
   public RewriteDataFilesSparkAction zOrder(String... columnNames) {
     Preconditions.checkArgument(
         rewriter == null, "Must use only one rewriter type (bin-pack, sort, zorder)");
-    this.rewriter = new SparkZOrderDataRewriter(spark(), table, Arrays.asList(columnNames));
+    this.rewriter = new SparkZOrderDataRewriteExecutor(spark(), table, Arrays.asList(columnNames));
     return this;
   }
 
@@ -156,14 +156,10 @@ public RewriteDataFiles.Result execute() {
 
     long startingSnapshotId = table.currentSnapshot().snapshotId();
 
-    // Default to BinPack if no strategy selected
-    if (this.rewriter == null) {
-      this.rewriter = new SparkBinPackDataRewriter(spark(), table);
-    }
-
-    validateAndInitOptions();
+    init(startingSnapshotId);
 
-    RewritePlan plan = plan(startingSnapshotId);
+    FileRewritePlan<FileGroupInfo, FileScanTask, DataFile, RewriteFileGroup> plan = plan();
+    rewriter.initPlan(plan);
 
     if (plan.totalGroupCount() == 0) {
       LOG.info("Nothing found to rewrite in {}", table.name());
@@ -185,18 +181,32 @@ public RewriteDataFiles.Result execute() {
     return resultBuilder.build();
   }
 
-  RewritePlan plan(long startingSnapshotId) {
-    return new RewriteFileGroupPlanner(rewriter, rewriteJobOrder)
-        .plan(table, filter, startingSnapshotId, caseSensitive);
+  @VisibleForTesting
+  FileRewritePlan<FileGroupInfo, FileScanTask, DataFile, RewriteFileGroup> plan() {
+    return planner.plan();
   }
 
   @VisibleForTesting
-  RewriteFileGroup rewriteFiles(RewritePlan plan, RewriteFileGroup fileGroup) {
+  void init(long startingSnapshotId) {
+
+    this.planner = new RewriteFileGroupPlanner(table, filter, startingSnapshotId, caseSensitive);
+
+    // Default to BinPack if no strategy selected
+    if (this.rewriter == null) {
+      this.rewriter = new SparkBinPackDataRewriteExecutor(spark(), table);
+    }
+
+    validateAndInitOptions();
+  }
+
+  @VisibleForTesting
+  RewriteFileGroup rewriteFiles(
+      FileRewritePlan<FileGroupInfo, FileScanTask, DataFile, RewriteFileGroup> plan,
+      RewriteFileGroup fileGroup) {
     String desc = jobDesc(fileGroup, plan);
     Set<DataFile> addedFiles =
         withJobGroupInfo(
-            newJobGroupInfo("REWRITE-DATA-FILES", desc),
-            () -> rewriter.rewrite(fileGroup.fileScans()));
+            newJobGroupInfo("REWRITE-DATA-FILES", desc), () -> rewriter.rewrite(fileGroup));
 
     fileGroup.setOutputFiles(addedFiles);
     LOG.info("Rewrite Files Ready to be Committed - {}", desc);
@@ -217,7 +227,9 @@ RewriteDataFilesCommitManager commitManager(long startingSnapshotId) {
         table, startingSnapshotId, useStartingSequenceNumber, commitSummary());
   }
 
-  private Builder doExecute(RewritePlan plan, RewriteDataFilesCommitManager commitManager) {
+  private Builder doExecute(
+      FileRewritePlan<FileGroupInfo, FileScanTask, DataFile, RewriteFileGroup> plan,
+      RewriteDataFilesCommitManager commitManager) {
     ExecutorService rewriteService = rewriteService();
 
     ConcurrentLinkedQueue<RewriteFileGroup> rewrittenGroups = Queues.newConcurrentLinkedQueue();
@@ -277,7 +289,8 @@ private Builder doExecute(RewritePlan plan, RewriteDataFilesCommitManager commit
   }
 
   private Builder doExecuteWithPartialProgress(
-      RewritePlan plan, RewriteDataFilesCommitManager commitManager) {
+      FileRewritePlan<FileGroupInfo, FileScanTask, DataFile, RewriteFileGroup> plan,
+      RewriteDataFilesCommitManager commitManager) {
     ExecutorService rewriteService = rewriteService();
 
     // start commit service
@@ -298,7 +311,7 @@ private Builder doExecuteWithPartialProgress(
               rewriteFailures.add(
                   ImmutableRewriteDataFiles.FileGroupFailureResult.builder()
                       .info(fileGroup.info())
-                      .dataFilesCount(fileGroup.numFiles())
+                      .dataFilesCount(fileGroup.numInputFiles())
                       .build());
             })
         .run(fileGroup -> commitService.offer(rewriteFiles(plan, fileGroup)));
@@ -341,6 +354,7 @@ private Iterable<FileGroupRewriteResult> toRewriteResults(List<RewriteFileGroup>
   void validateAndInitOptions() {
     Set<String> validOptions = Sets.newHashSet(rewriter.validOptions());
     validOptions.addAll(VALID_OPTIONS);
+    validOptions.addAll(planner.validOptions());
 
     Set<String> invalidKeys = Sets.newHashSet(options().keySet());
     invalidKeys.removeAll(validOptions);
@@ -351,6 +365,7 @@ void validateAndInitOptions() {
         invalidKeys,
         rewriter.description());
 
+    planner.init(options());
     rewriter.init(options());
 
     maxConcurrentFileGroupRewrites =
@@ -378,10 +393,6 @@ void validateAndInitOptions() {
         PropertyUtil.propertyAsBoolean(
             options(), REMOVE_DANGLING_DELETES, REMOVE_DANGLING_DELETES_DEFAULT);
 
-    rewriteJobOrder =
-        RewriteJobOrder.fromName(
-            PropertyUtil.propertyAsString(options(), REWRITE_JOB_ORDER, REWRITE_JOB_ORDER_DEFAULT));
-
     Preconditions.checkArgument(
         maxConcurrentFileGroupRewrites >= 1,
         "Cannot set %s to %s, the value must be positive.",
@@ -396,7 +407,9 @@ void validateAndInitOptions() {
         PARTIAL_PROGRESS_ENABLED);
   }
 
-  private String jobDesc(RewriteFileGroup group, RewritePlan plan) {
+  private String jobDesc(
+      RewriteFileGroup group,
+      FileRewritePlan<FileGroupInfo, FileScanTask, DataFile, RewriteFileGroup> plan) {
     StructLike partition = group.info().partition();
     if (partition.size() > 0) {
       return String.format(
diff --git a/spark/v3.5/spark/src/main/java/org/apache/iceberg/spark/actions/RewritePositionDeleteFilesSparkAction.java b/spark/v3.5/spark/src/main/java/org/apache/iceberg/spark/actions/RewritePositionDeleteFilesSparkAction.java
index 2562c74eafcc..e237f46a163f 100644
--- a/spark/v3.5/spark/src/main/java/org/apache/iceberg/spark/actions/RewritePositionDeleteFilesSparkAction.java
+++ b/spark/v3.5/spark/src/main/java/org/apache/iceberg/spark/actions/RewritePositionDeleteFilesSparkAction.java
@@ -18,52 +18,39 @@
  */
 package org.apache.iceberg.spark.actions;
 
-import java.io.IOException;
 import java.math.RoundingMode;
 import java.util.List;
-import java.util.Map;
 import java.util.Set;
 import java.util.concurrent.ConcurrentLinkedQueue;
 import java.util.concurrent.ExecutorService;
 import java.util.concurrent.Executors;
 import java.util.concurrent.ThreadPoolExecutor;
-import java.util.concurrent.atomic.AtomicInteger;
 import java.util.stream.Collectors;
-import java.util.stream.Stream;
 import org.apache.iceberg.DeleteFile;
-import org.apache.iceberg.MetadataTableType;
-import org.apache.iceberg.MetadataTableUtils;
-import org.apache.iceberg.Partitioning;
 import org.apache.iceberg.PositionDeletesScanTask;
-import org.apache.iceberg.PositionDeletesTable.PositionDeletesBatchScan;
-import org.apache.iceberg.RewriteJobOrder;
 import org.apache.iceberg.StructLike;
 import org.apache.iceberg.Table;
+import org.apache.iceberg.actions.FileRewritePlan;
 import org.apache.iceberg.actions.ImmutableRewritePositionDeleteFiles;
 import org.apache.iceberg.actions.RewritePositionDeleteFiles;
 import org.apache.iceberg.actions.RewritePositionDeletesCommitManager;
 import org.apache.iceberg.actions.RewritePositionDeletesCommitManager.CommitService;
 import org.apache.iceberg.actions.RewritePositionDeletesGroup;
+import org.apache.iceberg.actions.RewritePositionDeletesGroupPlanner;
 import org.apache.iceberg.exceptions.CommitFailedException;
 import org.apache.iceberg.exceptions.ValidationException;
 import org.apache.iceberg.expressions.Expression;
 import org.apache.iceberg.expressions.Expressions;
-import org.apache.iceberg.io.CloseableIterable;
+import org.apache.iceberg.relocated.com.google.common.annotations.VisibleForTesting;
 import org.apache.iceberg.relocated.com.google.common.base.Preconditions;
-import org.apache.iceberg.relocated.com.google.common.collect.ImmutableList;
 import org.apache.iceberg.relocated.com.google.common.collect.ImmutableSet;
-import org.apache.iceberg.relocated.com.google.common.collect.Lists;
-import org.apache.iceberg.relocated.com.google.common.collect.Maps;
 import org.apache.iceberg.relocated.com.google.common.collect.Queues;
 import org.apache.iceberg.relocated.com.google.common.collect.Sets;
 import org.apache.iceberg.relocated.com.google.common.math.IntMath;
 import org.apache.iceberg.relocated.com.google.common.util.concurrent.MoreExecutors;
 import org.apache.iceberg.relocated.com.google.common.util.concurrent.ThreadFactoryBuilder;
 import org.apache.iceberg.spark.SparkUtil;
-import org.apache.iceberg.types.Types.StructType;
-import org.apache.iceberg.util.PartitionUtil;
 import org.apache.iceberg.util.PropertyUtil;
-import org.apache.iceberg.util.StructLikeMap;
 import org.apache.iceberg.util.Tasks;
 import org.apache.spark.sql.SparkSession;
 import org.slf4j.Logger;
@@ -86,20 +73,20 @@ public class RewritePositionDeleteFilesSparkAction
       ImmutableRewritePositionDeleteFiles.Result.builder().build();
 
   private final Table table;
-  private final SparkBinPackPositionDeletesRewriter rewriter;
+  private RewritePositionDeletesGroupPlanner planner;
+  private final SparkBinPackPositionDeletesRewriteExecutor rewriter;
   private Expression filter = Expressions.alwaysTrue();
 
   private int maxConcurrentFileGroupRewrites;
   private int maxCommits;
   private boolean partialProgressEnabled;
-  private RewriteJobOrder rewriteJobOrder;
   private boolean caseSensitive;
 
   RewritePositionDeleteFilesSparkAction(SparkSession spark, Table table) {
     super(spark);
     this.table = table;
-    this.rewriter = new SparkBinPackPositionDeletesRewriter(spark(), table);
     this.caseSensitive = SparkUtil.caseSensitive(spark);
+    this.rewriter = new SparkBinPackPositionDeletesRewriteExecutor(spark(), table);
   }
 
   @Override
@@ -120,86 +107,41 @@ public RewritePositionDeleteFiles.Result execute() {
       return EMPTY_RESULT;
     }
 
+    this.planner = new RewritePositionDeletesGroupPlanner(table, filter, caseSensitive);
+
     validateAndInitOptions();
 
-    StructLikeMap<List<List<PositionDeletesScanTask>>> fileGroupsByPartition = planFileGroups();
-    RewriteExecutionContext ctx = new RewriteExecutionContext(fileGroupsByPartition);
+    FileRewritePlan<FileGroupInfo, PositionDeletesScanTask, DeleteFile, RewritePositionDeletesGroup>
+        plan = plan();
+    rewriter.initPlan(plan);
 
-    if (ctx.totalGroupCount() == 0) {
+    if (plan.totalGroupCount() == 0) {
       LOG.info("Nothing found to rewrite in {}", table.name());
       return EMPTY_RESULT;
     }
 
-    Stream<RewritePositionDeletesGroup> groupStream = toGroupStream(ctx, fileGroupsByPartition);
-
     if (partialProgressEnabled) {
-      return doExecuteWithPartialProgress(ctx, groupStream, commitManager());
+      return doExecuteWithPartialProgress(plan, commitManager());
     } else {
-      return doExecute(ctx, groupStream, commitManager());
-    }
-  }
-
-  private StructLikeMap<List<List<PositionDeletesScanTask>>> planFileGroups() {
-    Table deletesTable =
-        MetadataTableUtils.createMetadataTableInstance(table, MetadataTableType.POSITION_DELETES);
-    CloseableIterable<PositionDeletesScanTask> fileTasks = planFiles(deletesTable);
-
-    try {
-      StructType partitionType = Partitioning.partitionType(deletesTable);
-      StructLikeMap<List<PositionDeletesScanTask>> fileTasksByPartition =
-          groupByPartition(partitionType, fileTasks);
-      return fileGroupsByPartition(fileTasksByPartition);
-    } finally {
-      try {
-        fileTasks.close();
-      } catch (IOException io) {
-        LOG.error("Cannot properly close file iterable while planning for rewrite", io);
-      }
-    }
-  }
-
-  private CloseableIterable<PositionDeletesScanTask> planFiles(Table deletesTable) {
-    PositionDeletesBatchScan scan = (PositionDeletesBatchScan) deletesTable.newBatchScan();
-    return CloseableIterable.transform(
-        scan.baseTableFilter(filter).caseSensitive(caseSensitive).ignoreResiduals().planFiles(),
-        task -> (PositionDeletesScanTask) task);
-  }
-
-  private StructLikeMap<List<PositionDeletesScanTask>> groupByPartition(
-      StructType partitionType, Iterable<PositionDeletesScanTask> tasks) {
-    StructLikeMap<List<PositionDeletesScanTask>> filesByPartition =
-        StructLikeMap.create(partitionType);
-
-    for (PositionDeletesScanTask task : tasks) {
-      StructLike coerced = coercePartition(task, partitionType);
-
-      List<PositionDeletesScanTask> partitionTasks = filesByPartition.get(coerced);
-      if (partitionTasks == null) {
-        partitionTasks = Lists.newArrayList();
-      }
-      partitionTasks.add(task);
-      filesByPartition.put(coerced, partitionTasks);
+      return doExecute(plan, commitManager());
     }
-
-    return filesByPartition;
-  }
-
-  private StructLikeMap<List<List<PositionDeletesScanTask>>> fileGroupsByPartition(
-      StructLikeMap<List<PositionDeletesScanTask>> filesByPartition) {
-    return filesByPartition.transformValues(this::planFileGroups);
   }
 
-  private List<List<PositionDeletesScanTask>> planFileGroups(List<PositionDeletesScanTask> tasks) {
-    return ImmutableList.copyOf(rewriter.planFileGroups(tasks));
+  @VisibleForTesting
+  FileRewritePlan<FileGroupInfo, PositionDeletesScanTask, DeleteFile, RewritePositionDeletesGroup>
+      plan() {
+    return planner.plan();
   }
 
   private RewritePositionDeletesGroup rewriteDeleteFiles(
-      RewriteExecutionContext ctx, RewritePositionDeletesGroup fileGroup) {
-    String desc = jobDesc(fileGroup, ctx);
+      FileRewritePlan<
+              FileGroupInfo, PositionDeletesScanTask, DeleteFile, RewritePositionDeletesGroup>
+          plan,
+      RewritePositionDeletesGroup fileGroup) {
+    String desc = jobDesc(fileGroup, plan);
     Set<DeleteFile> addedFiles =
         withJobGroupInfo(
-            newJobGroupInfo("REWRITE-POSITION-DELETES", desc),
-            () -> rewriter.rewrite(fileGroup.tasks()));
+            newJobGroupInfo("REWRITE-POSITION-DELETES", desc), () -> rewriter.rewrite(fileGroup));
 
     fileGroup.setOutputFiles(addedFiles);
     LOG.info("Rewrite position deletes ready to be committed - {}", desc);
@@ -221,8 +163,9 @@ private RewritePositionDeletesCommitManager commitManager() {
   }
 
   private Result doExecute(
-      RewriteExecutionContext ctx,
-      Stream<RewritePositionDeletesGroup> groupStream,
+      FileRewritePlan<
+              FileGroupInfo, PositionDeletesScanTask, DeleteFile, RewritePositionDeletesGroup>
+          plan,
       RewritePositionDeletesCommitManager commitManager) {
     ExecutorService rewriteService = rewriteService();
 
@@ -230,7 +173,7 @@ private Result doExecute(
         Queues.newConcurrentLinkedQueue();
 
     Tasks.Builder<RewritePositionDeletesGroup> rewriteTaskBuilder =
-        Tasks.foreach(groupStream)
+        Tasks.foreach(plan.groups())
             .executeWith(rewriteService)
             .stopOnFailure()
             .noRetry()
@@ -242,7 +185,7 @@ private Result doExecute(
                         exception));
 
     try {
-      rewriteTaskBuilder.run(fileGroup -> rewrittenGroups.add(rewriteDeleteFiles(ctx, fileGroup)));
+      rewriteTaskBuilder.run(fileGroup -> rewrittenGroups.add(rewriteDeleteFiles(plan, fileGroup)));
     } catch (Exception e) {
       // At least one rewrite group failed, clean up all completed rewrites
       LOG.error(
@@ -288,25 +231,26 @@ private Result doExecute(
   }
 
   private Result doExecuteWithPartialProgress(
-      RewriteExecutionContext ctx,
-      Stream<RewritePositionDeletesGroup> groupStream,
+      FileRewritePlan<
+              FileGroupInfo, PositionDeletesScanTask, DeleteFile, RewritePositionDeletesGroup>
+          plan,
       RewritePositionDeletesCommitManager commitManager) {
     ExecutorService rewriteService = rewriteService();
 
     // start commit service
-    int groupsPerCommit = IntMath.divide(ctx.totalGroupCount(), maxCommits, RoundingMode.CEILING);
+    int groupsPerCommit = IntMath.divide(plan.totalGroupCount(), maxCommits, RoundingMode.CEILING);
     CommitService commitService = commitManager.service(groupsPerCommit);
     commitService.start();
 
     // start rewrite tasks
-    Tasks.foreach(groupStream)
+    Tasks.foreach(plan.groups())
         .suppressFailureWhenFinished()
         .executeWith(rewriteService)
         .noRetry()
         .onFailure(
             (fileGroup, exception) ->
                 LOG.error("Failure during rewrite group {}", fileGroup.info(), exception))
-        .run(fileGroup -> commitService.offer(rewriteDeleteFiles(ctx, fileGroup)));
+        .run(fileGroup -> commitService.offer(rewriteDeleteFiles(plan, fileGroup)));
     rewriteService.shutdown();
 
     // stop commit service
@@ -330,36 +274,10 @@ private Result doExecuteWithPartialProgress(
         .build();
   }
 
-  private Stream<RewritePositionDeletesGroup> toGroupStream(
-      RewriteExecutionContext ctx,
-      Map<StructLike, List<List<PositionDeletesScanTask>>> groupsByPartition) {
-    return groupsByPartition.entrySet().stream()
-        .filter(e -> !e.getValue().isEmpty())
-        .flatMap(
-            e -> {
-              StructLike partition = e.getKey();
-              List<List<PositionDeletesScanTask>> scanGroups = e.getValue();
-              return scanGroups.stream().map(tasks -> newRewriteGroup(ctx, partition, tasks));
-            })
-        .sorted(RewritePositionDeletesGroup.comparator(rewriteJobOrder));
-  }
-
-  private RewritePositionDeletesGroup newRewriteGroup(
-      RewriteExecutionContext ctx, StructLike partition, List<PositionDeletesScanTask> tasks) {
-    int globalIndex = ctx.currentGlobalIndex();
-    int partitionIndex = ctx.currentPartitionIndex(partition);
-    FileGroupInfo info =
-        ImmutableRewritePositionDeleteFiles.FileGroupInfo.builder()
-            .globalIndex(globalIndex)
-            .partitionIndex(partitionIndex)
-            .partition(partition)
-            .build();
-    return new RewritePositionDeletesGroup(info, tasks);
-  }
-
   private void validateAndInitOptions() {
     Set<String> validOptions = Sets.newHashSet(rewriter.validOptions());
     validOptions.addAll(VALID_OPTIONS);
+    validOptions.addAll(planner.validOptions());
 
     Set<String> invalidKeys = Sets.newHashSet(options().keySet());
     invalidKeys.removeAll(validOptions);
@@ -370,6 +288,7 @@ private void validateAndInitOptions() {
         invalidKeys,
         rewriter.description());
 
+    planner.init(options());
     rewriter.init(options());
 
     this.maxConcurrentFileGroupRewrites =
@@ -386,10 +305,6 @@ private void validateAndInitOptions() {
         PropertyUtil.propertyAsBoolean(
             options(), PARTIAL_PROGRESS_ENABLED, PARTIAL_PROGRESS_ENABLED_DEFAULT);
 
-    this.rewriteJobOrder =
-        RewriteJobOrder.fromName(
-            PropertyUtil.propertyAsString(options(), REWRITE_JOB_ORDER, REWRITE_JOB_ORDER_DEFAULT));
-
     Preconditions.checkArgument(
         maxConcurrentFileGroupRewrites >= 1,
         "Cannot set %s to %s, the value must be positive.",
@@ -404,7 +319,11 @@ private void validateAndInitOptions() {
         PARTIAL_PROGRESS_ENABLED);
   }
 
-  private String jobDesc(RewritePositionDeletesGroup group, RewriteExecutionContext ctx) {
+  private String jobDesc(
+      RewritePositionDeletesGroup group,
+      FileRewritePlan<
+              FileGroupInfo, PositionDeletesScanTask, DeleteFile, RewritePositionDeletesGroup>
+          plan) {
     StructLike partition = group.info().partition();
     if (partition.size() > 0) {
       return String.format(
@@ -412,10 +331,10 @@ private String jobDesc(RewritePositionDeletesGroup group, RewriteExecutionContex
           group.rewrittenDeleteFiles().size(),
           rewriter.description(),
           group.info().globalIndex(),
-          ctx.totalGroupCount(),
+          plan.totalGroupCount(),
           partition,
           group.info().partitionIndex(),
-          ctx.groupsInPartition(partition),
+          plan.groupsInPartition(partition),
           table.name());
     } else {
       return String.format(
@@ -423,43 +342,8 @@ private String jobDesc(RewritePositionDeletesGroup group, RewriteExecutionContex
           group.rewrittenDeleteFiles().size(),
           rewriter.description(),
           group.info().globalIndex(),
-          ctx.totalGroupCount(),
+          plan.totalGroupCount(),
           table.name());
     }
   }
-
-  static class RewriteExecutionContext {
-    private final StructLikeMap<Integer> numGroupsByPartition;
-    private final int totalGroupCount;
-    private final Map<StructLike, Integer> partitionIndexMap;
-    private final AtomicInteger groupIndex;
-
-    RewriteExecutionContext(
-        StructLikeMap<List<List<PositionDeletesScanTask>>> fileTasksByPartition) {
-      this.numGroupsByPartition = fileTasksByPartition.transformValues(List::size);
-      this.totalGroupCount = numGroupsByPartition.values().stream().reduce(Integer::sum).orElse(0);
-      this.partitionIndexMap = Maps.newConcurrentMap();
-      this.groupIndex = new AtomicInteger(1);
-    }
-
-    public int currentGlobalIndex() {
-      return groupIndex.getAndIncrement();
-    }
-
-    public int currentPartitionIndex(StructLike partition) {
-      return partitionIndexMap.merge(partition, 1, Integer::sum);
-    }
-
-    public int groupsInPartition(StructLike partition) {
-      return numGroupsByPartition.get(partition);
-    }
-
-    public int totalGroupCount() {
-      return totalGroupCount;
-    }
-  }
-
-  private StructLike coercePartition(PositionDeletesScanTask task, StructType partitionType) {
-    return PartitionUtil.coercePartition(partitionType, task.spec(), task.partition());
-  }
 }
diff --git a/spark/v3.5/spark/src/main/java/org/apache/iceberg/spark/actions/SparkBinPackDataRewriter.java b/spark/v3.5/spark/src/main/java/org/apache/iceberg/spark/actions/SparkBinPackDataRewriteExecutor.java
similarity index 88%
rename from spark/v3.5/spark/src/main/java/org/apache/iceberg/spark/actions/SparkBinPackDataRewriter.java
rename to spark/v3.5/spark/src/main/java/org/apache/iceberg/spark/actions/SparkBinPackDataRewriteExecutor.java
index d256bf2794e2..d1c70ee289c6 100644
--- a/spark/v3.5/spark/src/main/java/org/apache/iceberg/spark/actions/SparkBinPackDataRewriter.java
+++ b/spark/v3.5/spark/src/main/java/org/apache/iceberg/spark/actions/SparkBinPackDataRewriteExecutor.java
@@ -28,9 +28,9 @@
 import org.apache.spark.sql.Row;
 import org.apache.spark.sql.SparkSession;
 
-class SparkBinPackDataRewriter extends SparkSizeBasedDataRewriter {
+class SparkBinPackDataRewriteExecutor extends SparkSizeBasedDataRewriteExecutor {
 
-  SparkBinPackDataRewriter(SparkSession spark, Table table) {
+  SparkBinPackDataRewriteExecutor(SparkSession spark, Table table) {
     super(spark, table);
   }
 
@@ -40,14 +40,15 @@ public String description() {
   }
 
   @Override
-  protected void doRewrite(String groupId, List<FileScanTask> group) {
+  protected void doRewrite(
+      String groupId, List<FileScanTask> group, long splitSize, int expectedOutputFiles) {
     // read the files packing them into splits of the required size
     Dataset<Row> scanDF =
         spark()
             .read()
             .format("iceberg")
             .option(SparkReadOptions.SCAN_TASK_SET_ID, groupId)
-            .option(SparkReadOptions.SPLIT_SIZE, splitSize(inputSize(group)))
+            .option(SparkReadOptions.SPLIT_SIZE, splitSize)
             .option(SparkReadOptions.FILE_OPEN_COST, "0")
             .load(groupId);
 
diff --git a/spark/v3.5/spark/src/main/java/org/apache/iceberg/spark/actions/SparkBinPackPositionDeletesRewriter.java b/spark/v3.5/spark/src/main/java/org/apache/iceberg/spark/actions/SparkBinPackPositionDeletesRewriteExecutor.java
similarity index 88%
rename from spark/v3.5/spark/src/main/java/org/apache/iceberg/spark/actions/SparkBinPackPositionDeletesRewriter.java
rename to spark/v3.5/spark/src/main/java/org/apache/iceberg/spark/actions/SparkBinPackPositionDeletesRewriteExecutor.java
index 5afd724aad88..fb8b73f17463 100644
--- a/spark/v3.5/spark/src/main/java/org/apache/iceberg/spark/actions/SparkBinPackPositionDeletesRewriter.java
+++ b/spark/v3.5/spark/src/main/java/org/apache/iceberg/spark/actions/SparkBinPackPositionDeletesRewriteExecutor.java
@@ -34,7 +34,8 @@
 import org.apache.iceberg.PositionDeletesScanTask;
 import org.apache.iceberg.StructLike;
 import org.apache.iceberg.Table;
-import org.apache.iceberg.actions.SizeBasedPositionDeletesRewriter;
+import org.apache.iceberg.actions.RewritePositionDeleteFiles.FileGroupInfo;
+import org.apache.iceberg.actions.RewritePositionDeletesGroup;
 import org.apache.iceberg.relocated.com.google.common.base.Preconditions;
 import org.apache.iceberg.spark.PositionDeletesRewriteCoordinator;
 import org.apache.iceberg.spark.ScanTaskSetManager;
@@ -51,7 +52,9 @@
 import org.apache.spark.sql.SparkSession;
 import org.apache.spark.sql.internal.SQLConf;
 
-class SparkBinPackPositionDeletesRewriter extends SizeBasedPositionDeletesRewriter {
+class SparkBinPackPositionDeletesRewriteExecutor
+    extends SparkRewriteExecutor<
+        FileGroupInfo, PositionDeletesScanTask, DeleteFile, RewritePositionDeletesGroup> {
 
   private final SparkSession spark;
   private final SparkTableCache tableCache = SparkTableCache.get();
@@ -59,7 +62,7 @@ class SparkBinPackPositionDeletesRewriter extends SizeBasedPositionDeletesRewrit
   private final PositionDeletesRewriteCoordinator coordinator =
       PositionDeletesRewriteCoordinator.get();
 
-  SparkBinPackPositionDeletesRewriter(SparkSession spark, Table table) {
+  SparkBinPackPositionDeletesRewriteExecutor(SparkSession spark, Table table) {
     super(table);
     // Disable Adaptive Query Execution as this may change the output partitioning of our write
     this.spark = spark.cloneSession();
@@ -72,14 +75,14 @@ public String description() {
   }
 
   @Override
-  public Set<DeleteFile> rewrite(List<PositionDeletesScanTask> group) {
+  public Set<DeleteFile> rewrite(RewritePositionDeletesGroup group) {
     String groupId = UUID.randomUUID().toString();
     Table deletesTable = MetadataTableUtils.createMetadataTableInstance(table(), POSITION_DELETES);
     try {
       tableCache.add(groupId, deletesTable);
-      taskSetManager.stageTasks(deletesTable, groupId, group);
+      taskSetManager.stageTasks(deletesTable, groupId, group.fileScans());
 
-      doRewrite(groupId, group);
+      doRewrite(groupId, group.fileScans(), group.splitSize());
 
       return coordinator.fetchNewFiles(deletesTable, groupId);
     } finally {
@@ -89,7 +92,7 @@ public Set<DeleteFile> rewrite(List<PositionDeletesScanTask> group) {
     }
   }
 
-  protected void doRewrite(String groupId, List<PositionDeletesScanTask> group) {
+  protected void doRewrite(String groupId, List<PositionDeletesScanTask> group, long splitSize) {
     // all position deletes are of the same partition, because they are in same file group
     Preconditions.checkArgument(!group.isEmpty(), "Empty group");
     Types.StructType partitionType = group.get(0).spec().partitionType();
@@ -101,7 +104,7 @@ protected void doRewrite(String groupId, List<PositionDeletesScanTask> group) {
             .read()
             .format("iceberg")
             .option(SparkReadOptions.SCAN_TASK_SET_ID, groupId)
-            .option(SparkReadOptions.SPLIT_SIZE, splitSize(inputSize(group)))
+            .option(SparkReadOptions.SPLIT_SIZE, splitSize)
             .option(SparkReadOptions.FILE_OPEN_COST, "0")
             .load(groupId);
 
diff --git a/spark/v3.5/spark/src/main/java/org/apache/iceberg/spark/actions/SparkRewriteExecutor.java b/spark/v3.5/spark/src/main/java/org/apache/iceberg/spark/actions/SparkRewriteExecutor.java
new file mode 100644
index 000000000000..f723be7d633d
--- /dev/null
+++ b/spark/v3.5/spark/src/main/java/org/apache/iceberg/spark/actions/SparkRewriteExecutor.java
@@ -0,0 +1,83 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.iceberg.spark.actions;
+
+import java.util.Map;
+import java.util.Set;
+import org.apache.iceberg.ContentFile;
+import org.apache.iceberg.ContentScanTask;
+import org.apache.iceberg.PartitionSpec;
+import org.apache.iceberg.Table;
+import org.apache.iceberg.actions.FileRewriteExecutor;
+import org.apache.iceberg.actions.FileRewriteGroup;
+import org.apache.iceberg.actions.FileRewritePlan;
+import org.apache.iceberg.relocated.com.google.common.collect.ImmutableSet;
+
+/**
+ * Common parent for data and positional delete rewrite executors.
+ *
+ * @param <I> the Java type of the plan info
+ * @param <T> the Java type of the tasks to read content files
+ * @param <F> the Java type of the content files
+ * @param <G> the Java type of the planned groups
+ */
+abstract class SparkRewriteExecutor<
+        I,
+        T extends ContentScanTask<F>,
+        F extends ContentFile<F>,
+        G extends FileRewriteGroup<I, T, F>>
+    implements FileRewriteExecutor<I, T, F, G> {
+  private final Table table;
+  private long writeMaxFileSize;
+  private int outputSpecId;
+
+  SparkRewriteExecutor(Table table) {
+    this.table = table;
+  }
+
+  Table table() {
+    return table;
+  }
+
+  long writeMaxFileSize() {
+    return writeMaxFileSize;
+  }
+
+  int outputSpecId() {
+    return outputSpecId;
+  }
+
+  PartitionSpec outputSpec() {
+    return table.specs().get(outputSpecId);
+  }
+
+  @Override
+  public void initPlan(FileRewritePlan<I, T, F, G> plan) {
+    this.writeMaxFileSize = plan.writeMaxFileSize();
+    this.outputSpecId = plan.outputSpecId();
+  }
+
+  @Override
+  public Set<String> validOptions() {
+    return ImmutableSet.of();
+  }
+
+  @Override
+  public void init(Map<String, String> options) {}
+}
diff --git a/spark/v3.5/spark/src/main/java/org/apache/iceberg/spark/actions/SparkShufflingDataRewriter.java b/spark/v3.5/spark/src/main/java/org/apache/iceberg/spark/actions/SparkShufflingDataRewriteExecutor.java
similarity index 93%
rename from spark/v3.5/spark/src/main/java/org/apache/iceberg/spark/actions/SparkShufflingDataRewriter.java
rename to spark/v3.5/spark/src/main/java/org/apache/iceberg/spark/actions/SparkShufflingDataRewriteExecutor.java
index ce572c6486cc..e5090a68bff2 100644
--- a/spark/v3.5/spark/src/main/java/org/apache/iceberg/spark/actions/SparkShufflingDataRewriter.java
+++ b/spark/v3.5/spark/src/main/java/org/apache/iceberg/spark/actions/SparkShufflingDataRewriteExecutor.java
@@ -48,7 +48,7 @@
 import org.apache.spark.sql.execution.datasources.v2.DistributionAndOrderingUtils$;
 import scala.Option;
 
-abstract class SparkShufflingDataRewriter extends SparkSizeBasedDataRewriter {
+abstract class SparkShufflingDataRewriteExecutor extends SparkSizeBasedDataRewriteExecutor {
 
   /**
    * The number of shuffle partitions and consequently the number of output files created by the
@@ -82,7 +82,7 @@ abstract class SparkShufflingDataRewriter extends SparkSizeBasedDataRewriter {
   private double compressionFactor;
   private int numShufflePartitionsPerFile;
 
-  protected SparkShufflingDataRewriter(SparkSession spark, Table table) {
+  protected SparkShufflingDataRewriteExecutor(SparkSession spark, Table table) {
     super(spark, table);
   }
 
@@ -118,7 +118,8 @@ public void init(Map<String, String> options) {
   }
 
   @Override
-  public void doRewrite(String groupId, List<FileScanTask> group) {
+  public void doRewrite(
+      String groupId, List<FileScanTask> group, long splitSize, int expectedOutputFiles) {
     Dataset<Row> scanDF =
         spark()
             .read()
@@ -126,7 +127,7 @@ public void doRewrite(String groupId, List<FileScanTask> group) {
             .option(SparkReadOptions.SCAN_TASK_SET_ID, groupId)
             .load(groupId);
 
-    Dataset<Row> sortedDF = sortedDF(scanDF, sortFunction(group));
+    Dataset<Row> sortedDF = sortedDF(scanDF, sortFunction(group, expectedOutputFiles));
 
     sortedDF
         .write()
@@ -139,9 +140,10 @@ public void doRewrite(String groupId, List<FileScanTask> group) {
         .save(groupId);
   }
 
-  private Function<Dataset<Row>, Dataset<Row>> sortFunction(List<FileScanTask> group) {
+  private Function<Dataset<Row>, Dataset<Row>> sortFunction(
+      List<FileScanTask> group, int expectedOutputFiles) {
     SortOrder[] ordering = Spark3Util.toOrdering(outputSortOrder(group));
-    int numShufflePartitions = numShufflePartitions(group);
+    int numShufflePartitions = Math.max(1, expectedOutputFiles * numShufflePartitionsPerFile);
     return (df) -> transformPlan(df, plan -> sortPlan(plan, ordering, numShufflePartitions));
   }
 
@@ -176,11 +178,6 @@ private org.apache.iceberg.SortOrder outputSortOrder(List<FileScanTask> group) {
     }
   }
 
-  private int numShufflePartitions(List<FileScanTask> group) {
-    int numOutputFiles = (int) numOutputFiles((long) (inputSize(group) * compressionFactor));
-    return Math.max(1, numOutputFiles * numShufflePartitionsPerFile);
-  }
-
   private double compressionFactor(Map<String, String> options) {
     double value =
         PropertyUtil.propertyAsDouble(options, COMPRESSION_FACTOR, COMPRESSION_FACTOR_DEFAULT);
diff --git a/spark/v3.5/spark/src/main/java/org/apache/iceberg/spark/actions/SparkSizeBasedDataRewriter.java b/spark/v3.5/spark/src/main/java/org/apache/iceberg/spark/actions/SparkSizeBasedDataRewriteExecutor.java
similarity index 74%
rename from spark/v3.5/spark/src/main/java/org/apache/iceberg/spark/actions/SparkSizeBasedDataRewriter.java
rename to spark/v3.5/spark/src/main/java/org/apache/iceberg/spark/actions/SparkSizeBasedDataRewriteExecutor.java
index ae0e0d20dd4e..068979d8e5db 100644
--- a/spark/v3.5/spark/src/main/java/org/apache/iceberg/spark/actions/SparkSizeBasedDataRewriter.java
+++ b/spark/v3.5/spark/src/main/java/org/apache/iceberg/spark/actions/SparkSizeBasedDataRewriteExecutor.java
@@ -24,38 +24,41 @@
 import org.apache.iceberg.DataFile;
 import org.apache.iceberg.FileScanTask;
 import org.apache.iceberg.Table;
-import org.apache.iceberg.actions.SizeBasedDataRewriter;
+import org.apache.iceberg.actions.RewriteDataFiles.FileGroupInfo;
+import org.apache.iceberg.actions.RewriteFileGroup;
 import org.apache.iceberg.spark.FileRewriteCoordinator;
 import org.apache.iceberg.spark.ScanTaskSetManager;
 import org.apache.iceberg.spark.SparkTableCache;
 import org.apache.spark.sql.SparkSession;
 
-abstract class SparkSizeBasedDataRewriter extends SizeBasedDataRewriter {
+abstract class SparkSizeBasedDataRewriteExecutor
+    extends SparkRewriteExecutor<FileGroupInfo, FileScanTask, DataFile, RewriteFileGroup> {
 
   private final SparkSession spark;
   private final SparkTableCache tableCache = SparkTableCache.get();
   private final ScanTaskSetManager taskSetManager = ScanTaskSetManager.get();
   private final FileRewriteCoordinator coordinator = FileRewriteCoordinator.get();
 
-  SparkSizeBasedDataRewriter(SparkSession spark, Table table) {
+  SparkSizeBasedDataRewriteExecutor(SparkSession spark, Table table) {
     super(table);
     this.spark = spark;
   }
 
-  protected abstract void doRewrite(String groupId, List<FileScanTask> group);
+  protected abstract void doRewrite(
+      String groupId, List<FileScanTask> group, long splitSize, int expectedOutputFiles);
 
   protected SparkSession spark() {
     return spark;
   }
 
   @Override
-  public Set<DataFile> rewrite(List<FileScanTask> group) {
+  public Set<DataFile> rewrite(RewriteFileGroup group) {
     String groupId = UUID.randomUUID().toString();
     try {
       tableCache.add(groupId, table());
-      taskSetManager.stageTasks(table(), groupId, group);
+      taskSetManager.stageTasks(table(), groupId, group.fileScans());
 
-      doRewrite(groupId, group);
+      doRewrite(groupId, group.fileScans(), group.splitSize(), group.expectedOutputFiles());
 
       return coordinator.fetchNewFiles(table(), groupId);
     } finally {
diff --git a/spark/v3.5/spark/src/main/java/org/apache/iceberg/spark/actions/SparkSortDataRewriter.java b/spark/v3.5/spark/src/main/java/org/apache/iceberg/spark/actions/SparkSortDataRewriteExecutor.java
similarity index 89%
rename from spark/v3.5/spark/src/main/java/org/apache/iceberg/spark/actions/SparkSortDataRewriter.java
rename to spark/v3.5/spark/src/main/java/org/apache/iceberg/spark/actions/SparkSortDataRewriteExecutor.java
index 1f70d4d7ca9d..a1d4c57894cc 100644
--- a/spark/v3.5/spark/src/main/java/org/apache/iceberg/spark/actions/SparkSortDataRewriter.java
+++ b/spark/v3.5/spark/src/main/java/org/apache/iceberg/spark/actions/SparkSortDataRewriteExecutor.java
@@ -26,11 +26,11 @@
 import org.apache.spark.sql.Row;
 import org.apache.spark.sql.SparkSession;
 
-class SparkSortDataRewriter extends SparkShufflingDataRewriter {
+class SparkSortDataRewriteExecutor extends SparkShufflingDataRewriteExecutor {
 
   private final SortOrder sortOrder;
 
-  SparkSortDataRewriter(SparkSession spark, Table table) {
+  SparkSortDataRewriteExecutor(SparkSession spark, Table table) {
     super(spark, table);
     Preconditions.checkArgument(
         table.sortOrder().isSorted(),
@@ -39,7 +39,7 @@ class SparkSortDataRewriter extends SparkShufflingDataRewriter {
     this.sortOrder = table.sortOrder();
   }
 
-  SparkSortDataRewriter(SparkSession spark, Table table, SortOrder sortOrder) {
+  SparkSortDataRewriteExecutor(SparkSession spark, Table table, SortOrder sortOrder) {
     super(spark, table);
     Preconditions.checkArgument(
         sortOrder != null && sortOrder.isSorted(),
diff --git a/spark/v3.5/spark/src/main/java/org/apache/iceberg/spark/actions/SparkZOrderDataRewriter.java b/spark/v3.5/spark/src/main/java/org/apache/iceberg/spark/actions/SparkZOrderDataRewriteExecutor.java
similarity index 97%
rename from spark/v3.5/spark/src/main/java/org/apache/iceberg/spark/actions/SparkZOrderDataRewriter.java
rename to spark/v3.5/spark/src/main/java/org/apache/iceberg/spark/actions/SparkZOrderDataRewriteExecutor.java
index cc4fb78ebd18..d4dc5affb4b6 100644
--- a/spark/v3.5/spark/src/main/java/org/apache/iceberg/spark/actions/SparkZOrderDataRewriter.java
+++ b/spark/v3.5/spark/src/main/java/org/apache/iceberg/spark/actions/SparkZOrderDataRewriteExecutor.java
@@ -44,9 +44,9 @@
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 
-class SparkZOrderDataRewriter extends SparkShufflingDataRewriter {
+class SparkZOrderDataRewriteExecutor extends SparkShufflingDataRewriteExecutor {
 
-  private static final Logger LOG = LoggerFactory.getLogger(SparkZOrderDataRewriter.class);
+  private static final Logger LOG = LoggerFactory.getLogger(SparkZOrderDataRewriteExecutor.class);
 
   private static final String Z_COLUMN = "ICEZVALUE";
   private static final Schema Z_SCHEMA =
@@ -78,7 +78,7 @@ class SparkZOrderDataRewriter extends SparkShufflingDataRewriter {
   private int maxOutputSize;
   private int varLengthContribution;
 
-  SparkZOrderDataRewriter(SparkSession spark, Table table, List<String> zOrderColNames) {
+  SparkZOrderDataRewriteExecutor(SparkSession spark, Table table, List<String> zOrderColNames) {
     super(spark, table);
     this.zOrderColNames = validZOrderColNames(spark, table, zOrderColNames);
   }
diff --git a/spark/v3.5/spark/src/test/java/org/apache/iceberg/spark/actions/TestRewriteDataFilesAction.java b/spark/v3.5/spark/src/test/java/org/apache/iceberg/spark/actions/TestRewriteDataFilesAction.java
index 2127b20aa9b1..980a1e71bef9 100644
--- a/spark/v3.5/spark/src/test/java/org/apache/iceberg/spark/actions/TestRewriteDataFilesAction.java
+++ b/spark/v3.5/spark/src/test/java/org/apache/iceberg/spark/actions/TestRewriteDataFilesAction.java
@@ -76,8 +76,8 @@
 import org.apache.iceberg.actions.RewriteDataFiles.Result;
 import org.apache.iceberg.actions.RewriteDataFilesCommitManager;
 import org.apache.iceberg.actions.RewriteFileGroup;
-import org.apache.iceberg.actions.SizeBasedDataRewriter;
-import org.apache.iceberg.actions.SizeBasedFileRewriter;
+import org.apache.iceberg.actions.RewriteFileGroupPlanner;
+import org.apache.iceberg.actions.SizeBasedFileRewritePlanner;
 import org.apache.iceberg.data.GenericAppenderFactory;
 import org.apache.iceberg.data.GenericRecord;
 import org.apache.iceberg.data.Record;
@@ -168,7 +168,9 @@ public void setupTableLocation() throws Exception {
   private RewriteDataFilesSparkAction basicRewrite(Table table) {
     // Always compact regardless of input files
     table.refresh();
-    return actions().rewriteDataFiles(table).option(SizeBasedFileRewriter.MIN_INPUT_FILES, "1");
+    return actions()
+        .rewriteDataFiles(table)
+        .option(SizeBasedFileRewritePlanner.MIN_INPUT_FILES, "1");
   }
 
   @TestTemplate
@@ -289,9 +291,9 @@ public void testBinPackAfterPartitionChange() {
 
     RewriteDataFiles.Result result =
         basicRewrite(table)
-            .option(SizeBasedFileRewriter.MIN_INPUT_FILES, "1")
+            .option(SizeBasedFileRewritePlanner.MIN_INPUT_FILES, "1")
             .option(
-                SizeBasedFileRewriter.MIN_FILE_SIZE_BYTES,
+                SizeBasedFileRewritePlanner.MIN_FILE_SIZE_BYTES,
                 Integer.toString(averageFileSize(table) + 1000))
             .option(
                 RewriteDataFiles.TARGET_FILE_SIZE_BYTES,
@@ -356,11 +358,12 @@ public void testBinPackWithDeletes() throws IOException {
           actions()
               .rewriteDataFiles(table)
               // do not include any file based on bin pack file size configs
-              .option(SizeBasedFileRewriter.MIN_FILE_SIZE_BYTES, "0")
+              .option(SizeBasedFileRewritePlanner.MIN_FILE_SIZE_BYTES, "0")
               .option(RewriteDataFiles.TARGET_FILE_SIZE_BYTES, Long.toString(Long.MAX_VALUE - 1))
-              .option(SizeBasedFileRewriter.MAX_FILE_SIZE_BYTES, Long.toString(Long.MAX_VALUE))
+              .option(
+                  SizeBasedFileRewritePlanner.MAX_FILE_SIZE_BYTES, Long.toString(Long.MAX_VALUE))
               // set DELETE_FILE_THRESHOLD to 1 since DVs only produce one delete file per data file
-              .option(SizeBasedDataRewriter.DELETE_FILE_THRESHOLD, "1")
+              .option(RewriteFileGroupPlanner.DELETE_FILE_THRESHOLD, "1")
               .execute();
       assertThat(result.rewrittenDataFilesCount())
           .as("Action should rewrite 5 data files")
@@ -371,10 +374,11 @@ public void testBinPackWithDeletes() throws IOException {
           actions()
               .rewriteDataFiles(table)
               // do not include any file based on bin pack file size configs
-              .option(SizeBasedFileRewriter.MIN_FILE_SIZE_BYTES, "0")
+              .option(SizeBasedFileRewritePlanner.MIN_FILE_SIZE_BYTES, "0")
               .option(RewriteDataFiles.TARGET_FILE_SIZE_BYTES, Long.toString(Long.MAX_VALUE - 1))
-              .option(SizeBasedFileRewriter.MAX_FILE_SIZE_BYTES, Long.toString(Long.MAX_VALUE))
-              .option(SizeBasedDataRewriter.DELETE_FILE_THRESHOLD, "2")
+              .option(
+                  SizeBasedFileRewritePlanner.MAX_FILE_SIZE_BYTES, Long.toString(Long.MAX_VALUE))
+              .option(RewriteFileGroupPlanner.DELETE_FILE_THRESHOLD, "2")
               .execute();
       assertThat(result.rewrittenDataFilesCount())
           .as("Action should rewrite 2 data files")
@@ -432,7 +436,7 @@ public void testRemoveDangledEqualityDeletesPartitionEvolution() {
 
     RewriteDataFiles.Result result =
         basicRewrite(table)
-            .option(SizeBasedFileRewriter.REWRITE_ALL, "true")
+            .option(SizeBasedFileRewritePlanner.REWRITE_ALL, "true")
             .filter(Expressions.equal("c1", 1))
             .option(RewriteDataFiles.REMOVE_DANGLING_DELETES, "true")
             .execute();
@@ -492,7 +496,7 @@ public void testRemoveDangledPositionDeletesPartitionEvolution() throws IOExcept
         actions()
             .rewriteDataFiles(table)
             .filter(Expressions.equal("c1", 1))
-            .option(SizeBasedFileRewriter.REWRITE_ALL, "true")
+            .option(SizeBasedFileRewritePlanner.REWRITE_ALL, "true")
             .option(RewriteDataFiles.REMOVE_DANGLING_DELETES, "true")
             .execute();
 
@@ -538,7 +542,7 @@ public void testBinPackWithDeleteAllData() throws IOException {
     Result result =
         actions()
             .rewriteDataFiles(table)
-            .option(SizeBasedDataRewriter.DELETE_FILE_THRESHOLD, "1")
+            .option(RewriteFileGroupPlanner.DELETE_FILE_THRESHOLD, "1")
             .execute();
     assertThat(result.rewrittenDataFilesCount()).as("Action should rewrite 1 data files").isOne();
     assertThat(result.rewrittenBytesCount()).isEqualTo(dataSizeBefore);
@@ -689,7 +693,9 @@ public void testBinPackSplitLargeFile() {
     Result result =
         basicRewrite(table)
             .option(RewriteDataFiles.TARGET_FILE_SIZE_BYTES, Long.toString(targetSize))
-            .option(SizeBasedFileRewriter.MAX_FILE_SIZE_BYTES, Long.toString(targetSize * 2 - 2000))
+            .option(
+                SizeBasedFileRewritePlanner.MAX_FILE_SIZE_BYTES,
+                Long.toString(targetSize * 2 - 2000))
             .execute();
 
     assertThat(result.rewrittenDataFilesCount()).as("Action should delete 1 data files").isOne();
@@ -720,8 +726,12 @@ public void testBinPackCombineMixedFiles() {
     Result result =
         basicRewrite(table)
             .option(RewriteDataFiles.TARGET_FILE_SIZE_BYTES, Integer.toString(targetSize + 1000))
-            .option(SizeBasedFileRewriter.MAX_FILE_SIZE_BYTES, Integer.toString(targetSize + 80000))
-            .option(SizeBasedFileRewriter.MIN_FILE_SIZE_BYTES, Integer.toString(targetSize - 1000))
+            .option(
+                SizeBasedFileRewritePlanner.MAX_FILE_SIZE_BYTES,
+                Integer.toString(targetSize + 80000))
+            .option(
+                SizeBasedFileRewritePlanner.MIN_FILE_SIZE_BYTES,
+                Integer.toString(targetSize - 1000))
             .execute();
 
     assertThat(result.rewrittenDataFilesCount())
@@ -752,10 +762,10 @@ public void testBinPackCombineMediumFiles() {
         basicRewrite(table)
             .option(RewriteDataFiles.TARGET_FILE_SIZE_BYTES, Integer.toString(targetSize))
             .option(
-                SizeBasedFileRewriter.MAX_FILE_SIZE_BYTES,
+                SizeBasedFileRewritePlanner.MAX_FILE_SIZE_BYTES,
                 Integer.toString((int) (targetSize * 1.8)))
             .option(
-                SizeBasedFileRewriter.MIN_FILE_SIZE_BYTES,
+                SizeBasedFileRewritePlanner.MIN_FILE_SIZE_BYTES,
                 Integer.toString(targetSize - 100)) // All files too small
             .execute();
 
@@ -815,7 +825,7 @@ public void testMultipleGroups() {
         basicRewrite(table)
             .option(
                 RewriteDataFiles.MAX_FILE_GROUP_SIZE_BYTES, Integer.toString(fileSize * 2 + 1000))
-            .option(SizeBasedFileRewriter.MIN_INPUT_FILES, "1")
+            .option(SizeBasedFileRewritePlanner.MIN_INPUT_FILES, "1")
             .execute();
 
     assertThat(result.rewriteResults()).as("Should have 10 fileGroups").hasSize(10);
@@ -1206,7 +1216,7 @@ public void testInvalidOptions() {
             () ->
                 basicRewrite(table)
                     .sort(SortOrder.builderFor(table.schema()).asc("c2").build())
-                    .option(SparkShufflingDataRewriter.SHUFFLE_PARTITIONS_PER_FILE, "5")
+                    .option(SparkShufflingDataRewriteExecutor.SHUFFLE_PARTITIONS_PER_FILE, "5")
                     .execute())
         .isInstanceOf(IllegalArgumentException.class)
         .hasMessageContaining("requires enabling Iceberg Spark session extensions");
@@ -1227,7 +1237,7 @@ public void testSortMultipleGroups() {
     RewriteDataFiles.Result result =
         basicRewrite(table)
             .sort()
-            .option(SizeBasedFileRewriter.REWRITE_ALL, "true")
+            .option(SizeBasedFileRewritePlanner.REWRITE_ALL, "true")
             .option(
                 RewriteDataFiles.MAX_FILE_GROUP_SIZE_BYTES, Integer.toString(fileSize * 2 + 1000))
             .execute();
@@ -1257,8 +1267,8 @@ public void testSimpleSort() {
     RewriteDataFiles.Result result =
         basicRewrite(table)
             .sort()
-            .option(SizeBasedFileRewriter.MIN_INPUT_FILES, "1")
-            .option(SizeBasedFileRewriter.REWRITE_ALL, "true")
+            .option(SizeBasedFileRewritePlanner.MIN_INPUT_FILES, "1")
+            .option(SizeBasedFileRewritePlanner.REWRITE_ALL, "true")
             .option(
                 RewriteDataFiles.TARGET_FILE_SIZE_BYTES, Integer.toString(averageFileSize(table)))
             .execute();
@@ -1291,8 +1301,8 @@ public void testSortAfterPartitionChange() {
     RewriteDataFiles.Result result =
         basicRewrite(table)
             .sort()
-            .option(SizeBasedFileRewriter.MIN_INPUT_FILES, "1")
-            .option(SizeBasedFileRewriter.REWRITE_ALL, "true")
+            .option(SizeBasedFileRewritePlanner.MIN_INPUT_FILES, "1")
+            .option(SizeBasedFileRewritePlanner.REWRITE_ALL, "true")
             .option(
                 RewriteDataFiles.TARGET_FILE_SIZE_BYTES, Integer.toString(averageFileSize(table)))
             .execute();
@@ -1325,7 +1335,7 @@ public void testSortCustomSortOrder() {
     RewriteDataFiles.Result result =
         basicRewrite(table)
             .sort(SortOrder.builderFor(table.schema()).asc("c2").build())
-            .option(SizeBasedFileRewriter.REWRITE_ALL, "true")
+            .option(SizeBasedFileRewritePlanner.REWRITE_ALL, "true")
             .option(
                 RewriteDataFiles.TARGET_FILE_SIZE_BYTES, Integer.toString(averageFileSize(table)))
             .execute();
@@ -1363,7 +1373,7 @@ public void testSortCustomSortOrderRequiresRepartition() {
     RewriteDataFiles.Result result =
         basicRewrite(table)
             .sort(SortOrder.builderFor(table.schema()).asc("c3").build())
-            .option(SizeBasedFileRewriter.REWRITE_ALL, "true")
+            .option(SizeBasedFileRewritePlanner.REWRITE_ALL, "true")
             .option(
                 RewriteDataFiles.TARGET_FILE_SIZE_BYTES,
                 Integer.toString(averageFileSize(table) / partitions))
@@ -1397,13 +1407,13 @@ public void testAutoSortShuffleOutput() {
         basicRewrite(table)
             .sort(SortOrder.builderFor(table.schema()).asc("c2").build())
             .option(
-                SizeBasedFileRewriter.MAX_FILE_SIZE_BYTES,
+                SizeBasedFileRewritePlanner.MAX_FILE_SIZE_BYTES,
                 Integer.toString((averageFileSize(table) / 2) + 2))
             // Divide files in 2
             .option(
                 RewriteDataFiles.TARGET_FILE_SIZE_BYTES,
                 Integer.toString(averageFileSize(table) / 2))
-            .option(SizeBasedFileRewriter.MIN_INPUT_FILES, "1")
+            .option(SizeBasedFileRewritePlanner.MIN_INPUT_FILES, "1")
             .execute();
 
     assertThat(result.rewriteResults()).as("Should have 1 fileGroups").hasSize(1);
@@ -1477,13 +1487,13 @@ public void testZOrderSort() {
         basicRewrite(table)
             .zOrder("c2", "c3")
             .option(
-                SizeBasedFileRewriter.MAX_FILE_SIZE_BYTES,
+                SizeBasedFileRewritePlanner.MAX_FILE_SIZE_BYTES,
                 Integer.toString((averageFileSize(table) / 2) + 2))
             // Divide files in 2
             .option(
                 RewriteDataFiles.TARGET_FILE_SIZE_BYTES,
                 Integer.toString(averageFileSize(table) / 2))
-            .option(SizeBasedFileRewriter.MIN_INPUT_FILES, "1")
+            .option(SizeBasedFileRewritePlanner.MIN_INPUT_FILES, "1")
             .execute();
 
     assertThat(result.rewriteResults()).as("Should have 1 fileGroups").hasSize(1);
@@ -1539,8 +1549,8 @@ public void testZOrderAllTypesSort() {
                 "stringCol",
                 "binaryCol",
                 "booleanCol")
-            .option(SizeBasedFileRewriter.MIN_INPUT_FILES, "1")
-            .option(SizeBasedFileRewriter.REWRITE_ALL, "true")
+            .option(SizeBasedFileRewritePlanner.MIN_INPUT_FILES, "1")
+            .option(SizeBasedFileRewritePlanner.REWRITE_ALL, "true")
             .execute();
 
     assertThat(result.rewriteResults()).as("Should have 1 fileGroups").hasSize(1);
@@ -1655,7 +1665,7 @@ public void testRewriteJobOrderFilesAsc() {
     RewriteDataFilesSparkAction basicRewrite = basicRewrite(table).binPack();
     List<Long> expected =
         toGroupStream(table, basicRewrite)
-            .mapToLong(RewriteFileGroup::numFiles)
+            .mapToLong(RewriteFileGroup::numInputFiles)
             .boxed()
             .collect(Collectors.toList());
 
@@ -1665,7 +1675,7 @@ public void testRewriteJobOrderFilesAsc() {
             .binPack();
     List<Long> actual =
         toGroupStream(table, jobOrderRewrite)
-            .mapToLong(RewriteFileGroup::numFiles)
+            .mapToLong(RewriteFileGroup::numInputFiles)
             .boxed()
             .collect(Collectors.toList());
 
@@ -1687,7 +1697,7 @@ public void testRewriteJobOrderFilesDesc() {
     RewriteDataFilesSparkAction basicRewrite = basicRewrite(table).binPack();
     List<Long> expected =
         toGroupStream(table, basicRewrite)
-            .mapToLong(RewriteFileGroup::numFiles)
+            .mapToLong(RewriteFileGroup::numInputFiles)
             .boxed()
             .collect(Collectors.toList());
 
@@ -1697,7 +1707,7 @@ public void testRewriteJobOrderFilesDesc() {
             .binPack();
     List<Long> actual =
         toGroupStream(table, jobOrderRewrite)
-            .mapToLong(RewriteFileGroup::numFiles)
+            .mapToLong(RewriteFileGroup::numInputFiles)
             .boxed()
             .collect(Collectors.toList());
 
@@ -1737,7 +1747,7 @@ public void testBinPackRewriterWithSpecificUnparitionedOutputSpec() {
     RewriteDataFiles.Result result =
         basicRewrite(table)
             .option(RewriteDataFiles.OUTPUT_SPEC_ID, String.valueOf(outputSpecId))
-            .option(SizeBasedFileRewriter.REWRITE_ALL, "true")
+            .option(SizeBasedFileRewritePlanner.REWRITE_ALL, "true")
             .binPack()
             .execute();
 
@@ -1760,7 +1770,7 @@ public void testBinPackRewriterWithSpecificOutputSpec() {
     RewriteDataFiles.Result result =
         basicRewrite(table)
             .option(RewriteDataFiles.OUTPUT_SPEC_ID, String.valueOf(outputSpecId))
-            .option(SizeBasedFileRewriter.REWRITE_ALL, "true")
+            .option(SizeBasedFileRewritePlanner.REWRITE_ALL, "true")
             .binPack()
             .execute();
 
@@ -1799,7 +1809,7 @@ public void testSortRewriterWithSpecificOutputSpecId() {
     RewriteDataFiles.Result result =
         basicRewrite(table)
             .option(RewriteDataFiles.OUTPUT_SPEC_ID, String.valueOf(outputSpecId))
-            .option(SizeBasedFileRewriter.REWRITE_ALL, "true")
+            .option(SizeBasedFileRewritePlanner.REWRITE_ALL, "true")
             .sort(SortOrder.builderFor(table.schema()).asc("c2").asc("c3").build())
             .execute();
 
@@ -1822,7 +1832,7 @@ public void testZOrderRewriteWithSpecificOutputSpecId() {
     RewriteDataFiles.Result result =
         basicRewrite(table)
             .option(RewriteDataFiles.OUTPUT_SPEC_ID, String.valueOf(outputSpecId))
-            .option(SizeBasedFileRewriter.REWRITE_ALL, "true")
+            .option(SizeBasedFileRewritePlanner.REWRITE_ALL, "true")
             .zOrder("c2", "c3")
             .execute();
 
@@ -1849,9 +1859,9 @@ protected List<DataFile> currentDataFiles(Table table) {
   }
 
   private Stream<RewriteFileGroup> toGroupStream(Table table, RewriteDataFilesSparkAction rewrite) {
-    rewrite.validateAndInitOptions();
+    rewrite.init(table.currentSnapshot().snapshotId());
 
-    return rewrite.plan(table.currentSnapshot().snapshotId()).groups();
+    return rewrite.plan().groups();
   }
 
   protected List<Object[]> currentData() {
diff --git a/spark/v3.5/spark/src/test/java/org/apache/iceberg/spark/actions/TestRewritePositionDeleteFilesAction.java b/spark/v3.5/spark/src/test/java/org/apache/iceberg/spark/actions/TestRewritePositionDeleteFilesAction.java
index 12b104fca27c..0fa8ccee9903 100644
--- a/spark/v3.5/spark/src/test/java/org/apache/iceberg/spark/actions/TestRewritePositionDeleteFilesAction.java
+++ b/spark/v3.5/spark/src/test/java/org/apache/iceberg/spark/actions/TestRewritePositionDeleteFilesAction.java
@@ -57,7 +57,7 @@
 import org.apache.iceberg.TableProperties;
 import org.apache.iceberg.actions.RewritePositionDeleteFiles.FileGroupRewriteResult;
 import org.apache.iceberg.actions.RewritePositionDeleteFiles.Result;
-import org.apache.iceberg.actions.SizeBasedFileRewriter;
+import org.apache.iceberg.actions.SizeBasedFileRewritePlanner;
 import org.apache.iceberg.catalog.TableIdentifier;
 import org.apache.iceberg.data.FileHelpers;
 import org.apache.iceberg.deletes.DeleteGranularity;
@@ -166,7 +166,7 @@ private void checkDeleteGranularity(DeleteGranularity deleteGranularity) throws
     Result result =
         SparkActions.get(spark)
             .rewritePositionDeletes(table)
-            .option(SizeBasedFileRewriter.REWRITE_ALL, "true")
+            .option(SizeBasedFileRewritePlanner.REWRITE_ALL, "true")
             .execute();
 
     int expectedDeleteFilesCount = deleteGranularity == DeleteGranularity.FILE ? 2 : 1;
@@ -191,7 +191,7 @@ public void testUnpartitioned() throws Exception {
     Result result =
         SparkActions.get(spark)
             .rewritePositionDeletes(table)
-            .option(SizeBasedFileRewriter.REWRITE_ALL, "true")
+            .option(SizeBasedFileRewritePlanner.REWRITE_ALL, "true")
             .execute();
     List<DeleteFile> newDeleteFiles = deleteFiles(table);
     assertThat(newDeleteFiles).as("Expected 1 new delete file").hasSize(1);
@@ -225,8 +225,10 @@ public void testRewriteAll() throws Exception {
     Result result =
         SparkActions.get(spark)
             .rewritePositionDeletes(table)
-            .option(SizeBasedFileRewriter.REWRITE_ALL, "true")
-            .option(SizeBasedFileRewriter.TARGET_FILE_SIZE_BYTES, Long.toString(Long.MAX_VALUE - 1))
+            .option(SizeBasedFileRewritePlanner.REWRITE_ALL, "true")
+            .option(
+                SizeBasedFileRewritePlanner.TARGET_FILE_SIZE_BYTES,
+                Long.toString(Long.MAX_VALUE - 1))
             .execute();
 
     List<DeleteFile> newDeleteFiles = deleteFiles(table);
@@ -270,8 +272,10 @@ public void testRewriteFilter() throws Exception {
         SparkActions.get(spark)
             .rewritePositionDeletes(table)
             .filter(filter)
-            .option(SizeBasedFileRewriter.REWRITE_ALL, "true")
-            .option(SizeBasedFileRewriter.TARGET_FILE_SIZE_BYTES, Long.toString(Long.MAX_VALUE - 1))
+            .option(SizeBasedFileRewritePlanner.REWRITE_ALL, "true")
+            .option(
+                SizeBasedFileRewritePlanner.TARGET_FILE_SIZE_BYTES,
+                Long.toString(Long.MAX_VALUE - 1))
             .execute();
 
     List<DeleteFile> newDeleteFiles = except(deleteFiles(table), deleteFiles);
@@ -322,8 +326,8 @@ public void testRewriteToSmallerTarget() throws Exception {
     Result result =
         SparkActions.get(spark)
             .rewritePositionDeletes(table)
-            .option(SizeBasedFileRewriter.REWRITE_ALL, "true")
-            .option(SizeBasedFileRewriter.TARGET_FILE_SIZE_BYTES, String.valueOf(avgSize / 2))
+            .option(SizeBasedFileRewritePlanner.REWRITE_ALL, "true")
+            .option(SizeBasedFileRewritePlanner.TARGET_FILE_SIZE_BYTES, String.valueOf(avgSize / 2))
             .execute();
     List<DeleteFile> newDeleteFiles = deleteFiles(table);
     assertThat(newDeleteFiles).as("Should have 8 new delete files").hasSize(8);
@@ -362,13 +366,13 @@ public void testRemoveDanglingDeletes() throws Exception {
 
     SparkActions.get(spark)
         .rewriteDataFiles(table)
-        .option(SizeBasedFileRewriter.REWRITE_ALL, "true")
+        .option(SizeBasedFileRewritePlanner.REWRITE_ALL, "true")
         .execute();
 
     Result result =
         SparkActions.get(spark)
             .rewritePositionDeletes(table)
-            .option(SizeBasedFileRewriter.REWRITE_ALL, "true")
+            .option(SizeBasedFileRewritePlanner.REWRITE_ALL, "true")
             .execute();
     List<DeleteFile> newDeleteFiles = deleteFiles(table);
     assertThat(newDeleteFiles).as("Should have 0 new delete files").hasSize(0);
@@ -404,13 +408,13 @@ public void testSomePartitionsDanglingDeletes() throws Exception {
     SparkActions.get(spark)
         .rewriteDataFiles(table)
         .filter(filter)
-        .option(SizeBasedFileRewriter.REWRITE_ALL, "true")
+        .option(SizeBasedFileRewritePlanner.REWRITE_ALL, "true")
         .execute();
 
     Result result =
         SparkActions.get(spark)
             .rewritePositionDeletes(table)
-            .option(SizeBasedFileRewriter.REWRITE_ALL, "true")
+            .option(SizeBasedFileRewritePlanner.REWRITE_ALL, "true")
             .execute();
     List<DeleteFile> newDeleteFiles = deleteFiles(table);
     assertThat(newDeleteFiles).as("Should have 2 new delete files").hasSize(2);
@@ -456,7 +460,7 @@ public void testRewriteFilterRemoveDangling() throws Exception {
 
     SparkActions.get(spark)
         .rewriteDataFiles(table)
-        .option(SizeBasedFileRewriter.REWRITE_ALL, "true")
+        .option(SizeBasedFileRewritePlanner.REWRITE_ALL, "true")
         .execute();
 
     Expression filter = Expressions.or(Expressions.equal("c1", 0), Expressions.equal("c1", 1));
@@ -464,8 +468,10 @@ public void testRewriteFilterRemoveDangling() throws Exception {
         SparkActions.get(spark)
             .rewritePositionDeletes(table)
             .filter(filter)
-            .option(SizeBasedFileRewriter.REWRITE_ALL, "true")
-            .option(SizeBasedFileRewriter.TARGET_FILE_SIZE_BYTES, Long.toString(Long.MAX_VALUE - 1))
+            .option(SizeBasedFileRewritePlanner.REWRITE_ALL, "true")
+            .option(
+                SizeBasedFileRewritePlanner.TARGET_FILE_SIZE_BYTES,
+                Long.toString(Long.MAX_VALUE - 1))
             .execute();
 
     List<DeleteFile> newDeleteFiles = except(deleteFiles(table), deleteFiles);
@@ -517,7 +523,7 @@ public void testPartitionEvolutionAdd() throws Exception {
     Result result =
         SparkActions.get(spark)
             .rewritePositionDeletes(table)
-            .option(SizeBasedFileRewriter.REWRITE_ALL, "true")
+            .option(SizeBasedFileRewritePlanner.REWRITE_ALL, "true")
             .execute();
 
     List<DeleteFile> rewrittenDeleteFiles =
@@ -568,7 +574,7 @@ public void testPartitionEvolutionRemove() throws Exception {
     Result result =
         SparkActions.get(spark)
             .rewritePositionDeletes(table)
-            .option(SizeBasedFileRewriter.REWRITE_ALL, "true")
+            .option(SizeBasedFileRewritePlanner.REWRITE_ALL, "true")
             .execute();
     List<DeleteFile> newDeleteFiles = deleteFiles(table);
     assertThat(newDeleteFiles).as("Should have 3 new delete files").hasSize(3);
@@ -615,7 +621,7 @@ public void testSchemaEvolution() throws Exception {
     Result result =
         SparkActions.get(spark)
             .rewritePositionDeletes(table)
-            .option(SizeBasedFileRewriter.REWRITE_ALL, "true")
+            .option(SizeBasedFileRewritePlanner.REWRITE_ALL, "true")
             .execute();
 
     List<DeleteFile> rewrittenDeleteFiles =
@@ -646,7 +652,7 @@ public void testSnapshotProperty() throws Exception {
         SparkActions.get(spark)
             .rewritePositionDeletes(table)
             .snapshotProperty("key", "value")
-            .option(SizeBasedFileRewriter.REWRITE_ALL, "true")
+            .option(SizeBasedFileRewritePlanner.REWRITE_ALL, "true")
             .execute();
     assertThat(table.currentSnapshot().summary())
         .containsAllEntriesOf(ImmutableMap.of("key", "value"));
@@ -711,8 +717,10 @@ public void testRewriteManyColumns() throws Exception {
     Result result =
         SparkActions.get(spark)
             .rewritePositionDeletes(table)
-            .option(SizeBasedFileRewriter.REWRITE_ALL, "true")
-            .option(SizeBasedFileRewriter.TARGET_FILE_SIZE_BYTES, Long.toString(Long.MAX_VALUE - 1))
+            .option(SizeBasedFileRewritePlanner.REWRITE_ALL, "true")
+            .option(
+                SizeBasedFileRewritePlanner.TARGET_FILE_SIZE_BYTES,
+                Long.toString(Long.MAX_VALUE - 1))
             .execute();
 
     List<DeleteFile> newDeleteFiles = deleteFiles(table);
diff --git a/spark/v3.5/spark/src/test/java/org/apache/iceberg/spark/actions/TestSparkFileRewriter.java b/spark/v3.5/spark/src/test/java/org/apache/iceberg/spark/actions/TestSparkFileRewriteExecutor.java
similarity index 57%
rename from spark/v3.5/spark/src/test/java/org/apache/iceberg/spark/actions/TestSparkFileRewriter.java
rename to spark/v3.5/spark/src/test/java/org/apache/iceberg/spark/actions/TestSparkFileRewriteExecutor.java
index e223d2e16411..bce2bf11209c 100644
--- a/spark/v3.5/spark/src/test/java/org/apache/iceberg/spark/actions/TestSparkFileRewriter.java
+++ b/spark/v3.5/spark/src/test/java/org/apache/iceberg/spark/actions/TestSparkFileRewriteExecutor.java
@@ -29,9 +29,11 @@
 import org.apache.iceberg.Schema;
 import org.apache.iceberg.SortOrder;
 import org.apache.iceberg.Table;
-import org.apache.iceberg.actions.SizeBasedDataRewriter;
-import org.apache.iceberg.actions.SizeBasedFileRewriter;
+import org.apache.iceberg.actions.RewriteDataFiles;
+import org.apache.iceberg.actions.RewriteFileGroupPlanner;
+import org.apache.iceberg.actions.SizeBasedFileRewritePlanner;
 import org.apache.iceberg.catalog.TableIdentifier;
+import org.apache.iceberg.expressions.Expressions;
 import org.apache.iceberg.relocated.com.google.common.collect.ImmutableList;
 import org.apache.iceberg.relocated.com.google.common.collect.ImmutableMap;
 import org.apache.iceberg.relocated.com.google.common.collect.ImmutableSet;
@@ -43,7 +45,7 @@
 import org.junit.jupiter.api.AfterEach;
 import org.junit.jupiter.api.Test;
 
-public class TestSparkFileRewriter extends TestBase {
+public class TestSparkFileRewriteExecutor extends TestBase {
 
   private static final TableIdentifier TABLE_IDENT = TableIdentifier.of("default", "tbl");
   private static final Schema SCHEMA =
@@ -62,7 +64,8 @@ public void removeTable() {
   @Test
   public void testBinPackDataSelectFiles() {
     Table table = catalog.createTable(TABLE_IDENT, SCHEMA);
-    SparkBinPackDataRewriter rewriter = new SparkBinPackDataRewriter(spark, table);
+    RewriteFileGroupPlanner rewriter =
+        new RewriteFileGroupPlanner(table, Expressions.alwaysTrue(), 1, false);
 
     checkDataFileSizeFiltering(rewriter);
     checkDataFilesDeleteThreshold(rewriter);
@@ -71,32 +74,7 @@ public void testBinPackDataSelectFiles() {
     checkDataFileGroupWithTooMuchData(rewriter);
   }
 
-  @Test
-  public void testSortDataSelectFiles() {
-    Table table = catalog.createTable(TABLE_IDENT, SCHEMA);
-    SparkSortDataRewriter rewriter = new SparkSortDataRewriter(spark, table, SORT_ORDER);
-
-    checkDataFileSizeFiltering(rewriter);
-    checkDataFilesDeleteThreshold(rewriter);
-    checkDataFileGroupWithEnoughFiles(rewriter);
-    checkDataFileGroupWithEnoughData(rewriter);
-    checkDataFileGroupWithTooMuchData(rewriter);
-  }
-
-  @Test
-  public void testZOrderDataSelectFiles() {
-    Table table = catalog.createTable(TABLE_IDENT, SCHEMA);
-    ImmutableList<String> zOrderCols = ImmutableList.of("id");
-    SparkZOrderDataRewriter rewriter = new SparkZOrderDataRewriter(spark, table, zOrderCols);
-
-    checkDataFileSizeFiltering(rewriter);
-    checkDataFilesDeleteThreshold(rewriter);
-    checkDataFileGroupWithEnoughFiles(rewriter);
-    checkDataFileGroupWithEnoughData(rewriter);
-    checkDataFileGroupWithTooMuchData(rewriter);
-  }
-
-  private void checkDataFileSizeFiltering(SizeBasedDataRewriter rewriter) {
+  private void checkDataFileSizeFiltering(RewriteFileGroupPlanner rewriter) {
     FileScanTask tooSmallTask = new MockFileScanTask(100L);
     FileScanTask optimal = new MockFileScanTask(450);
     FileScanTask tooBigTask = new MockFileScanTask(1000L);
@@ -104,10 +82,10 @@ private void checkDataFileSizeFiltering(SizeBasedDataRewriter rewriter) {
 
     Map<String, String> options =
         ImmutableMap.of(
-            SizeBasedDataRewriter.MIN_FILE_SIZE_BYTES, "250",
-            SizeBasedDataRewriter.TARGET_FILE_SIZE_BYTES, "500",
-            SizeBasedDataRewriter.MAX_FILE_SIZE_BYTES, "750",
-            SizeBasedDataRewriter.DELETE_FILE_THRESHOLD, String.valueOf(Integer.MAX_VALUE));
+            RewriteFileGroupPlanner.MIN_FILE_SIZE_BYTES, "250",
+            RewriteFileGroupPlanner.TARGET_FILE_SIZE_BYTES, "500",
+            RewriteFileGroupPlanner.MAX_FILE_SIZE_BYTES, "750",
+            RewriteFileGroupPlanner.DELETE_FILE_THRESHOLD, String.valueOf(Integer.MAX_VALUE));
     rewriter.init(options);
 
     Iterable<List<FileScanTask>> groups = rewriter.planFileGroups(tasks);
@@ -116,17 +94,17 @@ private void checkDataFileSizeFiltering(SizeBasedDataRewriter rewriter) {
     assertThat(group).as("Must rewrite 2 files").hasSize(2);
   }
 
-  private void checkDataFilesDeleteThreshold(SizeBasedDataRewriter rewriter) {
+  private void checkDataFilesDeleteThreshold(RewriteFileGroupPlanner rewriter) {
     FileScanTask tooManyDeletesTask = MockFileScanTask.mockTaskWithDeletes(1000L, 3);
     FileScanTask optimalTask = MockFileScanTask.mockTaskWithDeletes(1000L, 1);
     List<FileScanTask> tasks = ImmutableList.of(tooManyDeletesTask, optimalTask);
 
     Map<String, String> options =
         ImmutableMap.of(
-            SizeBasedDataRewriter.MIN_FILE_SIZE_BYTES, "1",
-            SizeBasedDataRewriter.TARGET_FILE_SIZE_BYTES, "2000",
-            SizeBasedDataRewriter.MAX_FILE_SIZE_BYTES, "5000",
-            SizeBasedDataRewriter.DELETE_FILE_THRESHOLD, "2");
+            RewriteFileGroupPlanner.MIN_FILE_SIZE_BYTES, "1",
+            RewriteFileGroupPlanner.TARGET_FILE_SIZE_BYTES, "2000",
+            RewriteFileGroupPlanner.MAX_FILE_SIZE_BYTES, "5000",
+            RewriteFileGroupPlanner.DELETE_FILE_THRESHOLD, "2");
     rewriter.init(options);
 
     Iterable<List<FileScanTask>> groups = rewriter.planFileGroups(tasks);
@@ -135,7 +113,7 @@ private void checkDataFilesDeleteThreshold(SizeBasedDataRewriter rewriter) {
     assertThat(group).as("Must rewrite 1 file").hasSize(1);
   }
 
-  private void checkDataFileGroupWithEnoughFiles(SizeBasedDataRewriter rewriter) {
+  private void checkDataFileGroupWithEnoughFiles(RewriteFileGroupPlanner rewriter) {
     List<FileScanTask> tasks =
         ImmutableList.of(
             new MockFileScanTask(100L),
@@ -145,11 +123,11 @@ private void checkDataFileGroupWithEnoughFiles(SizeBasedDataRewriter rewriter) {
 
     Map<String, String> options =
         ImmutableMap.of(
-            SizeBasedDataRewriter.MIN_INPUT_FILES, "3",
-            SizeBasedDataRewriter.MIN_FILE_SIZE_BYTES, "150",
-            SizeBasedDataRewriter.TARGET_FILE_SIZE_BYTES, "1000",
-            SizeBasedDataRewriter.MAX_FILE_SIZE_BYTES, "5000",
-            SizeBasedDataRewriter.DELETE_FILE_THRESHOLD, String.valueOf(Integer.MAX_VALUE));
+            RewriteFileGroupPlanner.MIN_INPUT_FILES, "3",
+            RewriteFileGroupPlanner.MIN_FILE_SIZE_BYTES, "150",
+            RewriteFileGroupPlanner.TARGET_FILE_SIZE_BYTES, "1000",
+            RewriteFileGroupPlanner.MAX_FILE_SIZE_BYTES, "5000",
+            RewriteFileGroupPlanner.DELETE_FILE_THRESHOLD, String.valueOf(Integer.MAX_VALUE));
     rewriter.init(options);
 
     Iterable<List<FileScanTask>> groups = rewriter.planFileGroups(tasks);
@@ -158,18 +136,18 @@ private void checkDataFileGroupWithEnoughFiles(SizeBasedDataRewriter rewriter) {
     assertThat(group).as("Must rewrite 4 files").hasSize(4);
   }
 
-  private void checkDataFileGroupWithEnoughData(SizeBasedDataRewriter rewriter) {
+  private void checkDataFileGroupWithEnoughData(RewriteFileGroupPlanner rewriter) {
     List<FileScanTask> tasks =
         ImmutableList.of(
             new MockFileScanTask(100L), new MockFileScanTask(100L), new MockFileScanTask(100L));
 
     Map<String, String> options =
         ImmutableMap.of(
-            SizeBasedDataRewriter.MIN_INPUT_FILES, "5",
-            SizeBasedDataRewriter.MIN_FILE_SIZE_BYTES, "200",
-            SizeBasedDataRewriter.TARGET_FILE_SIZE_BYTES, "250",
-            SizeBasedDataRewriter.MAX_FILE_SIZE_BYTES, "500",
-            SizeBasedDataRewriter.DELETE_FILE_THRESHOLD, String.valueOf(Integer.MAX_VALUE));
+            RewriteFileGroupPlanner.MIN_INPUT_FILES, "5",
+            RewriteFileGroupPlanner.MIN_FILE_SIZE_BYTES, "200",
+            RewriteFileGroupPlanner.TARGET_FILE_SIZE_BYTES, "250",
+            RewriteFileGroupPlanner.MAX_FILE_SIZE_BYTES, "500",
+            RewriteFileGroupPlanner.DELETE_FILE_THRESHOLD, String.valueOf(Integer.MAX_VALUE));
     rewriter.init(options);
 
     Iterable<List<FileScanTask>> groups = rewriter.planFileGroups(tasks);
@@ -178,16 +156,16 @@ private void checkDataFileGroupWithEnoughData(SizeBasedDataRewriter rewriter) {
     assertThat(group).as("Must rewrite 3 files").hasSize(3);
   }
 
-  private void checkDataFileGroupWithTooMuchData(SizeBasedDataRewriter rewriter) {
+  private void checkDataFileGroupWithTooMuchData(RewriteFileGroupPlanner rewriter) {
     List<FileScanTask> tasks = ImmutableList.of(new MockFileScanTask(2000L));
 
     Map<String, String> options =
         ImmutableMap.of(
-            SizeBasedDataRewriter.MIN_INPUT_FILES, "5",
-            SizeBasedDataRewriter.MIN_FILE_SIZE_BYTES, "200",
-            SizeBasedDataRewriter.TARGET_FILE_SIZE_BYTES, "250",
-            SizeBasedDataRewriter.MAX_FILE_SIZE_BYTES, "500",
-            SizeBasedDataRewriter.DELETE_FILE_THRESHOLD, String.valueOf(Integer.MAX_VALUE));
+            RewriteFileGroupPlanner.MIN_INPUT_FILES, "5",
+            RewriteFileGroupPlanner.MIN_FILE_SIZE_BYTES, "200",
+            RewriteFileGroupPlanner.TARGET_FILE_SIZE_BYTES, "250",
+            RewriteFileGroupPlanner.MAX_FILE_SIZE_BYTES, "500",
+            RewriteFileGroupPlanner.DELETE_FILE_THRESHOLD, String.valueOf(Integer.MAX_VALUE));
     rewriter.init(options);
 
     Iterable<List<FileScanTask>> groups = rewriter.planFileGroups(tasks);
@@ -200,15 +178,15 @@ private void checkDataFileGroupWithTooMuchData(SizeBasedDataRewriter rewriter) {
   public void testInvalidConstructorUsagesSortData() {
     Table table = catalog.createTable(TABLE_IDENT, SCHEMA);
 
-    assertThatThrownBy(() -> new SparkSortDataRewriter(spark, table))
+    assertThatThrownBy(() -> new SparkSortDataRewriteExecutor(spark, table))
         .hasMessageContaining("Cannot sort data without a valid sort order")
         .hasMessageContaining("is unsorted and no sort order is provided");
 
-    assertThatThrownBy(() -> new SparkSortDataRewriter(spark, table, null))
+    assertThatThrownBy(() -> new SparkSortDataRewriteExecutor(spark, table, null))
         .hasMessageContaining("Cannot sort data without a valid sort order")
         .hasMessageContaining("the provided sort order is null or empty");
 
-    assertThatThrownBy(() -> new SparkSortDataRewriter(spark, table, SortOrder.unsorted()))
+    assertThatThrownBy(() -> new SparkSortDataRewriteExecutor(spark, table, SortOrder.unsorted()))
         .hasMessageContaining("Cannot sort data without a valid sort order")
         .hasMessageContaining("the provided sort order is null or empty");
   }
@@ -217,17 +195,19 @@ public void testInvalidConstructorUsagesSortData() {
   public void testInvalidConstructorUsagesZOrderData() {
     Table table = catalog.createTable(TABLE_IDENT, SCHEMA, SPEC);
 
-    assertThatThrownBy(() -> new SparkZOrderDataRewriter(spark, table, null))
+    assertThatThrownBy(() -> new SparkZOrderDataRewriteExecutor(spark, table, null))
         .hasMessageContaining("Cannot ZOrder when no columns are specified");
 
-    assertThatThrownBy(() -> new SparkZOrderDataRewriter(spark, table, ImmutableList.of()))
+    assertThatThrownBy(() -> new SparkZOrderDataRewriteExecutor(spark, table, ImmutableList.of()))
         .hasMessageContaining("Cannot ZOrder when no columns are specified");
 
-    assertThatThrownBy(() -> new SparkZOrderDataRewriter(spark, table, ImmutableList.of("dep")))
+    assertThatThrownBy(
+            () -> new SparkZOrderDataRewriteExecutor(spark, table, ImmutableList.of("dep")))
         .hasMessageContaining("Cannot ZOrder")
         .hasMessageContaining("all columns provided were identity partition columns");
 
-    assertThatThrownBy(() -> new SparkZOrderDataRewriter(spark, table, ImmutableList.of("DeP")))
+    assertThatThrownBy(
+            () -> new SparkZOrderDataRewriteExecutor(spark, table, ImmutableList.of("DeP")))
         .hasMessageContaining("Cannot ZOrder")
         .hasMessageContaining("all columns provided were identity partition columns");
   }
@@ -235,91 +215,119 @@ public void testInvalidConstructorUsagesZOrderData() {
   @Test
   public void testBinPackDataValidOptions() {
     Table table = catalog.createTable(TABLE_IDENT, SCHEMA);
-    SparkBinPackDataRewriter rewriter = new SparkBinPackDataRewriter(spark, table);
+    SparkBinPackDataRewriteExecutor rewriter = new SparkBinPackDataRewriteExecutor(spark, table);
+    RewriteFileGroupPlanner planner =
+        new RewriteFileGroupPlanner(table, Expressions.alwaysTrue(), 1, false);
 
     assertThat(rewriter.validOptions())
         .as("Rewriter must report all supported options")
+        .isEqualTo(ImmutableSet.of());
+
+    assertThat(planner.validOptions())
+        .as("Planner must report all supported options")
         .isEqualTo(
             ImmutableSet.of(
-                SparkBinPackDataRewriter.TARGET_FILE_SIZE_BYTES,
-                SparkBinPackDataRewriter.MIN_FILE_SIZE_BYTES,
-                SparkBinPackDataRewriter.MAX_FILE_SIZE_BYTES,
-                SparkBinPackDataRewriter.MIN_INPUT_FILES,
-                SparkBinPackDataRewriter.REWRITE_ALL,
-                SparkBinPackDataRewriter.MAX_FILE_GROUP_SIZE_BYTES,
-                SparkBinPackDataRewriter.DELETE_FILE_THRESHOLD));
+                RewriteFileGroupPlanner.TARGET_FILE_SIZE_BYTES,
+                RewriteFileGroupPlanner.MIN_FILE_SIZE_BYTES,
+                RewriteFileGroupPlanner.MAX_FILE_SIZE_BYTES,
+                RewriteFileGroupPlanner.MIN_INPUT_FILES,
+                RewriteFileGroupPlanner.REWRITE_ALL,
+                RewriteFileGroupPlanner.MAX_FILE_GROUP_SIZE_BYTES,
+                RewriteFileGroupPlanner.DELETE_FILE_THRESHOLD,
+                RewriteDataFiles.REWRITE_JOB_ORDER));
   }
 
   @Test
   public void testSortDataValidOptions() {
     Table table = catalog.createTable(TABLE_IDENT, SCHEMA);
-    SparkSortDataRewriter rewriter = new SparkSortDataRewriter(spark, table, SORT_ORDER);
+    SparkSortDataRewriteExecutor rewriter =
+        new SparkSortDataRewriteExecutor(spark, table, SORT_ORDER);
+    RewriteFileGroupPlanner planner =
+        new RewriteFileGroupPlanner(table, Expressions.alwaysTrue(), 1, false);
 
     assertThat(rewriter.validOptions())
         .as("Rewriter must report all supported options")
         .isEqualTo(
             ImmutableSet.of(
-                SparkSortDataRewriter.SHUFFLE_PARTITIONS_PER_FILE,
-                SparkSortDataRewriter.TARGET_FILE_SIZE_BYTES,
-                SparkSortDataRewriter.MIN_FILE_SIZE_BYTES,
-                SparkSortDataRewriter.MAX_FILE_SIZE_BYTES,
-                SparkSortDataRewriter.MIN_INPUT_FILES,
-                SparkSortDataRewriter.REWRITE_ALL,
-                SparkSortDataRewriter.MAX_FILE_GROUP_SIZE_BYTES,
-                SparkSortDataRewriter.DELETE_FILE_THRESHOLD,
-                SparkSortDataRewriter.COMPRESSION_FACTOR));
+                SparkSortDataRewriteExecutor.SHUFFLE_PARTITIONS_PER_FILE,
+                SparkSortDataRewriteExecutor.COMPRESSION_FACTOR));
+
+    assertThat(planner.validOptions())
+        .as("Planner must report all supported options")
+        .isEqualTo(
+            ImmutableSet.of(
+                RewriteFileGroupPlanner.TARGET_FILE_SIZE_BYTES,
+                RewriteFileGroupPlanner.MIN_FILE_SIZE_BYTES,
+                RewriteFileGroupPlanner.MAX_FILE_SIZE_BYTES,
+                RewriteFileGroupPlanner.MIN_INPUT_FILES,
+                RewriteFileGroupPlanner.REWRITE_ALL,
+                RewriteFileGroupPlanner.MAX_FILE_GROUP_SIZE_BYTES,
+                RewriteFileGroupPlanner.DELETE_FILE_THRESHOLD,
+                RewriteDataFiles.REWRITE_JOB_ORDER));
   }
 
   @Test
   public void testZOrderDataValidOptions() {
     Table table = catalog.createTable(TABLE_IDENT, SCHEMA);
     ImmutableList<String> zOrderCols = ImmutableList.of("id");
-    SparkZOrderDataRewriter rewriter = new SparkZOrderDataRewriter(spark, table, zOrderCols);
+    SparkZOrderDataRewriteExecutor rewriter =
+        new SparkZOrderDataRewriteExecutor(spark, table, zOrderCols);
+    RewriteFileGroupPlanner planner =
+        new RewriteFileGroupPlanner(table, Expressions.alwaysTrue(), 1, false);
 
     assertThat(rewriter.validOptions())
         .as("Rewriter must report all supported options")
         .isEqualTo(
             ImmutableSet.of(
-                SparkZOrderDataRewriter.SHUFFLE_PARTITIONS_PER_FILE,
-                SparkZOrderDataRewriter.TARGET_FILE_SIZE_BYTES,
-                SparkZOrderDataRewriter.MIN_FILE_SIZE_BYTES,
-                SparkZOrderDataRewriter.MAX_FILE_SIZE_BYTES,
-                SparkZOrderDataRewriter.MIN_INPUT_FILES,
-                SparkZOrderDataRewriter.REWRITE_ALL,
-                SparkZOrderDataRewriter.MAX_FILE_GROUP_SIZE_BYTES,
-                SparkZOrderDataRewriter.DELETE_FILE_THRESHOLD,
-                SparkZOrderDataRewriter.COMPRESSION_FACTOR,
-                SparkZOrderDataRewriter.MAX_OUTPUT_SIZE,
-                SparkZOrderDataRewriter.VAR_LENGTH_CONTRIBUTION));
+                SparkZOrderDataRewriteExecutor.SHUFFLE_PARTITIONS_PER_FILE,
+                SparkZOrderDataRewriteExecutor.COMPRESSION_FACTOR,
+                SparkZOrderDataRewriteExecutor.MAX_OUTPUT_SIZE,
+                SparkZOrderDataRewriteExecutor.VAR_LENGTH_CONTRIBUTION));
+    assertThat(planner.validOptions())
+        .as("Planner must report all supported options")
+        .isEqualTo(
+            ImmutableSet.of(
+                RewriteFileGroupPlanner.TARGET_FILE_SIZE_BYTES,
+                RewriteFileGroupPlanner.MIN_FILE_SIZE_BYTES,
+                RewriteFileGroupPlanner.MAX_FILE_SIZE_BYTES,
+                RewriteFileGroupPlanner.MIN_INPUT_FILES,
+                RewriteFileGroupPlanner.REWRITE_ALL,
+                RewriteFileGroupPlanner.MAX_FILE_GROUP_SIZE_BYTES,
+                RewriteFileGroupPlanner.DELETE_FILE_THRESHOLD,
+                RewriteDataFiles.REWRITE_JOB_ORDER));
   }
 
   @Test
   public void testInvalidValuesForBinPackDataOptions() {
     Table table = catalog.createTable(TABLE_IDENT, SCHEMA);
-    SparkBinPackDataRewriter rewriter = new SparkBinPackDataRewriter(spark, table);
+    RewriteFileGroupPlanner planner =
+        new RewriteFileGroupPlanner(table, Expressions.alwaysTrue(), 1, false);
 
-    validateSizeBasedRewriterOptions(rewriter);
+    validateSizeBasedRewriterOptions(planner);
 
     Map<String, String> invalidDeleteThresholdOptions =
-        ImmutableMap.of(SizeBasedDataRewriter.DELETE_FILE_THRESHOLD, "-1");
-    assertThatThrownBy(() -> rewriter.init(invalidDeleteThresholdOptions))
+        ImmutableMap.of(RewriteFileGroupPlanner.DELETE_FILE_THRESHOLD, "-1");
+    assertThatThrownBy(() -> planner.init(invalidDeleteThresholdOptions))
         .hasMessageContaining("'delete-file-threshold' is set to -1 but must be >= 0");
   }
 
   @Test
   public void testInvalidValuesForSortDataOptions() {
     Table table = catalog.createTable(TABLE_IDENT, SCHEMA);
-    SparkSortDataRewriter rewriter = new SparkSortDataRewriter(spark, table, SORT_ORDER);
+    SparkSortDataRewriteExecutor rewriter =
+        new SparkSortDataRewriteExecutor(spark, table, SORT_ORDER);
+    RewriteFileGroupPlanner planner =
+        new RewriteFileGroupPlanner(table, Expressions.alwaysTrue(), 1, false);
 
-    validateSizeBasedRewriterOptions(rewriter);
+    validateSizeBasedRewriterOptions(planner);
 
     Map<String, String> invalidDeleteThresholdOptions =
-        ImmutableMap.of(SizeBasedDataRewriter.DELETE_FILE_THRESHOLD, "-1");
-    assertThatThrownBy(() -> rewriter.init(invalidDeleteThresholdOptions))
+        ImmutableMap.of(RewriteFileGroupPlanner.DELETE_FILE_THRESHOLD, "-1");
+    assertThatThrownBy(() -> planner.init(invalidDeleteThresholdOptions))
         .hasMessageContaining("'delete-file-threshold' is set to -1 but must be >= 0");
 
     Map<String, String> invalidCompressionFactorOptions =
-        ImmutableMap.of(SparkShufflingDataRewriter.COMPRESSION_FACTOR, "0");
+        ImmutableMap.of(SparkShufflingDataRewriteExecutor.COMPRESSION_FACTOR, "0");
     assertThatThrownBy(() -> rewriter.init(invalidCompressionFactorOptions))
         .hasMessageContaining("'compression-factor' is set to 0.0 but must be > 0");
   }
@@ -328,67 +336,70 @@ public void testInvalidValuesForSortDataOptions() {
   public void testInvalidValuesForZOrderDataOptions() {
     Table table = catalog.createTable(TABLE_IDENT, SCHEMA);
     ImmutableList<String> zOrderCols = ImmutableList.of("id");
-    SparkZOrderDataRewriter rewriter = new SparkZOrderDataRewriter(spark, table, zOrderCols);
+    SparkZOrderDataRewriteExecutor rewriter =
+        new SparkZOrderDataRewriteExecutor(spark, table, zOrderCols);
+    RewriteFileGroupPlanner planner =
+        new RewriteFileGroupPlanner(table, Expressions.alwaysTrue(), 1, false);
 
-    validateSizeBasedRewriterOptions(rewriter);
+    validateSizeBasedRewriterOptions(planner);
 
     Map<String, String> invalidDeleteThresholdOptions =
-        ImmutableMap.of(SizeBasedDataRewriter.DELETE_FILE_THRESHOLD, "-1");
-    assertThatThrownBy(() -> rewriter.init(invalidDeleteThresholdOptions))
+        ImmutableMap.of(RewriteFileGroupPlanner.DELETE_FILE_THRESHOLD, "-1");
+    assertThatThrownBy(() -> planner.init(invalidDeleteThresholdOptions))
         .hasMessageContaining("'delete-file-threshold' is set to -1 but must be >= 0");
 
     Map<String, String> invalidCompressionFactorOptions =
-        ImmutableMap.of(SparkShufflingDataRewriter.COMPRESSION_FACTOR, "0");
+        ImmutableMap.of(SparkShufflingDataRewriteExecutor.COMPRESSION_FACTOR, "0");
     assertThatThrownBy(() -> rewriter.init(invalidCompressionFactorOptions))
         .hasMessageContaining("'compression-factor' is set to 0.0 but must be > 0");
 
     Map<String, String> invalidMaxOutputOptions =
-        ImmutableMap.of(SparkZOrderDataRewriter.MAX_OUTPUT_SIZE, "0");
+        ImmutableMap.of(SparkZOrderDataRewriteExecutor.MAX_OUTPUT_SIZE, "0");
     assertThatThrownBy(() -> rewriter.init(invalidMaxOutputOptions))
         .hasMessageContaining("Cannot have the interleaved ZOrder value use less than 1 byte")
         .hasMessageContaining("'max-output-size' was set to 0");
 
     Map<String, String> invalidVarLengthContributionOptions =
-        ImmutableMap.of(SparkZOrderDataRewriter.VAR_LENGTH_CONTRIBUTION, "0");
+        ImmutableMap.of(SparkZOrderDataRewriteExecutor.VAR_LENGTH_CONTRIBUTION, "0");
     assertThatThrownBy(() -> rewriter.init(invalidVarLengthContributionOptions))
         .hasMessageContaining("Cannot use less than 1 byte for variable length types with ZOrder")
         .hasMessageContaining("'var-length-contribution' was set to 0");
   }
 
-  private void validateSizeBasedRewriterOptions(SizeBasedFileRewriter<?, ?> rewriter) {
+  private void validateSizeBasedRewriterOptions(SizeBasedFileRewritePlanner<?, ?, ?, ?> rewriter) {
     Map<String, String> invalidTargetSizeOptions =
-        ImmutableMap.of(SizeBasedFileRewriter.TARGET_FILE_SIZE_BYTES, "0");
+        ImmutableMap.of(SizeBasedFileRewritePlanner.TARGET_FILE_SIZE_BYTES, "0");
     assertThatThrownBy(() -> rewriter.init(invalidTargetSizeOptions))
         .hasMessageContaining("'target-file-size-bytes' is set to 0 but must be > 0");
 
     Map<String, String> invalidMinSizeOptions =
-        ImmutableMap.of(SizeBasedFileRewriter.MIN_FILE_SIZE_BYTES, "-1");
+        ImmutableMap.of(SizeBasedFileRewritePlanner.MIN_FILE_SIZE_BYTES, "-1");
     assertThatThrownBy(() -> rewriter.init(invalidMinSizeOptions))
         .hasMessageContaining("'min-file-size-bytes' is set to -1 but must be >= 0");
 
     Map<String, String> invalidTargetMinSizeOptions =
         ImmutableMap.of(
-            SizeBasedFileRewriter.TARGET_FILE_SIZE_BYTES, "3",
-            SizeBasedFileRewriter.MIN_FILE_SIZE_BYTES, "5");
+            SizeBasedFileRewritePlanner.TARGET_FILE_SIZE_BYTES, "3",
+            SizeBasedFileRewritePlanner.MIN_FILE_SIZE_BYTES, "5");
     assertThatThrownBy(() -> rewriter.init(invalidTargetMinSizeOptions))
         .hasMessageContaining("'target-file-size-bytes' (3) must be > 'min-file-size-bytes' (5)")
         .hasMessageContaining("all new files will be smaller than the min threshold");
 
     Map<String, String> invalidTargetMaxSizeOptions =
         ImmutableMap.of(
-            SizeBasedFileRewriter.TARGET_FILE_SIZE_BYTES, "5",
-            SizeBasedFileRewriter.MAX_FILE_SIZE_BYTES, "3");
+            SizeBasedFileRewritePlanner.TARGET_FILE_SIZE_BYTES, "5",
+            SizeBasedFileRewritePlanner.MAX_FILE_SIZE_BYTES, "3");
     assertThatThrownBy(() -> rewriter.init(invalidTargetMaxSizeOptions))
         .hasMessageContaining("'target-file-size-bytes' (5) must be < 'max-file-size-bytes' (3)")
         .hasMessageContaining("all new files will be larger than the max threshold");
 
     Map<String, String> invalidMinInputFilesOptions =
-        ImmutableMap.of(SizeBasedFileRewriter.MIN_INPUT_FILES, "0");
+        ImmutableMap.of(SizeBasedFileRewritePlanner.MIN_INPUT_FILES, "0");
     assertThatThrownBy(() -> rewriter.init(invalidMinInputFilesOptions))
         .hasMessageContaining("'min-input-files' is set to 0 but must be > 0");
 
     Map<String, String> invalidMaxFileGroupSizeOptions =
-        ImmutableMap.of(SizeBasedFileRewriter.MAX_FILE_GROUP_SIZE_BYTES, "0");
+        ImmutableMap.of(SizeBasedFileRewritePlanner.MAX_FILE_GROUP_SIZE_BYTES, "0");
     assertThatThrownBy(() -> rewriter.init(invalidMaxFileGroupSizeOptions))
         .hasMessageContaining("'max-file-group-size-bytes' is set to 0 but must be > 0");
   }
diff --git a/spark/v3.5/spark/src/test/java/org/apache/iceberg/spark/source/TestCompressionSettings.java b/spark/v3.5/spark/src/test/java/org/apache/iceberg/spark/source/TestCompressionSettings.java
index f411920a5dcc..24a14bb64d86 100644
--- a/spark/v3.5/spark/src/test/java/org/apache/iceberg/spark/source/TestCompressionSettings.java
+++ b/spark/v3.5/spark/src/test/java/org/apache/iceberg/spark/source/TestCompressionSettings.java
@@ -58,7 +58,7 @@
 import org.apache.iceberg.Parameters;
 import org.apache.iceberg.PartitionSpec;
 import org.apache.iceberg.Table;
-import org.apache.iceberg.actions.SizeBasedFileRewriter;
+import org.apache.iceberg.actions.SizeBasedFileRewritePlanner;
 import org.apache.iceberg.catalog.TableIdentifier;
 import org.apache.iceberg.io.InputFile;
 import org.apache.iceberg.relocated.com.google.common.collect.ImmutableMap;
@@ -230,7 +230,7 @@ public void testWriteDataWithDifferentSetting() throws Exception {
 
     SparkActions.get(spark)
         .rewritePositionDeletes(table)
-        .option(SizeBasedFileRewriter.REWRITE_ALL, "true")
+        .option(SizeBasedFileRewritePlanner.REWRITE_ALL, "true")
         .execute();
     table.refresh();
     deleteManifestFiles = table.currentSnapshot().deleteManifests(table.io());

From 0e96b10decaed6e327257a33af7308a6c0c8c1b5 Mon Sep 17 00:00:00 2001
From: Peter Vary <peter_vary4@apple.com>
Date: Thu, 21 Nov 2024 14:10:43 +0100
Subject: [PATCH 06/11] Revapi fix so the tests could run. Temporary removal
 for Spark 3.4, 3.3 tests. Disabling Spark 3.4, 3.3 compilation as well.

---
 .github/workflows/spark-ci.yml |   9 +--
 .palantir/revapi.yml           | 142 +++++++++++++++++++++++++++++++++
 gradle.properties              |   2 +-
 3 files changed, 144 insertions(+), 9 deletions(-)

diff --git a/.github/workflows/spark-ci.yml b/.github/workflows/spark-ci.yml
index 0d7bd2d3d3e7..295943a7dab0 100644
--- a/.github/workflows/spark-ci.yml
+++ b/.github/workflows/spark-ci.yml
@@ -73,15 +73,8 @@ jobs:
     strategy:
       matrix:
         jvm: [11, 17, 21]
-        spark: ['3.3', '3.4', '3.5']
+        spark: ['3.5']
         scala: ['2.12', '2.13']
-        exclude:
-          # Spark 3.5 is the first version not failing on Java 21 (https://issues.apache.org/jira/browse/SPARK-42369)
-          # Full Java 21 support is coming in Spark 4 (https://issues.apache.org/jira/browse/SPARK-43831)
-          - jvm: 21
-            spark: '3.3'
-          - jvm: 21
-            spark: '3.4'
     env:
       SPARK_LOCAL_IP: localhost
     steps:
diff --git a/.palantir/revapi.yml b/.palantir/revapi.yml
index fade79326a49..e689ed9803d2 100644
--- a/.palantir/revapi.yml
+++ b/.palantir/revapi.yml
@@ -1145,6 +1145,148 @@ acceptedBreaks:
       new: "method org.apache.iceberg.BaseMetastoreOperations.CommitStatus org.apache.iceberg.BaseMetastoreTableOperations::checkCommitStatus(java.lang.String,\
         \ org.apache.iceberg.TableMetadata)"
       justification: "Removing deprecated code"
+  "1.7.0":
+    org.apache.iceberg:iceberg-core:
+    - code: "java.class.removed"
+      old: "class org.apache.iceberg.actions.SizeBasedDataRewriter"
+      justification: "We will decide what to do with the API changes after the new\
+        \ API has been finalized"
+    - code: "java.class.removed"
+      old: "class org.apache.iceberg.actions.SizeBasedFileRewriter<T extends org.apache.iceberg.ContentScanTask<F\
+        \ extends org.apache.iceberg.ContentFile<F>>, F>"
+      justification: "We will decide what to do with the API changes after the new\
+        \ API has been finalized"
+    - code: "java.class.removed"
+      old: "class org.apache.iceberg.actions.SizeBasedPositionDeletesRewriter"
+      justification: "We will decide what to do with the API changes after the new\
+        \ API has been finalized"
+    - code: "java.class.removed"
+      old: "interface org.apache.iceberg.actions.FileRewriter<T extends org.apache.iceberg.ContentScanTask<F\
+        \ extends org.apache.iceberg.ContentFile<F>>, F>"
+      justification: "We will decide what to do with the API changes after the new\
+        \ API has been finalized"
+    - code: "java.generics.elementNowParameterized"
+      old: "method java.util.Comparator<org.apache.iceberg.actions.RewriteFileGroup>\
+        \ org.apache.iceberg.actions.RewriteFileGroup::comparator(org.apache.iceberg.RewriteJobOrder)"
+      new: "method <I, T extends org.apache.iceberg.ContentScanTask<F extends org.apache.iceberg.ContentFile<F>>,\
+        \ F extends org.apache.iceberg.ContentFile<F extends org.apache.iceberg.ContentFile<F>>>\
+        \ java.util.Comparator<org.apache.iceberg.actions.FileRewriteGroup<I, T, F>>\
+        \ org.apache.iceberg.actions.FileRewriteGroup<I, T extends org.apache.iceberg.ContentScanTask<F\
+        \ extends org.apache.iceberg.ContentFile<F>>, F extends org.apache.iceberg.ContentFile<F\
+        \ extends org.apache.iceberg.ContentFile<F>>>::comparator(org.apache.iceberg.RewriteJobOrder)\
+        \ @ org.apache.iceberg.actions.RewriteFileGroup"
+      justification: "We will decide what to do with the API changes after the new\
+        \ API has been finalized"
+    - code: "java.generics.elementNowParameterized"
+      old: "method java.util.Comparator<org.apache.iceberg.actions.RewritePositionDeletesGroup>\
+        \ org.apache.iceberg.actions.RewritePositionDeletesGroup::comparator(org.apache.iceberg.RewriteJobOrder)"
+      new: "method <I, T extends org.apache.iceberg.ContentScanTask<F extends org.apache.iceberg.ContentFile<F>>,\
+        \ F extends org.apache.iceberg.ContentFile<F extends org.apache.iceberg.ContentFile<F>>>\
+        \ java.util.Comparator<org.apache.iceberg.actions.FileRewriteGroup<I, T, F>>\
+        \ org.apache.iceberg.actions.FileRewriteGroup<I, T extends org.apache.iceberg.ContentScanTask<F\
+        \ extends org.apache.iceberg.ContentFile<F>>, F extends org.apache.iceberg.ContentFile<F\
+        \ extends org.apache.iceberg.ContentFile<F>>>::comparator(org.apache.iceberg.RewriteJobOrder)\
+        \ @ org.apache.iceberg.actions.RewritePositionDeletesGroup"
+      justification: "We will decide what to do with the API changes after the new\
+        \ API has been finalized"
+    - code: "java.generics.formalTypeParameterAdded"
+      old: "method java.util.Comparator<org.apache.iceberg.actions.RewriteFileGroup>\
+        \ org.apache.iceberg.actions.RewriteFileGroup::comparator(org.apache.iceberg.RewriteJobOrder)"
+      new: "method <I, T extends org.apache.iceberg.ContentScanTask<F extends org.apache.iceberg.ContentFile<F>>,\
+        \ F extends org.apache.iceberg.ContentFile<F extends org.apache.iceberg.ContentFile<F>>>\
+        \ java.util.Comparator<org.apache.iceberg.actions.FileRewriteGroup<I, T, F>>\
+        \ org.apache.iceberg.actions.FileRewriteGroup<I, T extends org.apache.iceberg.ContentScanTask<F\
+        \ extends org.apache.iceberg.ContentFile<F>>, F extends org.apache.iceberg.ContentFile<F\
+        \ extends org.apache.iceberg.ContentFile<F>>>::comparator(org.apache.iceberg.RewriteJobOrder)\
+        \ @ org.apache.iceberg.actions.RewriteFileGroup"
+      justification: "We will decide what to do with the API changes after the new\
+        \ API has been finalized"
+    - code: "java.generics.formalTypeParameterAdded"
+      old: "method java.util.Comparator<org.apache.iceberg.actions.RewritePositionDeletesGroup>\
+        \ org.apache.iceberg.actions.RewritePositionDeletesGroup::comparator(org.apache.iceberg.RewriteJobOrder)"
+      new: "method <I, T extends org.apache.iceberg.ContentScanTask<F extends org.apache.iceberg.ContentFile<F>>,\
+        \ F extends org.apache.iceberg.ContentFile<F extends org.apache.iceberg.ContentFile<F>>>\
+        \ java.util.Comparator<org.apache.iceberg.actions.FileRewriteGroup<I, T, F>>\
+        \ org.apache.iceberg.actions.FileRewriteGroup<I, T extends org.apache.iceberg.ContentScanTask<F\
+        \ extends org.apache.iceberg.ContentFile<F>>, F extends org.apache.iceberg.ContentFile<F\
+        \ extends org.apache.iceberg.ContentFile<F>>>::comparator(org.apache.iceberg.RewriteJobOrder)\
+        \ @ org.apache.iceberg.actions.RewritePositionDeletesGroup"
+      justification: "We will decide what to do with the API changes after the new\
+        \ API has been finalized"
+    - code: "java.method.movedToSuperClass"
+      old: "method java.util.Comparator<org.apache.iceberg.actions.RewriteFileGroup>\
+        \ org.apache.iceberg.actions.RewriteFileGroup::comparator(org.apache.iceberg.RewriteJobOrder)"
+      new: "method <I, T extends org.apache.iceberg.ContentScanTask<F extends org.apache.iceberg.ContentFile<F>>,\
+        \ F extends org.apache.iceberg.ContentFile<F extends org.apache.iceberg.ContentFile<F>>>\
+        \ java.util.Comparator<org.apache.iceberg.actions.FileRewriteGroup<I, T, F>>\
+        \ org.apache.iceberg.actions.FileRewriteGroup<I, T extends org.apache.iceberg.ContentScanTask<F\
+        \ extends org.apache.iceberg.ContentFile<F>>, F extends org.apache.iceberg.ContentFile<F\
+        \ extends org.apache.iceberg.ContentFile<F>>>::comparator(org.apache.iceberg.RewriteJobOrder)\
+        \ @ org.apache.iceberg.actions.RewriteFileGroup"
+      justification: "We will decide what to do with the API changes after the new\
+        \ API has been finalized"
+    - code: "java.method.movedToSuperClass"
+      old: "method java.util.Comparator<org.apache.iceberg.actions.RewritePositionDeletesGroup>\
+        \ org.apache.iceberg.actions.RewritePositionDeletesGroup::comparator(org.apache.iceberg.RewriteJobOrder)"
+      new: "method <I, T extends org.apache.iceberg.ContentScanTask<F extends org.apache.iceberg.ContentFile<F>>,\
+        \ F extends org.apache.iceberg.ContentFile<F extends org.apache.iceberg.ContentFile<F>>>\
+        \ java.util.Comparator<org.apache.iceberg.actions.FileRewriteGroup<I, T, F>>\
+        \ org.apache.iceberg.actions.FileRewriteGroup<I, T extends org.apache.iceberg.ContentScanTask<F\
+        \ extends org.apache.iceberg.ContentFile<F>>, F extends org.apache.iceberg.ContentFile<F\
+        \ extends org.apache.iceberg.ContentFile<F>>>::comparator(org.apache.iceberg.RewriteJobOrder)\
+        \ @ org.apache.iceberg.actions.RewritePositionDeletesGroup"
+      justification: "We will decide what to do with the API changes after the new\
+        \ API has been finalized"
+    - code: "java.method.numberOfParametersChanged"
+      old: "method void org.apache.iceberg.actions.RewriteFileGroup::<init>(org.apache.iceberg.actions.RewriteDataFiles.FileGroupInfo,\
+        \ java.util.List<org.apache.iceberg.FileScanTask>)"
+      new: "method void org.apache.iceberg.actions.RewriteFileGroup::<init>(org.apache.iceberg.actions.RewriteDataFiles.FileGroupInfo,\
+        \ java.util.List<org.apache.iceberg.FileScanTask>, long, int)"
+      justification: "We will decide what to do with the API changes after the new\
+        \ API has been finalized"
+    - code: "java.method.numberOfParametersChanged"
+      old: "method void org.apache.iceberg.actions.RewritePositionDeletesGroup::<init>(org.apache.iceberg.actions.RewritePositionDeleteFiles.FileGroupInfo,\
+        \ java.util.List<org.apache.iceberg.PositionDeletesScanTask>)"
+      new: "method void org.apache.iceberg.actions.RewritePositionDeletesGroup::<init>(org.apache.iceberg.actions.RewritePositionDeleteFiles.FileGroupInfo,\
+        \ java.util.List<org.apache.iceberg.PositionDeletesScanTask>, long, int)"
+      justification: "We will decide what to do with the API changes after the new\
+        \ API has been finalized"
+    - code: "java.method.removed"
+      old: "method int org.apache.iceberg.actions.RewriteFileGroup::numFiles()"
+      justification: "We will decide what to do with the API changes after the new\
+        \ API has been finalized"
+    - code: "java.method.removed"
+      old: "method int org.apache.iceberg.actions.RewritePositionDeletesGroup::numRewrittenDeleteFiles()"
+      justification: "We will decide what to do with the API changes after the new\
+        \ API has been finalized"
+    - code: "java.method.removed"
+      old: "method java.util.List<org.apache.iceberg.PositionDeletesScanTask> org.apache.iceberg.actions.RewritePositionDeletesGroup::tasks()"
+      justification: "We will decide what to do with the API changes after the new\
+        \ API has been finalized"
+    - code: "java.method.returnTypeTypeParametersChanged"
+      old: "method java.util.Comparator<org.apache.iceberg.actions.RewriteFileGroup>\
+        \ org.apache.iceberg.actions.RewriteFileGroup::comparator(org.apache.iceberg.RewriteJobOrder)"
+      new: "method <I, T extends org.apache.iceberg.ContentScanTask<F extends org.apache.iceberg.ContentFile<F>>,\
+        \ F extends org.apache.iceberg.ContentFile<F extends org.apache.iceberg.ContentFile<F>>>\
+        \ java.util.Comparator<org.apache.iceberg.actions.FileRewriteGroup<I, T, F>>\
+        \ org.apache.iceberg.actions.FileRewriteGroup<I, T extends org.apache.iceberg.ContentScanTask<F\
+        \ extends org.apache.iceberg.ContentFile<F>>, F extends org.apache.iceberg.ContentFile<F\
+        \ extends org.apache.iceberg.ContentFile<F>>>::comparator(org.apache.iceberg.RewriteJobOrder)\
+        \ @ org.apache.iceberg.actions.RewriteFileGroup"
+      justification: "We will decide what to do with the API changes after the new\
+        \ API has been finalized"
+    - code: "java.method.returnTypeTypeParametersChanged"
+      old: "method java.util.Comparator<org.apache.iceberg.actions.RewritePositionDeletesGroup>\
+        \ org.apache.iceberg.actions.RewritePositionDeletesGroup::comparator(org.apache.iceberg.RewriteJobOrder)"
+      new: "method <I, T extends org.apache.iceberg.ContentScanTask<F extends org.apache.iceberg.ContentFile<F>>,\
+        \ F extends org.apache.iceberg.ContentFile<F extends org.apache.iceberg.ContentFile<F>>>\
+        \ java.util.Comparator<org.apache.iceberg.actions.FileRewriteGroup<I, T, F>>\
+        \ org.apache.iceberg.actions.FileRewriteGroup<I, T extends org.apache.iceberg.ContentScanTask<F\
+        \ extends org.apache.iceberg.ContentFile<F>>, F extends org.apache.iceberg.ContentFile<F\
+        \ extends org.apache.iceberg.ContentFile<F>>>::comparator(org.apache.iceberg.RewriteJobOrder)\
+        \ @ org.apache.iceberg.actions.RewritePositionDeletesGroup"
+      justification: "We will decide what to do with the API changes after the new\
+        \ API has been finalized"
   apache-iceberg-0.14.0:
     org.apache.iceberg:iceberg-api:
     - code: "java.class.defaultSerializationChanged"
diff --git a/gradle.properties b/gradle.properties
index dc1e1a509b01..5c62371efe35 100644
--- a/gradle.properties
+++ b/gradle.properties
@@ -21,7 +21,7 @@ systemProp.knownFlinkVersions=1.18,1.19,1.20
 systemProp.defaultHiveVersions=2
 systemProp.knownHiveVersions=2,3
 systemProp.defaultSparkVersions=3.5
-systemProp.knownSparkVersions=3.3,3.4,3.5
+systemProp.knownSparkVersions=3.5
 systemProp.defaultKafkaVersions=3
 systemProp.knownKafkaVersions=3
 systemProp.defaultScalaVersion=2.12

From fed8e69fa249e6e02cfce1735f97454a3a615361 Mon Sep 17 00:00:00 2001
From: Peter Vary <peter_vary4@apple.com>
Date: Wed, 27 Nov 2024 15:05:08 +0100
Subject: [PATCH 07/11] Revert API changes and use deprecation instead

---
 .github/workflows/spark-ci.yml                |   9 +-
 .palantir/revapi.yml                          | 142 -------
 .../iceberg/actions/FileRewriteGroup.java     |   2 +-
 .../apache/iceberg/actions/FileRewriter.java  |  80 ++++
 .../iceberg/actions/RewriteFileGroup.java     |  38 ++
 .../actions/RewriteFileGroupPlanner.java      |   2 +-
 .../actions/RewritePositionDeletesGroup.java  |  48 +++
 .../RewritePositionDeletesGroupPlanner.java   |   2 +-
 .../actions/SizeBasedDataRewriter.java        | 112 ++++++
 .../actions/SizeBasedFileRewriter.java        | 348 ++++++++++++++++++
 .../SizeBasedPositionDeletesRewriter.java     |  63 ++++
 gradle.properties                             |   2 +-
 12 files changed, 701 insertions(+), 147 deletions(-)
 create mode 100644 core/src/main/java/org/apache/iceberg/actions/FileRewriter.java
 create mode 100644 core/src/main/java/org/apache/iceberg/actions/SizeBasedDataRewriter.java
 create mode 100644 core/src/main/java/org/apache/iceberg/actions/SizeBasedFileRewriter.java
 create mode 100644 core/src/main/java/org/apache/iceberg/actions/SizeBasedPositionDeletesRewriter.java

diff --git a/.github/workflows/spark-ci.yml b/.github/workflows/spark-ci.yml
index 295943a7dab0..0d7bd2d3d3e7 100644
--- a/.github/workflows/spark-ci.yml
+++ b/.github/workflows/spark-ci.yml
@@ -73,8 +73,15 @@ jobs:
     strategy:
       matrix:
         jvm: [11, 17, 21]
-        spark: ['3.5']
+        spark: ['3.3', '3.4', '3.5']
         scala: ['2.12', '2.13']
+        exclude:
+          # Spark 3.5 is the first version not failing on Java 21 (https://issues.apache.org/jira/browse/SPARK-42369)
+          # Full Java 21 support is coming in Spark 4 (https://issues.apache.org/jira/browse/SPARK-43831)
+          - jvm: 21
+            spark: '3.3'
+          - jvm: 21
+            spark: '3.4'
     env:
       SPARK_LOCAL_IP: localhost
     steps:
diff --git a/.palantir/revapi.yml b/.palantir/revapi.yml
index e689ed9803d2..fade79326a49 100644
--- a/.palantir/revapi.yml
+++ b/.palantir/revapi.yml
@@ -1145,148 +1145,6 @@ acceptedBreaks:
       new: "method org.apache.iceberg.BaseMetastoreOperations.CommitStatus org.apache.iceberg.BaseMetastoreTableOperations::checkCommitStatus(java.lang.String,\
         \ org.apache.iceberg.TableMetadata)"
       justification: "Removing deprecated code"
-  "1.7.0":
-    org.apache.iceberg:iceberg-core:
-    - code: "java.class.removed"
-      old: "class org.apache.iceberg.actions.SizeBasedDataRewriter"
-      justification: "We will decide what to do with the API changes after the new\
-        \ API has been finalized"
-    - code: "java.class.removed"
-      old: "class org.apache.iceberg.actions.SizeBasedFileRewriter<T extends org.apache.iceberg.ContentScanTask<F\
-        \ extends org.apache.iceberg.ContentFile<F>>, F>"
-      justification: "We will decide what to do with the API changes after the new\
-        \ API has been finalized"
-    - code: "java.class.removed"
-      old: "class org.apache.iceberg.actions.SizeBasedPositionDeletesRewriter"
-      justification: "We will decide what to do with the API changes after the new\
-        \ API has been finalized"
-    - code: "java.class.removed"
-      old: "interface org.apache.iceberg.actions.FileRewriter<T extends org.apache.iceberg.ContentScanTask<F\
-        \ extends org.apache.iceberg.ContentFile<F>>, F>"
-      justification: "We will decide what to do with the API changes after the new\
-        \ API has been finalized"
-    - code: "java.generics.elementNowParameterized"
-      old: "method java.util.Comparator<org.apache.iceberg.actions.RewriteFileGroup>\
-        \ org.apache.iceberg.actions.RewriteFileGroup::comparator(org.apache.iceberg.RewriteJobOrder)"
-      new: "method <I, T extends org.apache.iceberg.ContentScanTask<F extends org.apache.iceberg.ContentFile<F>>,\
-        \ F extends org.apache.iceberg.ContentFile<F extends org.apache.iceberg.ContentFile<F>>>\
-        \ java.util.Comparator<org.apache.iceberg.actions.FileRewriteGroup<I, T, F>>\
-        \ org.apache.iceberg.actions.FileRewriteGroup<I, T extends org.apache.iceberg.ContentScanTask<F\
-        \ extends org.apache.iceberg.ContentFile<F>>, F extends org.apache.iceberg.ContentFile<F\
-        \ extends org.apache.iceberg.ContentFile<F>>>::comparator(org.apache.iceberg.RewriteJobOrder)\
-        \ @ org.apache.iceberg.actions.RewriteFileGroup"
-      justification: "We will decide what to do with the API changes after the new\
-        \ API has been finalized"
-    - code: "java.generics.elementNowParameterized"
-      old: "method java.util.Comparator<org.apache.iceberg.actions.RewritePositionDeletesGroup>\
-        \ org.apache.iceberg.actions.RewritePositionDeletesGroup::comparator(org.apache.iceberg.RewriteJobOrder)"
-      new: "method <I, T extends org.apache.iceberg.ContentScanTask<F extends org.apache.iceberg.ContentFile<F>>,\
-        \ F extends org.apache.iceberg.ContentFile<F extends org.apache.iceberg.ContentFile<F>>>\
-        \ java.util.Comparator<org.apache.iceberg.actions.FileRewriteGroup<I, T, F>>\
-        \ org.apache.iceberg.actions.FileRewriteGroup<I, T extends org.apache.iceberg.ContentScanTask<F\
-        \ extends org.apache.iceberg.ContentFile<F>>, F extends org.apache.iceberg.ContentFile<F\
-        \ extends org.apache.iceberg.ContentFile<F>>>::comparator(org.apache.iceberg.RewriteJobOrder)\
-        \ @ org.apache.iceberg.actions.RewritePositionDeletesGroup"
-      justification: "We will decide what to do with the API changes after the new\
-        \ API has been finalized"
-    - code: "java.generics.formalTypeParameterAdded"
-      old: "method java.util.Comparator<org.apache.iceberg.actions.RewriteFileGroup>\
-        \ org.apache.iceberg.actions.RewriteFileGroup::comparator(org.apache.iceberg.RewriteJobOrder)"
-      new: "method <I, T extends org.apache.iceberg.ContentScanTask<F extends org.apache.iceberg.ContentFile<F>>,\
-        \ F extends org.apache.iceberg.ContentFile<F extends org.apache.iceberg.ContentFile<F>>>\
-        \ java.util.Comparator<org.apache.iceberg.actions.FileRewriteGroup<I, T, F>>\
-        \ org.apache.iceberg.actions.FileRewriteGroup<I, T extends org.apache.iceberg.ContentScanTask<F\
-        \ extends org.apache.iceberg.ContentFile<F>>, F extends org.apache.iceberg.ContentFile<F\
-        \ extends org.apache.iceberg.ContentFile<F>>>::comparator(org.apache.iceberg.RewriteJobOrder)\
-        \ @ org.apache.iceberg.actions.RewriteFileGroup"
-      justification: "We will decide what to do with the API changes after the new\
-        \ API has been finalized"
-    - code: "java.generics.formalTypeParameterAdded"
-      old: "method java.util.Comparator<org.apache.iceberg.actions.RewritePositionDeletesGroup>\
-        \ org.apache.iceberg.actions.RewritePositionDeletesGroup::comparator(org.apache.iceberg.RewriteJobOrder)"
-      new: "method <I, T extends org.apache.iceberg.ContentScanTask<F extends org.apache.iceberg.ContentFile<F>>,\
-        \ F extends org.apache.iceberg.ContentFile<F extends org.apache.iceberg.ContentFile<F>>>\
-        \ java.util.Comparator<org.apache.iceberg.actions.FileRewriteGroup<I, T, F>>\
-        \ org.apache.iceberg.actions.FileRewriteGroup<I, T extends org.apache.iceberg.ContentScanTask<F\
-        \ extends org.apache.iceberg.ContentFile<F>>, F extends org.apache.iceberg.ContentFile<F\
-        \ extends org.apache.iceberg.ContentFile<F>>>::comparator(org.apache.iceberg.RewriteJobOrder)\
-        \ @ org.apache.iceberg.actions.RewritePositionDeletesGroup"
-      justification: "We will decide what to do with the API changes after the new\
-        \ API has been finalized"
-    - code: "java.method.movedToSuperClass"
-      old: "method java.util.Comparator<org.apache.iceberg.actions.RewriteFileGroup>\
-        \ org.apache.iceberg.actions.RewriteFileGroup::comparator(org.apache.iceberg.RewriteJobOrder)"
-      new: "method <I, T extends org.apache.iceberg.ContentScanTask<F extends org.apache.iceberg.ContentFile<F>>,\
-        \ F extends org.apache.iceberg.ContentFile<F extends org.apache.iceberg.ContentFile<F>>>\
-        \ java.util.Comparator<org.apache.iceberg.actions.FileRewriteGroup<I, T, F>>\
-        \ org.apache.iceberg.actions.FileRewriteGroup<I, T extends org.apache.iceberg.ContentScanTask<F\
-        \ extends org.apache.iceberg.ContentFile<F>>, F extends org.apache.iceberg.ContentFile<F\
-        \ extends org.apache.iceberg.ContentFile<F>>>::comparator(org.apache.iceberg.RewriteJobOrder)\
-        \ @ org.apache.iceberg.actions.RewriteFileGroup"
-      justification: "We will decide what to do with the API changes after the new\
-        \ API has been finalized"
-    - code: "java.method.movedToSuperClass"
-      old: "method java.util.Comparator<org.apache.iceberg.actions.RewritePositionDeletesGroup>\
-        \ org.apache.iceberg.actions.RewritePositionDeletesGroup::comparator(org.apache.iceberg.RewriteJobOrder)"
-      new: "method <I, T extends org.apache.iceberg.ContentScanTask<F extends org.apache.iceberg.ContentFile<F>>,\
-        \ F extends org.apache.iceberg.ContentFile<F extends org.apache.iceberg.ContentFile<F>>>\
-        \ java.util.Comparator<org.apache.iceberg.actions.FileRewriteGroup<I, T, F>>\
-        \ org.apache.iceberg.actions.FileRewriteGroup<I, T extends org.apache.iceberg.ContentScanTask<F\
-        \ extends org.apache.iceberg.ContentFile<F>>, F extends org.apache.iceberg.ContentFile<F\
-        \ extends org.apache.iceberg.ContentFile<F>>>::comparator(org.apache.iceberg.RewriteJobOrder)\
-        \ @ org.apache.iceberg.actions.RewritePositionDeletesGroup"
-      justification: "We will decide what to do with the API changes after the new\
-        \ API has been finalized"
-    - code: "java.method.numberOfParametersChanged"
-      old: "method void org.apache.iceberg.actions.RewriteFileGroup::<init>(org.apache.iceberg.actions.RewriteDataFiles.FileGroupInfo,\
-        \ java.util.List<org.apache.iceberg.FileScanTask>)"
-      new: "method void org.apache.iceberg.actions.RewriteFileGroup::<init>(org.apache.iceberg.actions.RewriteDataFiles.FileGroupInfo,\
-        \ java.util.List<org.apache.iceberg.FileScanTask>, long, int)"
-      justification: "We will decide what to do with the API changes after the new\
-        \ API has been finalized"
-    - code: "java.method.numberOfParametersChanged"
-      old: "method void org.apache.iceberg.actions.RewritePositionDeletesGroup::<init>(org.apache.iceberg.actions.RewritePositionDeleteFiles.FileGroupInfo,\
-        \ java.util.List<org.apache.iceberg.PositionDeletesScanTask>)"
-      new: "method void org.apache.iceberg.actions.RewritePositionDeletesGroup::<init>(org.apache.iceberg.actions.RewritePositionDeleteFiles.FileGroupInfo,\
-        \ java.util.List<org.apache.iceberg.PositionDeletesScanTask>, long, int)"
-      justification: "We will decide what to do with the API changes after the new\
-        \ API has been finalized"
-    - code: "java.method.removed"
-      old: "method int org.apache.iceberg.actions.RewriteFileGroup::numFiles()"
-      justification: "We will decide what to do with the API changes after the new\
-        \ API has been finalized"
-    - code: "java.method.removed"
-      old: "method int org.apache.iceberg.actions.RewritePositionDeletesGroup::numRewrittenDeleteFiles()"
-      justification: "We will decide what to do with the API changes after the new\
-        \ API has been finalized"
-    - code: "java.method.removed"
-      old: "method java.util.List<org.apache.iceberg.PositionDeletesScanTask> org.apache.iceberg.actions.RewritePositionDeletesGroup::tasks()"
-      justification: "We will decide what to do with the API changes after the new\
-        \ API has been finalized"
-    - code: "java.method.returnTypeTypeParametersChanged"
-      old: "method java.util.Comparator<org.apache.iceberg.actions.RewriteFileGroup>\
-        \ org.apache.iceberg.actions.RewriteFileGroup::comparator(org.apache.iceberg.RewriteJobOrder)"
-      new: "method <I, T extends org.apache.iceberg.ContentScanTask<F extends org.apache.iceberg.ContentFile<F>>,\
-        \ F extends org.apache.iceberg.ContentFile<F extends org.apache.iceberg.ContentFile<F>>>\
-        \ java.util.Comparator<org.apache.iceberg.actions.FileRewriteGroup<I, T, F>>\
-        \ org.apache.iceberg.actions.FileRewriteGroup<I, T extends org.apache.iceberg.ContentScanTask<F\
-        \ extends org.apache.iceberg.ContentFile<F>>, F extends org.apache.iceberg.ContentFile<F\
-        \ extends org.apache.iceberg.ContentFile<F>>>::comparator(org.apache.iceberg.RewriteJobOrder)\
-        \ @ org.apache.iceberg.actions.RewriteFileGroup"
-      justification: "We will decide what to do with the API changes after the new\
-        \ API has been finalized"
-    - code: "java.method.returnTypeTypeParametersChanged"
-      old: "method java.util.Comparator<org.apache.iceberg.actions.RewritePositionDeletesGroup>\
-        \ org.apache.iceberg.actions.RewritePositionDeletesGroup::comparator(org.apache.iceberg.RewriteJobOrder)"
-      new: "method <I, T extends org.apache.iceberg.ContentScanTask<F extends org.apache.iceberg.ContentFile<F>>,\
-        \ F extends org.apache.iceberg.ContentFile<F extends org.apache.iceberg.ContentFile<F>>>\
-        \ java.util.Comparator<org.apache.iceberg.actions.FileRewriteGroup<I, T, F>>\
-        \ org.apache.iceberg.actions.FileRewriteGroup<I, T extends org.apache.iceberg.ContentScanTask<F\
-        \ extends org.apache.iceberg.ContentFile<F>>, F extends org.apache.iceberg.ContentFile<F\
-        \ extends org.apache.iceberg.ContentFile<F>>>::comparator(org.apache.iceberg.RewriteJobOrder)\
-        \ @ org.apache.iceberg.actions.RewritePositionDeletesGroup"
-      justification: "We will decide what to do with the API changes after the new\
-        \ API has been finalized"
   apache-iceberg-0.14.0:
     org.apache.iceberg:iceberg-api:
     - code: "java.class.defaultSerializationChanged"
diff --git a/core/src/main/java/org/apache/iceberg/actions/FileRewriteGroup.java b/core/src/main/java/org/apache/iceberg/actions/FileRewriteGroup.java
index c43bf5cd85f6..c48a6d6f4a2c 100644
--- a/core/src/main/java/org/apache/iceberg/actions/FileRewriteGroup.java
+++ b/core/src/main/java/org/apache/iceberg/actions/FileRewriteGroup.java
@@ -70,7 +70,7 @@ public int numInputFiles() {
   }
 
   public static <I, T extends ContentScanTask<F>, F extends ContentFile<F>>
-      Comparator<FileRewriteGroup<I, T, F>> comparator(RewriteJobOrder rewriteJobOrder) {
+      Comparator<FileRewriteGroup<I, T, F>> taskComparator(RewriteJobOrder rewriteJobOrder) {
     switch (rewriteJobOrder) {
       case BYTES_ASC:
         return Comparator.comparing(FileRewriteGroup::sizeInBytes);
diff --git a/core/src/main/java/org/apache/iceberg/actions/FileRewriter.java b/core/src/main/java/org/apache/iceberg/actions/FileRewriter.java
new file mode 100644
index 000000000000..f014aea0c034
--- /dev/null
+++ b/core/src/main/java/org/apache/iceberg/actions/FileRewriter.java
@@ -0,0 +1,80 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.iceberg.actions;
+
+import java.util.List;
+import java.util.Map;
+import java.util.Set;
+import org.apache.iceberg.ContentFile;
+import org.apache.iceberg.ContentScanTask;
+
+/**
+ * A class for rewriting content files.
+ *
+ * <p>The entire rewrite operation is broken down into pieces based on partitioning, and size-based
+ * groups within a partition. These subunits of the rewrite are referred to as file groups. A file
+ * group will be processed by a single framework "action". For example, in Spark this means that
+ * each group would be rewritten in its own Spark job.
+ *
+ * @param <T> the Java type of tasks to read content files
+ * @param <F> the Java type of content files
+ * @deprecated since 1.8.0, will be removed in 1.9.0; use {@link FileRewritePlanner} and {@link
+ *     FileRewriteExecutor}.
+ */
+@Deprecated
+public interface FileRewriter<T extends ContentScanTask<F>, F extends ContentFile<F>> {
+
+  /** Returns a description for this rewriter. */
+  default String description() {
+    return getClass().getName();
+  }
+
+  /**
+   * Returns a set of supported options for this rewriter. Only options specified in this list will
+   * be accepted at runtime. Any other options will be rejected.
+   */
+  Set<String> validOptions();
+
+  /**
+   * Initializes this rewriter using provided options.
+   *
+   * @param options options to initialize this rewriter
+   */
+  void init(Map<String, String> options);
+
+  /**
+   * Selects files which this rewriter believes are valid targets to be rewritten based on their
+   * scan tasks and groups those scan tasks into file groups. The file groups are then rewritten in
+   * a single executable unit, such as a Spark job.
+   *
+   * @param tasks an iterable of scan task for files in a partition
+   * @return groups of scan tasks for files to be rewritten in a single executable unit
+   */
+  Iterable<List<T>> planFileGroups(Iterable<T> tasks);
+
+  /**
+   * Rewrite a group of files represented by the given list of scan tasks.
+   *
+   * <p>The implementation is supposed to be engine-specific (e.g. Spark, Flink, Trino).
+   *
+   * @param group a group of scan tasks for files to be rewritten together
+   * @return a set of newly written files
+   */
+  Set<F> rewrite(List<T> group);
+}
diff --git a/core/src/main/java/org/apache/iceberg/actions/RewriteFileGroup.java b/core/src/main/java/org/apache/iceberg/actions/RewriteFileGroup.java
index b43d94a2bb8c..996e7b0f8ba2 100644
--- a/core/src/main/java/org/apache/iceberg/actions/RewriteFileGroup.java
+++ b/core/src/main/java/org/apache/iceberg/actions/RewriteFileGroup.java
@@ -18,11 +18,13 @@
  */
 package org.apache.iceberg.actions;
 
+import java.util.Comparator;
 import java.util.List;
 import java.util.Set;
 import java.util.stream.Collectors;
 import org.apache.iceberg.DataFile;
 import org.apache.iceberg.FileScanTask;
+import org.apache.iceberg.RewriteJobOrder;
 import org.apache.iceberg.actions.RewriteDataFiles.FileGroupInfo;
 import org.apache.iceberg.relocated.com.google.common.base.MoreObjects;
 import org.apache.iceberg.relocated.com.google.common.base.Preconditions;
@@ -35,6 +37,14 @@
 public class RewriteFileGroup extends FileRewriteGroup<FileGroupInfo, FileScanTask, DataFile> {
   private DataFileSet addedFiles = DataFileSet.create();
 
+  /**
+   * @deprecated since 1.8.0, will be removed in 1.9.0.
+   */
+  @Deprecated
+  public RewriteFileGroup(FileGroupInfo info, List<FileScanTask> fileScanTasks) {
+    this(info, fileScanTasks, 0L, 0);
+  }
+
   public RewriteFileGroup(
       FileGroupInfo info,
       List<FileScanTask> fileScanTasks,
@@ -78,4 +88,32 @@ public String toString() {
         .add("numRewrittenBytes", sizeInBytes())
         .toString();
   }
+
+  /**
+   * @deprecated since 1.8.0, will be removed in 1.9.0. Use {@link #numInputFiles()} instead.
+   */
+  @Deprecated
+  public int numFiles() {
+    return fileScans().size();
+  }
+
+  /**
+   * @deprecated since 1.8.0, will be removed in 1.9.0. Use {@link
+   *     FileRewriteGroup#taskComparator(RewriteJobOrder)} instead.
+   */
+  @Deprecated
+  public static Comparator<RewriteFileGroup> comparator(RewriteJobOrder rewriteJobOrder) {
+    switch (rewriteJobOrder) {
+      case BYTES_ASC:
+        return Comparator.comparing(RewriteFileGroup::sizeInBytes);
+      case BYTES_DESC:
+        return Comparator.comparing(RewriteFileGroup::sizeInBytes, Comparator.reverseOrder());
+      case FILES_ASC:
+        return Comparator.comparing(RewriteFileGroup::numFiles);
+      case FILES_DESC:
+        return Comparator.comparing(RewriteFileGroup::numFiles, Comparator.reverseOrder());
+      default:
+        return (unused, unused2) -> 0;
+    }
+  }
 }
diff --git a/core/src/main/java/org/apache/iceberg/actions/RewriteFileGroupPlanner.java b/core/src/main/java/org/apache/iceberg/actions/RewriteFileGroupPlanner.java
index 38df04217d98..3fdcfba3fbbd 100644
--- a/core/src/main/java/org/apache/iceberg/actions/RewriteFileGroupPlanner.java
+++ b/core/src/main/java/org/apache/iceberg/actions/RewriteFileGroupPlanner.java
@@ -151,7 +151,7 @@ public FileRewritePlan<FileGroupInfo, FileScanTask, DataFile, RewriteFileGroup>
                                 numOutputFiles(inputSize));
                           });
                 })
-            .sorted(RewriteFileGroup.comparator(rewriteJobOrder));
+            .sorted(FileRewriteGroup.taskComparator(rewriteJobOrder));
     Map<StructLike, Integer> groupsInPartition = plan.transformValues(List::size);
     int totalGroupCount = groupsInPartition.values().stream().reduce(Integer::sum).orElse(0);
     return new FileRewritePlan<>(
diff --git a/core/src/main/java/org/apache/iceberg/actions/RewritePositionDeletesGroup.java b/core/src/main/java/org/apache/iceberg/actions/RewritePositionDeletesGroup.java
index 96640bb5d9b6..c7b1f9ddaf51 100644
--- a/core/src/main/java/org/apache/iceberg/actions/RewritePositionDeletesGroup.java
+++ b/core/src/main/java/org/apache/iceberg/actions/RewritePositionDeletesGroup.java
@@ -18,11 +18,13 @@
  */
 package org.apache.iceberg.actions;
 
+import java.util.Comparator;
 import java.util.List;
 import java.util.Set;
 import java.util.stream.Collectors;
 import org.apache.iceberg.DeleteFile;
 import org.apache.iceberg.PositionDeletesScanTask;
+import org.apache.iceberg.RewriteJobOrder;
 import org.apache.iceberg.actions.RewritePositionDeleteFiles.FileGroupInfo;
 import org.apache.iceberg.actions.RewritePositionDeleteFiles.FileGroupRewriteResult;
 import org.apache.iceberg.relocated.com.google.common.base.MoreObjects;
@@ -39,6 +41,14 @@ public class RewritePositionDeletesGroup
 
   private DeleteFileSet addedDeleteFiles = DeleteFileSet.create();
 
+  /**
+   * @deprecated since 1.8.0, will be removed in 1.9.0.
+   */
+  @Deprecated
+  public RewritePositionDeletesGroup(FileGroupInfo info, List<PositionDeletesScanTask> tasks) {
+    this(info, tasks, 0L, 0);
+  }
+
   public RewritePositionDeletesGroup(
       FileGroupInfo info,
       List<PositionDeletesScanTask> tasks,
@@ -50,6 +60,14 @@ public RewritePositionDeletesGroup(
         tasks.stream().mapToLong(t -> t.file().dataSequenceNumber()).max().getAsLong();
   }
 
+  /**
+   * @deprecated since 1.8.0, will be removed in 1.9.0. Use {@link #fileScans()} instead.
+   */
+  @Deprecated
+  public List<PositionDeletesScanTask> tasks() {
+    return fileScans();
+  }
+
   public void setOutputFiles(Set<DeleteFile> files) {
     addedDeleteFiles = DeleteFileSet.of(files);
   }
@@ -103,4 +121,34 @@ public long rewrittenBytes() {
   public long addedBytes() {
     return addedDeleteFiles.stream().mapToLong(DeleteFile::fileSizeInBytes).sum();
   }
+
+  /**
+   * @deprecated since 1.8.0, will be removed in 1.9.0. Use {@link #numInputFiles()} instead.
+   */
+  @Deprecated
+  public int numRewrittenDeleteFiles() {
+    return fileScans().size();
+  }
+
+  /**
+   * @deprecated since 1.8.0, will be removed in 1.9.0. Use {@link
+   *     FileRewriteGroup#taskComparator(RewriteJobOrder)} instead.
+   */
+  @Deprecated
+  public static Comparator<RewritePositionDeletesGroup> comparator(RewriteJobOrder order) {
+    switch (order) {
+      case BYTES_ASC:
+        return Comparator.comparing(RewritePositionDeletesGroup::rewrittenBytes);
+      case BYTES_DESC:
+        return Comparator.comparing(
+            RewritePositionDeletesGroup::rewrittenBytes, Comparator.reverseOrder());
+      case FILES_ASC:
+        return Comparator.comparing(RewritePositionDeletesGroup::numRewrittenDeleteFiles);
+      case FILES_DESC:
+        return Comparator.comparing(
+            RewritePositionDeletesGroup::numRewrittenDeleteFiles, Comparator.reverseOrder());
+      default:
+        return (unused, unused2) -> 0;
+    }
+  }
 }
diff --git a/core/src/main/java/org/apache/iceberg/actions/RewritePositionDeletesGroupPlanner.java b/core/src/main/java/org/apache/iceberg/actions/RewritePositionDeletesGroupPlanner.java
index d83677139a37..74109df05a1f 100644
--- a/core/src/main/java/org/apache/iceberg/actions/RewritePositionDeletesGroupPlanner.java
+++ b/core/src/main/java/org/apache/iceberg/actions/RewritePositionDeletesGroupPlanner.java
@@ -118,7 +118,7 @@ public void init(Map<String, String> options) {
                                 numOutputFiles(inputSize));
                           });
                 })
-            .sorted(RewritePositionDeletesGroup.comparator(rewriteJobOrder));
+            .sorted(FileRewriteGroup.taskComparator(rewriteJobOrder));
     Map<StructLike, Integer> groupsInPartition = plan.transformValues(List::size);
     int totalGroupCount = groupsInPartition.values().stream().reduce(Integer::sum).orElse(0);
     return new FileRewritePlan<>(
diff --git a/core/src/main/java/org/apache/iceberg/actions/SizeBasedDataRewriter.java b/core/src/main/java/org/apache/iceberg/actions/SizeBasedDataRewriter.java
new file mode 100644
index 000000000000..66b759321ac8
--- /dev/null
+++ b/core/src/main/java/org/apache/iceberg/actions/SizeBasedDataRewriter.java
@@ -0,0 +1,112 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.iceberg.actions;
+
+import java.util.List;
+import java.util.Map;
+import java.util.Set;
+import org.apache.iceberg.DataFile;
+import org.apache.iceberg.FileScanTask;
+import org.apache.iceberg.Table;
+import org.apache.iceberg.TableProperties;
+import org.apache.iceberg.relocated.com.google.common.base.Preconditions;
+import org.apache.iceberg.relocated.com.google.common.collect.ImmutableSet;
+import org.apache.iceberg.relocated.com.google.common.collect.Iterables;
+import org.apache.iceberg.util.PropertyUtil;
+
+public abstract class SizeBasedDataRewriter extends SizeBasedFileRewriter<FileScanTask, DataFile> {
+
+  /**
+   * The minimum number of deletes that needs to be associated with a data file for it to be
+   * considered for rewriting. If a data file has this number of deletes or more, it will be
+   * rewritten regardless of its file size determined by {@link #MIN_FILE_SIZE_BYTES} and {@link
+   * #MAX_FILE_SIZE_BYTES}. If a file group contains a file that satisfies this condition, the file
+   * group will be rewritten regardless of the number of files in the file group determined by
+   * {@link #MIN_INPUT_FILES}.
+   *
+   * <p>Defaults to Integer.MAX_VALUE, which means this feature is not enabled by default.
+   *
+   * @deprecated since 1.8.0, will be removed in 1.9.0; use {@link RewriteFileGroupPlanner} and
+   *     {@link FileRewriteExecutor}.
+   */
+  @Deprecated public static final String DELETE_FILE_THRESHOLD = "delete-file-threshold";
+
+  public static final int DELETE_FILE_THRESHOLD_DEFAULT = Integer.MAX_VALUE;
+
+  private int deleteFileThreshold;
+
+  protected SizeBasedDataRewriter(Table table) {
+    super(table);
+  }
+
+  @Override
+  public Set<String> validOptions() {
+    return ImmutableSet.<String>builder()
+        .addAll(super.validOptions())
+        .add(DELETE_FILE_THRESHOLD)
+        .build();
+  }
+
+  @Override
+  public void init(Map<String, String> options) {
+    super.init(options);
+    this.deleteFileThreshold = deleteFileThreshold(options);
+  }
+
+  @Override
+  protected Iterable<FileScanTask> filterFiles(Iterable<FileScanTask> tasks) {
+    return Iterables.filter(tasks, task -> wronglySized(task) || tooManyDeletes(task));
+  }
+
+  private boolean tooManyDeletes(FileScanTask task) {
+    return task.deletes() != null && task.deletes().size() >= deleteFileThreshold;
+  }
+
+  @Override
+  protected Iterable<List<FileScanTask>> filterFileGroups(List<List<FileScanTask>> groups) {
+    return Iterables.filter(groups, this::shouldRewrite);
+  }
+
+  private boolean shouldRewrite(List<FileScanTask> group) {
+    return enoughInputFiles(group)
+        || enoughContent(group)
+        || tooMuchContent(group)
+        || anyTaskHasTooManyDeletes(group);
+  }
+
+  private boolean anyTaskHasTooManyDeletes(List<FileScanTask> group) {
+    return group.stream().anyMatch(this::tooManyDeletes);
+  }
+
+  @Override
+  protected long defaultTargetFileSize() {
+    return PropertyUtil.propertyAsLong(
+        table().properties(),
+        TableProperties.WRITE_TARGET_FILE_SIZE_BYTES,
+        TableProperties.WRITE_TARGET_FILE_SIZE_BYTES_DEFAULT);
+  }
+
+  private int deleteFileThreshold(Map<String, String> options) {
+    int value =
+        PropertyUtil.propertyAsInt(options, DELETE_FILE_THRESHOLD, DELETE_FILE_THRESHOLD_DEFAULT);
+    Preconditions.checkArgument(
+        value >= 0, "'%s' is set to %s but must be >= 0", DELETE_FILE_THRESHOLD, value);
+    return value;
+  }
+}
diff --git a/core/src/main/java/org/apache/iceberg/actions/SizeBasedFileRewriter.java b/core/src/main/java/org/apache/iceberg/actions/SizeBasedFileRewriter.java
new file mode 100644
index 000000000000..319e44c4a20c
--- /dev/null
+++ b/core/src/main/java/org/apache/iceberg/actions/SizeBasedFileRewriter.java
@@ -0,0 +1,348 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.iceberg.actions;
+
+import java.math.RoundingMode;
+import java.util.List;
+import java.util.Map;
+import java.util.Set;
+import org.apache.iceberg.ContentFile;
+import org.apache.iceberg.ContentScanTask;
+import org.apache.iceberg.PartitionSpec;
+import org.apache.iceberg.Table;
+import org.apache.iceberg.relocated.com.google.common.base.Preconditions;
+import org.apache.iceberg.relocated.com.google.common.collect.ImmutableSet;
+import org.apache.iceberg.relocated.com.google.common.collect.Maps;
+import org.apache.iceberg.relocated.com.google.common.math.LongMath;
+import org.apache.iceberg.util.BinPacking;
+import org.apache.iceberg.util.PropertyUtil;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+/**
+ * A file rewriter that determines which files to rewrite based on their size.
+ *
+ * <p>If files are smaller than the {@link #MIN_FILE_SIZE_BYTES} threshold or larger than the {@link
+ * #MAX_FILE_SIZE_BYTES} threshold, they are considered targets for being rewritten.
+ *
+ * <p>Once selected, files are grouped based on the {@link BinPacking bin-packing algorithm} into
+ * groups of no more than {@link #MAX_FILE_GROUP_SIZE_BYTES}. Groups will be actually rewritten if
+ * they contain more than {@link #MIN_INPUT_FILES} or if they would produce at least one file of
+ * {@link #TARGET_FILE_SIZE_BYTES}.
+ *
+ * <p>Note that implementations may add extra conditions for selecting files or filtering groups.
+ *
+ * @deprecated since 1.8.0, will be removed in 1.9.0; use {@link SizeBasedFileRewritePlanner} and
+ *     {@link FileRewriteExecutor}.
+ */
+@Deprecated
+public abstract class SizeBasedFileRewriter<T extends ContentScanTask<F>, F extends ContentFile<F>>
+    implements FileRewriter<T, F> {
+
+  private static final Logger LOG = LoggerFactory.getLogger(SizeBasedFileRewriter.class);
+
+  /** The target output file size that this file rewriter will attempt to generate. */
+  public static final String TARGET_FILE_SIZE_BYTES = "target-file-size-bytes";
+
+  /**
+   * Controls which files will be considered for rewriting. Files with sizes under this threshold
+   * will be considered for rewriting regardless of any other criteria.
+   *
+   * <p>Defaults to 75% of the target file size.
+   */
+  public static final String MIN_FILE_SIZE_BYTES = "min-file-size-bytes";
+
+  public static final double MIN_FILE_SIZE_DEFAULT_RATIO = 0.75;
+
+  /**
+   * Controls which files will be considered for rewriting. Files with sizes above this threshold
+   * will be considered for rewriting regardless of any other criteria.
+   *
+   * <p>Defaults to 180% of the target file size.
+   */
+  public static final String MAX_FILE_SIZE_BYTES = "max-file-size-bytes";
+
+  public static final double MAX_FILE_SIZE_DEFAULT_RATIO = 1.80;
+
+  /**
+   * Any file group exceeding this number of files will be rewritten regardless of other criteria.
+   * This config ensures file groups that contain many files are compacted even if the total size of
+   * that group is less than the target file size. This can also be thought of as the maximum number
+   * of wrongly sized files that could remain in a partition after rewriting.
+   */
+  public static final String MIN_INPUT_FILES = "min-input-files";
+
+  public static final int MIN_INPUT_FILES_DEFAULT = 5;
+
+  /** Overrides other options and forces rewriting of all provided files. */
+  public static final String REWRITE_ALL = "rewrite-all";
+
+  public static final boolean REWRITE_ALL_DEFAULT = false;
+
+  /**
+   * This option controls the largest amount of data that should be rewritten in a single file
+   * group. It helps with breaking down the rewriting of very large partitions which may not be
+   * rewritable otherwise due to the resource constraints of the cluster. For example, a sort-based
+   * rewrite may not scale to TB-sized partitions, and those partitions need to be worked on in
+   * small subsections to avoid exhaustion of resources.
+   */
+  public static final String MAX_FILE_GROUP_SIZE_BYTES = "max-file-group-size-bytes";
+
+  public static final long MAX_FILE_GROUP_SIZE_BYTES_DEFAULT = 100L * 1024 * 1024 * 1024; // 100 GB
+
+  private static final long SPLIT_OVERHEAD = 5 * 1024;
+
+  private final Table table;
+  private long targetFileSize;
+  private long minFileSize;
+  private long maxFileSize;
+  private int minInputFiles;
+  private boolean rewriteAll;
+  private long maxGroupSize;
+
+  private int outputSpecId;
+
+  protected SizeBasedFileRewriter(Table table) {
+    this.table = table;
+  }
+
+  protected abstract long defaultTargetFileSize();
+
+  protected abstract Iterable<T> filterFiles(Iterable<T> tasks);
+
+  protected abstract Iterable<List<T>> filterFileGroups(List<List<T>> groups);
+
+  protected Table table() {
+    return table;
+  }
+
+  @Override
+  public Set<String> validOptions() {
+    return ImmutableSet.of(
+        TARGET_FILE_SIZE_BYTES,
+        MIN_FILE_SIZE_BYTES,
+        MAX_FILE_SIZE_BYTES,
+        MIN_INPUT_FILES,
+        REWRITE_ALL,
+        MAX_FILE_GROUP_SIZE_BYTES);
+  }
+
+  @Override
+  public void init(Map<String, String> options) {
+    Map<String, Long> sizeThresholds = sizeThresholds(options);
+    this.targetFileSize = sizeThresholds.get(TARGET_FILE_SIZE_BYTES);
+    this.minFileSize = sizeThresholds.get(MIN_FILE_SIZE_BYTES);
+    this.maxFileSize = sizeThresholds.get(MAX_FILE_SIZE_BYTES);
+
+    this.minInputFiles = minInputFiles(options);
+    this.rewriteAll = rewriteAll(options);
+    this.maxGroupSize = maxGroupSize(options);
+    this.outputSpecId = outputSpecId(options);
+
+    if (rewriteAll) {
+      LOG.info("Configured to rewrite all provided files in table {}", table.name());
+    }
+  }
+
+  protected boolean wronglySized(T task) {
+    return task.length() < minFileSize || task.length() > maxFileSize;
+  }
+
+  @Override
+  public Iterable<List<T>> planFileGroups(Iterable<T> tasks) {
+    Iterable<T> filteredTasks = rewriteAll ? tasks : filterFiles(tasks);
+    BinPacking.ListPacker<T> packer = new BinPacking.ListPacker<>(maxGroupSize, 1, false);
+    List<List<T>> groups = packer.pack(filteredTasks, ContentScanTask::length);
+    return rewriteAll ? groups : filterFileGroups(groups);
+  }
+
+  protected boolean enoughInputFiles(List<T> group) {
+    return group.size() > 1 && group.size() >= minInputFiles;
+  }
+
+  protected boolean enoughContent(List<T> group) {
+    return group.size() > 1 && inputSize(group) > targetFileSize;
+  }
+
+  protected boolean tooMuchContent(List<T> group) {
+    return inputSize(group) > maxFileSize;
+  }
+
+  protected long inputSize(List<T> group) {
+    return group.stream().mapToLong(ContentScanTask::length).sum();
+  }
+
+  /**
+   * Calculates the split size to use in bin-packing rewrites.
+   *
+   * <p>This method determines the target split size as the input size divided by the desired number
+   * of output files. The final split size is adjusted to be at least as big as the target file size
+   * but less than the max write file size.
+   */
+  public long splitSize(long inputSize) {
+    long estimatedSplitSize = (inputSize / numOutputFiles(inputSize)) + SPLIT_OVERHEAD;
+    if (estimatedSplitSize < targetFileSize) {
+      return targetFileSize;
+    } else if (estimatedSplitSize > writeMaxFileSize()) {
+      return writeMaxFileSize();
+    } else {
+      return estimatedSplitSize;
+    }
+  }
+
+  /**
+   * Determines the preferable number of output files when rewriting a particular file group.
+   *
+   * <p>If the rewriter is handling 10.1 GB of data with a target file size of 1 GB, it could
+   * produce 11 files, one of which would only have 0.1 GB. This would most likely be less
+   * preferable to 10 files with 1.01 GB each. So this method decides whether to round up or round
+   * down based on what the estimated average file size will be if the remainder (0.1 GB) is
+   * distributed amongst other files. If the new average file size is no more than 10% greater than
+   * the target file size, then this method will round down when determining the number of output
+   * files. Otherwise, the remainder will be written into a separate file.
+   *
+   * @param inputSize a total input size for a file group
+   * @return the number of files this rewriter should create
+   */
+  protected long numOutputFiles(long inputSize) {
+    if (inputSize < targetFileSize) {
+      return 1;
+    }
+
+    long numFilesWithRemainder = LongMath.divide(inputSize, targetFileSize, RoundingMode.CEILING);
+    long numFilesWithoutRemainder = LongMath.divide(inputSize, targetFileSize, RoundingMode.FLOOR);
+    long avgFileSizeWithoutRemainder = inputSize / numFilesWithoutRemainder;
+
+    if (LongMath.mod(inputSize, targetFileSize) > minFileSize) {
+      // the remainder file is of a valid size for this rewrite so keep it
+      return numFilesWithRemainder;
+
+    } else if (avgFileSizeWithoutRemainder
+        < Math.min(1.1 * targetFileSize, (double) writeMaxFileSize())) {
+      // if the reminder is distributed amongst other files,
+      // the average file size will be no more than 10% bigger than the target file size
+      // so round down and distribute remainder amongst other files
+      return numFilesWithoutRemainder;
+
+    } else {
+      // keep the remainder file as it is not OK to distribute it amongst other files
+      return numFilesWithRemainder;
+    }
+  }
+
+  /**
+   * Estimates a larger max target file size than the target size used in task creation to avoid
+   * creating tiny remainder files.
+   *
+   * <p>While we create tasks that should all be smaller than our target size, there is a chance
+   * that the actual data will end up being larger than our target size due to various factors of
+   * compression, serialization, which are outside our control. If this occurs, instead of making a
+   * single file that is close in size to our target, we would end up producing one file of the
+   * target size, and then a small extra file with the remaining data.
+   *
+   * <p>For example, if our target is 512 MB, we may generate a rewrite task that should be 500 MB.
+   * When we write the data we may find we actually have to write out 530 MB. If we use the target
+   * size while writing, we would produce a 512 MB file and an 18 MB file. If instead we use a
+   * larger size estimated by this method, then we end up writing a single file.
+   *
+   * @return the target size plus one half of the distance between max and target
+   */
+  protected long writeMaxFileSize() {
+    return (long) (targetFileSize + ((maxFileSize - targetFileSize) * 0.5));
+  }
+
+  protected PartitionSpec outputSpec() {
+    return table.specs().get(outputSpecId);
+  }
+
+  protected int outputSpecId() {
+    return outputSpecId;
+  }
+
+  private int outputSpecId(Map<String, String> options) {
+    int specId =
+        PropertyUtil.propertyAsInt(options, RewriteDataFiles.OUTPUT_SPEC_ID, table.spec().specId());
+    Preconditions.checkArgument(
+        table.specs().containsKey(specId),
+        "Cannot use output spec id %s because the table does not contain a reference to this spec-id.",
+        specId);
+    return specId;
+  }
+
+  private Map<String, Long> sizeThresholds(Map<String, String> options) {
+    long target =
+        PropertyUtil.propertyAsLong(options, TARGET_FILE_SIZE_BYTES, defaultTargetFileSize());
+
+    long defaultMin = (long) (target * MIN_FILE_SIZE_DEFAULT_RATIO);
+    long min = PropertyUtil.propertyAsLong(options, MIN_FILE_SIZE_BYTES, defaultMin);
+
+    long defaultMax = (long) (target * MAX_FILE_SIZE_DEFAULT_RATIO);
+    long max = PropertyUtil.propertyAsLong(options, MAX_FILE_SIZE_BYTES, defaultMax);
+
+    Preconditions.checkArgument(
+        target > 0, "'%s' is set to %s but must be > 0", TARGET_FILE_SIZE_BYTES, target);
+
+    Preconditions.checkArgument(
+        min >= 0, "'%s' is set to %s but must be >= 0", MIN_FILE_SIZE_BYTES, min);
+
+    Preconditions.checkArgument(
+        target > min,
+        "'%s' (%s) must be > '%s' (%s), all new files will be smaller than the min threshold",
+        TARGET_FILE_SIZE_BYTES,
+        target,
+        MIN_FILE_SIZE_BYTES,
+        min);
+
+    Preconditions.checkArgument(
+        target < max,
+        "'%s' (%s) must be < '%s' (%s), all new files will be larger than the max threshold",
+        TARGET_FILE_SIZE_BYTES,
+        target,
+        MAX_FILE_SIZE_BYTES,
+        max);
+
+    Map<String, Long> values = Maps.newHashMap();
+
+    values.put(TARGET_FILE_SIZE_BYTES, target);
+    values.put(MIN_FILE_SIZE_BYTES, min);
+    values.put(MAX_FILE_SIZE_BYTES, max);
+
+    return values;
+  }
+
+  private int minInputFiles(Map<String, String> options) {
+    int value = PropertyUtil.propertyAsInt(options, MIN_INPUT_FILES, MIN_INPUT_FILES_DEFAULT);
+    Preconditions.checkArgument(
+        value > 0, "'%s' is set to %s but must be > 0", MIN_INPUT_FILES, value);
+    return value;
+  }
+
+  private long maxGroupSize(Map<String, String> options) {
+    long value =
+        PropertyUtil.propertyAsLong(
+            options, MAX_FILE_GROUP_SIZE_BYTES, MAX_FILE_GROUP_SIZE_BYTES_DEFAULT);
+    Preconditions.checkArgument(
+        value > 0, "'%s' is set to %s but must be > 0", MAX_FILE_GROUP_SIZE_BYTES, value);
+    return value;
+  }
+
+  private boolean rewriteAll(Map<String, String> options) {
+    return PropertyUtil.propertyAsBoolean(options, REWRITE_ALL, REWRITE_ALL_DEFAULT);
+  }
+}
diff --git a/core/src/main/java/org/apache/iceberg/actions/SizeBasedPositionDeletesRewriter.java b/core/src/main/java/org/apache/iceberg/actions/SizeBasedPositionDeletesRewriter.java
new file mode 100644
index 000000000000..60f37b79d24c
--- /dev/null
+++ b/core/src/main/java/org/apache/iceberg/actions/SizeBasedPositionDeletesRewriter.java
@@ -0,0 +1,63 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.iceberg.actions;
+
+import java.util.List;
+import org.apache.iceberg.DeleteFile;
+import org.apache.iceberg.PositionDeletesScanTask;
+import org.apache.iceberg.Table;
+import org.apache.iceberg.TableProperties;
+import org.apache.iceberg.relocated.com.google.common.collect.Iterables;
+import org.apache.iceberg.util.PropertyUtil;
+
+/**
+ * @deprecated since 1.8.0, will be removed in 1.9.0; use {@link RewritePositionDeletesGroupPlanner}
+ *     and {@link FileRewriteExecutor}.
+ */
+@Deprecated
+public abstract class SizeBasedPositionDeletesRewriter
+    extends SizeBasedFileRewriter<PositionDeletesScanTask, DeleteFile> {
+
+  protected SizeBasedPositionDeletesRewriter(Table table) {
+    super(table);
+  }
+
+  @Override
+  protected Iterable<PositionDeletesScanTask> filterFiles(Iterable<PositionDeletesScanTask> tasks) {
+    return Iterables.filter(tasks, this::wronglySized);
+  }
+
+  @Override
+  protected Iterable<List<PositionDeletesScanTask>> filterFileGroups(
+      List<List<PositionDeletesScanTask>> groups) {
+    return Iterables.filter(groups, this::shouldRewrite);
+  }
+
+  private boolean shouldRewrite(List<PositionDeletesScanTask> group) {
+    return enoughInputFiles(group) || enoughContent(group) || tooMuchContent(group);
+  }
+
+  @Override
+  protected long defaultTargetFileSize() {
+    return PropertyUtil.propertyAsLong(
+        table().properties(),
+        TableProperties.DELETE_TARGET_FILE_SIZE_BYTES,
+        TableProperties.DELETE_TARGET_FILE_SIZE_BYTES_DEFAULT);
+  }
+}
diff --git a/gradle.properties b/gradle.properties
index 5c62371efe35..dc1e1a509b01 100644
--- a/gradle.properties
+++ b/gradle.properties
@@ -21,7 +21,7 @@ systemProp.knownFlinkVersions=1.18,1.19,1.20
 systemProp.defaultHiveVersions=2
 systemProp.knownHiveVersions=2,3
 systemProp.defaultSparkVersions=3.5
-systemProp.knownSparkVersions=3.5
+systemProp.knownSparkVersions=3.3,3.4,3.5
 systemProp.defaultKafkaVersions=3
 systemProp.knownKafkaVersions=3
 systemProp.defaultScalaVersion=2.12

From 440618df19fe1724a4600dff469cd8a4dad485e9 Mon Sep 17 00:00:00 2001
From: Peter Vary <peter_vary4@apple.com>
Date: Thu, 28 Nov 2024 14:46:32 +0100
Subject: [PATCH 08/11] Flashing out tests for RewriteFileGroupPlanner

---
 .../actions/RewriteFileGroupPlanner.java      |  33 +++-
 .../actions/TestRewriteFileGroupPlanner.java  | 174 +++++++++++++++---
 .../actions/TestSparkFileRewriteExecutor.java |  22 +--
 3 files changed, 182 insertions(+), 47 deletions(-)

diff --git a/core/src/main/java/org/apache/iceberg/actions/RewriteFileGroupPlanner.java b/core/src/main/java/org/apache/iceberg/actions/RewriteFileGroupPlanner.java
index 3fdcfba3fbbd..0fd786f99a99 100644
--- a/core/src/main/java/org/apache/iceberg/actions/RewriteFileGroupPlanner.java
+++ b/core/src/main/java/org/apache/iceberg/actions/RewriteFileGroupPlanner.java
@@ -30,9 +30,11 @@
 import org.apache.iceberg.StructLike;
 import org.apache.iceberg.Table;
 import org.apache.iceberg.TableProperties;
+import org.apache.iceberg.TableScan;
 import org.apache.iceberg.actions.RewriteDataFiles.FileGroupInfo;
 import org.apache.iceberg.data.GenericRecord;
 import org.apache.iceberg.expressions.Expression;
+import org.apache.iceberg.expressions.Expressions;
 import org.apache.iceberg.io.CloseableIterable;
 import org.apache.iceberg.relocated.com.google.common.annotations.VisibleForTesting;
 import org.apache.iceberg.relocated.com.google.common.base.Preconditions;
@@ -70,14 +72,26 @@ public class RewriteFileGroupPlanner
   private static final Logger LOG = LoggerFactory.getLogger(RewriteFileGroupPlanner.class);
 
   private final Expression filter;
-  private final long snapshotId;
+  private final Long snapshotId;
   private final boolean caseSensitive;
 
   private int deleteFileThreshold;
   private RewriteJobOrder rewriteJobOrder;
 
+  public RewriteFileGroupPlanner(Table table) {
+    this(table, Expressions.alwaysTrue());
+  }
+
+  public RewriteFileGroupPlanner(Table table, Expression filter) {
+    this(
+        table,
+        filter,
+        table.currentSnapshot() != null ? table.currentSnapshot().snapshotId() : null,
+        false);
+  }
+
   public RewriteFileGroupPlanner(
-      Table table, Expression filter, long snapshotId, boolean caseSensitive) {
+      Table table, Expression filter, Long snapshotId, boolean caseSensitive) {
     super(table);
     this.filter = filter;
     this.snapshotId = snapshotId;
@@ -160,13 +174,14 @@ public FileRewritePlan<FileGroupInfo, FileScanTask, DataFile, RewriteFileGroup>
 
   @VisibleForTesting
   CloseableIterable<FileScanTask> tasks() {
-    return table()
-        .newScan()
-        .useSnapshot(snapshotId)
-        .caseSensitive(caseSensitive)
-        .filter(filter)
-        .ignoreResiduals()
-        .planFiles();
+    TableScan scan =
+        table().newScan().filter(filter).caseSensitive(caseSensitive).ignoreResiduals();
+
+    if (snapshotId != null) {
+      scan = scan.useSnapshot(snapshotId);
+    }
+
+    return scan.planFiles();
   }
 
   private int deleteFileThreshold(Map<String, String> options) {
diff --git a/core/src/test/java/org/apache/iceberg/actions/TestRewriteFileGroupPlanner.java b/core/src/test/java/org/apache/iceberg/actions/TestRewriteFileGroupPlanner.java
index 903e7b27313c..746395d57bca 100644
--- a/core/src/test/java/org/apache/iceberg/actions/TestRewriteFileGroupPlanner.java
+++ b/core/src/test/java/org/apache/iceberg/actions/TestRewriteFileGroupPlanner.java
@@ -19,7 +19,9 @@
 package org.apache.iceberg.actions;
 
 import static org.apache.iceberg.actions.RewriteDataFiles.REWRITE_JOB_ORDER;
+import static org.apache.iceberg.actions.RewriteFileGroupPlanner.MAX_FILE_SIZE_DEFAULT_RATIO;
 import static org.assertj.core.api.Assertions.assertThat;
+import static org.assertj.core.api.Assertions.assertThatThrownBy;
 
 import java.io.File;
 import java.util.List;
@@ -37,14 +39,19 @@
 import org.apache.iceberg.expressions.Expressions;
 import org.apache.iceberg.relocated.com.google.common.collect.ImmutableList;
 import org.apache.iceberg.relocated.com.google.common.collect.ImmutableMap;
+import org.apache.iceberg.relocated.com.google.common.collect.Maps;
 import org.junit.jupiter.api.AfterEach;
 import org.junit.jupiter.api.BeforeEach;
 import org.junit.jupiter.api.Test;
 import org.junit.jupiter.api.io.TempDir;
 import org.junit.jupiter.params.ParameterizedTest;
 import org.junit.jupiter.params.provider.EnumSource;
+import org.junit.jupiter.params.provider.ValueSource;
 
 class TestRewriteFileGroupPlanner {
+  private static final Map<String, String> REWRITE_ALL =
+      ImmutableMap.of(RewriteFileGroupPlanner.REWRITE_ALL, "true");
+
   private static final DataFile FILE_1 = newDataFile("data_bucket=0", 10);
   private static final DataFile FILE_2 = newDataFile("data_bucket=0", 10);
   private static final DataFile FILE_3 = newDataFile("data_bucket=0", 10);
@@ -80,19 +87,9 @@ public void cleanupTables() {
   @EnumSource(
       value = RewriteJobOrder.class,
       names = {"FILES_DESC", "FILES_ASC", "BYTES_DESC", "BYTES_ASC"})
-  void testGroups(RewriteJobOrder order) {
-    table
-        .newAppend()
-        .appendFile(FILE_1)
-        .appendFile(FILE_2)
-        .appendFile(FILE_3)
-        .appendFile(FILE_4)
-        .appendFile(FILE_5)
-        .appendFile(FILE_6)
-        .commit();
-    RewriteFileGroupPlanner planner =
-        new RewriteFileGroupPlanner(
-            table, Expressions.alwaysTrue(), table.currentSnapshot().snapshotId(), false);
+  void testJobOrder(RewriteJobOrder order) {
+    addFiles();
+    RewriteFileGroupPlanner planner = new RewriteFileGroupPlanner(table);
     planner.init(
         ImmutableMap.of(
             RewriteFileGroupPlanner.REWRITE_ALL, "true", REWRITE_JOB_ORDER, order.name()));
@@ -106,19 +103,33 @@ void testGroups(RewriteJobOrder order) {
   }
 
   @Test
-  void testContext() {
+  void testUnpartitionedTable() {
+    table.updateSpec().removeField("data_bucket").commit();
+    table.refresh();
+
     table
         .newAppend()
-        .appendFile(FILE_1)
-        .appendFile(FILE_2)
-        .appendFile(FILE_3)
-        .appendFile(FILE_4)
-        .appendFile(FILE_5)
-        .appendFile(FILE_6)
+        .appendFile(newDataFile("", 10))
+        .appendFile(newDataFile("", 20))
+        .appendFile(newDataFile("", 30))
         .commit();
-    RewriteFileGroupPlanner planner =
-        new RewriteFileGroupPlanner(
-            table, Expressions.alwaysTrue(), table.currentSnapshot().snapshotId(), false);
+    RewriteFileGroupPlanner planner = new RewriteFileGroupPlanner(table);
+    planner.init(
+        ImmutableMap.of(
+            RewriteFileGroupPlanner.MIN_INPUT_FILES,
+            "1",
+            RewriteFileGroupPlanner.MIN_FILE_SIZE_BYTES,
+            "30"));
+    FileRewritePlan<FileGroupInfo, FileScanTask, DataFile, RewriteFileGroup> result =
+        planner.plan();
+    assertThat(result.totalGroupCount()).isEqualTo(1);
+    assertThat(result.groups().iterator().next().numInputFiles()).isEqualTo(2);
+  }
+
+  @Test
+  void testMaxGroupSize() {
+    addFiles();
+    RewriteFileGroupPlanner planner = new RewriteFileGroupPlanner(table);
     planner.init(
         ImmutableMap.of(
             RewriteFileGroupPlanner.REWRITE_ALL,
@@ -133,6 +144,123 @@ void testContext() {
     assertThat(result.groupsInPartition(FILE_6.partition())).isEqualTo(1);
   }
 
+  @Test
+  void testEmptyTable() {
+    RewriteFileGroupPlanner planner = new RewriteFileGroupPlanner(table);
+
+    planner.init(REWRITE_ALL);
+
+    FileRewritePlan<FileGroupInfo, FileScanTask, DataFile, RewriteFileGroup> result =
+        planner.plan();
+
+    assertThat(table.currentSnapshot()).as("Table must be empty").isNull();
+    assertThat(result.totalGroupCount()).isZero();
+  }
+
+  @Test
+  void testFilter() {
+    addFiles();
+    RewriteFileGroupPlanner planner =
+        new RewriteFileGroupPlanner(
+            table,
+            Expressions.or(
+                Expressions.equal(Expressions.bucket("data", 16), 0),
+                Expressions.equal(Expressions.bucket("data", 16), 2)));
+    planner.init(REWRITE_ALL);
+    FileRewritePlan<RewriteDataFiles.FileGroupInfo, FileScanTask, DataFile, RewriteFileGroup> plan =
+        planner.plan();
+    List<RewriteFileGroup> groups = plan.groups().collect(Collectors.toList());
+
+    assertThat(plan.totalGroupCount()).isEqualTo(2);
+    assertThat(groups).hasSize(2);
+    assertThat(groups.stream().mapToLong(FileRewriteGroup::numInputFiles).sum()).isEqualTo(4);
+  }
+
+  @Test
+  void testWriteMaxFileSize() {
+    int targetFileSize = 10;
+    addFiles();
+
+    RewriteFileGroupPlanner planner = new RewriteFileGroupPlanner(table);
+    planner.init(
+        ImmutableMap.of(
+            RewriteFileGroupPlanner.REWRITE_ALL,
+            "true",
+            RewriteFileGroupPlanner.TARGET_FILE_SIZE_BYTES,
+            String.valueOf(targetFileSize)));
+    FileRewritePlan<RewriteDataFiles.FileGroupInfo, FileScanTask, DataFile, RewriteFileGroup> plan =
+        planner.plan();
+    assertThat(plan.writeMaxFileSize())
+        .isGreaterThan(targetFileSize)
+        .isLessThan((long) (targetFileSize * MAX_FILE_SIZE_DEFAULT_RATIO));
+  }
+
+  @ParameterizedTest
+  @ValueSource(booleans = {true, false})
+  void testOutputSpec(boolean specific) {
+    addFiles();
+
+    int oldSpecId = table.spec().specId();
+    table.updateSpec().removeField("data_bucket").commit();
+    table.newAppend().appendFile(newDataFile("", 10)).commit();
+    table.refresh();
+    int newSpecId = table.spec().specId();
+
+    RewriteFileGroupPlanner planner = new RewriteFileGroupPlanner(table);
+
+    Map<String, String> options = Maps.newHashMap(REWRITE_ALL);
+    if (specific) {
+      options.put(RewriteDataFiles.OUTPUT_SPEC_ID, String.valueOf(oldSpecId));
+    }
+
+    planner.init(options);
+
+    FileRewritePlan<RewriteDataFiles.FileGroupInfo, FileScanTask, DataFile, RewriteFileGroup> plan =
+        planner.plan();
+    assertThat(plan.outputSpecId()).isEqualTo(specific ? oldSpecId : newSpecId);
+  }
+
+  @Test
+  public void testInvalidOption() {
+    addFiles();
+
+    assertThatThrownBy(
+            () -> {
+              RewriteFileGroupPlanner planner = new RewriteFileGroupPlanner(table);
+
+              planner.init(ImmutableMap.of(RewriteDataFiles.REWRITE_JOB_ORDER, "foo"));
+            })
+        .isInstanceOf(IllegalArgumentException.class)
+        .hasMessage("Invalid rewrite job order name: foo");
+
+    assertThatThrownBy(
+            () -> {
+              RewriteFileGroupPlanner planner = new RewriteFileGroupPlanner(table);
+
+              planner.init(
+                  ImmutableMap.of(
+                      RewriteFileGroupPlanner.REWRITE_ALL,
+                      "true",
+                      RewriteDataFiles.OUTPUT_SPEC_ID,
+                      String.valueOf(1234)));
+            })
+        .isInstanceOf(IllegalArgumentException.class)
+        .hasMessage(
+            "Cannot use output spec id 1234 because the table does not contain a reference to this spec-id.");
+  }
+
+  private void addFiles() {
+    table
+        .newAppend()
+        .appendFile(FILE_1)
+        .appendFile(FILE_2)
+        .appendFile(FILE_3)
+        .appendFile(FILE_4)
+        .appendFile(FILE_5)
+        .appendFile(FILE_6)
+        .commit();
+  }
+
   private static DataFile newDataFile(String partitionPath, long fileSize) {
     return DataFiles.builder(TestBase.SPEC)
         .withPath("/path/to/data-" + UUID.randomUUID() + ".parquet")
diff --git a/spark/v3.5/spark/src/test/java/org/apache/iceberg/spark/actions/TestSparkFileRewriteExecutor.java b/spark/v3.5/spark/src/test/java/org/apache/iceberg/spark/actions/TestSparkFileRewriteExecutor.java
index bce2bf11209c..444bbf458f17 100644
--- a/spark/v3.5/spark/src/test/java/org/apache/iceberg/spark/actions/TestSparkFileRewriteExecutor.java
+++ b/spark/v3.5/spark/src/test/java/org/apache/iceberg/spark/actions/TestSparkFileRewriteExecutor.java
@@ -33,7 +33,6 @@
 import org.apache.iceberg.actions.RewriteFileGroupPlanner;
 import org.apache.iceberg.actions.SizeBasedFileRewritePlanner;
 import org.apache.iceberg.catalog.TableIdentifier;
-import org.apache.iceberg.expressions.Expressions;
 import org.apache.iceberg.relocated.com.google.common.collect.ImmutableList;
 import org.apache.iceberg.relocated.com.google.common.collect.ImmutableMap;
 import org.apache.iceberg.relocated.com.google.common.collect.ImmutableSet;
@@ -64,8 +63,7 @@ public void removeTable() {
   @Test
   public void testBinPackDataSelectFiles() {
     Table table = catalog.createTable(TABLE_IDENT, SCHEMA);
-    RewriteFileGroupPlanner rewriter =
-        new RewriteFileGroupPlanner(table, Expressions.alwaysTrue(), 1, false);
+    RewriteFileGroupPlanner rewriter = new RewriteFileGroupPlanner(table);
 
     checkDataFileSizeFiltering(rewriter);
     checkDataFilesDeleteThreshold(rewriter);
@@ -216,8 +214,7 @@ public void testInvalidConstructorUsagesZOrderData() {
   public void testBinPackDataValidOptions() {
     Table table = catalog.createTable(TABLE_IDENT, SCHEMA);
     SparkBinPackDataRewriteExecutor rewriter = new SparkBinPackDataRewriteExecutor(spark, table);
-    RewriteFileGroupPlanner planner =
-        new RewriteFileGroupPlanner(table, Expressions.alwaysTrue(), 1, false);
+    RewriteFileGroupPlanner planner = new RewriteFileGroupPlanner(table);
 
     assertThat(rewriter.validOptions())
         .as("Rewriter must report all supported options")
@@ -242,8 +239,7 @@ public void testSortDataValidOptions() {
     Table table = catalog.createTable(TABLE_IDENT, SCHEMA);
     SparkSortDataRewriteExecutor rewriter =
         new SparkSortDataRewriteExecutor(spark, table, SORT_ORDER);
-    RewriteFileGroupPlanner planner =
-        new RewriteFileGroupPlanner(table, Expressions.alwaysTrue(), 1, false);
+    RewriteFileGroupPlanner planner = new RewriteFileGroupPlanner(table);
 
     assertThat(rewriter.validOptions())
         .as("Rewriter must report all supported options")
@@ -272,8 +268,7 @@ public void testZOrderDataValidOptions() {
     ImmutableList<String> zOrderCols = ImmutableList.of("id");
     SparkZOrderDataRewriteExecutor rewriter =
         new SparkZOrderDataRewriteExecutor(spark, table, zOrderCols);
-    RewriteFileGroupPlanner planner =
-        new RewriteFileGroupPlanner(table, Expressions.alwaysTrue(), 1, false);
+    RewriteFileGroupPlanner planner = new RewriteFileGroupPlanner(table);
 
     assertThat(rewriter.validOptions())
         .as("Rewriter must report all supported options")
@@ -300,8 +295,7 @@ public void testZOrderDataValidOptions() {
   @Test
   public void testInvalidValuesForBinPackDataOptions() {
     Table table = catalog.createTable(TABLE_IDENT, SCHEMA);
-    RewriteFileGroupPlanner planner =
-        new RewriteFileGroupPlanner(table, Expressions.alwaysTrue(), 1, false);
+    RewriteFileGroupPlanner planner = new RewriteFileGroupPlanner(table);
 
     validateSizeBasedRewriterOptions(planner);
 
@@ -316,8 +310,7 @@ public void testInvalidValuesForSortDataOptions() {
     Table table = catalog.createTable(TABLE_IDENT, SCHEMA);
     SparkSortDataRewriteExecutor rewriter =
         new SparkSortDataRewriteExecutor(spark, table, SORT_ORDER);
-    RewriteFileGroupPlanner planner =
-        new RewriteFileGroupPlanner(table, Expressions.alwaysTrue(), 1, false);
+    RewriteFileGroupPlanner planner = new RewriteFileGroupPlanner(table);
 
     validateSizeBasedRewriterOptions(planner);
 
@@ -338,8 +331,7 @@ public void testInvalidValuesForZOrderDataOptions() {
     ImmutableList<String> zOrderCols = ImmutableList.of("id");
     SparkZOrderDataRewriteExecutor rewriter =
         new SparkZOrderDataRewriteExecutor(spark, table, zOrderCols);
-    RewriteFileGroupPlanner planner =
-        new RewriteFileGroupPlanner(table, Expressions.alwaysTrue(), 1, false);
+    RewriteFileGroupPlanner planner = new RewriteFileGroupPlanner(table);
 
     validateSizeBasedRewriterOptions(planner);
 

From c76e9c14c7ffcaf1d7d780e4f17629cfd87d2201 Mon Sep 17 00:00:00 2001
From: Peter Vary <peter_vary4@apple.com>
Date: Mon, 2 Dec 2024 14:11:30 +0100
Subject: [PATCH 09/11] RewritePositionDeletesGroupPlanner tests

---
 .../iceberg/actions/FileRewriteExecutor.java  |   6 +-
 .../iceberg/actions/FileRewritePlan.java      |  12 +-
 .../actions/RewriteFileGroupPlanner.java      |   8 +-
 .../iceberg/actions/RewriteFilePlan.java      |  47 ++++
 .../actions/RewritePositionDeletePlan.java    |  41 +++
 .../RewritePositionDeletesGroupPlanner.java   |  13 +-
 .../actions/TestRewriteFileGroupPlanner.java  |  25 +-
 ...estRewritePositionDeletesGroupPlanner.java | 250 ++++++++++++++++++
 ...a => TestSizeBasedFileRewritePlanner.java} |  73 +++--
 .../actions/RewriteDataFilesSparkAction.java  |  26 +-
 ...RewritePositionDeleteFilesSparkAction.java |  34 +--
 ...BinPackPositionDeletesRewriteExecutor.java |   7 +-
 .../spark/actions/SparkRewriteExecutor.java   |  19 +-
 .../SparkSizeBasedDataRewriteExecutor.java    |  20 +-
 14 files changed, 455 insertions(+), 126 deletions(-)
 create mode 100644 core/src/main/java/org/apache/iceberg/actions/RewriteFilePlan.java
 create mode 100644 core/src/main/java/org/apache/iceberg/actions/RewritePositionDeletePlan.java
 create mode 100644 core/src/test/java/org/apache/iceberg/actions/TestRewritePositionDeletesGroupPlanner.java
 rename core/src/test/java/org/apache/iceberg/actions/{TestSizeBasedRewriter.java => TestSizeBasedFileRewritePlanner.java} (62%)

diff --git a/core/src/main/java/org/apache/iceberg/actions/FileRewriteExecutor.java b/core/src/main/java/org/apache/iceberg/actions/FileRewriteExecutor.java
index c024cb42877a..bbe84915fa63 100644
--- a/core/src/main/java/org/apache/iceberg/actions/FileRewriteExecutor.java
+++ b/core/src/main/java/org/apache/iceberg/actions/FileRewriteExecutor.java
@@ -30,12 +30,14 @@
  * @param <T> the Java type of the tasks to read content files
  * @param <F> the Java type of the content files
  * @param <G> the Java type of the planned groups
+ * @param <P> the Java type of the plan to execute
  */
 public interface FileRewriteExecutor<
     I,
     T extends ContentScanTask<F>,
     F extends ContentFile<F>,
-    G extends FileRewriteGroup<I, T, F>> {
+    G extends FileRewriteGroup<I, T, F>,
+    P extends FileRewritePlan<I, T, F, G>> {
 
   /** Returns a description for this rewriter. */
   default String description() {
@@ -60,7 +62,7 @@ default String description() {
    *
    * @param plan containing the configuration data
    */
-  void initPlan(FileRewritePlan<I, T, F, G> plan);
+  void initPlan(P plan);
 
   /**
    * Rewrite a group of files represented by the given list of scan tasks.
diff --git a/core/src/main/java/org/apache/iceberg/actions/FileRewritePlan.java b/core/src/main/java/org/apache/iceberg/actions/FileRewritePlan.java
index ad6349de2f80..2cab4d7d12b4 100644
--- a/core/src/main/java/org/apache/iceberg/actions/FileRewritePlan.java
+++ b/core/src/main/java/org/apache/iceberg/actions/FileRewritePlan.java
@@ -35,7 +35,7 @@
  * @param <F> the Java type of the content files
  * @param <G> the Java type of the planned groups
  */
-public class FileRewritePlan<
+public abstract class FileRewritePlan<
     I,
     T extends ContentScanTask<F>,
     F extends ContentFile<F>,
@@ -44,19 +44,16 @@ public class FileRewritePlan<
   private final int totalGroupCount;
   private final Map<StructLike, Integer> groupsInPartition;
   private final long writeMaxFileSize;
-  private final int outputSpecId;
 
   protected FileRewritePlan(
       Stream<G> groups,
       int totalGroupCount,
       Map<StructLike, Integer> groupsInPartition,
-      long writeMaxFileSize,
-      int outputSpecId) {
+      long writeMaxFileSize) {
     this.groups = groups;
     this.totalGroupCount = totalGroupCount;
     this.groupsInPartition = groupsInPartition;
     this.writeMaxFileSize = writeMaxFileSize;
-    this.outputSpecId = outputSpecId;
   }
 
   /** The stream of the generated {@link RewriteFileGroup}s. */
@@ -78,9 +75,4 @@ public int totalGroupCount() {
   public long writeMaxFileSize() {
     return writeMaxFileSize;
   }
-
-  /** Partition specification id for the target files */
-  public int outputSpecId() {
-    return outputSpecId;
-  }
 }
diff --git a/core/src/main/java/org/apache/iceberg/actions/RewriteFileGroupPlanner.java b/core/src/main/java/org/apache/iceberg/actions/RewriteFileGroupPlanner.java
index 0fd786f99a99..ba499f392411 100644
--- a/core/src/main/java/org/apache/iceberg/actions/RewriteFileGroupPlanner.java
+++ b/core/src/main/java/org/apache/iceberg/actions/RewriteFileGroupPlanner.java
@@ -36,7 +36,6 @@
 import org.apache.iceberg.expressions.Expression;
 import org.apache.iceberg.expressions.Expressions;
 import org.apache.iceberg.io.CloseableIterable;
-import org.apache.iceberg.relocated.com.google.common.annotations.VisibleForTesting;
 import org.apache.iceberg.relocated.com.google.common.base.Preconditions;
 import org.apache.iceberg.relocated.com.google.common.collect.ImmutableList;
 import org.apache.iceberg.relocated.com.google.common.collect.ImmutableSet;
@@ -143,7 +142,7 @@ protected long defaultTargetFileSize() {
    * @return the generated plan which could be executed during the compaction
    */
   @Override
-  public FileRewritePlan<FileGroupInfo, FileScanTask, DataFile, RewriteFileGroup> plan() {
+  public RewriteFilePlan plan() {
     StructLikeMap<List<List<FileScanTask>>> plan = planFileGroups();
     RewriteExecutionContext ctx = new RewriteExecutionContext();
     Stream<RewriteFileGroup> groups =
@@ -168,12 +167,11 @@ public FileRewritePlan<FileGroupInfo, FileScanTask, DataFile, RewriteFileGroup>
             .sorted(FileRewriteGroup.taskComparator(rewriteJobOrder));
     Map<StructLike, Integer> groupsInPartition = plan.transformValues(List::size);
     int totalGroupCount = groupsInPartition.values().stream().reduce(Integer::sum).orElse(0);
-    return new FileRewritePlan<>(
+    return new RewriteFilePlan(
         groups, totalGroupCount, groupsInPartition, writeMaxFileSize(), outputSpecId());
   }
 
-  @VisibleForTesting
-  CloseableIterable<FileScanTask> tasks() {
+  private CloseableIterable<FileScanTask> tasks() {
     TableScan scan =
         table().newScan().filter(filter).caseSensitive(caseSensitive).ignoreResiduals();
 
diff --git a/core/src/main/java/org/apache/iceberg/actions/RewriteFilePlan.java b/core/src/main/java/org/apache/iceberg/actions/RewriteFilePlan.java
new file mode 100644
index 000000000000..b6d9a35ebd9f
--- /dev/null
+++ b/core/src/main/java/org/apache/iceberg/actions/RewriteFilePlan.java
@@ -0,0 +1,47 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.iceberg.actions;
+
+import java.util.Map;
+import java.util.stream.Stream;
+import org.apache.iceberg.DataFile;
+import org.apache.iceberg.FileScanTask;
+import org.apache.iceberg.StructLike;
+
+/** Result of the data file rewrite planning. */
+public class RewriteFilePlan
+    extends FileRewritePlan<
+        RewriteDataFiles.FileGroupInfo, FileScanTask, DataFile, RewriteFileGroup> {
+  private final int outputSpecId;
+
+  public RewriteFilePlan(
+      Stream<RewriteFileGroup> groups,
+      int totalGroupCount,
+      Map<StructLike, Integer> groupsInPartition,
+      long writeMaxFileSize,
+      int outputSpecId) {
+    super(groups, totalGroupCount, groupsInPartition, writeMaxFileSize);
+    this.outputSpecId = outputSpecId;
+  }
+
+  /** Partition specification id for the target files */
+  public int outputSpecId() {
+    return outputSpecId;
+  }
+}
diff --git a/core/src/main/java/org/apache/iceberg/actions/RewritePositionDeletePlan.java b/core/src/main/java/org/apache/iceberg/actions/RewritePositionDeletePlan.java
new file mode 100644
index 000000000000..15ee241ad99e
--- /dev/null
+++ b/core/src/main/java/org/apache/iceberg/actions/RewritePositionDeletePlan.java
@@ -0,0 +1,41 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.iceberg.actions;
+
+import java.util.Map;
+import java.util.stream.Stream;
+import org.apache.iceberg.DeleteFile;
+import org.apache.iceberg.PositionDeletesScanTask;
+import org.apache.iceberg.StructLike;
+
+/** Result of the positional delete file rewrite planning. */
+public class RewritePositionDeletePlan
+    extends FileRewritePlan<
+        RewritePositionDeleteFiles.FileGroupInfo,
+        PositionDeletesScanTask,
+        DeleteFile,
+        RewritePositionDeletesGroup> {
+  public RewritePositionDeletePlan(
+      Stream<RewritePositionDeletesGroup> groups,
+      int totalGroupCount,
+      Map<StructLike, Integer> groupsInPartition,
+      long writeMaxFileSize) {
+    super(groups, totalGroupCount, groupsInPartition, writeMaxFileSize);
+  }
+}
diff --git a/core/src/main/java/org/apache/iceberg/actions/RewritePositionDeletesGroupPlanner.java b/core/src/main/java/org/apache/iceberg/actions/RewritePositionDeletesGroupPlanner.java
index 74109df05a1f..b1017ffcad14 100644
--- a/core/src/main/java/org/apache/iceberg/actions/RewritePositionDeletesGroupPlanner.java
+++ b/core/src/main/java/org/apache/iceberg/actions/RewritePositionDeletesGroupPlanner.java
@@ -36,6 +36,7 @@
 import org.apache.iceberg.TableProperties;
 import org.apache.iceberg.actions.RewritePositionDeleteFiles.FileGroupInfo;
 import org.apache.iceberg.expressions.Expression;
+import org.apache.iceberg.expressions.Expressions;
 import org.apache.iceberg.io.CloseableIterable;
 import org.apache.iceberg.relocated.com.google.common.collect.ImmutableList;
 import org.apache.iceberg.relocated.com.google.common.collect.ImmutableSet;
@@ -63,6 +64,10 @@ public class RewritePositionDeletesGroupPlanner
   private final boolean caseSensitive;
   private RewriteJobOrder rewriteJobOrder;
 
+  public RewritePositionDeletesGroupPlanner(Table table) {
+    this(table, Expressions.alwaysTrue(), false);
+  }
+
   public RewritePositionDeletesGroupPlanner(Table table, Expression filter, boolean caseSensitive) {
     super(table);
     this.caseSensitive = caseSensitive;
@@ -94,9 +99,7 @@ public void init(Map<String, String> options) {
    * @return the generated plan which could be executed during the compaction
    */
   @Override
-  public FileRewritePlan<
-          FileGroupInfo, PositionDeletesScanTask, DeleteFile, RewritePositionDeletesGroup>
-      plan() {
+  public RewritePositionDeletePlan plan() {
     StructLikeMap<List<List<PositionDeletesScanTask>>> plan = planFileGroups();
     RewriteExecutionContext ctx = new RewriteExecutionContext();
     Stream<RewritePositionDeletesGroup> groups =
@@ -121,8 +124,8 @@ public void init(Map<String, String> options) {
             .sorted(FileRewriteGroup.taskComparator(rewriteJobOrder));
     Map<StructLike, Integer> groupsInPartition = plan.transformValues(List::size);
     int totalGroupCount = groupsInPartition.values().stream().reduce(Integer::sum).orElse(0);
-    return new FileRewritePlan<>(
-        groups, totalGroupCount, groupsInPartition, writeMaxFileSize(), outputSpecId());
+    return new RewritePositionDeletePlan(
+        groups, totalGroupCount, groupsInPartition, writeMaxFileSize());
   }
 
   private StructLikeMap<List<List<PositionDeletesScanTask>>> planFileGroups() {
diff --git a/core/src/test/java/org/apache/iceberg/actions/TestRewriteFileGroupPlanner.java b/core/src/test/java/org/apache/iceberg/actions/TestRewriteFileGroupPlanner.java
index 746395d57bca..e10019a9f547 100644
--- a/core/src/test/java/org/apache/iceberg/actions/TestRewriteFileGroupPlanner.java
+++ b/core/src/test/java/org/apache/iceberg/actions/TestRewriteFileGroupPlanner.java
@@ -30,12 +30,10 @@
 import java.util.stream.Collectors;
 import org.apache.iceberg.DataFile;
 import org.apache.iceberg.DataFiles;
-import org.apache.iceberg.FileScanTask;
 import org.apache.iceberg.RewriteJobOrder;
 import org.apache.iceberg.StructLike;
 import org.apache.iceberg.TestBase;
 import org.apache.iceberg.TestTables;
-import org.apache.iceberg.actions.RewriteDataFiles.FileGroupInfo;
 import org.apache.iceberg.expressions.Expressions;
 import org.apache.iceberg.relocated.com.google.common.collect.ImmutableList;
 import org.apache.iceberg.relocated.com.google.common.collect.ImmutableMap;
@@ -93,8 +91,7 @@ void testJobOrder(RewriteJobOrder order) {
     planner.init(
         ImmutableMap.of(
             RewriteFileGroupPlanner.REWRITE_ALL, "true", REWRITE_JOB_ORDER, order.name()));
-    FileRewritePlan<FileGroupInfo, FileScanTask, DataFile, RewriteFileGroup> result =
-        planner.plan();
+    RewriteFilePlan result = planner.plan();
     List<RewriteFileGroup> groups = result.groups().collect(Collectors.toList());
     assertThat(groups.stream().map(group -> group.info().partition()).collect(Collectors.toList()))
         .isEqualTo(EXPECTED.get(order));
@@ -120,8 +117,7 @@ void testUnpartitionedTable() {
             "1",
             RewriteFileGroupPlanner.MIN_FILE_SIZE_BYTES,
             "30"));
-    FileRewritePlan<FileGroupInfo, FileScanTask, DataFile, RewriteFileGroup> result =
-        planner.plan();
+    RewriteFilePlan result = planner.plan();
     assertThat(result.totalGroupCount()).isEqualTo(1);
     assertThat(result.groups().iterator().next().numInputFiles()).isEqualTo(2);
   }
@@ -136,8 +132,7 @@ void testMaxGroupSize() {
             "true",
             RewriteFileGroupPlanner.MAX_FILE_GROUP_SIZE_BYTES,
             "10"));
-    FileRewritePlan<FileGroupInfo, FileScanTask, DataFile, RewriteFileGroup> result =
-        planner.plan();
+    RewriteFilePlan result = planner.plan();
     assertThat(result.totalGroupCount()).isEqualTo(6);
     assertThat(result.groupsInPartition(FILE_1.partition())).isEqualTo(3);
     assertThat(result.groupsInPartition(FILE_4.partition())).isEqualTo(2);
@@ -150,8 +145,7 @@ void testEmptyTable() {
 
     planner.init(REWRITE_ALL);
 
-    FileRewritePlan<FileGroupInfo, FileScanTask, DataFile, RewriteFileGroup> result =
-        planner.plan();
+    RewriteFilePlan result = planner.plan();
 
     assertThat(table.currentSnapshot()).as("Table must be empty").isNull();
     assertThat(result.totalGroupCount()).isZero();
@@ -167,8 +161,7 @@ void testFilter() {
                 Expressions.equal(Expressions.bucket("data", 16), 0),
                 Expressions.equal(Expressions.bucket("data", 16), 2)));
     planner.init(REWRITE_ALL);
-    FileRewritePlan<RewriteDataFiles.FileGroupInfo, FileScanTask, DataFile, RewriteFileGroup> plan =
-        planner.plan();
+    RewriteFilePlan plan = planner.plan();
     List<RewriteFileGroup> groups = plan.groups().collect(Collectors.toList());
 
     assertThat(plan.totalGroupCount()).isEqualTo(2);
@@ -188,8 +181,7 @@ void testWriteMaxFileSize() {
             "true",
             RewriteFileGroupPlanner.TARGET_FILE_SIZE_BYTES,
             String.valueOf(targetFileSize)));
-    FileRewritePlan<RewriteDataFiles.FileGroupInfo, FileScanTask, DataFile, RewriteFileGroup> plan =
-        planner.plan();
+    RewriteFilePlan plan = planner.plan();
     assertThat(plan.writeMaxFileSize())
         .isGreaterThan(targetFileSize)
         .isLessThan((long) (targetFileSize * MAX_FILE_SIZE_DEFAULT_RATIO));
@@ -215,13 +207,12 @@ void testOutputSpec(boolean specific) {
 
     planner.init(options);
 
-    FileRewritePlan<RewriteDataFiles.FileGroupInfo, FileScanTask, DataFile, RewriteFileGroup> plan =
-        planner.plan();
+    RewriteFilePlan plan = planner.plan();
     assertThat(plan.outputSpecId()).isEqualTo(specific ? oldSpecId : newSpecId);
   }
 
   @Test
-  public void testInvalidOption() {
+  void testInvalidOption() {
     addFiles();
 
     assertThatThrownBy(
diff --git a/core/src/test/java/org/apache/iceberg/actions/TestRewritePositionDeletesGroupPlanner.java b/core/src/test/java/org/apache/iceberg/actions/TestRewritePositionDeletesGroupPlanner.java
new file mode 100644
index 000000000000..2c4520d96d99
--- /dev/null
+++ b/core/src/test/java/org/apache/iceberg/actions/TestRewritePositionDeletesGroupPlanner.java
@@ -0,0 +1,250 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.iceberg.actions;
+
+import static org.apache.iceberg.actions.RewritePositionDeleteFiles.REWRITE_JOB_ORDER;
+import static org.apache.iceberg.actions.RewritePositionDeletesGroupPlanner.MAX_FILE_SIZE_DEFAULT_RATIO;
+import static org.assertj.core.api.Assertions.assertThat;
+import static org.assertj.core.api.Assertions.assertThatThrownBy;
+
+import java.io.File;
+import java.util.List;
+import java.util.Map;
+import java.util.UUID;
+import java.util.stream.Collectors;
+import org.apache.iceberg.DataFile;
+import org.apache.iceberg.DataFiles;
+import org.apache.iceberg.DeleteFile;
+import org.apache.iceberg.FileMetadata;
+import org.apache.iceberg.PartitionData;
+import org.apache.iceberg.PartitionSpec;
+import org.apache.iceberg.RewriteJobOrder;
+import org.apache.iceberg.StructLike;
+import org.apache.iceberg.TestBase;
+import org.apache.iceberg.TestTables;
+import org.apache.iceberg.expressions.Expressions;
+import org.apache.iceberg.relocated.com.google.common.collect.ImmutableList;
+import org.apache.iceberg.relocated.com.google.common.collect.ImmutableMap;
+import org.junit.jupiter.api.AfterEach;
+import org.junit.jupiter.api.BeforeEach;
+import org.junit.jupiter.api.Test;
+import org.junit.jupiter.api.io.TempDir;
+import org.junit.jupiter.params.ParameterizedTest;
+import org.junit.jupiter.params.provider.EnumSource;
+
+class TestRewritePositionDeletesGroupPlanner {
+  private static final Map<String, String> REWRITE_ALL =
+      ImmutableMap.of(RewritePositionDeletesGroupPlanner.REWRITE_ALL, "true");
+
+  private static final DataFile FILE_1 = newDataFile("data_bucket=0");
+  private static final DataFile FILE_2 = newDataFile("data_bucket=1");
+  private static final DataFile FILE_3 = newDataFile("data_bucket=2");
+  private static final Map<RewriteJobOrder, List<StructLike>> EXPECTED =
+      ImmutableMap.of(
+          RewriteJobOrder.FILES_DESC,
+              ImmutableList.of(FILE_1.partition(), FILE_2.partition(), FILE_3.partition()),
+          RewriteJobOrder.FILES_ASC,
+              ImmutableList.of(FILE_3.partition(), FILE_2.partition(), FILE_1.partition()),
+          RewriteJobOrder.BYTES_DESC,
+              ImmutableList.of(FILE_3.partition(), FILE_1.partition(), FILE_2.partition()),
+          RewriteJobOrder.BYTES_ASC,
+              ImmutableList.of(FILE_2.partition(), FILE_1.partition(), FILE_3.partition()));
+
+  @TempDir private File tableDir = null;
+  private TestTables.TestTable table = null;
+
+  @BeforeEach
+  public void setupTable() throws Exception {
+    this.table = TestTables.create(tableDir, "test", TestBase.SCHEMA, TestBase.SPEC, 2);
+  }
+
+  @AfterEach
+  public void cleanupTables() {
+    TestTables.clearTables();
+  }
+
+  @ParameterizedTest
+  @EnumSource(
+      value = RewriteJobOrder.class,
+      names = {"FILES_DESC", "FILES_ASC", "BYTES_DESC", "BYTES_ASC"})
+  void testJobOrder(RewriteJobOrder order) {
+    addFiles();
+    RewritePositionDeletesGroupPlanner planner = new RewritePositionDeletesGroupPlanner(table);
+    planner.init(
+        ImmutableMap.of(
+            RewriteFileGroupPlanner.REWRITE_ALL, "true", REWRITE_JOB_ORDER, order.name()));
+    RewritePositionDeletePlan result = planner.plan();
+    List<RewritePositionDeletesGroup> groups = result.groups().collect(Collectors.toList());
+    assertThat(
+            groups.stream()
+                .map(
+                    group ->
+                        new PartitionData(TestBase.SPEC.partitionType())
+                            .copyFor(group.info().partition()))
+                .collect(Collectors.toList()))
+        .isEqualTo(EXPECTED.get(order));
+    assertThat(result.totalGroupCount()).isEqualTo(3);
+    EXPECTED.get(order).forEach(s -> assertThat(result.groupsInPartition(s)).isEqualTo(1));
+  }
+
+  @Test
+  void testUnpartitionedTable() {
+    table.updateSpec().removeField("data_bucket").commit();
+    table.refresh();
+
+    table
+        .newRowDelta()
+        .addRows(newDataFile(""))
+        .addDeletes(newDeleteFile(10))
+        .addDeletes(newDeleteFile(20))
+        .addDeletes(newDeleteFile(30))
+        .commit();
+
+    RewritePositionDeletesGroupPlanner planner = new RewritePositionDeletesGroupPlanner(table);
+    planner.init(
+        ImmutableMap.of(
+            RewriteFileGroupPlanner.MIN_INPUT_FILES,
+            "1",
+            RewriteFileGroupPlanner.MIN_FILE_SIZE_BYTES,
+            "30"));
+    RewritePositionDeletePlan result = planner.plan();
+    assertThat(result.totalGroupCount()).isEqualTo(1);
+    assertThat(result.groups().iterator().next().numInputFiles()).isEqualTo(2);
+  }
+
+  @Test
+  void testMaxGroupSize() {
+    addFiles();
+    RewritePositionDeletesGroupPlanner planner = new RewritePositionDeletesGroupPlanner(table);
+    planner.init(
+        ImmutableMap.of(
+            RewritePositionDeletesGroupPlanner.REWRITE_ALL,
+            "true",
+            RewritePositionDeletesGroupPlanner.MAX_FILE_GROUP_SIZE_BYTES,
+            "10"));
+    RewritePositionDeletePlan result = planner.plan();
+    assertThat(result.totalGroupCount()).isEqualTo(6);
+    assertThat(result.groupsInPartition(FILE_1.partition())).isEqualTo(3);
+    assertThat(result.groupsInPartition(FILE_2.partition())).isEqualTo(2);
+    assertThat(result.groupsInPartition(FILE_3.partition())).isEqualTo(1);
+  }
+
+  @Test
+  void testEmptyTable() {
+    RewritePositionDeletesGroupPlanner planner = new RewritePositionDeletesGroupPlanner(table);
+
+    planner.init(REWRITE_ALL);
+
+    RewritePositionDeletePlan result = planner.plan();
+
+    assertThat(table.currentSnapshot()).as("Table must be empty").isNull();
+    assertThat(result.totalGroupCount()).isZero();
+  }
+
+  @Test
+  void testFilter() {
+    addFiles();
+    RewritePositionDeletesGroupPlanner planner =
+        new RewritePositionDeletesGroupPlanner(
+            table,
+            Expressions.or(
+                Expressions.equal(Expressions.bucket("data", 16), 0),
+                Expressions.equal(Expressions.bucket("data", 16), 2)),
+            false);
+    planner.init(REWRITE_ALL);
+    RewritePositionDeletePlan plan = planner.plan();
+    List<RewritePositionDeletesGroup> groups = plan.groups().collect(Collectors.toList());
+
+    assertThat(plan.totalGroupCount()).isEqualTo(2);
+    assertThat(groups).hasSize(2);
+    assertThat(groups.stream().mapToLong(FileRewriteGroup::numInputFiles).sum()).isEqualTo(4);
+  }
+
+  @Test
+  void testWriteMaxFileSize() {
+    int targetFileSize = 10;
+    addFiles();
+
+    RewritePositionDeletesGroupPlanner planner = new RewritePositionDeletesGroupPlanner(table);
+    planner.init(
+        ImmutableMap.of(
+            RewritePositionDeletesGroupPlanner.REWRITE_ALL,
+            "true",
+            RewritePositionDeletesGroupPlanner.TARGET_FILE_SIZE_BYTES,
+            String.valueOf(targetFileSize)));
+    RewritePositionDeletePlan plan = planner.plan();
+    assertThat(plan.writeMaxFileSize())
+        .isGreaterThan(targetFileSize)
+        .isLessThan((long) (targetFileSize * MAX_FILE_SIZE_DEFAULT_RATIO));
+  }
+
+  @Test
+  void testInvalidOption() {
+    addFiles();
+
+    assertThatThrownBy(
+            () -> {
+              RewritePositionDeletesGroupPlanner planner =
+                  new RewritePositionDeletesGroupPlanner(table);
+
+              planner.init(ImmutableMap.of(RewritePositionDeleteFiles.REWRITE_JOB_ORDER, "foo"));
+            })
+        .isInstanceOf(IllegalArgumentException.class)
+        .hasMessage("Invalid rewrite job order name: foo");
+  }
+
+  private void addFiles() {
+    table
+        .newRowDelta()
+        .addRows(FILE_1)
+        .addDeletes(newDeleteFile(FILE_1.partition(), 10))
+        .addDeletes(newDeleteFile(FILE_1.partition(), 10))
+        .addDeletes(newDeleteFile(FILE_1.partition(), 10))
+        .addRows(FILE_2)
+        .addDeletes(newDeleteFile(FILE_2.partition(), 11))
+        .addDeletes(newDeleteFile(FILE_2.partition(), 11))
+        .addRows(FILE_3)
+        .addDeletes(newDeleteFile(FILE_3.partition(), 50))
+        .commit();
+  }
+
+  private static DataFile newDataFile(String partitionPath) {
+    return DataFiles.builder(TestBase.SPEC)
+        .withPath("/path/to/data-" + UUID.randomUUID() + ".parquet")
+        .withFileSizeInBytes(10)
+        .withPartitionPath(partitionPath)
+        .withRecordCount(1)
+        .build();
+  }
+
+  private static DeleteFile newDeleteFile(long fileSize) {
+    return newDeleteFile(
+        new PartitionData(PartitionSpec.unpartitioned().partitionType()), fileSize);
+  }
+
+  private static DeleteFile newDeleteFile(StructLike partition, long fileSize) {
+    return FileMetadata.deleteFileBuilder(TestBase.SPEC)
+        .ofPositionDeletes()
+        .withPath("/path/to/delete-" + UUID.randomUUID() + ".parquet")
+        .withFileSizeInBytes(fileSize)
+        .withPartition(partition)
+        .withRecordCount(1)
+        .build();
+  }
+}
diff --git a/core/src/test/java/org/apache/iceberg/actions/TestSizeBasedRewriter.java b/core/src/test/java/org/apache/iceberg/actions/TestSizeBasedFileRewritePlanner.java
similarity index 62%
rename from core/src/test/java/org/apache/iceberg/actions/TestSizeBasedRewriter.java
rename to core/src/test/java/org/apache/iceberg/actions/TestSizeBasedFileRewritePlanner.java
index 82286d250574..013be9cb94b8 100644
--- a/core/src/test/java/org/apache/iceberg/actions/TestSizeBasedRewriter.java
+++ b/core/src/test/java/org/apache/iceberg/actions/TestSizeBasedFileRewritePlanner.java
@@ -21,35 +21,40 @@
 import static org.assertj.core.api.Assertions.assertThat;
 import static org.mockito.Mockito.when;
 
-import java.util.Arrays;
+import java.io.File;
 import java.util.List;
 import java.util.Map;
 import org.apache.iceberg.DataFile;
 import org.apache.iceberg.FileScanTask;
 import org.apache.iceberg.MockFileScanTask;
-import org.apache.iceberg.ParameterizedTestExtension;
-import org.apache.iceberg.Parameters;
 import org.apache.iceberg.StructLike;
 import org.apache.iceberg.Table;
 import org.apache.iceberg.TestBase;
-import org.apache.iceberg.expressions.Expression;
-import org.apache.iceberg.expressions.Expressions;
-import org.apache.iceberg.io.CloseableIterable;
+import org.apache.iceberg.TestTables;
 import org.apache.iceberg.relocated.com.google.common.collect.ImmutableList;
 import org.apache.iceberg.relocated.com.google.common.collect.ImmutableMap;
-import org.junit.jupiter.api.TestTemplate;
-import org.junit.jupiter.api.extension.ExtendWith;
+import org.apache.iceberg.relocated.com.google.common.collect.Lists;
+import org.junit.jupiter.api.AfterEach;
+import org.junit.jupiter.api.BeforeEach;
+import org.junit.jupiter.api.Test;
+import org.junit.jupiter.api.io.TempDir;
 import org.mockito.Mockito;
 
-@ExtendWith(ParameterizedTestExtension.class)
-class TestSizeBasedRewriter extends TestBase {
+class TestSizeBasedFileRewritePlanner {
+  @TempDir private File tableDir = null;
+  private TestTables.TestTable table = null;
 
-  @Parameters(name = "formatVersion = {0}")
-  protected static List<Object> parameters() {
-    return Arrays.asList(1, 2, 3);
+  @BeforeEach
+  public void setupTable() throws Exception {
+    this.table = TestTables.create(tableDir, "test", TestBase.SCHEMA, TestBase.SPEC, 3);
   }
 
-  @TestTemplate
+  @AfterEach
+  public void cleanupTables() {
+    TestTables.clearTables();
+  }
+
+  @Test
   void testSplitSizeLowerBound() {
     FileScanTask task1 = new MockFileScanTask(mockDataFile());
     FileScanTask task2 = new MockFileScanTask(mockDataFile());
@@ -57,7 +62,7 @@ void testSplitSizeLowerBound() {
     FileScanTask task4 = new MockFileScanTask(mockDataFile());
     List<FileScanTask> tasks = ImmutableList.of(task1, task2, task3, task4);
 
-    RewriteFileGroupPlanner planner = new TestingPlanner(table, Expressions.alwaysTrue(), 1, tasks);
+    TestingPlanner planner = new TestingPlanner(table);
 
     long minFileSize = 256L * 1024 * 1024;
     long targetFileSize = 512L * 1024 * 1024;
@@ -72,28 +77,42 @@ void testSplitSizeLowerBound() {
 
     // the total task size is 580 MB and the target file size is 512 MB
     // the remainder must be written into a separate file as it exceeds 10%
+    List<List<FileScanTask>> groups = Lists.newArrayList(planner.planFileGroups(tasks).iterator());
 
-    RewriteFileGroup group = planner.plan().groups().iterator().next();
-
-    assertThat(group.expectedOutputFiles()).isEqualTo(2);
+    assertThat(groups).hasSize(1);
 
+    List<FileScanTask> group = groups.get(0);
     // the split size must be >= targetFileSize and < maxFileSize
-    long splitSize = group.sizeInBytes();
+    long splitSize = group.stream().mapToLong(FileScanTask::sizeBytes).sum();
     assertThat(splitSize).isGreaterThanOrEqualTo(targetFileSize).isLessThan(maxFileSize);
   }
 
-  private static class TestingPlanner extends RewriteFileGroupPlanner {
-    private final List<FileScanTask> tasks;
+  private static class TestingPlanner
+      extends SizeBasedFileRewritePlanner<
+          RewriteDataFiles.FileGroupInfo, FileScanTask, DataFile, RewriteFileGroup> {
+    protected TestingPlanner(Table table) {
+      super(table);
+    }
 
-    private TestingPlanner(
-        Table table, Expression filter, long snapshotId, List<FileScanTask> tasks) {
-      super(table, filter, snapshotId, false);
-      this.tasks = tasks;
+    @Override
+    protected long defaultTargetFileSize() {
+      return 0;
+    }
+
+    @Override
+    protected Iterable<FileScanTask> filterFiles(Iterable<FileScanTask> tasks) {
+      return tasks;
+    }
+
+    @Override
+    protected Iterable<List<FileScanTask>> filterFileGroups(List<List<FileScanTask>> groups) {
+      return groups;
     }
 
     @Override
-    CloseableIterable<FileScanTask> tasks() {
-      return CloseableIterable.withNoopClose(tasks);
+    public FileRewritePlan<RewriteDataFiles.FileGroupInfo, FileScanTask, DataFile, RewriteFileGroup>
+        plan() {
+      throw new UnsupportedOperationException("Not supported");
     }
   }
 
diff --git a/spark/v3.5/spark/src/main/java/org/apache/iceberg/spark/actions/RewriteDataFilesSparkAction.java b/spark/v3.5/spark/src/main/java/org/apache/iceberg/spark/actions/RewriteDataFilesSparkAction.java
index 8cf189ee8b79..f28bcd90ea9c 100644
--- a/spark/v3.5/spark/src/main/java/org/apache/iceberg/spark/actions/RewriteDataFilesSparkAction.java
+++ b/spark/v3.5/spark/src/main/java/org/apache/iceberg/spark/actions/RewriteDataFilesSparkAction.java
@@ -34,13 +34,13 @@
 import org.apache.iceberg.StructLike;
 import org.apache.iceberg.Table;
 import org.apache.iceberg.actions.FileRewriteExecutor;
-import org.apache.iceberg.actions.FileRewritePlan;
 import org.apache.iceberg.actions.ImmutableRewriteDataFiles;
 import org.apache.iceberg.actions.ImmutableRewriteDataFiles.Result.Builder;
 import org.apache.iceberg.actions.RewriteDataFiles;
 import org.apache.iceberg.actions.RewriteDataFilesCommitManager;
 import org.apache.iceberg.actions.RewriteFileGroup;
 import org.apache.iceberg.actions.RewriteFileGroupPlanner;
+import org.apache.iceberg.actions.RewriteFilePlan;
 import org.apache.iceberg.exceptions.CommitFailedException;
 import org.apache.iceberg.exceptions.ValidationException;
 import org.apache.iceberg.expressions.Expression;
@@ -94,8 +94,9 @@ public class RewriteDataFilesSparkAction
   private boolean useStartingSequenceNumber;
   private boolean caseSensitive;
   private RewriteFileGroupPlanner planner = null;
-  private FileRewriteExecutor<FileGroupInfo, FileScanTask, DataFile, RewriteFileGroup> rewriter =
-      null;
+  private FileRewriteExecutor<
+          FileGroupInfo, FileScanTask, DataFile, RewriteFileGroup, RewriteFilePlan>
+      rewriter = null;
 
   RewriteDataFilesSparkAction(SparkSession spark, Table table) {
     super(spark.cloneSession());
@@ -158,7 +159,7 @@ public RewriteDataFiles.Result execute() {
 
     init(startingSnapshotId);
 
-    FileRewritePlan<FileGroupInfo, FileScanTask, DataFile, RewriteFileGroup> plan = plan();
+    RewriteFilePlan plan = plan();
     rewriter.initPlan(plan);
 
     if (plan.totalGroupCount() == 0) {
@@ -182,7 +183,7 @@ public RewriteDataFiles.Result execute() {
   }
 
   @VisibleForTesting
-  FileRewritePlan<FileGroupInfo, FileScanTask, DataFile, RewriteFileGroup> plan() {
+  RewriteFilePlan plan() {
     return planner.plan();
   }
 
@@ -200,9 +201,7 @@ void init(long startingSnapshotId) {
   }
 
   @VisibleForTesting
-  RewriteFileGroup rewriteFiles(
-      FileRewritePlan<FileGroupInfo, FileScanTask, DataFile, RewriteFileGroup> plan,
-      RewriteFileGroup fileGroup) {
+  RewriteFileGroup rewriteFiles(RewriteFilePlan plan, RewriteFileGroup fileGroup) {
     String desc = jobDesc(fileGroup, plan);
     Set<DataFile> addedFiles =
         withJobGroupInfo(
@@ -227,9 +226,7 @@ RewriteDataFilesCommitManager commitManager(long startingSnapshotId) {
         table, startingSnapshotId, useStartingSequenceNumber, commitSummary());
   }
 
-  private Builder doExecute(
-      FileRewritePlan<FileGroupInfo, FileScanTask, DataFile, RewriteFileGroup> plan,
-      RewriteDataFilesCommitManager commitManager) {
+  private Builder doExecute(RewriteFilePlan plan, RewriteDataFilesCommitManager commitManager) {
     ExecutorService rewriteService = rewriteService();
 
     ConcurrentLinkedQueue<RewriteFileGroup> rewrittenGroups = Queues.newConcurrentLinkedQueue();
@@ -289,8 +286,7 @@ private Builder doExecute(
   }
 
   private Builder doExecuteWithPartialProgress(
-      FileRewritePlan<FileGroupInfo, FileScanTask, DataFile, RewriteFileGroup> plan,
-      RewriteDataFilesCommitManager commitManager) {
+      RewriteFilePlan plan, RewriteDataFilesCommitManager commitManager) {
     ExecutorService rewriteService = rewriteService();
 
     // start commit service
@@ -407,9 +403,7 @@ void validateAndInitOptions() {
         PARTIAL_PROGRESS_ENABLED);
   }
 
-  private String jobDesc(
-      RewriteFileGroup group,
-      FileRewritePlan<FileGroupInfo, FileScanTask, DataFile, RewriteFileGroup> plan) {
+  private String jobDesc(RewriteFileGroup group, RewriteFilePlan plan) {
     StructLike partition = group.info().partition();
     if (partition.size() > 0) {
       return String.format(
diff --git a/spark/v3.5/spark/src/main/java/org/apache/iceberg/spark/actions/RewritePositionDeleteFilesSparkAction.java b/spark/v3.5/spark/src/main/java/org/apache/iceberg/spark/actions/RewritePositionDeleteFilesSparkAction.java
index e237f46a163f..4fc7934f783b 100644
--- a/spark/v3.5/spark/src/main/java/org/apache/iceberg/spark/actions/RewritePositionDeleteFilesSparkAction.java
+++ b/spark/v3.5/spark/src/main/java/org/apache/iceberg/spark/actions/RewritePositionDeleteFilesSparkAction.java
@@ -27,12 +27,11 @@
 import java.util.concurrent.ThreadPoolExecutor;
 import java.util.stream.Collectors;
 import org.apache.iceberg.DeleteFile;
-import org.apache.iceberg.PositionDeletesScanTask;
 import org.apache.iceberg.StructLike;
 import org.apache.iceberg.Table;
-import org.apache.iceberg.actions.FileRewritePlan;
 import org.apache.iceberg.actions.ImmutableRewritePositionDeleteFiles;
 import org.apache.iceberg.actions.RewritePositionDeleteFiles;
+import org.apache.iceberg.actions.RewritePositionDeletePlan;
 import org.apache.iceberg.actions.RewritePositionDeletesCommitManager;
 import org.apache.iceberg.actions.RewritePositionDeletesCommitManager.CommitService;
 import org.apache.iceberg.actions.RewritePositionDeletesGroup;
@@ -41,7 +40,6 @@
 import org.apache.iceberg.exceptions.ValidationException;
 import org.apache.iceberg.expressions.Expression;
 import org.apache.iceberg.expressions.Expressions;
-import org.apache.iceberg.relocated.com.google.common.annotations.VisibleForTesting;
 import org.apache.iceberg.relocated.com.google.common.base.Preconditions;
 import org.apache.iceberg.relocated.com.google.common.collect.ImmutableSet;
 import org.apache.iceberg.relocated.com.google.common.collect.Queues;
@@ -111,8 +109,7 @@ public RewritePositionDeleteFiles.Result execute() {
 
     validateAndInitOptions();
 
-    FileRewritePlan<FileGroupInfo, PositionDeletesScanTask, DeleteFile, RewritePositionDeletesGroup>
-        plan = plan();
+    RewritePositionDeletePlan plan = planner.plan();
     rewriter.initPlan(plan);
 
     if (plan.totalGroupCount() == 0) {
@@ -127,17 +124,8 @@ public RewritePositionDeleteFiles.Result execute() {
     }
   }
 
-  @VisibleForTesting
-  FileRewritePlan<FileGroupInfo, PositionDeletesScanTask, DeleteFile, RewritePositionDeletesGroup>
-      plan() {
-    return planner.plan();
-  }
-
   private RewritePositionDeletesGroup rewriteDeleteFiles(
-      FileRewritePlan<
-              FileGroupInfo, PositionDeletesScanTask, DeleteFile, RewritePositionDeletesGroup>
-          plan,
-      RewritePositionDeletesGroup fileGroup) {
+      RewritePositionDeletePlan plan, RewritePositionDeletesGroup fileGroup) {
     String desc = jobDesc(fileGroup, plan);
     Set<DeleteFile> addedFiles =
         withJobGroupInfo(
@@ -163,10 +151,7 @@ private RewritePositionDeletesCommitManager commitManager() {
   }
 
   private Result doExecute(
-      FileRewritePlan<
-              FileGroupInfo, PositionDeletesScanTask, DeleteFile, RewritePositionDeletesGroup>
-          plan,
-      RewritePositionDeletesCommitManager commitManager) {
+      RewritePositionDeletePlan plan, RewritePositionDeletesCommitManager commitManager) {
     ExecutorService rewriteService = rewriteService();
 
     ConcurrentLinkedQueue<RewritePositionDeletesGroup> rewrittenGroups =
@@ -231,10 +216,7 @@ private Result doExecute(
   }
 
   private Result doExecuteWithPartialProgress(
-      FileRewritePlan<
-              FileGroupInfo, PositionDeletesScanTask, DeleteFile, RewritePositionDeletesGroup>
-          plan,
-      RewritePositionDeletesCommitManager commitManager) {
+      RewritePositionDeletePlan plan, RewritePositionDeletesCommitManager commitManager) {
     ExecutorService rewriteService = rewriteService();
 
     // start commit service
@@ -319,11 +301,7 @@ private void validateAndInitOptions() {
         PARTIAL_PROGRESS_ENABLED);
   }
 
-  private String jobDesc(
-      RewritePositionDeletesGroup group,
-      FileRewritePlan<
-              FileGroupInfo, PositionDeletesScanTask, DeleteFile, RewritePositionDeletesGroup>
-          plan) {
+  private String jobDesc(RewritePositionDeletesGroup group, RewritePositionDeletePlan plan) {
     StructLike partition = group.info().partition();
     if (partition.size() > 0) {
       return String.format(
diff --git a/spark/v3.5/spark/src/main/java/org/apache/iceberg/spark/actions/SparkBinPackPositionDeletesRewriteExecutor.java b/spark/v3.5/spark/src/main/java/org/apache/iceberg/spark/actions/SparkBinPackPositionDeletesRewriteExecutor.java
index fb8b73f17463..cfc939cf0347 100644
--- a/spark/v3.5/spark/src/main/java/org/apache/iceberg/spark/actions/SparkBinPackPositionDeletesRewriteExecutor.java
+++ b/spark/v3.5/spark/src/main/java/org/apache/iceberg/spark/actions/SparkBinPackPositionDeletesRewriteExecutor.java
@@ -35,6 +35,7 @@
 import org.apache.iceberg.StructLike;
 import org.apache.iceberg.Table;
 import org.apache.iceberg.actions.RewritePositionDeleteFiles.FileGroupInfo;
+import org.apache.iceberg.actions.RewritePositionDeletePlan;
 import org.apache.iceberg.actions.RewritePositionDeletesGroup;
 import org.apache.iceberg.relocated.com.google.common.base.Preconditions;
 import org.apache.iceberg.spark.PositionDeletesRewriteCoordinator;
@@ -54,7 +55,11 @@
 
 class SparkBinPackPositionDeletesRewriteExecutor
     extends SparkRewriteExecutor<
-        FileGroupInfo, PositionDeletesScanTask, DeleteFile, RewritePositionDeletesGroup> {
+        FileGroupInfo,
+        PositionDeletesScanTask,
+        DeleteFile,
+        RewritePositionDeletesGroup,
+        RewritePositionDeletePlan> {
 
   private final SparkSession spark;
   private final SparkTableCache tableCache = SparkTableCache.get();
diff --git a/spark/v3.5/spark/src/main/java/org/apache/iceberg/spark/actions/SparkRewriteExecutor.java b/spark/v3.5/spark/src/main/java/org/apache/iceberg/spark/actions/SparkRewriteExecutor.java
index f723be7d633d..99c2f88a9b0a 100644
--- a/spark/v3.5/spark/src/main/java/org/apache/iceberg/spark/actions/SparkRewriteExecutor.java
+++ b/spark/v3.5/spark/src/main/java/org/apache/iceberg/spark/actions/SparkRewriteExecutor.java
@@ -22,7 +22,6 @@
 import java.util.Set;
 import org.apache.iceberg.ContentFile;
 import org.apache.iceberg.ContentScanTask;
-import org.apache.iceberg.PartitionSpec;
 import org.apache.iceberg.Table;
 import org.apache.iceberg.actions.FileRewriteExecutor;
 import org.apache.iceberg.actions.FileRewriteGroup;
@@ -36,16 +35,17 @@
  * @param <T> the Java type of the tasks to read content files
  * @param <F> the Java type of the content files
  * @param <G> the Java type of the planned groups
+ * @param <P> the Java type of the plan to execute
  */
 abstract class SparkRewriteExecutor<
         I,
         T extends ContentScanTask<F>,
         F extends ContentFile<F>,
-        G extends FileRewriteGroup<I, T, F>>
-    implements FileRewriteExecutor<I, T, F, G> {
+        G extends FileRewriteGroup<I, T, F>,
+        P extends FileRewritePlan<I, T, F, G>>
+    implements FileRewriteExecutor<I, T, F, G, P> {
   private final Table table;
   private long writeMaxFileSize;
-  private int outputSpecId;
 
   SparkRewriteExecutor(Table table) {
     this.table = table;
@@ -59,18 +59,9 @@ long writeMaxFileSize() {
     return writeMaxFileSize;
   }
 
-  int outputSpecId() {
-    return outputSpecId;
-  }
-
-  PartitionSpec outputSpec() {
-    return table.specs().get(outputSpecId);
-  }
-
   @Override
-  public void initPlan(FileRewritePlan<I, T, F, G> plan) {
+  public void initPlan(P plan) {
     this.writeMaxFileSize = plan.writeMaxFileSize();
-    this.outputSpecId = plan.outputSpecId();
   }
 
   @Override
diff --git a/spark/v3.5/spark/src/main/java/org/apache/iceberg/spark/actions/SparkSizeBasedDataRewriteExecutor.java b/spark/v3.5/spark/src/main/java/org/apache/iceberg/spark/actions/SparkSizeBasedDataRewriteExecutor.java
index 068979d8e5db..9979beacd777 100644
--- a/spark/v3.5/spark/src/main/java/org/apache/iceberg/spark/actions/SparkSizeBasedDataRewriteExecutor.java
+++ b/spark/v3.5/spark/src/main/java/org/apache/iceberg/spark/actions/SparkSizeBasedDataRewriteExecutor.java
@@ -23,21 +23,25 @@
 import java.util.UUID;
 import org.apache.iceberg.DataFile;
 import org.apache.iceberg.FileScanTask;
+import org.apache.iceberg.PartitionSpec;
 import org.apache.iceberg.Table;
 import org.apache.iceberg.actions.RewriteDataFiles.FileGroupInfo;
 import org.apache.iceberg.actions.RewriteFileGroup;
+import org.apache.iceberg.actions.RewriteFilePlan;
 import org.apache.iceberg.spark.FileRewriteCoordinator;
 import org.apache.iceberg.spark.ScanTaskSetManager;
 import org.apache.iceberg.spark.SparkTableCache;
 import org.apache.spark.sql.SparkSession;
 
 abstract class SparkSizeBasedDataRewriteExecutor
-    extends SparkRewriteExecutor<FileGroupInfo, FileScanTask, DataFile, RewriteFileGroup> {
+    extends SparkRewriteExecutor<
+        FileGroupInfo, FileScanTask, DataFile, RewriteFileGroup, RewriteFilePlan> {
 
   private final SparkSession spark;
   private final SparkTableCache tableCache = SparkTableCache.get();
   private final ScanTaskSetManager taskSetManager = ScanTaskSetManager.get();
   private final FileRewriteCoordinator coordinator = FileRewriteCoordinator.get();
+  private int outputSpecId;
 
   SparkSizeBasedDataRewriteExecutor(SparkSession spark, Table table) {
     super(table);
@@ -67,4 +71,18 @@ public Set<DataFile> rewrite(RewriteFileGroup group) {
       coordinator.clearRewrite(table(), groupId);
     }
   }
+
+  @Override
+  public void initPlan(RewriteFilePlan plan) {
+    super.initPlan(plan);
+    this.outputSpecId = plan.outputSpecId();
+  }
+
+  int outputSpecId() {
+    return outputSpecId;
+  }
+
+  PartitionSpec outputSpec() {
+    return table().specs().get(outputSpecId);
+  }
 }

From 8e23a3601fe64bbf09495ba581a9b2d49f3dbcb6 Mon Sep 17 00:00:00 2001
From: Peter Vary <peter_vary4@apple.com>
Date: Tue, 3 Dec 2024 12:39:17 +0100
Subject: [PATCH 10/11] Some more test refactor and moved COMPRESSION_FACTOR to
 SparkShufflingDataRewritePlanner

---
 .../iceberg/actions/FileRewriteExecutor.java  |   2 +-
 .../iceberg/actions/FileRewriteGroup.java     |  10 +-
 .../iceberg/actions/FileRewritePlan.java      |   2 +-
 .../actions/RewriteFileGroupPlanner.java      |  17 +-
 .../RewritePositionDeletesGroupPlanner.java   |  63 +++--
 .../actions/SizeBasedFileRewritePlanner.java  |  18 +-
 .../actions/SizeBasedFileRewriter.java        |   2 +-
 .../actions/TestRewriteFileGroupPlanner.java  | 163 +++++++++--
 ...estRewritePositionDeletesGroupPlanner.java |  30 +-
 .../TestSizeBasedFileRewritePlanner.java      |  62 +++-
 .../actions/RewriteDataFilesSparkAction.java  |  10 +-
 .../SparkShufflingDataRewriteExecutor.java    |  24 --
 .../SparkShufflingDataRewritePlanner.java     |  85 ++++++
 .../SparkSizeBasedDataRewriteExecutor.java    |  16 +-
 .../actions/TestRewriteDataFilesAction.java   |  11 +-
 .../actions/TestSparkFileRewriteExecutor.java | 264 +-----------------
 .../actions/TestSparkFileRewritePlanner.java  | 104 +++++++
 17 files changed, 513 insertions(+), 370 deletions(-)
 create mode 100644 spark/v3.5/spark/src/main/java/org/apache/iceberg/spark/actions/SparkShufflingDataRewritePlanner.java
 create mode 100644 spark/v3.5/spark/src/test/java/org/apache/iceberg/spark/actions/TestSparkFileRewritePlanner.java

diff --git a/core/src/main/java/org/apache/iceberg/actions/FileRewriteExecutor.java b/core/src/main/java/org/apache/iceberg/actions/FileRewriteExecutor.java
index bbe84915fa63..bc4102a25de4 100644
--- a/core/src/main/java/org/apache/iceberg/actions/FileRewriteExecutor.java
+++ b/core/src/main/java/org/apache/iceberg/actions/FileRewriteExecutor.java
@@ -69,7 +69,7 @@ default String description() {
    *
    * <p>The implementation is supposed to be engine-specific (e.g. Spark, Flink, Trino).
    *
-   * @param group a group of scan tasks for files to be rewritten together
+   * @param group of scan tasks for files to be rewritten together
    * @return a set of newly written files
    */
   Set<F> rewrite(G group);
diff --git a/core/src/main/java/org/apache/iceberg/actions/FileRewriteGroup.java b/core/src/main/java/org/apache/iceberg/actions/FileRewriteGroup.java
index c48a6d6f4a2c..08f6e050b163 100644
--- a/core/src/main/java/org/apache/iceberg/actions/FileRewriteGroup.java
+++ b/core/src/main/java/org/apache/iceberg/actions/FileRewriteGroup.java
@@ -37,38 +37,44 @@ public abstract class FileRewriteGroup<I, T extends ContentScanTask<F>, F extend
   private final long splitSize;
   private final int expectedOutputFiles;
 
-  protected FileRewriteGroup(
-      I info, List<T> fileScanTasks, long splitSize, int expectedOutputFiles) {
+  FileRewriteGroup(I info, List<T> fileScanTasks, long splitSize, int expectedOutputFiles) {
     this.info = info;
     this.fileScanTasks = fileScanTasks;
     this.splitSize = splitSize;
     this.expectedOutputFiles = expectedOutputFiles;
   }
 
+  /** Identifiers and partition information about the group. */
   public I info() {
     return info;
   }
 
+  /** Input of the group. {@link ContentScanTask}s to read. */
   public List<T> fileScans() {
     return fileScanTasks;
   }
 
+  /** Expected split size for the output files. */
   public long splitSize() {
     return splitSize;
   }
 
+  /** Expected number of the output files. */
   public int expectedOutputFiles() {
     return expectedOutputFiles;
   }
 
+  /** Accumulated size for the input files. */
   public long sizeInBytes() {
     return fileScanTasks.stream().mapToLong(T::length).sum();
   }
 
+  /** Number of the input files. */
   public int numInputFiles() {
     return fileScanTasks.size();
   }
 
+  /** Comparator to order the FileRewriteGroups based on a provided {@link RewriteJobOrder}. */
   public static <I, T extends ContentScanTask<F>, F extends ContentFile<F>>
       Comparator<FileRewriteGroup<I, T, F>> taskComparator(RewriteJobOrder rewriteJobOrder) {
     switch (rewriteJobOrder) {
diff --git a/core/src/main/java/org/apache/iceberg/actions/FileRewritePlan.java b/core/src/main/java/org/apache/iceberg/actions/FileRewritePlan.java
index 2cab4d7d12b4..dc4cc9a6d57a 100644
--- a/core/src/main/java/org/apache/iceberg/actions/FileRewritePlan.java
+++ b/core/src/main/java/org/apache/iceberg/actions/FileRewritePlan.java
@@ -56,7 +56,7 @@ protected FileRewritePlan(
     this.writeMaxFileSize = writeMaxFileSize;
   }
 
-  /** The stream of the generated {@link RewriteFileGroup}s. */
+  /** The stream of the generated {@link FileRewriteGroup}s. */
   public Stream<G> groups() {
     return groups;
   }
diff --git a/core/src/main/java/org/apache/iceberg/actions/RewriteFileGroupPlanner.java b/core/src/main/java/org/apache/iceberg/actions/RewriteFileGroupPlanner.java
index ba499f392411..2ba632fcf061 100644
--- a/core/src/main/java/org/apache/iceberg/actions/RewriteFileGroupPlanner.java
+++ b/core/src/main/java/org/apache/iceberg/actions/RewriteFileGroupPlanner.java
@@ -50,7 +50,8 @@
 
 /**
  * Groups specified files in the {@link Table} by {@link RewriteFileGroup}s. These will be grouped
- * by partitions.
+ * by partitions. Extends {@link SizeBasedFileRewritePlanner} with delete file threshold and job
+ * {@link RewriteDataFiles#REWRITE_JOB_ORDER} handling.
  */
 public class RewriteFileGroupPlanner
     extends SizeBasedFileRewritePlanner<FileGroupInfo, FileScanTask, DataFile, RewriteFileGroup> {
@@ -89,6 +90,15 @@ public RewriteFileGroupPlanner(Table table, Expression filter) {
         false);
   }
 
+  /**
+   * Creates the planner for the given table.
+   *
+   * @param table to plan for
+   * @param filter used to remove files from the plan
+   * @param snapshotId used as a basis for planning - should be used as starting snapshot id at
+   *     commit time when replacing the files
+   * @param caseSensitive property used for scanning
+   */
   public RewriteFileGroupPlanner(
       Table table, Expression filter, Long snapshotId, boolean caseSensitive) {
     super(table);
@@ -136,11 +146,6 @@ protected long defaultTargetFileSize() {
         TableProperties.WRITE_TARGET_FILE_SIZE_BYTES_DEFAULT);
   }
 
-  /**
-   * Generates the plan for the current table.
-   *
-   * @return the generated plan which could be executed during the compaction
-   */
   @Override
   public RewriteFilePlan plan() {
     StructLikeMap<List<List<FileScanTask>>> plan = planFileGroups();
diff --git a/core/src/main/java/org/apache/iceberg/actions/RewritePositionDeletesGroupPlanner.java b/core/src/main/java/org/apache/iceberg/actions/RewritePositionDeletesGroupPlanner.java
index b1017ffcad14..14bd3cabefcc 100644
--- a/core/src/main/java/org/apache/iceberg/actions/RewritePositionDeletesGroupPlanner.java
+++ b/core/src/main/java/org/apache/iceberg/actions/RewritePositionDeletesGroupPlanner.java
@@ -52,7 +52,8 @@
 
 /**
  * Groups specified files in the {@link Table} by {@link RewriteFileGroup}s. These will be grouped
- * by partitions.
+ * by partitions. Extends the {@link SizeBasedFileRewritePlanner} with {@link
+ * RewritePositionDeleteFiles#REWRITE_JOB_ORDER} handling.
  */
 public class RewritePositionDeletesGroupPlanner
     extends SizeBasedFileRewritePlanner<
@@ -68,6 +69,13 @@ public RewritePositionDeletesGroupPlanner(Table table) {
     this(table, Expressions.alwaysTrue(), false);
   }
 
+  /**
+   * Creates the planner for the given table.
+   *
+   * @param table to plan for
+   * @param filter used to remove files from the plan
+   * @param caseSensitive property used for scanning
+   */
   public RewritePositionDeletesGroupPlanner(Table table, Expression filter, boolean caseSensitive) {
     super(table);
     this.caseSensitive = caseSensitive;
@@ -78,7 +86,7 @@ public RewritePositionDeletesGroupPlanner(Table table, Expression filter, boolea
   public Set<String> validOptions() {
     return ImmutableSet.<String>builder()
         .addAll(super.validOptions())
-        .add(RewriteDataFiles.REWRITE_JOB_ORDER)
+        .add(RewritePositionDeleteFiles.REWRITE_JOB_ORDER)
         .build();
   }
 
@@ -93,11 +101,6 @@ public void init(Map<String, String> options) {
                 RewritePositionDeleteFiles.REWRITE_JOB_ORDER_DEFAULT));
   }
 
-  /**
-   * Generates the plan for the current table.
-   *
-   * @return the generated plan which could be executed during the compaction
-   */
   @Override
   public RewritePositionDeletePlan plan() {
     StructLikeMap<List<List<PositionDeletesScanTask>>> plan = planFileGroups();
@@ -128,6 +131,25 @@ public RewritePositionDeletePlan plan() {
         groups, totalGroupCount, groupsInPartition, writeMaxFileSize());
   }
 
+  @Override
+  protected Iterable<PositionDeletesScanTask> filterFiles(Iterable<PositionDeletesScanTask> tasks) {
+    return Iterables.filter(tasks, this::wronglySized);
+  }
+
+  @Override
+  protected Iterable<List<PositionDeletesScanTask>> filterFileGroups(
+      List<List<PositionDeletesScanTask>> groups) {
+    return Iterables.filter(groups, this::shouldRewrite);
+  }
+
+  @Override
+  protected long defaultTargetFileSize() {
+    return PropertyUtil.propertyAsLong(
+        table().properties(),
+        TableProperties.DELETE_TARGET_FILE_SIZE_BYTES,
+        TableProperties.DELETE_TARGET_FILE_SIZE_BYTES_DEFAULT);
+  }
+
   private StructLikeMap<List<List<PositionDeletesScanTask>>> planFileGroups() {
     Table deletesTable =
         MetadataTableUtils.createMetadataTableInstance(table(), MetadataTableType.POSITION_DELETES);
@@ -148,29 +170,6 @@ private StructLikeMap<List<List<PositionDeletesScanTask>>> planFileGroups() {
     }
   }
 
-  @Override
-  protected Iterable<PositionDeletesScanTask> filterFiles(Iterable<PositionDeletesScanTask> tasks) {
-    return Iterables.filter(tasks, this::wronglySized);
-  }
-
-  @Override
-  protected Iterable<List<PositionDeletesScanTask>> filterFileGroups(
-      List<List<PositionDeletesScanTask>> groups) {
-    return Iterables.filter(groups, this::shouldRewrite);
-  }
-
-  private boolean shouldRewrite(List<PositionDeletesScanTask> group) {
-    return enoughInputFiles(group) || enoughContent(group) || tooMuchContent(group);
-  }
-
-  @Override
-  protected long defaultTargetFileSize() {
-    return PropertyUtil.propertyAsLong(
-        table().properties(),
-        TableProperties.DELETE_TARGET_FILE_SIZE_BYTES,
-        TableProperties.DELETE_TARGET_FILE_SIZE_BYTES_DEFAULT);
-  }
-
   private CloseableIterable<PositionDeletesScanTask> planFiles(Table deletesTable) {
     PositionDeletesTable.PositionDeletesBatchScan scan =
         (PositionDeletesTable.PositionDeletesBatchScan) deletesTable.newBatchScan();
@@ -214,6 +213,10 @@ private RewritePositionDeletesGroup newRewriteGroup(
         info, Lists.newArrayList(tasks), splitSize, numOutputSize);
   }
 
+  private boolean shouldRewrite(List<PositionDeletesScanTask> group) {
+    return enoughInputFiles(group) || enoughContent(group) || tooMuchContent(group);
+  }
+
   private static class RewriteExecutionContext {
     private final Map<StructLike, Integer> partitionIndexMap;
     private final AtomicInteger groupIndex;
diff --git a/core/src/main/java/org/apache/iceberg/actions/SizeBasedFileRewritePlanner.java b/core/src/main/java/org/apache/iceberg/actions/SizeBasedFileRewritePlanner.java
index f743c689da35..edaec5af0f27 100644
--- a/core/src/main/java/org/apache/iceberg/actions/SizeBasedFileRewritePlanner.java
+++ b/core/src/main/java/org/apache/iceberg/actions/SizeBasedFileRewritePlanner.java
@@ -114,23 +114,21 @@ public abstract class SizeBasedFileRewritePlanner<
   private int minInputFiles;
   private boolean rewriteAll;
   private long maxGroupSize;
-
   private int outputSpecId;
 
   protected SizeBasedFileRewritePlanner(Table table) {
     this.table = table;
   }
 
+  /** Expected target file size before configuration. */
   protected abstract long defaultTargetFileSize();
 
+  /** Additional filter for tasks before grouping. */
   protected abstract Iterable<T> filterFiles(Iterable<T> tasks);
 
+  /** Additional filter for groups. */
   protected abstract Iterable<List<T>> filterFileGroups(List<List<T>> groups);
 
-  protected Table table() {
-    return table;
-  }
-
   @Override
   public Set<String> validOptions() {
     return ImmutableSet.of(
@@ -158,11 +156,15 @@ public void init(Map<String, String> options) {
     }
   }
 
+  protected Table table() {
+    return table;
+  }
+
   protected boolean wronglySized(T task) {
     return task.length() < minFileSize || task.length() > maxFileSize;
   }
 
-  public Iterable<List<T>> planFileGroups(Iterable<T> tasks) {
+  protected Iterable<List<T>> planFileGroups(Iterable<T> tasks) {
     Iterable<T> filteredTasks = rewriteAll ? tasks : filterFiles(tasks);
     BinPacking.ListPacker<T> packer = new BinPacking.ListPacker<>(maxGroupSize, 1, false);
     List<List<T>> groups = packer.pack(filteredTasks, ContentScanTask::length);
@@ -257,11 +259,11 @@ protected int numOutputFiles(long inputSize) {
    *
    * @return the target size plus one half of the distance between max and target
    */
-  public long writeMaxFileSize() {
+  protected long writeMaxFileSize() {
     return (long) (targetFileSize + ((maxFileSize - targetFileSize) * 0.5));
   }
 
-  public int outputSpecId() {
+  protected int outputSpecId() {
     return outputSpecId;
   }
 
diff --git a/core/src/main/java/org/apache/iceberg/actions/SizeBasedFileRewriter.java b/core/src/main/java/org/apache/iceberg/actions/SizeBasedFileRewriter.java
index 319e44c4a20c..00ef0b6694de 100644
--- a/core/src/main/java/org/apache/iceberg/actions/SizeBasedFileRewriter.java
+++ b/core/src/main/java/org/apache/iceberg/actions/SizeBasedFileRewriter.java
@@ -195,7 +195,7 @@ protected long inputSize(List<T> group) {
    * of output files. The final split size is adjusted to be at least as big as the target file size
    * but less than the max write file size.
    */
-  public long splitSize(long inputSize) {
+  protected long splitSize(long inputSize) {
     long estimatedSplitSize = (inputSize / numOutputFiles(inputSize)) + SPLIT_OVERHEAD;
     if (estimatedSplitSize < targetFileSize) {
       return targetFileSize;
diff --git a/core/src/test/java/org/apache/iceberg/actions/TestRewriteFileGroupPlanner.java b/core/src/test/java/org/apache/iceberg/actions/TestRewriteFileGroupPlanner.java
index e10019a9f547..48991cd601b7 100644
--- a/core/src/test/java/org/apache/iceberg/actions/TestRewriteFileGroupPlanner.java
+++ b/core/src/test/java/org/apache/iceberg/actions/TestRewriteFileGroupPlanner.java
@@ -30,6 +30,8 @@
 import java.util.stream.Collectors;
 import org.apache.iceberg.DataFile;
 import org.apache.iceberg.DataFiles;
+import org.apache.iceberg.FileScanTask;
+import org.apache.iceberg.MockFileScanTask;
 import org.apache.iceberg.RewriteJobOrder;
 import org.apache.iceberg.StructLike;
 import org.apache.iceberg.TestBase;
@@ -37,6 +39,8 @@
 import org.apache.iceberg.expressions.Expressions;
 import org.apache.iceberg.relocated.com.google.common.collect.ImmutableList;
 import org.apache.iceberg.relocated.com.google.common.collect.ImmutableMap;
+import org.apache.iceberg.relocated.com.google.common.collect.ImmutableSet;
+import org.apache.iceberg.relocated.com.google.common.collect.Iterables;
 import org.apache.iceberg.relocated.com.google.common.collect.Maps;
 import org.junit.jupiter.api.AfterEach;
 import org.junit.jupiter.api.BeforeEach;
@@ -212,32 +216,155 @@ void testOutputSpec(boolean specific) {
   }
 
   @Test
-  void testInvalidOption() {
-    addFiles();
+  void testValidOptions() {
+    RewriteFileGroupPlanner planner = new RewriteFileGroupPlanner(table);
 
-    assertThatThrownBy(
-            () -> {
-              RewriteFileGroupPlanner planner = new RewriteFileGroupPlanner(table);
+    assertThat(planner.validOptions())
+        .as("Planner must report all supported options")
+        .isEqualTo(
+            ImmutableSet.of(
+                RewriteFileGroupPlanner.TARGET_FILE_SIZE_BYTES,
+                RewriteFileGroupPlanner.MIN_FILE_SIZE_BYTES,
+                RewriteFileGroupPlanner.MAX_FILE_SIZE_BYTES,
+                RewriteFileGroupPlanner.MIN_INPUT_FILES,
+                RewriteFileGroupPlanner.REWRITE_ALL,
+                RewriteFileGroupPlanner.MAX_FILE_GROUP_SIZE_BYTES,
+                RewriteFileGroupPlanner.DELETE_FILE_THRESHOLD,
+                RewriteDataFiles.REWRITE_JOB_ORDER));
+  }
 
-              planner.init(ImmutableMap.of(RewriteDataFiles.REWRITE_JOB_ORDER, "foo"));
-            })
+  @Test
+  void testInvalidOption() {
+    RewriteFileGroupPlanner planner = new RewriteFileGroupPlanner(table);
+
+    Map<String, String> invalidRewriteJobOrderOptions =
+        ImmutableMap.of(RewriteDataFiles.REWRITE_JOB_ORDER, "foo");
+    assertThatThrownBy(() -> planner.init(invalidRewriteJobOrderOptions))
         .isInstanceOf(IllegalArgumentException.class)
         .hasMessage("Invalid rewrite job order name: foo");
 
-    assertThatThrownBy(
-            () -> {
-              RewriteFileGroupPlanner planner = new RewriteFileGroupPlanner(table);
-
-              planner.init(
-                  ImmutableMap.of(
-                      RewriteFileGroupPlanner.REWRITE_ALL,
-                      "true",
-                      RewriteDataFiles.OUTPUT_SPEC_ID,
-                      String.valueOf(1234)));
-            })
+    Map<String, String> invalidOutputSpecIdOptions =
+        ImmutableMap.of(RewriteDataFiles.OUTPUT_SPEC_ID, String.valueOf(1234));
+    assertThatThrownBy(() -> planner.init(invalidOutputSpecIdOptions))
         .isInstanceOf(IllegalArgumentException.class)
         .hasMessage(
             "Cannot use output spec id 1234 because the table does not contain a reference to this spec-id.");
+
+    Map<String, String> invalidDeleteFileThresholdOptions =
+        ImmutableMap.of(RewriteFileGroupPlanner.DELETE_FILE_THRESHOLD, "-1");
+    assertThatThrownBy(() -> planner.init(invalidDeleteFileThresholdOptions))
+        .hasMessageContaining("'delete-file-threshold' is set to -1 but must be >= 0");
+  }
+
+  @Test
+  void testBinPackDataSelectFiles() {
+    RewriteFileGroupPlanner planner = new RewriteFileGroupPlanner(table);
+
+    checkDataFileSizeFiltering(planner);
+    checkDataFilesDeleteThreshold(planner);
+    checkDataFileGroupWithEnoughFiles(planner);
+    checkDataFileGroupWithEnoughData(planner);
+    checkDataFileGroupWithTooMuchData(planner);
+  }
+
+  private void checkDataFileSizeFiltering(RewriteFileGroupPlanner planner) {
+    FileScanTask tooSmallTask = new MockFileScanTask(100L);
+    FileScanTask optimal = new MockFileScanTask(450);
+    FileScanTask tooBigTask = new MockFileScanTask(1000L);
+    List<FileScanTask> tasks = ImmutableList.of(tooSmallTask, optimal, tooBigTask);
+
+    Map<String, String> options =
+        ImmutableMap.of(
+            RewriteFileGroupPlanner.MIN_FILE_SIZE_BYTES, "250",
+            RewriteFileGroupPlanner.TARGET_FILE_SIZE_BYTES, "500",
+            RewriteFileGroupPlanner.MAX_FILE_SIZE_BYTES, "750",
+            RewriteFileGroupPlanner.DELETE_FILE_THRESHOLD, String.valueOf(Integer.MAX_VALUE));
+    planner.init(options);
+
+    Iterable<List<FileScanTask>> groups = planner.planFileGroups(tasks);
+    assertThat(groups).as("Must have 1 group").hasSize(1);
+    List<FileScanTask> group = Iterables.getOnlyElement(groups);
+    assertThat(group).as("Must rewrite 2 files").hasSize(2);
+  }
+
+  private void checkDataFilesDeleteThreshold(RewriteFileGroupPlanner planner) {
+    FileScanTask tooManyDeletesTask = MockFileScanTask.mockTaskWithDeletes(1000L, 3);
+    FileScanTask optimalTask = MockFileScanTask.mockTaskWithDeletes(1000L, 1);
+    List<FileScanTask> tasks = ImmutableList.of(tooManyDeletesTask, optimalTask);
+
+    Map<String, String> options =
+        ImmutableMap.of(
+            RewriteFileGroupPlanner.MIN_FILE_SIZE_BYTES, "1",
+            RewriteFileGroupPlanner.TARGET_FILE_SIZE_BYTES, "2000",
+            RewriteFileGroupPlanner.MAX_FILE_SIZE_BYTES, "5000",
+            RewriteFileGroupPlanner.DELETE_FILE_THRESHOLD, "2");
+    planner.init(options);
+
+    Iterable<List<FileScanTask>> groups = planner.planFileGroups(tasks);
+    assertThat(groups).as("Must have 1 group").hasSize(1);
+    List<FileScanTask> group = Iterables.getOnlyElement(groups);
+    assertThat(group).as("Must rewrite 1 file").hasSize(1);
+  }
+
+  private void checkDataFileGroupWithEnoughFiles(RewriteFileGroupPlanner planner) {
+    List<FileScanTask> tasks =
+        ImmutableList.of(
+            new MockFileScanTask(100L),
+            new MockFileScanTask(100L),
+            new MockFileScanTask(100L),
+            new MockFileScanTask(100L));
+
+    Map<String, String> options =
+        ImmutableMap.of(
+            RewriteFileGroupPlanner.MIN_INPUT_FILES, "3",
+            RewriteFileGroupPlanner.MIN_FILE_SIZE_BYTES, "150",
+            RewriteFileGroupPlanner.TARGET_FILE_SIZE_BYTES, "1000",
+            RewriteFileGroupPlanner.MAX_FILE_SIZE_BYTES, "5000",
+            RewriteFileGroupPlanner.DELETE_FILE_THRESHOLD, String.valueOf(Integer.MAX_VALUE));
+    planner.init(options);
+
+    Iterable<List<FileScanTask>> groups = planner.planFileGroups(tasks);
+    assertThat(groups).as("Must have 1 group").hasSize(1);
+    List<FileScanTask> group = Iterables.getOnlyElement(groups);
+    assertThat(group).as("Must rewrite 4 files").hasSize(4);
+  }
+
+  private void checkDataFileGroupWithEnoughData(RewriteFileGroupPlanner planner) {
+    List<FileScanTask> tasks =
+        ImmutableList.of(
+            new MockFileScanTask(100L), new MockFileScanTask(100L), new MockFileScanTask(100L));
+
+    Map<String, String> options =
+        ImmutableMap.of(
+            RewriteFileGroupPlanner.MIN_INPUT_FILES, "5",
+            RewriteFileGroupPlanner.MIN_FILE_SIZE_BYTES, "200",
+            RewriteFileGroupPlanner.TARGET_FILE_SIZE_BYTES, "250",
+            RewriteFileGroupPlanner.MAX_FILE_SIZE_BYTES, "500",
+            RewriteFileGroupPlanner.DELETE_FILE_THRESHOLD, String.valueOf(Integer.MAX_VALUE));
+    planner.init(options);
+
+    Iterable<List<FileScanTask>> groups = planner.planFileGroups(tasks);
+    assertThat(groups).as("Must have 1 group").hasSize(1);
+    List<FileScanTask> group = Iterables.getOnlyElement(groups);
+    assertThat(group).as("Must rewrite 3 files").hasSize(3);
+  }
+
+  private void checkDataFileGroupWithTooMuchData(RewriteFileGroupPlanner planner) {
+    List<FileScanTask> tasks = ImmutableList.of(new MockFileScanTask(2000L));
+
+    Map<String, String> options =
+        ImmutableMap.of(
+            RewriteFileGroupPlanner.MIN_INPUT_FILES, "5",
+            RewriteFileGroupPlanner.MIN_FILE_SIZE_BYTES, "200",
+            RewriteFileGroupPlanner.TARGET_FILE_SIZE_BYTES, "250",
+            RewriteFileGroupPlanner.MAX_FILE_SIZE_BYTES, "500",
+            RewriteFileGroupPlanner.DELETE_FILE_THRESHOLD, String.valueOf(Integer.MAX_VALUE));
+    planner.init(options);
+
+    Iterable<List<FileScanTask>> groups = planner.planFileGroups(tasks);
+    assertThat(groups).as("Must have 1 group").hasSize(1);
+    List<FileScanTask> group = Iterables.getOnlyElement(groups);
+    assertThat(group).as("Must rewrite big file").hasSize(1);
   }
 
   private void addFiles() {
diff --git a/core/src/test/java/org/apache/iceberg/actions/TestRewritePositionDeletesGroupPlanner.java b/core/src/test/java/org/apache/iceberg/actions/TestRewritePositionDeletesGroupPlanner.java
index 2c4520d96d99..07858706d9ee 100644
--- a/core/src/test/java/org/apache/iceberg/actions/TestRewritePositionDeletesGroupPlanner.java
+++ b/core/src/test/java/org/apache/iceberg/actions/TestRewritePositionDeletesGroupPlanner.java
@@ -41,6 +41,7 @@
 import org.apache.iceberg.expressions.Expressions;
 import org.apache.iceberg.relocated.com.google.common.collect.ImmutableList;
 import org.apache.iceberg.relocated.com.google.common.collect.ImmutableMap;
+import org.apache.iceberg.relocated.com.google.common.collect.ImmutableSet;
 import org.junit.jupiter.api.AfterEach;
 import org.junit.jupiter.api.BeforeEach;
 import org.junit.jupiter.api.Test;
@@ -195,16 +196,29 @@ void testWriteMaxFileSize() {
   }
 
   @Test
-  void testInvalidOption() {
-    addFiles();
+  void testValidOptions() {
+    RewritePositionDeletesGroupPlanner planner = new RewritePositionDeletesGroupPlanner(table);
+
+    assertThat(planner.validOptions())
+        .as("Planner must report all supported options")
+        .isEqualTo(
+            ImmutableSet.of(
+                RewritePositionDeletesGroupPlanner.TARGET_FILE_SIZE_BYTES,
+                RewritePositionDeletesGroupPlanner.MIN_FILE_SIZE_BYTES,
+                RewritePositionDeletesGroupPlanner.MAX_FILE_SIZE_BYTES,
+                RewritePositionDeletesGroupPlanner.MIN_INPUT_FILES,
+                RewritePositionDeletesGroupPlanner.REWRITE_ALL,
+                RewritePositionDeletesGroupPlanner.MAX_FILE_GROUP_SIZE_BYTES,
+                RewriteDataFiles.REWRITE_JOB_ORDER));
+  }
 
-    assertThatThrownBy(
-            () -> {
-              RewritePositionDeletesGroupPlanner planner =
-                  new RewritePositionDeletesGroupPlanner(table);
+  @Test
+  void testInvalidOption() {
+    RewritePositionDeletesGroupPlanner planner = new RewritePositionDeletesGroupPlanner(table);
 
-              planner.init(ImmutableMap.of(RewritePositionDeleteFiles.REWRITE_JOB_ORDER, "foo"));
-            })
+    Map<String, String> invalidRewriteJobOrderOptions =
+        ImmutableMap.of(RewritePositionDeleteFiles.REWRITE_JOB_ORDER, "foo");
+    assertThatThrownBy(() -> planner.init(invalidRewriteJobOrderOptions))
         .isInstanceOf(IllegalArgumentException.class)
         .hasMessage("Invalid rewrite job order name: foo");
   }
diff --git a/core/src/test/java/org/apache/iceberg/actions/TestSizeBasedFileRewritePlanner.java b/core/src/test/java/org/apache/iceberg/actions/TestSizeBasedFileRewritePlanner.java
index 013be9cb94b8..43ea307e11ff 100644
--- a/core/src/test/java/org/apache/iceberg/actions/TestSizeBasedFileRewritePlanner.java
+++ b/core/src/test/java/org/apache/iceberg/actions/TestSizeBasedFileRewritePlanner.java
@@ -19,6 +19,7 @@
 package org.apache.iceberg.actions;
 
 import static org.assertj.core.api.Assertions.assertThat;
+import static org.assertj.core.api.Assertions.assertThatThrownBy;
 import static org.mockito.Mockito.when;
 
 import java.io.File;
@@ -29,10 +30,12 @@
 import org.apache.iceberg.MockFileScanTask;
 import org.apache.iceberg.StructLike;
 import org.apache.iceberg.Table;
+import org.apache.iceberg.TableProperties;
 import org.apache.iceberg.TestBase;
 import org.apache.iceberg.TestTables;
 import org.apache.iceberg.relocated.com.google.common.collect.ImmutableList;
 import org.apache.iceberg.relocated.com.google.common.collect.ImmutableMap;
+import org.apache.iceberg.relocated.com.google.common.collect.ImmutableSet;
 import org.apache.iceberg.relocated.com.google.common.collect.Lists;
 import org.junit.jupiter.api.AfterEach;
 import org.junit.jupiter.api.BeforeEach;
@@ -87,6 +90,63 @@ void testSplitSizeLowerBound() {
     assertThat(splitSize).isGreaterThanOrEqualTo(targetFileSize).isLessThan(maxFileSize);
   }
 
+  @Test
+  void testValidOptions() {
+    TestingPlanner planner = new TestingPlanner(table);
+
+    assertThat(planner.validOptions())
+        .as("Planner must report all supported options")
+        .isEqualTo(
+            ImmutableSet.of(
+                RewriteFileGroupPlanner.TARGET_FILE_SIZE_BYTES,
+                RewriteFileGroupPlanner.MIN_FILE_SIZE_BYTES,
+                RewriteFileGroupPlanner.MAX_FILE_SIZE_BYTES,
+                RewriteFileGroupPlanner.MIN_INPUT_FILES,
+                RewriteFileGroupPlanner.REWRITE_ALL,
+                RewriteFileGroupPlanner.MAX_FILE_GROUP_SIZE_BYTES));
+  }
+
+  @Test
+  void testInvalidOption() {
+    TestingPlanner planner = new TestingPlanner(table);
+
+    Map<String, String> invalidTargetSizeOptions =
+        ImmutableMap.of(SizeBasedFileRewritePlanner.TARGET_FILE_SIZE_BYTES, "0");
+    assertThatThrownBy(() -> planner.init(invalidTargetSizeOptions))
+        .hasMessageContaining("'target-file-size-bytes' is set to 0 but must be > 0");
+
+    Map<String, String> invalidMinSizeOptions =
+        ImmutableMap.of(SizeBasedFileRewritePlanner.MIN_FILE_SIZE_BYTES, "-1");
+    assertThatThrownBy(() -> planner.init(invalidMinSizeOptions))
+        .hasMessageContaining("'min-file-size-bytes' is set to -1 but must be >= 0");
+
+    Map<String, String> invalidTargetMinSizeOptions =
+        ImmutableMap.of(
+            SizeBasedFileRewritePlanner.TARGET_FILE_SIZE_BYTES, "3",
+            SizeBasedFileRewritePlanner.MIN_FILE_SIZE_BYTES, "5");
+    assertThatThrownBy(() -> planner.init(invalidTargetMinSizeOptions))
+        .hasMessageContaining("'target-file-size-bytes' (3) must be > 'min-file-size-bytes' (5)")
+        .hasMessageContaining("all new files will be smaller than the min threshold");
+
+    Map<String, String> invalidTargetMaxSizeOptions =
+        ImmutableMap.of(
+            SizeBasedFileRewritePlanner.TARGET_FILE_SIZE_BYTES, "5",
+            SizeBasedFileRewritePlanner.MAX_FILE_SIZE_BYTES, "3");
+    assertThatThrownBy(() -> planner.init(invalidTargetMaxSizeOptions))
+        .hasMessageContaining("'target-file-size-bytes' (5) must be < 'max-file-size-bytes' (3)")
+        .hasMessageContaining("all new files will be larger than the max threshold");
+
+    Map<String, String> invalidMinInputFilesOptions =
+        ImmutableMap.of(SizeBasedFileRewritePlanner.MIN_INPUT_FILES, "0");
+    assertThatThrownBy(() -> planner.init(invalidMinInputFilesOptions))
+        .hasMessageContaining("'min-input-files' is set to 0 but must be > 0");
+
+    Map<String, String> invalidMaxFileGroupSizeOptions =
+        ImmutableMap.of(SizeBasedFileRewritePlanner.MAX_FILE_GROUP_SIZE_BYTES, "0");
+    assertThatThrownBy(() -> planner.init(invalidMaxFileGroupSizeOptions))
+        .hasMessageContaining("'max-file-group-size-bytes' is set to 0 but must be > 0");
+  }
+
   private static class TestingPlanner
       extends SizeBasedFileRewritePlanner<
           RewriteDataFiles.FileGroupInfo, FileScanTask, DataFile, RewriteFileGroup> {
@@ -96,7 +156,7 @@ protected TestingPlanner(Table table) {
 
     @Override
     protected long defaultTargetFileSize() {
-      return 0;
+      return TableProperties.WRITE_TARGET_FILE_SIZE_BYTES_DEFAULT;
     }
 
     @Override
diff --git a/spark/v3.5/spark/src/main/java/org/apache/iceberg/spark/actions/RewriteDataFilesSparkAction.java b/spark/v3.5/spark/src/main/java/org/apache/iceberg/spark/actions/RewriteDataFilesSparkAction.java
index f28bcd90ea9c..29585a85ac59 100644
--- a/spark/v3.5/spark/src/main/java/org/apache/iceberg/spark/actions/RewriteDataFilesSparkAction.java
+++ b/spark/v3.5/spark/src/main/java/org/apache/iceberg/spark/actions/RewriteDataFilesSparkAction.java
@@ -97,6 +97,7 @@ public class RewriteDataFilesSparkAction
   private FileRewriteExecutor<
           FileGroupInfo, FileScanTask, DataFile, RewriteFileGroup, RewriteFilePlan>
       rewriter = null;
+  private boolean shufflingPlanner = false;
 
   RewriteDataFilesSparkAction(SparkSession spark, Table table) {
     super(spark.cloneSession());
@@ -116,6 +117,7 @@ public RewriteDataFilesSparkAction binPack() {
     Preconditions.checkArgument(
         rewriter == null, "Must use only one rewriter type (bin-pack, sort, zorder)");
     this.rewriter = new SparkBinPackDataRewriteExecutor(spark(), table);
+    this.shufflingPlanner = false;
     return this;
   }
 
@@ -124,6 +126,7 @@ public RewriteDataFilesSparkAction sort(SortOrder sortOrder) {
     Preconditions.checkArgument(
         rewriter == null, "Must use only one rewriter type (bin-pack, sort, zorder)");
     this.rewriter = new SparkSortDataRewriteExecutor(spark(), table, sortOrder);
+    this.shufflingPlanner = true;
     return this;
   }
 
@@ -132,6 +135,7 @@ public RewriteDataFilesSparkAction sort() {
     Preconditions.checkArgument(
         rewriter == null, "Must use only one rewriter type (bin-pack, sort, zorder)");
     this.rewriter = new SparkSortDataRewriteExecutor(spark(), table);
+    this.shufflingPlanner = true;
     return this;
   }
 
@@ -140,6 +144,7 @@ public RewriteDataFilesSparkAction zOrder(String... columnNames) {
     Preconditions.checkArgument(
         rewriter == null, "Must use only one rewriter type (bin-pack, sort, zorder)");
     this.rewriter = new SparkZOrderDataRewriteExecutor(spark(), table, Arrays.asList(columnNames));
+    this.shufflingPlanner = true;
     return this;
   }
 
@@ -190,7 +195,10 @@ RewriteFilePlan plan() {
   @VisibleForTesting
   void init(long startingSnapshotId) {
 
-    this.planner = new RewriteFileGroupPlanner(table, filter, startingSnapshotId, caseSensitive);
+    this.planner =
+        shufflingPlanner
+            ? new SparkShufflingDataRewritePlanner(table, filter, startingSnapshotId, caseSensitive)
+            : new RewriteFileGroupPlanner(table, filter, startingSnapshotId, caseSensitive);
 
     // Default to BinPack if no strategy selected
     if (this.rewriter == null) {
diff --git a/spark/v3.5/spark/src/main/java/org/apache/iceberg/spark/actions/SparkShufflingDataRewriteExecutor.java b/spark/v3.5/spark/src/main/java/org/apache/iceberg/spark/actions/SparkShufflingDataRewriteExecutor.java
index e5090a68bff2..87b9326e3b6c 100644
--- a/spark/v3.5/spark/src/main/java/org/apache/iceberg/spark/actions/SparkShufflingDataRewriteExecutor.java
+++ b/spark/v3.5/spark/src/main/java/org/apache/iceberg/spark/actions/SparkShufflingDataRewriteExecutor.java
@@ -50,19 +50,6 @@
 
 abstract class SparkShufflingDataRewriteExecutor extends SparkSizeBasedDataRewriteExecutor {
 
-  /**
-   * The number of shuffle partitions and consequently the number of output files created by the
-   * Spark sort is based on the size of the input data files used in this file rewriter. Due to
-   * compression, the disk file sizes may not accurately represent the size of files in the output.
-   * This parameter lets the user adjust the file size used for estimating actual output data size.
-   * A factor greater than 1.0 would generate more files than we would expect based on the on-disk
-   * file size. A value less than 1.0 would create fewer files than we would expect based on the
-   * on-disk size.
-   */
-  public static final String COMPRESSION_FACTOR = "compression-factor";
-
-  public static final double COMPRESSION_FACTOR_DEFAULT = 1.0;
-
   /**
    * The number of shuffle partitions to use for each output file. By default, this file rewriter
    * assumes each shuffle partition would become a separate output file. Attempting to generate
@@ -79,7 +66,6 @@ abstract class SparkShufflingDataRewriteExecutor extends SparkSizeBasedDataRewri
 
   public static final int SHUFFLE_PARTITIONS_PER_FILE_DEFAULT = 1;
 
-  private double compressionFactor;
   private int numShufflePartitionsPerFile;
 
   protected SparkShufflingDataRewriteExecutor(SparkSession spark, Table table) {
@@ -105,7 +91,6 @@ protected abstract Dataset<Row> sortedDF(
   public Set<String> validOptions() {
     return ImmutableSet.<String>builder()
         .addAll(super.validOptions())
-        .add(COMPRESSION_FACTOR)
         .add(SHUFFLE_PARTITIONS_PER_FILE)
         .build();
   }
@@ -113,7 +98,6 @@ public Set<String> validOptions() {
   @Override
   public void init(Map<String, String> options) {
     super.init(options);
-    this.compressionFactor = compressionFactor(options);
     this.numShufflePartitionsPerFile = numShufflePartitionsPerFile(options);
   }
 
@@ -178,14 +162,6 @@ private org.apache.iceberg.SortOrder outputSortOrder(List<FileScanTask> group) {
     }
   }
 
-  private double compressionFactor(Map<String, String> options) {
-    double value =
-        PropertyUtil.propertyAsDouble(options, COMPRESSION_FACTOR, COMPRESSION_FACTOR_DEFAULT);
-    Preconditions.checkArgument(
-        value > 0, "'%s' is set to %s but must be > 0", COMPRESSION_FACTOR, value);
-    return value;
-  }
-
   private int numShufflePartitionsPerFile(Map<String, String> options) {
     int value =
         PropertyUtil.propertyAsInt(
diff --git a/spark/v3.5/spark/src/main/java/org/apache/iceberg/spark/actions/SparkShufflingDataRewritePlanner.java b/spark/v3.5/spark/src/main/java/org/apache/iceberg/spark/actions/SparkShufflingDataRewritePlanner.java
new file mode 100644
index 000000000000..16410946bd7d
--- /dev/null
+++ b/spark/v3.5/spark/src/main/java/org/apache/iceberg/spark/actions/SparkShufflingDataRewritePlanner.java
@@ -0,0 +1,85 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.iceberg.spark.actions;
+
+import java.util.Map;
+import java.util.Set;
+import org.apache.iceberg.Table;
+import org.apache.iceberg.actions.RewriteFileGroupPlanner;
+import org.apache.iceberg.expressions.Expression;
+import org.apache.iceberg.relocated.com.google.common.base.Preconditions;
+import org.apache.iceberg.relocated.com.google.common.collect.ImmutableSet;
+import org.apache.iceberg.util.PropertyUtil;
+
+/**
+ * Extends the {@link RewriteFileGroupPlanner} with the possibility to set the expected compression
+ * factor.
+ */
+public class SparkShufflingDataRewritePlanner extends RewriteFileGroupPlanner {
+  /**
+   * The number of shuffle partitions and consequently the number of output files created by the
+   * Spark sort is based on the size of the input data files used in this file rewriter. Due to
+   * compression, the disk file sizes may not accurately represent the size of files in the output.
+   * This parameter lets the user adjust the file size used for estimating actual output data size.
+   * A factor greater than 1.0 would generate more files than we would expect based on the on-disk
+   * file size. A value less than 1.0 would create fewer files than we would expect based on the
+   * on-disk size.
+   */
+  public static final String COMPRESSION_FACTOR = "compression-factor";
+
+  public static final double COMPRESSION_FACTOR_DEFAULT = 1.0;
+
+  private double compressionFactor;
+
+  public SparkShufflingDataRewritePlanner(Table table) {
+    super(table);
+  }
+
+  public SparkShufflingDataRewritePlanner(
+      Table table, Expression filter, Long snapshotId, boolean caseSensitive) {
+    super(table, filter, snapshotId, caseSensitive);
+  }
+
+  @Override
+  public Set<String> validOptions() {
+    return ImmutableSet.<String>builder()
+        .addAll(super.validOptions())
+        .add(COMPRESSION_FACTOR)
+        .build();
+  }
+
+  @Override
+  public void init(Map<String, String> options) {
+    super.init(options);
+    this.compressionFactor = compressionFactor(options);
+  }
+
+  @Override
+  protected int numOutputFiles(long inputSize) {
+    return Math.max(1, super.numOutputFiles((long) (inputSize * compressionFactor)));
+  }
+
+  private double compressionFactor(Map<String, String> options) {
+    double value =
+        PropertyUtil.propertyAsDouble(options, COMPRESSION_FACTOR, COMPRESSION_FACTOR_DEFAULT);
+    Preconditions.checkArgument(
+        value > 0, "'%s' is set to %s but must be > 0", COMPRESSION_FACTOR, value);
+    return value;
+  }
+}
diff --git a/spark/v3.5/spark/src/main/java/org/apache/iceberg/spark/actions/SparkSizeBasedDataRewriteExecutor.java b/spark/v3.5/spark/src/main/java/org/apache/iceberg/spark/actions/SparkSizeBasedDataRewriteExecutor.java
index 9979beacd777..800882cc31e5 100644
--- a/spark/v3.5/spark/src/main/java/org/apache/iceberg/spark/actions/SparkSizeBasedDataRewriteExecutor.java
+++ b/spark/v3.5/spark/src/main/java/org/apache/iceberg/spark/actions/SparkSizeBasedDataRewriteExecutor.java
@@ -55,6 +55,14 @@ protected SparkSession spark() {
     return spark;
   }
 
+  protected int outputSpecId() {
+    return outputSpecId;
+  }
+
+  protected PartitionSpec outputSpec() {
+    return table().specs().get(outputSpecId);
+  }
+
   @Override
   public Set<DataFile> rewrite(RewriteFileGroup group) {
     String groupId = UUID.randomUUID().toString();
@@ -77,12 +85,4 @@ public void initPlan(RewriteFilePlan plan) {
     super.initPlan(plan);
     this.outputSpecId = plan.outputSpecId();
   }
-
-  int outputSpecId() {
-    return outputSpecId;
-  }
-
-  PartitionSpec outputSpec() {
-    return table().specs().get(outputSpecId);
-  }
 }
diff --git a/spark/v3.5/spark/src/test/java/org/apache/iceberg/spark/actions/TestRewriteDataFilesAction.java b/spark/v3.5/spark/src/test/java/org/apache/iceberg/spark/actions/TestRewriteDataFilesAction.java
index 980a1e71bef9..d25710e7cd13 100644
--- a/spark/v3.5/spark/src/test/java/org/apache/iceberg/spark/actions/TestRewriteDataFilesAction.java
+++ b/spark/v3.5/spark/src/test/java/org/apache/iceberg/spark/actions/TestRewriteDataFilesAction.java
@@ -77,6 +77,7 @@
 import org.apache.iceberg.actions.RewriteDataFilesCommitManager;
 import org.apache.iceberg.actions.RewriteFileGroup;
 import org.apache.iceberg.actions.RewriteFileGroupPlanner;
+import org.apache.iceberg.actions.RewriteFilePlan;
 import org.apache.iceberg.actions.SizeBasedFileRewritePlanner;
 import org.apache.iceberg.data.GenericAppenderFactory;
 import org.apache.iceberg.data.GenericRecord;
@@ -887,7 +888,7 @@ public void testSingleCommitWithRewriteFailure() {
     GroupInfoMatcher failGroup = new GroupInfoMatcher(1, 3, 7);
     doThrow(new RuntimeException("Rewrite Failed"))
         .when(spyRewrite)
-        .rewriteFiles(any(), argThat(failGroup));
+        .rewriteFiles(any(RewriteFilePlan.class), argThat(failGroup));
 
     assertThatThrownBy(spyRewrite::execute)
         .isInstanceOf(RuntimeException.class)
@@ -990,7 +991,7 @@ public void testParallelSingleCommitWithRewriteFailure() {
     GroupInfoMatcher failGroup = new GroupInfoMatcher(1, 3, 7);
     doThrow(new CommitFailedException("Rewrite Failed"))
         .when(spyRewrite)
-        .rewriteFiles(any(), argThat(failGroup));
+        .rewriteFiles(any(RewriteFilePlan.class), argThat(failGroup));
 
     assertThatThrownBy(spyRewrite::execute)
         .isInstanceOf(CommitFailedException.class)
@@ -1027,7 +1028,7 @@ public void testPartialProgressWithRewriteFailure() {
     GroupInfoMatcher failGroup = new GroupInfoMatcher(1, 3, 7);
     doThrow(new RuntimeException("Rewrite Failed"))
         .when(spyRewrite)
-        .rewriteFiles(any(), argThat(failGroup));
+        .rewriteFiles(any(RewriteFilePlan.class), argThat(failGroup));
 
     RewriteDataFiles.Result result = spyRewrite.execute();
 
@@ -1070,7 +1071,7 @@ public void testParallelPartialProgressWithRewriteFailure() {
     GroupInfoMatcher failGroup = new GroupInfoMatcher(1, 3, 7);
     doThrow(new RuntimeException("Rewrite Failed"))
         .when(spyRewrite)
-        .rewriteFiles(any(), argThat(failGroup));
+        .rewriteFiles(any(RewriteFilePlan.class), argThat(failGroup));
 
     RewriteDataFiles.Result result = spyRewrite.execute();
 
@@ -1158,7 +1159,7 @@ public void testParallelPartialProgressWithMaxFailedCommits() {
     GroupInfoMatcher failGroup = new GroupInfoMatcher(1, 3, 7);
     doThrow(new RuntimeException("Rewrite Failed"))
         .when(spyRewrite)
-        .rewriteFiles(any(), argThat(failGroup));
+        .rewriteFiles(any(RewriteFilePlan.class), argThat(failGroup));
 
     assertThatThrownBy(() -> spyRewrite.execute())
         .isInstanceOf(RuntimeException.class)
diff --git a/spark/v3.5/spark/src/test/java/org/apache/iceberg/spark/actions/TestSparkFileRewriteExecutor.java b/spark/v3.5/spark/src/test/java/org/apache/iceberg/spark/actions/TestSparkFileRewriteExecutor.java
index 444bbf458f17..32d1816e56e4 100644
--- a/spark/v3.5/spark/src/test/java/org/apache/iceberg/spark/actions/TestSparkFileRewriteExecutor.java
+++ b/spark/v3.5/spark/src/test/java/org/apache/iceberg/spark/actions/TestSparkFileRewriteExecutor.java
@@ -21,22 +21,15 @@
 import static org.assertj.core.api.Assertions.assertThat;
 import static org.assertj.core.api.Assertions.assertThatThrownBy;
 
-import java.util.List;
 import java.util.Map;
-import org.apache.iceberg.FileScanTask;
-import org.apache.iceberg.MockFileScanTask;
 import org.apache.iceberg.PartitionSpec;
 import org.apache.iceberg.Schema;
 import org.apache.iceberg.SortOrder;
 import org.apache.iceberg.Table;
-import org.apache.iceberg.actions.RewriteDataFiles;
-import org.apache.iceberg.actions.RewriteFileGroupPlanner;
-import org.apache.iceberg.actions.SizeBasedFileRewritePlanner;
 import org.apache.iceberg.catalog.TableIdentifier;
 import org.apache.iceberg.relocated.com.google.common.collect.ImmutableList;
 import org.apache.iceberg.relocated.com.google.common.collect.ImmutableMap;
 import org.apache.iceberg.relocated.com.google.common.collect.ImmutableSet;
-import org.apache.iceberg.relocated.com.google.common.collect.Iterables;
 import org.apache.iceberg.spark.TestBase;
 import org.apache.iceberg.types.Types.IntegerType;
 import org.apache.iceberg.types.Types.NestedField;
@@ -44,7 +37,7 @@
 import org.junit.jupiter.api.AfterEach;
 import org.junit.jupiter.api.Test;
 
-public class TestSparkFileRewriteExecutor extends TestBase {
+class TestSparkFileRewriteExecutor extends TestBase {
 
   private static final TableIdentifier TABLE_IDENT = TableIdentifier.of("default", "tbl");
   private static final Schema SCHEMA =
@@ -61,119 +54,7 @@ public void removeTable() {
   }
 
   @Test
-  public void testBinPackDataSelectFiles() {
-    Table table = catalog.createTable(TABLE_IDENT, SCHEMA);
-    RewriteFileGroupPlanner rewriter = new RewriteFileGroupPlanner(table);
-
-    checkDataFileSizeFiltering(rewriter);
-    checkDataFilesDeleteThreshold(rewriter);
-    checkDataFileGroupWithEnoughFiles(rewriter);
-    checkDataFileGroupWithEnoughData(rewriter);
-    checkDataFileGroupWithTooMuchData(rewriter);
-  }
-
-  private void checkDataFileSizeFiltering(RewriteFileGroupPlanner rewriter) {
-    FileScanTask tooSmallTask = new MockFileScanTask(100L);
-    FileScanTask optimal = new MockFileScanTask(450);
-    FileScanTask tooBigTask = new MockFileScanTask(1000L);
-    List<FileScanTask> tasks = ImmutableList.of(tooSmallTask, optimal, tooBigTask);
-
-    Map<String, String> options =
-        ImmutableMap.of(
-            RewriteFileGroupPlanner.MIN_FILE_SIZE_BYTES, "250",
-            RewriteFileGroupPlanner.TARGET_FILE_SIZE_BYTES, "500",
-            RewriteFileGroupPlanner.MAX_FILE_SIZE_BYTES, "750",
-            RewriteFileGroupPlanner.DELETE_FILE_THRESHOLD, String.valueOf(Integer.MAX_VALUE));
-    rewriter.init(options);
-
-    Iterable<List<FileScanTask>> groups = rewriter.planFileGroups(tasks);
-    assertThat(groups).as("Must have 1 group").hasSize(1);
-    List<FileScanTask> group = Iterables.getOnlyElement(groups);
-    assertThat(group).as("Must rewrite 2 files").hasSize(2);
-  }
-
-  private void checkDataFilesDeleteThreshold(RewriteFileGroupPlanner rewriter) {
-    FileScanTask tooManyDeletesTask = MockFileScanTask.mockTaskWithDeletes(1000L, 3);
-    FileScanTask optimalTask = MockFileScanTask.mockTaskWithDeletes(1000L, 1);
-    List<FileScanTask> tasks = ImmutableList.of(tooManyDeletesTask, optimalTask);
-
-    Map<String, String> options =
-        ImmutableMap.of(
-            RewriteFileGroupPlanner.MIN_FILE_SIZE_BYTES, "1",
-            RewriteFileGroupPlanner.TARGET_FILE_SIZE_BYTES, "2000",
-            RewriteFileGroupPlanner.MAX_FILE_SIZE_BYTES, "5000",
-            RewriteFileGroupPlanner.DELETE_FILE_THRESHOLD, "2");
-    rewriter.init(options);
-
-    Iterable<List<FileScanTask>> groups = rewriter.planFileGroups(tasks);
-    assertThat(groups).as("Must have 1 group").hasSize(1);
-    List<FileScanTask> group = Iterables.getOnlyElement(groups);
-    assertThat(group).as("Must rewrite 1 file").hasSize(1);
-  }
-
-  private void checkDataFileGroupWithEnoughFiles(RewriteFileGroupPlanner rewriter) {
-    List<FileScanTask> tasks =
-        ImmutableList.of(
-            new MockFileScanTask(100L),
-            new MockFileScanTask(100L),
-            new MockFileScanTask(100L),
-            new MockFileScanTask(100L));
-
-    Map<String, String> options =
-        ImmutableMap.of(
-            RewriteFileGroupPlanner.MIN_INPUT_FILES, "3",
-            RewriteFileGroupPlanner.MIN_FILE_SIZE_BYTES, "150",
-            RewriteFileGroupPlanner.TARGET_FILE_SIZE_BYTES, "1000",
-            RewriteFileGroupPlanner.MAX_FILE_SIZE_BYTES, "5000",
-            RewriteFileGroupPlanner.DELETE_FILE_THRESHOLD, String.valueOf(Integer.MAX_VALUE));
-    rewriter.init(options);
-
-    Iterable<List<FileScanTask>> groups = rewriter.planFileGroups(tasks);
-    assertThat(groups).as("Must have 1 group").hasSize(1);
-    List<FileScanTask> group = Iterables.getOnlyElement(groups);
-    assertThat(group).as("Must rewrite 4 files").hasSize(4);
-  }
-
-  private void checkDataFileGroupWithEnoughData(RewriteFileGroupPlanner rewriter) {
-    List<FileScanTask> tasks =
-        ImmutableList.of(
-            new MockFileScanTask(100L), new MockFileScanTask(100L), new MockFileScanTask(100L));
-
-    Map<String, String> options =
-        ImmutableMap.of(
-            RewriteFileGroupPlanner.MIN_INPUT_FILES, "5",
-            RewriteFileGroupPlanner.MIN_FILE_SIZE_BYTES, "200",
-            RewriteFileGroupPlanner.TARGET_FILE_SIZE_BYTES, "250",
-            RewriteFileGroupPlanner.MAX_FILE_SIZE_BYTES, "500",
-            RewriteFileGroupPlanner.DELETE_FILE_THRESHOLD, String.valueOf(Integer.MAX_VALUE));
-    rewriter.init(options);
-
-    Iterable<List<FileScanTask>> groups = rewriter.planFileGroups(tasks);
-    assertThat(groups).as("Must have 1 group").hasSize(1);
-    List<FileScanTask> group = Iterables.getOnlyElement(groups);
-    assertThat(group).as("Must rewrite 3 files").hasSize(3);
-  }
-
-  private void checkDataFileGroupWithTooMuchData(RewriteFileGroupPlanner rewriter) {
-    List<FileScanTask> tasks = ImmutableList.of(new MockFileScanTask(2000L));
-
-    Map<String, String> options =
-        ImmutableMap.of(
-            RewriteFileGroupPlanner.MIN_INPUT_FILES, "5",
-            RewriteFileGroupPlanner.MIN_FILE_SIZE_BYTES, "200",
-            RewriteFileGroupPlanner.TARGET_FILE_SIZE_BYTES, "250",
-            RewriteFileGroupPlanner.MAX_FILE_SIZE_BYTES, "500",
-            RewriteFileGroupPlanner.DELETE_FILE_THRESHOLD, String.valueOf(Integer.MAX_VALUE));
-    rewriter.init(options);
-
-    Iterable<List<FileScanTask>> groups = rewriter.planFileGroups(tasks);
-    assertThat(groups).as("Must have 1 group").hasSize(1);
-    List<FileScanTask> group = Iterables.getOnlyElement(groups);
-    assertThat(group).as("Must rewrite big file").hasSize(1);
-  }
-
-  @Test
-  public void testInvalidConstructorUsagesSortData() {
+  void testInvalidConstructorUsagesSortData() {
     Table table = catalog.createTable(TABLE_IDENT, SCHEMA);
 
     assertThatThrownBy(() -> new SparkSortDataRewriteExecutor(spark, table))
@@ -190,7 +71,7 @@ public void testInvalidConstructorUsagesSortData() {
   }
 
   @Test
-  public void testInvalidConstructorUsagesZOrderData() {
+  void testInvalidConstructorUsagesZOrderData() {
     Table table = catalog.createTable(TABLE_IDENT, SCHEMA, SPEC);
 
     assertThatThrownBy(() -> new SparkZOrderDataRewriteExecutor(spark, table, null))
@@ -211,139 +92,48 @@ public void testInvalidConstructorUsagesZOrderData() {
   }
 
   @Test
-  public void testBinPackDataValidOptions() {
+  void testBinPackDataValidOptions() {
     Table table = catalog.createTable(TABLE_IDENT, SCHEMA);
     SparkBinPackDataRewriteExecutor rewriter = new SparkBinPackDataRewriteExecutor(spark, table);
-    RewriteFileGroupPlanner planner = new RewriteFileGroupPlanner(table);
 
     assertThat(rewriter.validOptions())
         .as("Rewriter must report all supported options")
         .isEqualTo(ImmutableSet.of());
-
-    assertThat(planner.validOptions())
-        .as("Planner must report all supported options")
-        .isEqualTo(
-            ImmutableSet.of(
-                RewriteFileGroupPlanner.TARGET_FILE_SIZE_BYTES,
-                RewriteFileGroupPlanner.MIN_FILE_SIZE_BYTES,
-                RewriteFileGroupPlanner.MAX_FILE_SIZE_BYTES,
-                RewriteFileGroupPlanner.MIN_INPUT_FILES,
-                RewriteFileGroupPlanner.REWRITE_ALL,
-                RewriteFileGroupPlanner.MAX_FILE_GROUP_SIZE_BYTES,
-                RewriteFileGroupPlanner.DELETE_FILE_THRESHOLD,
-                RewriteDataFiles.REWRITE_JOB_ORDER));
   }
 
   @Test
-  public void testSortDataValidOptions() {
+  void testSortDataValidOptions() {
     Table table = catalog.createTable(TABLE_IDENT, SCHEMA);
     SparkSortDataRewriteExecutor rewriter =
         new SparkSortDataRewriteExecutor(spark, table, SORT_ORDER);
-    RewriteFileGroupPlanner planner = new RewriteFileGroupPlanner(table);
 
     assertThat(rewriter.validOptions())
         .as("Rewriter must report all supported options")
-        .isEqualTo(
-            ImmutableSet.of(
-                SparkSortDataRewriteExecutor.SHUFFLE_PARTITIONS_PER_FILE,
-                SparkSortDataRewriteExecutor.COMPRESSION_FACTOR));
-
-    assertThat(planner.validOptions())
-        .as("Planner must report all supported options")
-        .isEqualTo(
-            ImmutableSet.of(
-                RewriteFileGroupPlanner.TARGET_FILE_SIZE_BYTES,
-                RewriteFileGroupPlanner.MIN_FILE_SIZE_BYTES,
-                RewriteFileGroupPlanner.MAX_FILE_SIZE_BYTES,
-                RewriteFileGroupPlanner.MIN_INPUT_FILES,
-                RewriteFileGroupPlanner.REWRITE_ALL,
-                RewriteFileGroupPlanner.MAX_FILE_GROUP_SIZE_BYTES,
-                RewriteFileGroupPlanner.DELETE_FILE_THRESHOLD,
-                RewriteDataFiles.REWRITE_JOB_ORDER));
+        .isEqualTo(ImmutableSet.of(SparkSortDataRewriteExecutor.SHUFFLE_PARTITIONS_PER_FILE));
   }
 
   @Test
-  public void testZOrderDataValidOptions() {
+  void testZOrderDataValidOptions() {
     Table table = catalog.createTable(TABLE_IDENT, SCHEMA);
     ImmutableList<String> zOrderCols = ImmutableList.of("id");
     SparkZOrderDataRewriteExecutor rewriter =
         new SparkZOrderDataRewriteExecutor(spark, table, zOrderCols);
-    RewriteFileGroupPlanner planner = new RewriteFileGroupPlanner(table);
 
     assertThat(rewriter.validOptions())
         .as("Rewriter must report all supported options")
         .isEqualTo(
             ImmutableSet.of(
                 SparkZOrderDataRewriteExecutor.SHUFFLE_PARTITIONS_PER_FILE,
-                SparkZOrderDataRewriteExecutor.COMPRESSION_FACTOR,
                 SparkZOrderDataRewriteExecutor.MAX_OUTPUT_SIZE,
                 SparkZOrderDataRewriteExecutor.VAR_LENGTH_CONTRIBUTION));
-    assertThat(planner.validOptions())
-        .as("Planner must report all supported options")
-        .isEqualTo(
-            ImmutableSet.of(
-                RewriteFileGroupPlanner.TARGET_FILE_SIZE_BYTES,
-                RewriteFileGroupPlanner.MIN_FILE_SIZE_BYTES,
-                RewriteFileGroupPlanner.MAX_FILE_SIZE_BYTES,
-                RewriteFileGroupPlanner.MIN_INPUT_FILES,
-                RewriteFileGroupPlanner.REWRITE_ALL,
-                RewriteFileGroupPlanner.MAX_FILE_GROUP_SIZE_BYTES,
-                RewriteFileGroupPlanner.DELETE_FILE_THRESHOLD,
-                RewriteDataFiles.REWRITE_JOB_ORDER));
-  }
-
-  @Test
-  public void testInvalidValuesForBinPackDataOptions() {
-    Table table = catalog.createTable(TABLE_IDENT, SCHEMA);
-    RewriteFileGroupPlanner planner = new RewriteFileGroupPlanner(table);
-
-    validateSizeBasedRewriterOptions(planner);
-
-    Map<String, String> invalidDeleteThresholdOptions =
-        ImmutableMap.of(RewriteFileGroupPlanner.DELETE_FILE_THRESHOLD, "-1");
-    assertThatThrownBy(() -> planner.init(invalidDeleteThresholdOptions))
-        .hasMessageContaining("'delete-file-threshold' is set to -1 but must be >= 0");
-  }
-
-  @Test
-  public void testInvalidValuesForSortDataOptions() {
-    Table table = catalog.createTable(TABLE_IDENT, SCHEMA);
-    SparkSortDataRewriteExecutor rewriter =
-        new SparkSortDataRewriteExecutor(spark, table, SORT_ORDER);
-    RewriteFileGroupPlanner planner = new RewriteFileGroupPlanner(table);
-
-    validateSizeBasedRewriterOptions(planner);
-
-    Map<String, String> invalidDeleteThresholdOptions =
-        ImmutableMap.of(RewriteFileGroupPlanner.DELETE_FILE_THRESHOLD, "-1");
-    assertThatThrownBy(() -> planner.init(invalidDeleteThresholdOptions))
-        .hasMessageContaining("'delete-file-threshold' is set to -1 but must be >= 0");
-
-    Map<String, String> invalidCompressionFactorOptions =
-        ImmutableMap.of(SparkShufflingDataRewriteExecutor.COMPRESSION_FACTOR, "0");
-    assertThatThrownBy(() -> rewriter.init(invalidCompressionFactorOptions))
-        .hasMessageContaining("'compression-factor' is set to 0.0 but must be > 0");
   }
 
   @Test
-  public void testInvalidValuesForZOrderDataOptions() {
+  void testInvalidValuesForZOrderDataOptions() {
     Table table = catalog.createTable(TABLE_IDENT, SCHEMA);
     ImmutableList<String> zOrderCols = ImmutableList.of("id");
     SparkZOrderDataRewriteExecutor rewriter =
         new SparkZOrderDataRewriteExecutor(spark, table, zOrderCols);
-    RewriteFileGroupPlanner planner = new RewriteFileGroupPlanner(table);
-
-    validateSizeBasedRewriterOptions(planner);
-
-    Map<String, String> invalidDeleteThresholdOptions =
-        ImmutableMap.of(RewriteFileGroupPlanner.DELETE_FILE_THRESHOLD, "-1");
-    assertThatThrownBy(() -> planner.init(invalidDeleteThresholdOptions))
-        .hasMessageContaining("'delete-file-threshold' is set to -1 but must be >= 0");
-
-    Map<String, String> invalidCompressionFactorOptions =
-        ImmutableMap.of(SparkShufflingDataRewriteExecutor.COMPRESSION_FACTOR, "0");
-    assertThatThrownBy(() -> rewriter.init(invalidCompressionFactorOptions))
-        .hasMessageContaining("'compression-factor' is set to 0.0 but must be > 0");
 
     Map<String, String> invalidMaxOutputOptions =
         ImmutableMap.of(SparkZOrderDataRewriteExecutor.MAX_OUTPUT_SIZE, "0");
@@ -357,42 +147,4 @@ public void testInvalidValuesForZOrderDataOptions() {
         .hasMessageContaining("Cannot use less than 1 byte for variable length types with ZOrder")
         .hasMessageContaining("'var-length-contribution' was set to 0");
   }
-
-  private void validateSizeBasedRewriterOptions(SizeBasedFileRewritePlanner<?, ?, ?, ?> rewriter) {
-    Map<String, String> invalidTargetSizeOptions =
-        ImmutableMap.of(SizeBasedFileRewritePlanner.TARGET_FILE_SIZE_BYTES, "0");
-    assertThatThrownBy(() -> rewriter.init(invalidTargetSizeOptions))
-        .hasMessageContaining("'target-file-size-bytes' is set to 0 but must be > 0");
-
-    Map<String, String> invalidMinSizeOptions =
-        ImmutableMap.of(SizeBasedFileRewritePlanner.MIN_FILE_SIZE_BYTES, "-1");
-    assertThatThrownBy(() -> rewriter.init(invalidMinSizeOptions))
-        .hasMessageContaining("'min-file-size-bytes' is set to -1 but must be >= 0");
-
-    Map<String, String> invalidTargetMinSizeOptions =
-        ImmutableMap.of(
-            SizeBasedFileRewritePlanner.TARGET_FILE_SIZE_BYTES, "3",
-            SizeBasedFileRewritePlanner.MIN_FILE_SIZE_BYTES, "5");
-    assertThatThrownBy(() -> rewriter.init(invalidTargetMinSizeOptions))
-        .hasMessageContaining("'target-file-size-bytes' (3) must be > 'min-file-size-bytes' (5)")
-        .hasMessageContaining("all new files will be smaller than the min threshold");
-
-    Map<String, String> invalidTargetMaxSizeOptions =
-        ImmutableMap.of(
-            SizeBasedFileRewritePlanner.TARGET_FILE_SIZE_BYTES, "5",
-            SizeBasedFileRewritePlanner.MAX_FILE_SIZE_BYTES, "3");
-    assertThatThrownBy(() -> rewriter.init(invalidTargetMaxSizeOptions))
-        .hasMessageContaining("'target-file-size-bytes' (5) must be < 'max-file-size-bytes' (3)")
-        .hasMessageContaining("all new files will be larger than the max threshold");
-
-    Map<String, String> invalidMinInputFilesOptions =
-        ImmutableMap.of(SizeBasedFileRewritePlanner.MIN_INPUT_FILES, "0");
-    assertThatThrownBy(() -> rewriter.init(invalidMinInputFilesOptions))
-        .hasMessageContaining("'min-input-files' is set to 0 but must be > 0");
-
-    Map<String, String> invalidMaxFileGroupSizeOptions =
-        ImmutableMap.of(SizeBasedFileRewritePlanner.MAX_FILE_GROUP_SIZE_BYTES, "0");
-    assertThatThrownBy(() -> rewriter.init(invalidMaxFileGroupSizeOptions))
-        .hasMessageContaining("'max-file-group-size-bytes' is set to 0 but must be > 0");
-  }
 }
diff --git a/spark/v3.5/spark/src/test/java/org/apache/iceberg/spark/actions/TestSparkFileRewritePlanner.java b/spark/v3.5/spark/src/test/java/org/apache/iceberg/spark/actions/TestSparkFileRewritePlanner.java
new file mode 100644
index 000000000000..3426a6a71adb
--- /dev/null
+++ b/spark/v3.5/spark/src/test/java/org/apache/iceberg/spark/actions/TestSparkFileRewritePlanner.java
@@ -0,0 +1,104 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.iceberg.spark.actions;
+
+import static org.assertj.core.api.Assertions.assertThat;
+import static org.assertj.core.api.Assertions.assertThatThrownBy;
+
+import java.util.Map;
+import org.apache.iceberg.PartitionSpec;
+import org.apache.iceberg.Schema;
+import org.apache.iceberg.Table;
+import org.apache.iceberg.actions.RewriteDataFiles;
+import org.apache.iceberg.actions.RewriteFileGroupPlanner;
+import org.apache.iceberg.catalog.TableIdentifier;
+import org.apache.iceberg.relocated.com.google.common.collect.ImmutableMap;
+import org.apache.iceberg.relocated.com.google.common.collect.ImmutableSet;
+import org.apache.iceberg.spark.TestBase;
+import org.apache.iceberg.types.Types.IntegerType;
+import org.apache.iceberg.types.Types.NestedField;
+import org.apache.iceberg.types.Types.StringType;
+import org.junit.jupiter.api.AfterEach;
+import org.junit.jupiter.api.Test;
+
+class TestSparkFileRewritePlanner extends TestBase {
+
+  private static final TableIdentifier TABLE_IDENT = TableIdentifier.of("default", "tbl");
+  private static final Schema SCHEMA =
+      new Schema(
+          NestedField.required(1, "id", IntegerType.get()),
+          NestedField.required(2, "dep", StringType.get()));
+  private static final PartitionSpec SPEC =
+      PartitionSpec.builderFor(SCHEMA).identity("dep").build();
+
+  @AfterEach
+  public void removeTable() {
+    catalog.dropTable(TABLE_IDENT);
+  }
+
+  @Test
+  void testRewriteFileGroupPlannerValidOptions() {
+    Table table = catalog.createTable(TABLE_IDENT, SCHEMA);
+    RewriteFileGroupPlanner planner = new RewriteFileGroupPlanner(table);
+
+    assertThat(planner.validOptions())
+        .as("Planner must report all supported options")
+        .isEqualTo(
+            ImmutableSet.of(
+                RewriteFileGroupPlanner.TARGET_FILE_SIZE_BYTES,
+                RewriteFileGroupPlanner.MIN_FILE_SIZE_BYTES,
+                RewriteFileGroupPlanner.MAX_FILE_SIZE_BYTES,
+                RewriteFileGroupPlanner.MIN_INPUT_FILES,
+                RewriteFileGroupPlanner.REWRITE_ALL,
+                RewriteFileGroupPlanner.MAX_FILE_GROUP_SIZE_BYTES,
+                RewriteFileGroupPlanner.DELETE_FILE_THRESHOLD,
+                RewriteDataFiles.REWRITE_JOB_ORDER));
+  }
+
+  @Test
+  void testSparkShufflingDataRewritePlannerValidOptions() {
+    Table table = catalog.createTable(TABLE_IDENT, SCHEMA);
+    SparkShufflingDataRewritePlanner planner = new SparkShufflingDataRewritePlanner(table);
+
+    assertThat(planner.validOptions())
+        .as("Planner must report all supported options")
+        .isEqualTo(
+            ImmutableSet.of(
+                SparkShufflingDataRewritePlanner.COMPRESSION_FACTOR,
+                SparkShufflingDataRewritePlanner.TARGET_FILE_SIZE_BYTES,
+                SparkShufflingDataRewritePlanner.MIN_FILE_SIZE_BYTES,
+                SparkShufflingDataRewritePlanner.MAX_FILE_SIZE_BYTES,
+                SparkShufflingDataRewritePlanner.MIN_INPUT_FILES,
+                SparkShufflingDataRewritePlanner.REWRITE_ALL,
+                SparkShufflingDataRewritePlanner.MAX_FILE_GROUP_SIZE_BYTES,
+                SparkShufflingDataRewritePlanner.DELETE_FILE_THRESHOLD,
+                RewriteDataFiles.REWRITE_JOB_ORDER));
+  }
+
+  @Test
+  void testInvalidValuesSparkShufflingDataRewritePlannerOptions() {
+    Table table = catalog.createTable(TABLE_IDENT, SCHEMA);
+    SparkShufflingDataRewritePlanner planner = new SparkShufflingDataRewritePlanner(table);
+
+    Map<String, String> invalidCompressionFactorOptions =
+        ImmutableMap.of(SparkShufflingDataRewritePlanner.COMPRESSION_FACTOR, "0");
+    assertThatThrownBy(() -> planner.init(invalidCompressionFactorOptions))
+        .hasMessageContaining("'compression-factor' is set to 0.0 but must be > 0");
+  }
+}

From ab5ba4177fb76defda807f50b95b16cef8b245aa Mon Sep 17 00:00:00 2001
From: Peter Vary <peter.vary.apache@gmail.com>
Date: Mon, 9 Dec 2024 20:35:32 +0100
Subject: [PATCH 11/11] Russell's comments

---
 .../iceberg/actions/FileRewriteExecutor.java      | 14 +++++++++++++-
 .../apache/iceberg/actions/FileRewritePlan.java   | 13 +++++++++----
 .../iceberg/actions/FileRewritePlanner.java       | 15 +++++++++++----
 .../iceberg/actions/SizeBasedDataRewriter.java    | 10 +++++++---
 4 files changed, 40 insertions(+), 12 deletions(-)

diff --git a/core/src/main/java/org/apache/iceberg/actions/FileRewriteExecutor.java b/core/src/main/java/org/apache/iceberg/actions/FileRewriteExecutor.java
index bc4102a25de4..5d589c6931c5 100644
--- a/core/src/main/java/org/apache/iceberg/actions/FileRewriteExecutor.java
+++ b/core/src/main/java/org/apache/iceberg/actions/FileRewriteExecutor.java
@@ -24,7 +24,19 @@
 import org.apache.iceberg.ContentScanTask;
 
 /**
- * A class for rewriting content file groups ({@link FileRewriteGroup}).
+ * A class for rewriting content file groups ({@link FileRewriteGroup}). The lifecycle for the
+ * executor looks like the following:
+ *
+ * <ul>
+ *   <li>{@link #init(Map)} initializes the executor with the configuration parameters
+ *   <li>{@link #initPlan(FileRewritePlan)} initializes the executor with the configuration
+ *       calculated during planning ({@link FileRewritePlan#writeMaxFileSize()}, {@link
+ *       RewriteFilePlan#outputSpecId()}
+ *   <li>{@link #rewrite(FileRewriteGroup)} called for every group in the plan to do the actual
+ *       rewrite of the files, and returns the generated new files.
+ * </ul>
+ *
+ * A single executor could be used to rewrite multiple groups for the same plan.
  *
  * @param <I> the Java type of the plan info
  * @param <T> the Java type of the tasks to read content files
diff --git a/core/src/main/java/org/apache/iceberg/actions/FileRewritePlan.java b/core/src/main/java/org/apache/iceberg/actions/FileRewritePlan.java
index dc4cc9a6d57a..f313fd1b070d 100644
--- a/core/src/main/java/org/apache/iceberg/actions/FileRewritePlan.java
+++ b/core/src/main/java/org/apache/iceberg/actions/FileRewritePlan.java
@@ -25,10 +25,15 @@
 import org.apache.iceberg.StructLike;
 
 /**
- * Result of the file rewrite planning.
+ * Result of the file rewrite planning as generated by the {@link FileRewritePlanner#plan()}.
  *
- * <p>Contains the planned groups, calculated values required by the {@link FileRewriteExecutor}s
- * and statistics.
+ * <p>The plan contains the stream of the planned groups and statistics about the number of the
+ * generated groups, like the total number of the groups and the groups per partition. The plan also
+ * contains some calculated values required by the {@link FileRewriteExecutor}s where the values are
+ * based on the input data and the planning parameters.
+ *
+ * <p>Groups in a plan could be processed independently. For example, in Spark this means that each
+ * group would be rewritten in its own Spark job.
  *
  * @param <I> the Java type of the plan info
  * @param <T> the Java type of the tasks to read content files
@@ -71,7 +76,7 @@ public int totalGroupCount() {
     return totalGroupCount;
   }
 
-  /** Calculated maximum file size for the target files */
+  /** Calculated maximum file size based on the planner target file size configuration */
   public long writeMaxFileSize() {
     return writeMaxFileSize;
   }
diff --git a/core/src/main/java/org/apache/iceberg/actions/FileRewritePlanner.java b/core/src/main/java/org/apache/iceberg/actions/FileRewritePlanner.java
index ff770874a9a3..0d242bbe6bb0 100644
--- a/core/src/main/java/org/apache/iceberg/actions/FileRewritePlanner.java
+++ b/core/src/main/java/org/apache/iceberg/actions/FileRewritePlanner.java
@@ -26,10 +26,17 @@
 /**
  * A class for planning content file rewrites.
  *
- * <p>The entire rewrite operation is broken down into pieces based on partitioning, and size-based
- * groups within a partition. These subunits of the rewrite are referred to as file groups. A file
- * group will be processed by a {@link FileRewriteExecutor} in a single framework "action". For
- * example, in Spark this means that each group would be rewritten in its own Spark job.
+ * <p>The entire rewrite operation is broken down into pieces. The grouping is based on partitioning
+ * and the planning could create multiple groups within a partition. As a result {@link
+ * FileRewritePlan} is generated which contains the data need by the {@link FileRewriteExecutor}s
+ * which execute the actual file rewrite.
+ *
+ * <p>The lifecycle of the planner is:
+ *
+ * <ul>
+ *   <li>{@link #init(Map)} initializes the planner with the configuration parameters
+ *   <li>{@link #plan()} generates the plan for the given configuration
+ * </ul>
  *
  * @param <I> the Java type of the plan info
  * @param <T> the Java type of the tasks to read content files
diff --git a/core/src/main/java/org/apache/iceberg/actions/SizeBasedDataRewriter.java b/core/src/main/java/org/apache/iceberg/actions/SizeBasedDataRewriter.java
index 66b759321ac8..5c9e2321fd82 100644
--- a/core/src/main/java/org/apache/iceberg/actions/SizeBasedDataRewriter.java
+++ b/core/src/main/java/org/apache/iceberg/actions/SizeBasedDataRewriter.java
@@ -42,12 +42,16 @@ public abstract class SizeBasedDataRewriter extends SizeBasedFileRewriter<FileSc
    *
    * <p>Defaults to Integer.MAX_VALUE, which means this feature is not enabled by default.
    *
-   * @deprecated since 1.8.0, will be removed in 1.9.0; use {@link RewriteFileGroupPlanner} and
-   *     {@link FileRewriteExecutor}.
+   * @deprecated since 1.8.0, will be removed in 1.9.0; use {@link
+   *     RewriteFileGroupPlanner#DELETE_FILE_THRESHOLD}.
    */
   @Deprecated public static final String DELETE_FILE_THRESHOLD = "delete-file-threshold";
 
-  public static final int DELETE_FILE_THRESHOLD_DEFAULT = Integer.MAX_VALUE;
+  /**
+   * @deprecated since 1.8.0, will be removed in 1.9.0; use {@link
+   *     RewriteFileGroupPlanner#DELETE_FILE_THRESHOLD_DEFAULT}.
+   */
+  @Deprecated public static final int DELETE_FILE_THRESHOLD_DEFAULT = Integer.MAX_VALUE;
 
   private int deleteFileThreshold;