Skip to content

Commit 06f9a1d

Browse files
Core: Fix BinPackRewriteFilePlanner producing incorrect output file count with max-files-to-rewrite (#15576)
1 parent b99e12a commit 06f9a1d

2 files changed

Lines changed: 41 additions & 3 deletions

File tree

core/src/main/java/org/apache/iceberg/actions/BinPackRewriteFilePlanner.java

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -241,13 +241,16 @@ public FileRewritePlan<FileGroupInfo, FileScanTask, DataFile, RewriteFileGroup>
241241
} else if (fileCountRunner.get() < maxFilesToRewrite) {
242242
int remainingSize = maxFilesToRewrite - fileCountRunner.get();
243243
int scanTasksToRewrite = Math.min(fileScanTasks.size(), remainingSize);
244+
List<FileScanTask> tasksToRewrite =
245+
fileScanTasks.subList(0, scanTasksToRewrite);
246+
long rewriteInputSize = inputSize(tasksToRewrite);
244247
selectedFileGroups.add(
245248
newRewriteGroup(
246249
ctx,
247250
partition,
248-
fileScanTasks.subList(0, scanTasksToRewrite),
249-
inputSplitSize(inputSize),
250-
expectedOutputFiles(inputSize)));
251+
tasksToRewrite,
252+
inputSplitSize(rewriteInputSize),
253+
expectedOutputFiles(rewriteInputSize)));
251254
fileCountRunner.getAndAdd(scanTasksToRewrite);
252255
}
253256
});

core/src/test/java/org/apache/iceberg/actions/TestBinPackRewriteFilePlanner.java

Lines changed: 35 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -522,6 +522,41 @@ public void testRewriteMaxFilesOption() {
522522
assertThat(fileScanTasks).isEqualTo(5);
523523
}
524524

525+
@Test
526+
public void testMaxFilesToRewriteUsesCorrectInputSizeForExpectedOutputFiles() {
527+
// Create files with known sizes where truncation matters for expectedOutputFiles
528+
DataFile largeFile1 = newDataFile("data_bucket=0", 200);
529+
DataFile largeFile2 = newDataFile("data_bucket=0", 200);
530+
DataFile largeFile3 = newDataFile("data_bucket=0", 200);
531+
table.newAppend().appendFile(largeFile1).appendFile(largeFile2).appendFile(largeFile3).commit();
532+
533+
BinPackRewriteFilePlanner planner = new BinPackRewriteFilePlanner(table);
534+
// target=250 means:
535+
// Full group (600 bytes): expectedOutputFiles = ceil(600/250) = 3
536+
// Truncated to 1 file (200 bytes): expectedOutputFiles = ceil(200/250) = 1
537+
Map<String, String> options =
538+
ImmutableMap.of(
539+
BinPackRewriteFilePlanner.MAX_FILES_TO_REWRITE, "1",
540+
BinPackRewriteFilePlanner.REWRITE_ALL, "true",
541+
BinPackRewriteFilePlanner.TARGET_FILE_SIZE_BYTES, "250");
542+
planner.init(options);
543+
544+
FileRewritePlan<FileGroupInfo, FileScanTask, DataFile, RewriteFileGroup> plan = planner.plan();
545+
List<RewriteFileGroup> groups = Lists.newArrayList(plan.groups().iterator());
546+
547+
assertThat(groups).hasSize(1);
548+
RewriteFileGroup group = groups.get(0);
549+
// Only 1 file should be in the group due to max-files-to-rewrite=1
550+
assertThat(group.inputFileNum()).isEqualTo(1);
551+
// expectedOutputFiles should be based on the truncated input (200 bytes), not full group (600)
552+
// ceil(200/250) = 1, NOT ceil(600/250) = 3
553+
assertThat(group.expectedOutputFiles())
554+
.as(
555+
"expectedOutputFiles should be computed from actual files being rewritten (200 bytes),"
556+
+ " not the full group (600 bytes)")
557+
.isEqualTo(1);
558+
}
559+
525560
@Test
526561
public void testRewriteMaxFilesRewriteGreaterThanTotalFiles() {
527562
addFiles();

0 commit comments

Comments
 (0)