From 50bccd7b4b692e66752caf88ab2367e40ec1bd2d Mon Sep 17 00:00:00 2001 From: Joshua Magady Date: Sun, 15 Feb 2026 22:37:48 -0600 Subject: [PATCH] fix(transaction): preserve delete-only manifests in fast_append FastAppendAction::existing_manifest() filters which manifests from the current snapshot are carried forward to the new snapshot. The filter only checked has_added_files() and has_existing_files(), which drops manifests that contain only Deleted entries. After a rewrite_files operation, a delete-only manifest records which file paths were removed. If a subsequent fast_append drops this manifest, the deleted files reappear as alive in the new snapshot because the old manifests still carry their Added entries with no corresponding Delete entries to exclude them. This causes data duplication that compounds with each subsequent operation. Add has_deleted_files() to the filter so delete-only manifests survive across snapshot boundaries until expire_snapshots cleans them up. --- crates/iceberg/src/transaction/append.rs | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/crates/iceberg/src/transaction/append.rs b/crates/iceberg/src/transaction/append.rs index 08d4032409..6f568b627e 100644 --- a/crates/iceberg/src/transaction/append.rs +++ b/crates/iceberg/src/transaction/append.rs @@ -135,10 +135,16 @@ impl SnapshotProduceOperation for FastAppendOperation { ) .await?; + // Delete-only manifests must be preserved so that subsequent scans + // can see which file paths were removed (e.g. by a rewrite_files + // operation). Dropping them causes deleted files to reappear as + // alive in the next snapshot. Ok(manifest_list .entries() .iter() - .filter(|entry| entry.has_added_files() || entry.has_existing_files()) + .filter(|entry| { + entry.has_added_files() || entry.has_existing_files() || entry.has_deleted_files() + }) .cloned() .collect()) }