apache · aokolnychyi · Dec 23, 2025 · aokolnychyi · Feb 18, 2026
diff --git a/core/src/main/java/org/apache/iceberg/BaseMetadataTable.java b/core/src/main/java/org/apache/iceberg/BaseMetadataTable.java
@@ -21,6 +21,7 @@
 import java.io.Serializable;
 import java.util.List;
 import java.util.Map;
+import java.util.Set;
 import java.util.UUID;
 import java.util.stream.Collectors;
 import org.apache.iceberg.encryption.EncryptionManager;
@@ -29,6 +30,7 @@
 import org.apache.iceberg.relocated.com.google.common.base.Preconditions;
 import org.apache.iceberg.relocated.com.google.common.collect.ImmutableList;
 import org.apache.iceberg.relocated.com.google.common.collect.ImmutableMap;
+import org.apache.iceberg.relocated.com.google.common.collect.ImmutableSet;
 import org.apache.iceberg.transforms.Transforms;
 
 /**
@@ -39,6 +41,16 @@
  * needed when reading the table data after deserialization.
  */
 public abstract class BaseMetadataTable extends BaseReadOnlyTable implements Serializable {
+  private static final Set<MetadataTableType> TIME_TRAVEL_TABLE_TYPES =
+      ImmutableSet.of(
+          MetadataTableType.ENTRIES,
+          MetadataTableType.FILES,
+          MetadataTableType.DATA_FILES,
+          MetadataTableType.DELETE_FILES,
+          MetadataTableType.MANIFESTS,
+          MetadataTableType.PARTITIONS,
+          MetadataTableType.POSITION_DELETES);
+
   private final PartitionSpec spec = PartitionSpec.unpartitioned();
   private final SortOrder sortOrder = SortOrder.unsorted();
   private final BaseTable table;
@@ -106,6 +118,10 @@ static Map<Integer, PartitionSpec> transformSpecs(
 
   abstract MetadataTableType metadataTableType();
 
+  public boolean supportsTimeTravel() {
+    return TIME_TRAVEL_TABLE_TYPES.contains(metadataTableType());
+  }
+
   public BaseTable table() {
     return table;
   }

diff --git a/...ns/src/main/scala/org/apache/iceberg/spark/extensions/IcebergSparkSessionExtensions.scala b/...ns/src/main/scala/org/apache/iceberg/spark/extensions/IcebergSparkSessionExtensions.scala
@@ -20,6 +20,7 @@ package org.apache.iceberg.spark.extensions
 
 import org.apache.spark.sql.SparkSessionExtensions
 import org.apache.spark.sql.catalyst.analysis.CheckViews
+import org.apache.spark.sql.catalyst.analysis.ResolveBranch
 import org.apache.spark.sql.catalyst.analysis.ResolveViews
 import org.apache.spark.sql.catalyst.optimizer.ReplaceStaticInvoke
 import org.apache.spark.sql.catalyst.parser.extensions.IcebergSparkSqlExtensionsParser
@@ -33,6 +34,7 @@ class IcebergSparkSessionExtensions extends (SparkSessionExtensions => Unit) {
 
     // analyzer extensions
     extensions.injectResolutionRule { spark => ResolveViews(spark) }
+    extensions.injectPostHocResolutionRule { spark => ResolveBranch(spark) }
     extensions.injectCheckRule(_ => CheckViews)
 
     // optimizer extensions

diff --git a/...park-extensions/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveBranch.scala b/...park-extensions/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveBranch.scala
@@ -0,0 +1,118 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.spark.sql.catalyst.analysis
+
+import org.apache.iceberg.spark.PathIdentifier
+import org.apache.iceberg.spark.SparkTableUtil
+import org.apache.iceberg.spark.source.SparkTable
+import org.apache.spark.sql.SparkSession
+import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan
+import org.apache.spark.sql.catalyst.plans.logical.RowLevelWrite
+import org.apache.spark.sql.catalyst.plans.logical.V2WriteCommand
+import org.apache.spark.sql.catalyst.rules.Rule
+import org.apache.spark.sql.connector.catalog.Identifier
+import org.apache.spark.sql.connector.write.RowLevelOperation
+import org.apache.spark.sql.connector.write.RowLevelOperationInfoImpl
+import org.apache.spark.sql.connector.write.RowLevelOperationTable
+import org.apache.spark.sql.execution.datasources.v2.DataSourceV2Relation
+import org.apache.spark.sql.execution.datasources.v2.ExtractV2Table
+import org.apache.spark.sql.util.CaseInsensitiveStringMap
+
+/**
+ * A rule that resolves the target branch for Iceberg reads and writes.
+ * <p>
+ * The branch must be determined and pinned during analysis. The current DSv2 framework
+ * doesn't provide access to all necessary options during the initial table loading,
+ * forcing us to finalize the branch selection in a custom analyzer rule. Future Spark
+ * versions will have a built-in mechanism to cleanly determine the target branch.
+ */
+case class ResolveBranch(spark: SparkSession) extends Rule[LogicalPlan] {
+
+  override def apply(plan: LogicalPlan): LogicalPlan = plan resolveOperators {
+    // row-level operations like DELETE, UPDATE, and MERGE
+    case w @ IcebergRowLevelWrite(table, operation, options) =>
+      val branch = SparkTableUtil.determineWriteBranch(spark, table, options)
+      if (table.branch != branch) {
+        val newTable = table.copyWithBranch(branch)
+        val info = RowLevelOperationInfoImpl(operation.command, options)
+        val newOperation = newTable.newRowLevelOperationBuilder(info).build()
+        val newOperationTable = RowLevelOperationTable(newTable, newOperation)
+        val newTarget = transformPreservingType(w.table) {
+          case r @ ExtractV2Table(RowLevelOperationTable(_, _)) => r.copy(table = newOperationTable)
+        }
+        val newQuery = transformPreservingType(w.query) {
+          case r @ ExtractV2Table(RowLevelOperationTable(_, _)) => r.copy(table = newOperationTable)
+        }
+        w.withNewTable(newTarget).withNewQuery(newQuery)
+      } else {
+        w
+      }
+
+    // batch write operations like append or overwrite
+    case w: V2WriteCommand =>
+      val newTarget = transformPreservingType(w.table) {
+        case r @ DataSourceV2Relation(table: SparkTable, _, _, _, options, _) =>
+          val branch = SparkTableUtil.determineWriteBranch(spark, table, options)
+          if (table.branch != branch) r.copy(table = table.copyWithBranch(branch)) else r
+      }
+      w.withNewTable(newTarget)
+
+    // scan operations
+    // branch selector is added to identifier to ensure further refreshes point to correct branch
+    case r @ DataSourceV2Relation(table: SparkTable, _, _, Some(ident), options, None) =>
+      val branch = SparkTableUtil.determineReadBranch(spark, table, options)
+      if (table.branch != branch) {
+        val branchSelector = s"branch_$branch"
+        val newIdent = ident match {
+          case path: PathIdentifier if path.location.contains("#") =>
+            new PathIdentifier(path.location + "," + branchSelector)
+          case path: PathIdentifier =>
+            new PathIdentifier(path.location + "#" + branchSelector)
+          case _ =>
+            Identifier.of(ident.namespace :+ ident.name, branchSelector)
+        }
+        r.copy(table = table.copyWithBranch(branch), identifier = Some(newIdent))
+      } else {
+        r
+      }
+  }
+
+  private def transformPreservingType[T <: LogicalPlan](plan: T)(
+      func: PartialFunction[LogicalPlan, LogicalPlan]): T = {
+    plan.transform(func).asInstanceOf[T]
+  }
+}
+
+// Iceberg specific extractor for row-level operations like DELETE, UPDATE, and MERGE
+private object IcebergRowLevelWrite {
+  def unapply(
+      write: RowLevelWrite): Option[(SparkTable, RowLevelOperation, CaseInsensitiveStringMap)] = {
+    EliminateSubqueryAliases(write.table) match {
+      case DataSourceV2Relation(
+            RowLevelOperationTable(table: SparkTable, operation),
+            _,
+            _,
+            _,
+            options,
+            _) =>
+        Some((table, operation, options))
+      case _ => None
+    }
+  }
+}
diff --git a/...k-extensions/src/test/java/org/apache/iceberg/spark/extensions/TestCopyOnWriteDelete.java b/...k-extensions/src/test/java/org/apache/iceberg/spark/extensions/TestCopyOnWriteDelete.java
@@ -18,26 +18,12 @@
  */
 package org.apache.iceberg.spark.extensions;
 
-import static org.apache.iceberg.TableProperties.DELETE_ISOLATION_LEVEL;
 import static org.assertj.core.api.Assertions.assertThat;
-import static org.assertj.core.api.Assertions.assertThatThrownBy;
-import static org.assertj.core.api.Assumptions.assumeThat;
 
 import java.io.File;
 import java.io.IOException;
-import java.util.Collections;
 import java.util.List;
 import java.util.Map;
-import java.util.concurrent.ExecutionException;
-import java.util.concurrent.ExecutorService;
-import java.util.concurrent.Executors;
-import java.util.concurrent.Future;
-import java.util.concurrent.ThreadPoolExecutor;
-import java.util.concurrent.TimeUnit;
-import java.util.concurrent.atomic.AtomicBoolean;
-import java.util.concurrent.atomic.AtomicInteger;
-import org.apache.iceberg.AppendFiles;
-import org.apache.iceberg.DataFile;
 import org.apache.iceberg.DeleteFile;
 import org.apache.iceberg.Files;
 import org.apache.iceberg.ParameterizedTestExtension;
@@ -52,14 +38,10 @@
 import org.apache.iceberg.io.OutputFile;
 import org.apache.iceberg.relocated.com.google.common.collect.ImmutableList;
 import org.apache.iceberg.relocated.com.google.common.collect.ImmutableMap;
-import org.apache.iceberg.relocated.com.google.common.util.concurrent.MoreExecutors;
-import org.apache.iceberg.spark.Spark3Util;
 import org.apache.iceberg.spark.SparkSQLProperties;
 import org.apache.iceberg.util.SnapshotUtil;
-import org.apache.spark.sql.Encoders;
 import org.apache.spark.sql.catalyst.analysis.NoSuchTableException;
 import org.apache.spark.sql.internal.SQLConf;
-import org.awaitility.Awaitility;
 import org.junit.jupiter.api.TestTemplate;
 import org.junit.jupiter.api.extension.ExtendWith;
 
@@ -72,96 +54,6 @@ protected Map<String, String> extraTableProperties() {
         TableProperties.DELETE_MODE, RowLevelOperationMode.COPY_ON_WRITE.modeName());
   }
 
-  @TestTemplate
-  public synchronized void testDeleteWithConcurrentTableRefresh() throws Exception {
-    // this test can only be run with Hive tables as it requires a reliable lock
-    // also, the table cache must be enabled so that the same table instance can be reused
-    assumeThat(catalogName).isEqualToIgnoringCase("testhive");
-
-    createAndInitUnpartitionedTable();
-    createOrReplaceView("deleted_id", Collections.singletonList(1), Encoders.INT());
-
-    sql(
-        "ALTER TABLE %s SET TBLPROPERTIES('%s' '%s')",
-        tableName, DELETE_ISOLATION_LEVEL, "snapshot");
-
-    sql("INSERT INTO TABLE %s VALUES (1, 'hr')", tableName);
-    createBranchIfNeeded();
-
-    Table table = Spark3Util.loadIcebergTable(spark, tableName);
-
-    ExecutorService executorService =
-        MoreExecutors.getExitingExecutorService(
-            (ThreadPoolExecutor) Executors.newFixedThreadPool(2));
-
-    AtomicInteger barrier = new AtomicInteger(0);
-    AtomicBoolean shouldAppend = new AtomicBoolean(true);
-
-    // delete thread
-    Future<?> deleteFuture =
-        executorService.submit(
-            () -> {
-              for (int numOperations = 0; numOperations < Integer.MAX_VALUE; numOperations++) {
-                int currentNumOperations = numOperations;
-                Awaitility.await()
-                    .pollInterval(10, TimeUnit.MILLISECONDS)
-                    .atMost(5, TimeUnit.SECONDS)
-                    .until(() -> barrier.get() >= currentNumOperations * 2);
-
-                sql("DELETE FROM %s WHERE id IN (SELECT * FROM deleted_id)", commitTarget());
-
-                barrier.incrementAndGet();
-              }
-            });
-
-    // append thread
-    Future<?> appendFuture =
-        executorService.submit(
-            () -> {
-              GenericRecord record = GenericRecord.create(SnapshotUtil.schemaFor(table, branch));
-              record.set(0, 1); // id
-              record.set(1, "hr"); // dep
-
-              for (int numOperations = 0; numOperations < Integer.MAX_VALUE; numOperations++) {
-                int currentNumOperations = numOperations;
-                Awaitility.await()
-                    .pollInterval(10, TimeUnit.MILLISECONDS)
-                    .atMost(5, TimeUnit.SECONDS)
-                    .until(() -> !shouldAppend.get() || barrier.get() >= currentNumOperations * 2);
-
-                if (!shouldAppend.get()) {
-                  return;
-                }
-
-                for (int numAppends = 0; numAppends < 5; numAppends++) {
-                  DataFile dataFile = writeDataFile(table, ImmutableList.of(record));
-                  AppendFiles appendFiles = table.newFastAppend().appendFile(dataFile);
-                  if (branch != null) {
-                    appendFiles.toBranch(branch);
-                  }
-
-                  appendFiles.commit();
-                }
-
-                barrier.incrementAndGet();
-              }
-            });
-
-    try {
-      assertThatThrownBy(deleteFuture::get)
-          .isInstanceOf(ExecutionException.class)
-          .cause()
-          .isInstanceOf(IllegalStateException.class)
-          .hasMessageContaining("the table has been concurrently modified");
-    } finally {
-      shouldAppend.set(false);
-      appendFuture.cancel(true);
-    }
-
-    executorService.shutdown();
-    assertThat(executorService.awaitTermination(2, TimeUnit.MINUTES)).as("Timeout").isTrue();
-  }
-
   @TestTemplate
   public void testRuntimeFilteringWithPreservedDataGrouping() throws NoSuchTableException {
     createAndInitPartitionedTable();