Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
16 changes: 16 additions & 0 deletions core/src/main/java/org/apache/iceberg/BaseMetadataTable.java
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@
import java.io.Serializable;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.UUID;
import java.util.stream.Collectors;
import org.apache.iceberg.encryption.EncryptionManager;
Expand All @@ -29,6 +30,7 @@
import org.apache.iceberg.relocated.com.google.common.base.Preconditions;
import org.apache.iceberg.relocated.com.google.common.collect.ImmutableList;
import org.apache.iceberg.relocated.com.google.common.collect.ImmutableMap;
import org.apache.iceberg.relocated.com.google.common.collect.ImmutableSet;
import org.apache.iceberg.transforms.Transforms;

/**
Expand All @@ -39,6 +41,16 @@
* needed when reading the table data after deserialization.
*/
public abstract class BaseMetadataTable extends BaseReadOnlyTable implements Serializable {
private static final Set<MetadataTableType> TIME_TRAVEL_TABLE_TYPES =
ImmutableSet.of(
MetadataTableType.ENTRIES,
MetadataTableType.FILES,
MetadataTableType.DATA_FILES,
MetadataTableType.DELETE_FILES,
MetadataTableType.MANIFESTS,
MetadataTableType.PARTITIONS,
MetadataTableType.POSITION_DELETES);

private final PartitionSpec spec = PartitionSpec.unpartitioned();
private final SortOrder sortOrder = SortOrder.unsorted();
private final BaseTable table;
Expand Down Expand Up @@ -106,6 +118,10 @@ static Map<Integer, PartitionSpec> transformSpecs(

abstract MetadataTableType metadataTableType();

public boolean supportsTimeTravel() {
return TIME_TRAVEL_TABLE_TYPES.contains(metadataTableType());
}

public BaseTable table() {
return table;
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@ package org.apache.iceberg.spark.extensions

import org.apache.spark.sql.SparkSessionExtensions
import org.apache.spark.sql.catalyst.analysis.CheckViews
import org.apache.spark.sql.catalyst.analysis.ResolveBranch
import org.apache.spark.sql.catalyst.analysis.ResolveViews
import org.apache.spark.sql.catalyst.optimizer.ReplaceStaticInvoke
import org.apache.spark.sql.catalyst.parser.extensions.IcebergSparkSqlExtensionsParser
Expand All @@ -33,6 +34,7 @@ class IcebergSparkSessionExtensions extends (SparkSessionExtensions => Unit) {

// analyzer extensions
extensions.injectResolutionRule { spark => ResolveViews(spark) }
extensions.injectPostHocResolutionRule { spark => ResolveBranch(spark) }
extensions.injectCheckRule(_ => CheckViews)

// optimizer extensions
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,118 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.apache.spark.sql.catalyst.analysis

import org.apache.iceberg.spark.PathIdentifier
import org.apache.iceberg.spark.SparkTableUtil
import org.apache.iceberg.spark.source.SparkTable
import org.apache.spark.sql.SparkSession
import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan
import org.apache.spark.sql.catalyst.plans.logical.RowLevelWrite
import org.apache.spark.sql.catalyst.plans.logical.V2WriteCommand
import org.apache.spark.sql.catalyst.rules.Rule
import org.apache.spark.sql.connector.catalog.Identifier
import org.apache.spark.sql.connector.write.RowLevelOperation
import org.apache.spark.sql.connector.write.RowLevelOperationInfoImpl
import org.apache.spark.sql.connector.write.RowLevelOperationTable
import org.apache.spark.sql.execution.datasources.v2.DataSourceV2Relation
import org.apache.spark.sql.execution.datasources.v2.ExtractV2Table
import org.apache.spark.sql.util.CaseInsensitiveStringMap

/**
* A rule that resolves the target branch for Iceberg reads and writes.
* <p>
* The branch must be determined and pinned during analysis. The current DSv2 framework
* doesn't provide access to all necessary options during the initial table loading,
* forcing us to finalize the branch selection in a custom analyzer rule. Future Spark
* versions will have a built-in mechanism to cleanly determine the target branch.
*/
case class ResolveBranch(spark: SparkSession) extends Rule[LogicalPlan] {

override def apply(plan: LogicalPlan): LogicalPlan = plan resolveOperators {
// row-level operations like DELETE, UPDATE, and MERGE
case w @ IcebergRowLevelWrite(table, operation, options) =>
val branch = SparkTableUtil.determineWriteBranch(spark, table, options)
if (table.branch != branch) {
val newTable = table.copyWithBranch(branch)
val info = RowLevelOperationInfoImpl(operation.command, options)
val newOperation = newTable.newRowLevelOperationBuilder(info).build()
val newOperationTable = RowLevelOperationTable(newTable, newOperation)
val newTarget = transformPreservingType(w.table) {
case r @ ExtractV2Table(RowLevelOperationTable(_, _)) => r.copy(table = newOperationTable)
}
val newQuery = transformPreservingType(w.query) {
case r @ ExtractV2Table(RowLevelOperationTable(_, _)) => r.copy(table = newOperationTable)
}
w.withNewTable(newTarget).withNewQuery(newQuery)
} else {
w
}

// batch write operations like append or overwrite
case w: V2WriteCommand =>
val newTarget = transformPreservingType(w.table) {
case r @ DataSourceV2Relation(table: SparkTable, _, _, _, options, _) =>
val branch = SparkTableUtil.determineWriteBranch(spark, table, options)
if (table.branch != branch) r.copy(table = table.copyWithBranch(branch)) else r
}
w.withNewTable(newTarget)

// scan operations
// branch selector is added to identifier to ensure further refreshes point to correct branch
case r @ DataSourceV2Relation(table: SparkTable, _, _, Some(ident), options, None) =>
val branch = SparkTableUtil.determineReadBranch(spark, table, options)
if (table.branch != branch) {
val branchSelector = s"branch_$branch"
val newIdent = ident match {
case path: PathIdentifier if path.location.contains("#") =>
new PathIdentifier(path.location + "," + branchSelector)
case path: PathIdentifier =>
new PathIdentifier(path.location + "#" + branchSelector)
case _ =>
Identifier.of(ident.namespace :+ ident.name, branchSelector)
}
r.copy(table = table.copyWithBranch(branch), identifier = Some(newIdent))
} else {
r
}
}

private def transformPreservingType[T <: LogicalPlan](plan: T)(
func: PartialFunction[LogicalPlan, LogicalPlan]): T = {
plan.transform(func).asInstanceOf[T]
}
}

// Iceberg specific extractor for row-level operations like DELETE, UPDATE, and MERGE
private object IcebergRowLevelWrite {
def unapply(
write: RowLevelWrite): Option[(SparkTable, RowLevelOperation, CaseInsensitiveStringMap)] = {
EliminateSubqueryAliases(write.table) match {
case DataSourceV2Relation(
RowLevelOperationTable(table: SparkTable, operation),
_,
_,
_,
options,
_) =>
Some((table, operation, options))
case _ => None
}
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -18,26 +18,12 @@
*/
package org.apache.iceberg.spark.extensions;

import static org.apache.iceberg.TableProperties.DELETE_ISOLATION_LEVEL;
import static org.assertj.core.api.Assertions.assertThat;
import static org.assertj.core.api.Assertions.assertThatThrownBy;
import static org.assertj.core.api.Assumptions.assumeThat;

import java.io.File;
import java.io.IOException;
import java.util.Collections;
import java.util.List;
import java.util.Map;
import java.util.concurrent.ExecutionException;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.Executors;
import java.util.concurrent.Future;
import java.util.concurrent.ThreadPoolExecutor;
import java.util.concurrent.TimeUnit;
import java.util.concurrent.atomic.AtomicBoolean;
import java.util.concurrent.atomic.AtomicInteger;
import org.apache.iceberg.AppendFiles;
import org.apache.iceberg.DataFile;
import org.apache.iceberg.DeleteFile;
import org.apache.iceberg.Files;
import org.apache.iceberg.ParameterizedTestExtension;
Expand All @@ -52,14 +38,10 @@
import org.apache.iceberg.io.OutputFile;
import org.apache.iceberg.relocated.com.google.common.collect.ImmutableList;
import org.apache.iceberg.relocated.com.google.common.collect.ImmutableMap;
import org.apache.iceberg.relocated.com.google.common.util.concurrent.MoreExecutors;
import org.apache.iceberg.spark.Spark3Util;
import org.apache.iceberg.spark.SparkSQLProperties;
import org.apache.iceberg.util.SnapshotUtil;
import org.apache.spark.sql.Encoders;
import org.apache.spark.sql.catalyst.analysis.NoSuchTableException;
import org.apache.spark.sql.internal.SQLConf;
import org.awaitility.Awaitility;
import org.junit.jupiter.api.TestTemplate;
import org.junit.jupiter.api.extension.ExtendWith;

Expand All @@ -72,96 +54,6 @@ protected Map<String, String> extraTableProperties() {
TableProperties.DELETE_MODE, RowLevelOperationMode.COPY_ON_WRITE.modeName());
}

@TestTemplate
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Same explanation as in MERGE below.

public synchronized void testDeleteWithConcurrentTableRefresh() throws Exception {
// this test can only be run with Hive tables as it requires a reliable lock
// also, the table cache must be enabled so that the same table instance can be reused
assumeThat(catalogName).isEqualToIgnoringCase("testhive");

createAndInitUnpartitionedTable();
createOrReplaceView("deleted_id", Collections.singletonList(1), Encoders.INT());

sql(
"ALTER TABLE %s SET TBLPROPERTIES('%s' '%s')",
tableName, DELETE_ISOLATION_LEVEL, "snapshot");

sql("INSERT INTO TABLE %s VALUES (1, 'hr')", tableName);
createBranchIfNeeded();

Table table = Spark3Util.loadIcebergTable(spark, tableName);

ExecutorService executorService =
MoreExecutors.getExitingExecutorService(
(ThreadPoolExecutor) Executors.newFixedThreadPool(2));

AtomicInteger barrier = new AtomicInteger(0);
AtomicBoolean shouldAppend = new AtomicBoolean(true);

// delete thread
Future<?> deleteFuture =
executorService.submit(
() -> {
for (int numOperations = 0; numOperations < Integer.MAX_VALUE; numOperations++) {
int currentNumOperations = numOperations;
Awaitility.await()
.pollInterval(10, TimeUnit.MILLISECONDS)
.atMost(5, TimeUnit.SECONDS)
.until(() -> barrier.get() >= currentNumOperations * 2);

sql("DELETE FROM %s WHERE id IN (SELECT * FROM deleted_id)", commitTarget());

barrier.incrementAndGet();
}
});

// append thread
Future<?> appendFuture =
executorService.submit(
() -> {
GenericRecord record = GenericRecord.create(SnapshotUtil.schemaFor(table, branch));
record.set(0, 1); // id
record.set(1, "hr"); // dep

for (int numOperations = 0; numOperations < Integer.MAX_VALUE; numOperations++) {
int currentNumOperations = numOperations;
Awaitility.await()
.pollInterval(10, TimeUnit.MILLISECONDS)
.atMost(5, TimeUnit.SECONDS)
.until(() -> !shouldAppend.get() || barrier.get() >= currentNumOperations * 2);

if (!shouldAppend.get()) {
return;
}

for (int numAppends = 0; numAppends < 5; numAppends++) {
DataFile dataFile = writeDataFile(table, ImmutableList.of(record));
AppendFiles appendFiles = table.newFastAppend().appendFile(dataFile);
if (branch != null) {
appendFiles.toBranch(branch);
}

appendFiles.commit();
}

barrier.incrementAndGet();
}
});

try {
assertThatThrownBy(deleteFuture::get)
.isInstanceOf(ExecutionException.class)
.cause()
.isInstanceOf(IllegalStateException.class)
.hasMessageContaining("the table has been concurrently modified");
} finally {
shouldAppend.set(false);
appendFuture.cancel(true);
}

executorService.shutdown();
assertThat(executorService.awaitTermination(2, TimeUnit.MINUTES)).as("Timeout").isTrue();
}

@TestTemplate
public void testRuntimeFilteringWithPreservedDataGrouping() throws NoSuchTableException {
createAndInitPartitionedTable();
Expand Down
Loading
Loading