From c6f54a6bd0eda3cae1f6d7c6ce02dc9f0342bb0d Mon Sep 17 00:00:00 2001 From: Anton Okolnychyi Date: Wed, 18 Feb 2026 15:40:50 -0800 Subject: [PATCH 1/2] Spark 4.1: Fix IcebergSource doc --- .../iceberg/spark/source/IcebergSource.java | 24 ++++++++++--------- 1 file changed, 13 insertions(+), 11 deletions(-) diff --git a/spark/v4.1/spark/src/main/java/org/apache/iceberg/spark/source/IcebergSource.java b/spark/v4.1/spark/src/main/java/org/apache/iceberg/spark/source/IcebergSource.java index a0462e8f8982..8ca80f2f0476 100644 --- a/spark/v4.1/spark/src/main/java/org/apache/iceberg/spark/source/IcebergSource.java +++ b/spark/v4.1/spark/src/main/java/org/apache/iceberg/spark/source/IcebergSource.java @@ -48,19 +48,21 @@ import org.apache.spark.sql.util.CaseInsensitiveStringMap; /** - * The IcebergSource loads/writes tables with format "iceberg". It can load paths and tables. + * Data source for reading and writing Iceberg tables using the "iceberg" format. * - *

How paths/tables are loaded when using spark.read().format("iceberg").load(table) + *

The `path` parameter provided by Spark is resolved in the following priority order: * - *

table = "file:///path/to/table" -> loads a HadoopTable at given path table = "tablename" - * -> loads currentCatalog.currentNamespace.tablename table = "catalog.tablename" -> load - * "tablename" from the specified catalog. table = "namespace.tablename" -> load - * "namespace.tablename" from current catalog table = "catalog.namespace.tablename" -> - * "namespace.tablename" from the specified catalog. table = "namespace1.namespace2.tablename" -> - * load "namespace1.namespace2.tablename" from current catalog - * - *

The above list is in order of priority. For example: a matching catalog will take priority - * over any namespace resolution. + *

    + *
  1. Rewrite key - If `path` is a key in the table cache, load from the rewrite catalog + *
  2. Table location - If `path` contains "/", load a table at the specified location + *
  3. Catalog identifier - Otherwise treat `path` as an identifier and resolve as: + *
      + *
    • {@code "tbl"} - {@code currentCatalog.currentNamespace.tbl} + *
    • {@code "cat.tbl"} - {@code tbl} from the specified catalog + *
    • {@code "ns.tbl"} - {@code ns.tbl} from the current catalog + *
    • {@code "cat.ns.tbl"} - {@code ns.tbl} from the specified catalog + *
    + *
*/ public class IcebergSource implements DataSourceRegister, SupportsCatalogOptions, SessionConfigSupport { From 23bd1c3218aafc7c03a81a6f25b721af1997914f Mon Sep 17 00:00:00 2001 From: Anton Okolnychyi Date: Wed, 18 Feb 2026 20:42:36 -0800 Subject: [PATCH 2/2] Update wording --- .../org/apache/iceberg/spark/source/IcebergSource.java | 10 ++-------- 1 file changed, 2 insertions(+), 8 deletions(-) diff --git a/spark/v4.1/spark/src/main/java/org/apache/iceberg/spark/source/IcebergSource.java b/spark/v4.1/spark/src/main/java/org/apache/iceberg/spark/source/IcebergSource.java index 8ca80f2f0476..28282eb1cc60 100644 --- a/spark/v4.1/spark/src/main/java/org/apache/iceberg/spark/source/IcebergSource.java +++ b/spark/v4.1/spark/src/main/java/org/apache/iceberg/spark/source/IcebergSource.java @@ -53,15 +53,9 @@ *

The `path` parameter provided by Spark is resolved in the following priority order: * *

    - *
  1. Rewrite key - If `path` is a key in the table cache, load from the rewrite catalog + *
  2. Rewrite key - If `path` is a rewrite key, load a table from the rewrite catalog *
  3. Table location - If `path` contains "/", load a table at the specified location - *
  4. Catalog identifier - Otherwise treat `path` as an identifier and resolve as: - *
      - *
    • {@code "tbl"} - {@code currentCatalog.currentNamespace.tbl} - *
    • {@code "cat.tbl"} - {@code tbl} from the specified catalog - *
    • {@code "ns.tbl"} - {@code ns.tbl} from the current catalog - *
    • {@code "cat.ns.tbl"} - {@code ns.tbl} from the specified catalog - *
    + *
  5. Catalog identifier - Otherwise resolve `path` as an identifier per Spark rules *
*/ public class IcebergSource