From b67971665bb2ab916dd5e0f2e0a9256b4dcc643a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Fredh=C3=B8i?= Date: Tue, 17 Jun 2025 00:45:12 +0200 Subject: [PATCH 01/70] feat: Add support for Microsoft Fabric Waerhouse --- sqlmesh/core/config/connection.py | 22 ++ sqlmesh/core/engine_adapter/__init__.py | 4 + .../core/engine_adapter/fabric_warehouse.py | 233 ++++++++++++++++++ 3 files changed, 259 insertions(+) create mode 100644 sqlmesh/core/engine_adapter/fabric_warehouse.py diff --git a/sqlmesh/core/config/connection.py b/sqlmesh/core/config/connection.py index b3ed3bc34f..3452ee5ba8 100644 --- a/sqlmesh/core/config/connection.py +++ b/sqlmesh/core/config/connection.py @@ -1587,6 +1587,28 @@ def _extra_engine_config(self) -> t.Dict[str, t.Any]: return {"catalog_support": CatalogSupport.SINGLE_CATALOG_ONLY} +class FabricWarehouseConnectionConfig(MSSQLConnectionConfig): + """ + Fabric Warehouse Connection Configuration. Inherits most settings from MSSQLConnectionConfig. + """ + + type_: t.Literal["fabric_warehouse"] = Field(alias="type", default="fabric_warehouse") # type: ignore + autocommit: t.Optional[bool] = True + + @property + def _engine_adapter(self) -> t.Type[EngineAdapter]: + from sqlmesh.core.engine_adapter.fabric_warehouse import FabricWarehouseAdapter + + return FabricWarehouseAdapter + + @property + def _extra_engine_config(self) -> t.Dict[str, t.Any]: + return { + "database": self.database, + "catalog_support": CatalogSupport.REQUIRES_SET_CATALOG, + } + + class SparkConnectionConfig(ConnectionConfig): """ Vanilla Spark Connection Configuration. Use `DatabricksConnectionConfig` for Databricks. diff --git a/sqlmesh/core/engine_adapter/__init__.py b/sqlmesh/core/engine_adapter/__init__.py index 19332dc005..b876c3b924 100644 --- a/sqlmesh/core/engine_adapter/__init__.py +++ b/sqlmesh/core/engine_adapter/__init__.py @@ -19,6 +19,7 @@ from sqlmesh.core.engine_adapter.trino import TrinoEngineAdapter from sqlmesh.core.engine_adapter.athena import AthenaEngineAdapter from sqlmesh.core.engine_adapter.risingwave import RisingwaveEngineAdapter +from sqlmesh.core.engine_adapter.fabric_warehouse import FabricWarehouseAdapter DIALECT_TO_ENGINE_ADAPTER = { "hive": SparkEngineAdapter, @@ -35,6 +36,7 @@ "trino": TrinoEngineAdapter, "athena": AthenaEngineAdapter, "risingwave": RisingwaveEngineAdapter, + "fabric_warehouse": FabricWarehouseAdapter, } DIALECT_ALIASES = { @@ -45,9 +47,11 @@ def create_engine_adapter( connection_factory: t.Callable[[], t.Any], dialect: str, **kwargs: t.Any ) -> EngineAdapter: + print(kwargs) dialect = dialect.lower() dialect = DIALECT_ALIASES.get(dialect, dialect) engine_adapter = DIALECT_TO_ENGINE_ADAPTER.get(dialect) + print(engine_adapter) if engine_adapter is None: return EngineAdapter(connection_factory, dialect, **kwargs) if engine_adapter is EngineAdapterWithIndexSupport: diff --git a/sqlmesh/core/engine_adapter/fabric_warehouse.py b/sqlmesh/core/engine_adapter/fabric_warehouse.py new file mode 100644 index 0000000000..037f827366 --- /dev/null +++ b/sqlmesh/core/engine_adapter/fabric_warehouse.py @@ -0,0 +1,233 @@ +from __future__ import annotations + +import typing as t +from sqlglot import exp +from sqlmesh.core.engine_adapter.mssql import MSSQLEngineAdapter +from sqlmesh.core.engine_adapter.shared import InsertOverwriteStrategy, SourceQuery + +if t.TYPE_CHECKING: + from sqlmesh.core._typing import SchemaName, TableName + from sqlmesh.core.engine_adapter._typing import QueryOrDF + + +class FabricWarehouseAdapter(MSSQLEngineAdapter): + """ + Adapter for Microsoft Fabric Warehouses. + """ + + DIALECT = "tsql" + SUPPORTS_INDEXES = False + SUPPORTS_TRANSACTIONS = False + + INSERT_OVERWRITE_STRATEGY = InsertOverwriteStrategy.DELETE_INSERT + + def __init__(self, *args: t.Any, **kwargs: t.Any): + self.database = kwargs.get("database") + + super().__init__(*args, **kwargs) + + if not self.database: + raise ValueError( + "The 'database' parameter is required in the connection config for the FabricWarehouseAdapter." + ) + try: + self.execute(f"USE [{self.database}]") + except Exception as e: + raise RuntimeError(f"Failed to set database context to '{self.database}'. Reason: {e}") + + def _get_schema_name(self, name: t.Union[TableName, SchemaName]) -> str: + """Extracts the schema name from a sqlglot object or string.""" + table = exp.to_table(name) + schema_part = table.db + + if isinstance(schema_part, exp.Identifier): + return schema_part.name + if isinstance(schema_part, str): + return schema_part + + if schema_part is None and table.this and table.this.is_identifier: + return table.this.name + + raise ValueError(f"Could not determine schema name from '{name}'") + + def create_schema(self, schema: SchemaName) -> None: + """ + Creates a schema in a Microsoft Fabric Warehouse. + + Overridden to handle Fabric's specific T-SQL requirements. + T-SQL's `CREATE SCHEMA` command does not support `IF NOT EXISTS`, so this + implementation first checks for the schema's existence in the + `INFORMATION_SCHEMA.SCHEMATA` view. + """ + sql = ( + exp.select("1") + .from_(f"{self.database}.INFORMATION_SCHEMA.SCHEMATA") + .where(f"SCHEMA_NAME = '{schema}'") + ) + if self.fetchone(sql): + return + self.execute(f"USE [{self.database}]") + self.execute(f"CREATE SCHEMA [{schema}]") + + def _create_table_from_columns( + self, + table_name: TableName, + columns_to_types: t.Dict[str, exp.DataType], + primary_key: t.Optional[t.Tuple[str, ...]] = None, + exists: bool = True, + table_description: t.Optional[str] = None, + column_descriptions: t.Optional[t.Dict[str, str]] = None, + **kwargs: t.Any, + ) -> None: + """ + Creates a table, ensuring the schema exists first and that all + object names are fully qualified with the database. + """ + table_exp = exp.to_table(table_name) + schema_name = self._get_schema_name(table_name) + + self.create_schema(schema_name) + + fully_qualified_table_name = f"[{self.database}].[{schema_name}].[{table_exp.name}]" + + column_defs = ", ".join( + f"[{col}] {kind.sql(dialect=self.dialect)}" for col, kind in columns_to_types.items() + ) + + create_table_sql = f"CREATE TABLE {fully_qualified_table_name} ({column_defs})" + + if not exists: + self.execute(create_table_sql) + return + + if not self.table_exists(table_name): + self.execute(create_table_sql) + + if table_description and self.comments_enabled: + qualified_table_for_comment = self._fully_qualify(table_name) + self._create_table_comment(qualified_table_for_comment, table_description) + if column_descriptions and self.comments_enabled: + self._create_column_comments(qualified_table_for_comment, column_descriptions) + + def table_exists(self, table_name: TableName) -> bool: + """ + Checks if a table exists. + + Overridden to query the uppercase `INFORMATION_SCHEMA` required + by case-sensitive Fabric environments. + """ + table = exp.to_table(table_name) + schema = self._get_schema_name(table_name) + + sql = ( + exp.select("1") + .from_("INFORMATION_SCHEMA.TABLES") + .where(f"TABLE_NAME = '{table.alias_or_name}'") + .where(f"TABLE_SCHEMA = '{schema}'") + ) + + result = self.fetchone(sql, quote_identifiers=True) + + return result[0] == 1 if result else False + + def _fully_qualify(self, name: t.Union[TableName, SchemaName]) -> exp.Table: + """Ensures an object name is prefixed with the configured database.""" + table = exp.to_table(name) + return exp.Table(this=table.this, db=table.db, catalog=exp.to_identifier(self.database)) + + def create_view( + self, + view_name: TableName, + query_or_df: QueryOrDF, + columns_to_types: t.Optional[t.Dict[str, exp.DataType]] = None, + replace: bool = True, + materialized: bool = False, + materialized_properties: t.Optional[t.Dict[str, t.Any]] = None, + table_description: t.Optional[str] = None, + column_descriptions: t.Optional[t.Dict[str, str]] = None, + view_properties: t.Optional[t.Dict[str, exp.Expression]] = None, + **create_kwargs: t.Any, + ) -> None: + """ + Creates a view from a query or DataFrame. + + Overridden to ensure that the view name and all tables referenced + in the source query are fully qualified with the database name, + as required by Fabric. + """ + view_schema = self._get_schema_name(view_name) + self.create_schema(view_schema) + + qualified_view_name = self._fully_qualify(view_name) + + if isinstance(query_or_df, exp.Expression): + for table in query_or_df.find_all(exp.Table): + if not table.catalog: + qualified_table = self._fully_qualify(table) + table.replace(qualified_table) + + return super().create_view( + qualified_view_name, + query_or_df, + columns_to_types, + replace, + materialized, + table_description=table_description, + column_descriptions=column_descriptions, + view_properties=view_properties, + **create_kwargs, + ) + + def columns( + self, table_name: TableName, include_pseudo_columns: bool = False + ) -> t.Dict[str, exp.DataType]: + """ + Fetches column names and types for the target table. + + Overridden to query the uppercase `INFORMATION_SCHEMA.COLUMNS` view + required by case-sensitive Fabric environments. + """ + table = exp.to_table(table_name) + schema = self._get_schema_name(table_name) + sql = ( + exp.select("COLUMN_NAME", "DATA_TYPE") + .from_(f"{self.database}.INFORMATION_SCHEMA.COLUMNS") + .where(f"TABLE_NAME = '{table.name}'") + .where(f"TABLE_SCHEMA = '{schema}'") + .order_by("ORDINAL_POSITION") + ) + df = self.fetchdf(sql) + return { + str(row.COLUMN_NAME): exp.DataType.build(str(row.DATA_TYPE), dialect=self.dialect) + for row in df.itertuples() + } + + def _insert_overwrite_by_condition( + self, + table_name: TableName, + source_queries: t.List[SourceQuery], + columns_to_types: t.Optional[t.Dict[str, exp.DataType]] = None, + where: t.Optional[exp.Condition] = None, + insert_overwrite_strategy_override: t.Optional[InsertOverwriteStrategy] = None, + **kwargs: t.Any, + ) -> None: + """ + Implements the insert overwrite strategy for Fabric. + + Overridden to enforce a `DELETE`/`INSERT` strategy, as Fabric's + `MERGE` statement has limitations. + """ + + columns_to_types = columns_to_types or self.columns(table_name) + + self.delete_from(table_name, where=where or exp.true()) + + for source_query in source_queries: + with source_query as query: + query = self._order_projections_and_filter(query, columns_to_types) + self._insert_append_query( + table_name, + query, + columns_to_types=columns_to_types, + order_projections=False, + ) From 9a6c5755086afdf634f63ff3b0969cdace7a9ee9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Fredh=C3=B8i?= Date: Tue, 17 Jun 2025 00:51:12 +0200 Subject: [PATCH 02/70] removing some print statements --- sqlmesh/core/engine_adapter/__init__.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/sqlmesh/core/engine_adapter/__init__.py b/sqlmesh/core/engine_adapter/__init__.py index b876c3b924..27a2be1e32 100644 --- a/sqlmesh/core/engine_adapter/__init__.py +++ b/sqlmesh/core/engine_adapter/__init__.py @@ -47,11 +47,9 @@ def create_engine_adapter( connection_factory: t.Callable[[], t.Any], dialect: str, **kwargs: t.Any ) -> EngineAdapter: - print(kwargs) dialect = dialect.lower() dialect = DIALECT_ALIASES.get(dialect, dialect) engine_adapter = DIALECT_TO_ENGINE_ADAPTER.get(dialect) - print(engine_adapter) if engine_adapter is None: return EngineAdapter(connection_factory, dialect, **kwargs) if engine_adapter is EngineAdapterWithIndexSupport: From 347d3ed69bf96eaeb736b3569c068963f2fa3b24 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Fredh=C3=B8i?= Date: Wed, 18 Jun 2025 00:10:54 +0200 Subject: [PATCH 03/70] adding dialect & handling temp views --- sqlmesh/core/config/connection.py | 16 +- sqlmesh/core/engine_adapter/__init__.py | 6 +- sqlmesh/core/engine_adapter/fabric.py | 482 ++++++++++++++++++ .../core/engine_adapter/fabric_warehouse.py | 233 --------- 4 files changed, 497 insertions(+), 240 deletions(-) create mode 100644 sqlmesh/core/engine_adapter/fabric.py delete mode 100644 sqlmesh/core/engine_adapter/fabric_warehouse.py diff --git a/sqlmesh/core/config/connection.py b/sqlmesh/core/config/connection.py index 3452ee5ba8..5cbd35487c 100644 --- a/sqlmesh/core/config/connection.py +++ b/sqlmesh/core/config/connection.py @@ -1587,22 +1587,28 @@ def _extra_engine_config(self) -> t.Dict[str, t.Any]: return {"catalog_support": CatalogSupport.SINGLE_CATALOG_ONLY} -class FabricWarehouseConnectionConfig(MSSQLConnectionConfig): +class FabricConnectionConfig(MSSQLConnectionConfig): """ - Fabric Warehouse Connection Configuration. Inherits most settings from MSSQLConnectionConfig. + Fabric Connection Configuration. + + Inherits most settings from MSSQLConnectionConfig and sets the type to 'fabric'. + It is recommended to use the 'pyodbc' driver for Fabric. """ - type_: t.Literal["fabric_warehouse"] = Field(alias="type", default="fabric_warehouse") # type: ignore + type_: t.Literal["fabric"] = Field(alias="type", default="fabric") autocommit: t.Optional[bool] = True @property def _engine_adapter(self) -> t.Type[EngineAdapter]: - from sqlmesh.core.engine_adapter.fabric_warehouse import FabricWarehouseAdapter + # This is the crucial link to the adapter you already created. + from sqlmesh.core.engine_adapter.fabric import FabricAdapter - return FabricWarehouseAdapter + return FabricAdapter @property def _extra_engine_config(self) -> t.Dict[str, t.Any]: + # This ensures the 'database' name from the config is passed + # to the FabricAdapter's constructor. return { "database": self.database, "catalog_support": CatalogSupport.REQUIRES_SET_CATALOG, diff --git a/sqlmesh/core/engine_adapter/__init__.py b/sqlmesh/core/engine_adapter/__init__.py index 27a2be1e32..c8b8299bd1 100644 --- a/sqlmesh/core/engine_adapter/__init__.py +++ b/sqlmesh/core/engine_adapter/__init__.py @@ -19,7 +19,7 @@ from sqlmesh.core.engine_adapter.trino import TrinoEngineAdapter from sqlmesh.core.engine_adapter.athena import AthenaEngineAdapter from sqlmesh.core.engine_adapter.risingwave import RisingwaveEngineAdapter -from sqlmesh.core.engine_adapter.fabric_warehouse import FabricWarehouseAdapter +from sqlmesh.core.engine_adapter.fabric import FabricAdapter DIALECT_TO_ENGINE_ADAPTER = { "hive": SparkEngineAdapter, @@ -36,7 +36,7 @@ "trino": TrinoEngineAdapter, "athena": AthenaEngineAdapter, "risingwave": RisingwaveEngineAdapter, - "fabric_warehouse": FabricWarehouseAdapter, + "fabric": FabricAdapter, } DIALECT_ALIASES = { @@ -47,9 +47,11 @@ def create_engine_adapter( connection_factory: t.Callable[[], t.Any], dialect: str, **kwargs: t.Any ) -> EngineAdapter: + print(kwargs) dialect = dialect.lower() dialect = DIALECT_ALIASES.get(dialect, dialect) engine_adapter = DIALECT_TO_ENGINE_ADAPTER.get(dialect) + print(engine_adapter) if engine_adapter is None: return EngineAdapter(connection_factory, dialect, **kwargs) if engine_adapter is EngineAdapterWithIndexSupport: diff --git a/sqlmesh/core/engine_adapter/fabric.py b/sqlmesh/core/engine_adapter/fabric.py new file mode 100644 index 0000000000..4865c3c8f5 --- /dev/null +++ b/sqlmesh/core/engine_adapter/fabric.py @@ -0,0 +1,482 @@ +from __future__ import annotations + +import typing as t +from sqlglot import exp +from sqlmesh.core.engine_adapter.mssql import MSSQLEngineAdapter +from sqlmesh.core.engine_adapter.shared import ( + InsertOverwriteStrategy, + SourceQuery, + DataObject, + DataObjectType, +) +import logging +from sqlmesh.core.dialect import to_schema + +logger = logging.getLogger(__name__) +if t.TYPE_CHECKING: + from sqlmesh.core._typing import SchemaName, TableName + from sqlmesh.core.engine_adapter._typing import QueryOrDF + + +class FabricAdapter(MSSQLEngineAdapter): + """ + Adapter for Microsoft Fabric. + """ + + DIALECT = "fabric" + SUPPORTS_INDEXES = False + SUPPORTS_TRANSACTIONS = False + + INSERT_OVERWRITE_STRATEGY = InsertOverwriteStrategy.DELETE_INSERT + + def __init__(self, *args: t.Any, **kwargs: t.Any): + self.database = kwargs.get("database") + + super().__init__(*args, **kwargs) + + if not self.database: + raise ValueError( + "The 'database' parameter is required in the connection config for the FabricWarehouseAdapter." + ) + try: + self.execute(f"USE [{self.database}]") + except Exception as e: + raise RuntimeError(f"Failed to set database context to '{self.database}'. Reason: {e}") + + def _get_schema_name(self, name: t.Union[str, exp.Table, exp.Identifier]) -> t.Optional[str]: + """ + Safely extracts the schema name from a table or schema name, which can be + a string or a sqlglot expression. + + Fabric requires database names to be explicitly specified in many contexts, + including referencing schemas in INFORMATION_SCHEMA. This function helps + in extracting the schema part correctly from potentially qualified names. + """ + table = exp.to_table(name) + + if table.this and table.this.name.startswith("#"): + return None + + schema_part = table.db + + if not schema_part: + return None + + if isinstance(schema_part, exp.Identifier): + return schema_part.name + if isinstance(schema_part, str): + return schema_part + + raise TypeError(f"Unexpected type for schema part: {type(schema_part)}") + + def _get_data_objects( + self, schema_name: SchemaName, object_names: t.Optional[t.Set[str]] = None + ) -> t.List[DataObject]: + """ + Returns all the data objects that exist in the given schema and database. + + Overridden to query `INFORMATION_SCHEMA.TABLES` with explicit database qualification + and preserved casing using `quoted=True`. + """ + import pandas as pd + + catalog = self.get_current_catalog() + + from_table = exp.Table( + this=exp.to_identifier("TABLES", quoted=True), + db=exp.to_identifier("INFORMATION_SCHEMA", quoted=True), + catalog=exp.to_identifier(self.database), + ) + + query = ( + exp.select( + exp.column("TABLE_NAME").as_("name"), + exp.column("TABLE_SCHEMA").as_("schema_name"), + exp.case() + .when(exp.column("TABLE_TYPE").eq("BASE TABLE"), exp.Literal.string("TABLE")) + .else_(exp.column("TABLE_TYPE")) + .as_("type"), + ) + .from_(from_table) + .where(exp.column("TABLE_SCHEMA").eq(str(to_schema(schema_name).db).strip("[]"))) + ) + if object_names: + query = query.where( + exp.column("TABLE_NAME").isin(*(name.strip("[]") for name in object_names)) + ) + + dataframe: pd.DataFrame = self.fetchdf(query) + + return [ + DataObject( + catalog=catalog, + schema=row.schema_name, + name=row.name, + type=DataObjectType.from_str(row.type), + ) + for row in dataframe.itertuples() + ] + + def create_schema( + self, + schema_name: SchemaName, + ignore_if_exists: bool = True, + warn_on_error: bool = True, + **kwargs: t.Any, + ) -> None: + """ + Creates a schema in a Microsoft Fabric Warehouse. + + Overridden to handle Fabric's specific T-SQL requirements. + T-SQL's `CREATE SCHEMA` command does not support `IF NOT EXISTS` directly + as part of the statement in all contexts, and error messages suggest + issues with batching or preceding statements like USE. + """ + if schema_name is None: + return + + schema_name_str = ( + schema_name.name if isinstance(schema_name, exp.Identifier) else str(schema_name) + ) + + if not schema_name_str: + logger.warning("Attempted to create a schema with an empty name. Skipping.") + return + + schema_name_str = schema_name_str.strip('[]"').rstrip(".") + + if not schema_name_str: + logger.warning( + "Attempted to create a schema with an empty name after sanitization. Skipping." + ) + return + + try: + if self.schema_exists(schema_name_str): + if ignore_if_exists: + return + raise RuntimeError(f"Schema '{schema_name_str}' already exists.") + except Exception as e: + if warn_on_error: + logger.warning(f"Failed to check for existence of schema '{schema_name_str}': {e}") + else: + raise + + try: + create_sql = f"CREATE SCHEMA [{schema_name_str}]" + self.execute(create_sql) + except Exception as e: + if "already exists" in str(e).lower() or "There is already an object named" in str(e): + if ignore_if_exists: + return + raise RuntimeError(f"Schema '{schema_name_str}' already exists.") from e + else: + if warn_on_error: + logger.warning(f"Failed to create schema {schema_name_str}. Reason: {e}") + else: + raise RuntimeError(f"Failed to create schema {schema_name_str}.") from e + + def _create_table_from_columns( + self, + table_name: TableName, + columns_to_types: t.Dict[str, exp.DataType], + primary_key: t.Optional[t.Tuple[str, ...]] = None, + exists: bool = True, + table_description: t.Optional[str] = None, + column_descriptions: t.Optional[t.Dict[str, str]] = None, + **kwargs: t.Any, + ) -> None: + """ + Creates a table, ensuring the schema exists first and that all + object names are fully qualified with the database. + """ + table_exp = exp.to_table(table_name) + schema_name = self._get_schema_name(table_name) + + self.create_schema(schema_name) + + fully_qualified_table_name = f"[{self.database}].[{schema_name}].[{table_exp.name}]" + + column_defs = ", ".join( + f"[{col}] {kind.sql(dialect=self.dialect)}" for col, kind in columns_to_types.items() + ) + + create_table_sql = f"CREATE TABLE {fully_qualified_table_name} ({column_defs})" + + if not exists: + self.execute(create_table_sql) + return + + if not self.table_exists(table_name): + self.execute(create_table_sql) + + if table_description and self.comments_enabled: + qualified_table_for_comment = self._fully_qualify(table_name) + self._create_table_comment(qualified_table_for_comment, table_description) + if column_descriptions and self.comments_enabled: + self._create_column_comments(qualified_table_for_comment, column_descriptions) + + def table_exists(self, table_name: TableName) -> bool: + """ + Checks if a table exists. + + Overridden to query the uppercase `INFORMATION_SCHEMA` required + by case-sensitive Fabric environments. + """ + table = exp.to_table(table_name) + schema = self._get_schema_name(table_name) + + sql = ( + exp.select("1") + .from_("INFORMATION_SCHEMA.TABLES") + .where(f"TABLE_NAME = '{table.alias_or_name}'") + .where(f"TABLE_SCHEMA = '{schema}'") + ) + + result = self.fetchone(sql, quote_identifiers=True) + + return result[0] == 1 if result else False + + def _fully_qualify(self, name: t.Union[TableName, SchemaName]) -> exp.Table: + """ + Ensures an object name is prefixed with the configured database and schema. + + Overridden to prevent qualification for temporary objects (starting with # or ##). + Temporary objects should not be qualified with database or schema in T-SQL. + """ + table = exp.to_table(name) + + if ( + table.this + and isinstance(table.this, exp.Identifier) + and (table.this.name.startswith("#")) + ): + temp_identifier = exp.Identifier(this=table.this.this, quoted=True) + return exp.Table(this=temp_identifier) + + schema = self._get_schema_name(name) + + return exp.Table( + this=table.this, + db=exp.to_identifier(schema) if schema else None, + catalog=exp.to_identifier(self.database), + ) + + def create_view( + self, + view_name: TableName, + query_or_df: QueryOrDF, + columns_to_types: t.Optional[t.Dict[str, exp.DataType]] = None, + replace: bool = True, + materialized: bool = False, + materialized_properties: t.Optional[t.Dict[str, t.Any]] = None, + table_description: t.Optional[str] = None, + column_descriptions: t.Optional[t.Dict[str, str]] = None, + view_properties: t.Optional[t.Dict[str, exp.Expression]] = None, + **create_kwargs: t.Any, + ) -> None: + """ + Creates a view from a query or DataFrame. + + Overridden to ensure that the view name and all tables referenced + in the source query are fully qualified with the database name, + as required by Fabric. + """ + view_schema = self._get_schema_name(view_name) + self.create_schema(view_schema) + + qualified_view_name = self._fully_qualify(view_name) + + if isinstance(query_or_df, exp.Expression): + for table in query_or_df.find_all(exp.Table): + if not table.catalog: + qualified_table = self._fully_qualify(table) + table.replace(qualified_table) + + return super().create_view( + qualified_view_name, + query_or_df, + columns_to_types, + replace, + materialized, + table_description=table_description, + column_descriptions=column_descriptions, + view_properties=view_properties, + **create_kwargs, + ) + + def columns( + self, table_name: TableName, include_pseudo_columns: bool = False + ) -> t.Dict[str, exp.DataType]: + table = exp.to_table(table_name) + schema = self._get_schema_name(table_name) + + if ( + not schema + and table.this + and isinstance(table.this, exp.Identifier) + and table.this.name.startswith("__temp_") + ): + schema = "dbo" + + if not schema: + logger.warning( + f"Cannot fetch columns for table '{table_name}' without a schema name in Fabric." + ) + return {} + + from_table = exp.Table( + this=exp.to_identifier("COLUMNS", quoted=True), + db=exp.to_identifier("INFORMATION_SCHEMA", quoted=True), + catalog=exp.to_identifier(self.database), + ) + + sql = ( + exp.select( + "COLUMN_NAME", + "DATA_TYPE", + "CHARACTER_MAXIMUM_LENGTH", + "NUMERIC_PRECISION", + "NUMERIC_SCALE", + ) + .from_(from_table) + .where(f"TABLE_NAME = '{table.name.strip('[]')}'") + .where(f"TABLE_SCHEMA = '{schema.strip('[]')}'") + .order_by("ORDINAL_POSITION") + ) + + df = self.fetchdf(sql) + + def build_var_length_col( + column_name: str, + data_type: str, + character_maximum_length: t.Optional[int] = None, + numeric_precision: t.Optional[int] = None, + numeric_scale: t.Optional[int] = None, + ) -> t.Tuple[str, str]: + data_type = data_type.lower() + + char_len_int = ( + int(character_maximum_length) if character_maximum_length is not None else None + ) + prec_int = int(numeric_precision) if numeric_precision is not None else None + scale_int = int(numeric_scale) if numeric_scale is not None else None + + if data_type in self.VARIABLE_LENGTH_DATA_TYPES and char_len_int is not None: + if char_len_int > 0: + return (column_name, f"{data_type}({char_len_int})") + if char_len_int == -1: + return (column_name, f"{data_type}(max)") + if ( + data_type in ("decimal", "numeric") + and prec_int is not None + and scale_int is not None + ): + return (column_name, f"{data_type}({prec_int}, {scale_int})") + if data_type == "float" and prec_int is not None: + return (column_name, f"{data_type}({prec_int})") + + return (column_name, data_type) + + columns_raw = [ + ( + row.COLUMN_NAME, + row.DATA_TYPE, + getattr(row, "CHARACTER_MAXIMUM_LENGTH", None), + getattr(row, "NUMERIC_PRECISION", None), + getattr(row, "NUMERIC_SCALE", None), + ) + for row in df.itertuples() + ] + + columns_processed = [build_var_length_col(*row) for row in columns_raw] + + return { + column_name: exp.DataType.build(data_type, dialect=self.dialect) + for column_name, data_type in columns_processed + } + + def create_schema( + self, + schema_name: SchemaName, + ignore_if_exists: bool = True, + warn_on_error: bool = True, + **kwargs: t.Any, + ) -> None: + if schema_name is None: + return + + schema_exp = to_schema(schema_name) + simple_schema_name_str = None + if schema_exp.db: + simple_schema_name_str = exp.to_identifier(schema_exp.db).name + + if not simple_schema_name_str: + logger.warning( + f"Could not determine simple schema name from '{schema_name}'. Skipping schema creation." + ) + return + + if ignore_if_exists: + try: + if self.schema_exists(simple_schema_name_str): + return + except Exception as e: + if warn_on_error: + logger.warning( + f"Failed to check for existence of schema '{simple_schema_name_str}': {e}" + ) + else: + raise + elif self.schema_exists(simple_schema_name_str): + raise RuntimeError(f"Schema '{simple_schema_name_str}' already exists.") + + try: + create_sql = f"CREATE SCHEMA [{simple_schema_name_str}]" + self.execute(create_sql) + except Exception as e: + error_message = str(e).lower() + if ( + "already exists" in error_message + or "there is already an object named" in error_message + ): + if ignore_if_exists: + return + raise RuntimeError( + f"Schema '{simple_schema_name_str}' already exists due to race condition." + ) from e + else: + if warn_on_error: + logger.warning(f"Failed to create schema {simple_schema_name_str}. Reason: {e}") + else: + raise RuntimeError(f"Failed to create schema {simple_schema_name_str}.") from e + + def _insert_overwrite_by_condition( + self, + table_name: TableName, + source_queries: t.List[SourceQuery], + columns_to_types: t.Optional[t.Dict[str, exp.DataType]] = None, + where: t.Optional[exp.Condition] = None, + insert_overwrite_strategy_override: t.Optional[InsertOverwriteStrategy] = None, + **kwargs: t.Any, + ) -> None: + """ + Implements the insert overwrite strategy for Fabric. + + Overridden to enforce a `DELETE`/`INSERT` strategy, as Fabric's + `MERGE` statement has limitations. + """ + + columns_to_types = columns_to_types or self.columns(table_name) + + self.delete_from(table_name, where=where or exp.true()) + + for source_query in source_queries: + with source_query as query: + query = self._order_projections_and_filter(query, columns_to_types) + self._insert_append_query( + table_name, + query, + columns_to_types=columns_to_types, + order_projections=False, + ) diff --git a/sqlmesh/core/engine_adapter/fabric_warehouse.py b/sqlmesh/core/engine_adapter/fabric_warehouse.py deleted file mode 100644 index 037f827366..0000000000 --- a/sqlmesh/core/engine_adapter/fabric_warehouse.py +++ /dev/null @@ -1,233 +0,0 @@ -from __future__ import annotations - -import typing as t -from sqlglot import exp -from sqlmesh.core.engine_adapter.mssql import MSSQLEngineAdapter -from sqlmesh.core.engine_adapter.shared import InsertOverwriteStrategy, SourceQuery - -if t.TYPE_CHECKING: - from sqlmesh.core._typing import SchemaName, TableName - from sqlmesh.core.engine_adapter._typing import QueryOrDF - - -class FabricWarehouseAdapter(MSSQLEngineAdapter): - """ - Adapter for Microsoft Fabric Warehouses. - """ - - DIALECT = "tsql" - SUPPORTS_INDEXES = False - SUPPORTS_TRANSACTIONS = False - - INSERT_OVERWRITE_STRATEGY = InsertOverwriteStrategy.DELETE_INSERT - - def __init__(self, *args: t.Any, **kwargs: t.Any): - self.database = kwargs.get("database") - - super().__init__(*args, **kwargs) - - if not self.database: - raise ValueError( - "The 'database' parameter is required in the connection config for the FabricWarehouseAdapter." - ) - try: - self.execute(f"USE [{self.database}]") - except Exception as e: - raise RuntimeError(f"Failed to set database context to '{self.database}'. Reason: {e}") - - def _get_schema_name(self, name: t.Union[TableName, SchemaName]) -> str: - """Extracts the schema name from a sqlglot object or string.""" - table = exp.to_table(name) - schema_part = table.db - - if isinstance(schema_part, exp.Identifier): - return schema_part.name - if isinstance(schema_part, str): - return schema_part - - if schema_part is None and table.this and table.this.is_identifier: - return table.this.name - - raise ValueError(f"Could not determine schema name from '{name}'") - - def create_schema(self, schema: SchemaName) -> None: - """ - Creates a schema in a Microsoft Fabric Warehouse. - - Overridden to handle Fabric's specific T-SQL requirements. - T-SQL's `CREATE SCHEMA` command does not support `IF NOT EXISTS`, so this - implementation first checks for the schema's existence in the - `INFORMATION_SCHEMA.SCHEMATA` view. - """ - sql = ( - exp.select("1") - .from_(f"{self.database}.INFORMATION_SCHEMA.SCHEMATA") - .where(f"SCHEMA_NAME = '{schema}'") - ) - if self.fetchone(sql): - return - self.execute(f"USE [{self.database}]") - self.execute(f"CREATE SCHEMA [{schema}]") - - def _create_table_from_columns( - self, - table_name: TableName, - columns_to_types: t.Dict[str, exp.DataType], - primary_key: t.Optional[t.Tuple[str, ...]] = None, - exists: bool = True, - table_description: t.Optional[str] = None, - column_descriptions: t.Optional[t.Dict[str, str]] = None, - **kwargs: t.Any, - ) -> None: - """ - Creates a table, ensuring the schema exists first and that all - object names are fully qualified with the database. - """ - table_exp = exp.to_table(table_name) - schema_name = self._get_schema_name(table_name) - - self.create_schema(schema_name) - - fully_qualified_table_name = f"[{self.database}].[{schema_name}].[{table_exp.name}]" - - column_defs = ", ".join( - f"[{col}] {kind.sql(dialect=self.dialect)}" for col, kind in columns_to_types.items() - ) - - create_table_sql = f"CREATE TABLE {fully_qualified_table_name} ({column_defs})" - - if not exists: - self.execute(create_table_sql) - return - - if not self.table_exists(table_name): - self.execute(create_table_sql) - - if table_description and self.comments_enabled: - qualified_table_for_comment = self._fully_qualify(table_name) - self._create_table_comment(qualified_table_for_comment, table_description) - if column_descriptions and self.comments_enabled: - self._create_column_comments(qualified_table_for_comment, column_descriptions) - - def table_exists(self, table_name: TableName) -> bool: - """ - Checks if a table exists. - - Overridden to query the uppercase `INFORMATION_SCHEMA` required - by case-sensitive Fabric environments. - """ - table = exp.to_table(table_name) - schema = self._get_schema_name(table_name) - - sql = ( - exp.select("1") - .from_("INFORMATION_SCHEMA.TABLES") - .where(f"TABLE_NAME = '{table.alias_or_name}'") - .where(f"TABLE_SCHEMA = '{schema}'") - ) - - result = self.fetchone(sql, quote_identifiers=True) - - return result[0] == 1 if result else False - - def _fully_qualify(self, name: t.Union[TableName, SchemaName]) -> exp.Table: - """Ensures an object name is prefixed with the configured database.""" - table = exp.to_table(name) - return exp.Table(this=table.this, db=table.db, catalog=exp.to_identifier(self.database)) - - def create_view( - self, - view_name: TableName, - query_or_df: QueryOrDF, - columns_to_types: t.Optional[t.Dict[str, exp.DataType]] = None, - replace: bool = True, - materialized: bool = False, - materialized_properties: t.Optional[t.Dict[str, t.Any]] = None, - table_description: t.Optional[str] = None, - column_descriptions: t.Optional[t.Dict[str, str]] = None, - view_properties: t.Optional[t.Dict[str, exp.Expression]] = None, - **create_kwargs: t.Any, - ) -> None: - """ - Creates a view from a query or DataFrame. - - Overridden to ensure that the view name and all tables referenced - in the source query are fully qualified with the database name, - as required by Fabric. - """ - view_schema = self._get_schema_name(view_name) - self.create_schema(view_schema) - - qualified_view_name = self._fully_qualify(view_name) - - if isinstance(query_or_df, exp.Expression): - for table in query_or_df.find_all(exp.Table): - if not table.catalog: - qualified_table = self._fully_qualify(table) - table.replace(qualified_table) - - return super().create_view( - qualified_view_name, - query_or_df, - columns_to_types, - replace, - materialized, - table_description=table_description, - column_descriptions=column_descriptions, - view_properties=view_properties, - **create_kwargs, - ) - - def columns( - self, table_name: TableName, include_pseudo_columns: bool = False - ) -> t.Dict[str, exp.DataType]: - """ - Fetches column names and types for the target table. - - Overridden to query the uppercase `INFORMATION_SCHEMA.COLUMNS` view - required by case-sensitive Fabric environments. - """ - table = exp.to_table(table_name) - schema = self._get_schema_name(table_name) - sql = ( - exp.select("COLUMN_NAME", "DATA_TYPE") - .from_(f"{self.database}.INFORMATION_SCHEMA.COLUMNS") - .where(f"TABLE_NAME = '{table.name}'") - .where(f"TABLE_SCHEMA = '{schema}'") - .order_by("ORDINAL_POSITION") - ) - df = self.fetchdf(sql) - return { - str(row.COLUMN_NAME): exp.DataType.build(str(row.DATA_TYPE), dialect=self.dialect) - for row in df.itertuples() - } - - def _insert_overwrite_by_condition( - self, - table_name: TableName, - source_queries: t.List[SourceQuery], - columns_to_types: t.Optional[t.Dict[str, exp.DataType]] = None, - where: t.Optional[exp.Condition] = None, - insert_overwrite_strategy_override: t.Optional[InsertOverwriteStrategy] = None, - **kwargs: t.Any, - ) -> None: - """ - Implements the insert overwrite strategy for Fabric. - - Overridden to enforce a `DELETE`/`INSERT` strategy, as Fabric's - `MERGE` statement has limitations. - """ - - columns_to_types = columns_to_types or self.columns(table_name) - - self.delete_from(table_name, where=where or exp.true()) - - for source_query in source_queries: - with source_query as query: - query = self._order_projections_and_filter(query, columns_to_types) - self._insert_append_query( - table_name, - query, - columns_to_types=columns_to_types, - order_projections=False, - ) From 0ff075ce2b3f3ebac0de9f2603101108db99924e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Fredh=C3=B8i?= Date: Wed, 18 Jun 2025 11:21:47 +0200 Subject: [PATCH 04/70] isnan error --- sqlmesh/core/config/connection.py | 5 +- sqlmesh/core/engine_adapter/__init__.py | 2 - sqlmesh/core/engine_adapter/fabric.py | 160 ++++++++++-------------- 3 files changed, 65 insertions(+), 102 deletions(-) diff --git a/sqlmesh/core/config/connection.py b/sqlmesh/core/config/connection.py index 5cbd35487c..cc26e63242 100644 --- a/sqlmesh/core/config/connection.py +++ b/sqlmesh/core/config/connection.py @@ -1595,20 +1595,17 @@ class FabricConnectionConfig(MSSQLConnectionConfig): It is recommended to use the 'pyodbc' driver for Fabric. """ - type_: t.Literal["fabric"] = Field(alias="type", default="fabric") + type_: t.Literal["fabric"] = Field(alias="type", default="fabric") # type: ignore autocommit: t.Optional[bool] = True @property def _engine_adapter(self) -> t.Type[EngineAdapter]: - # This is the crucial link to the adapter you already created. from sqlmesh.core.engine_adapter.fabric import FabricAdapter return FabricAdapter @property def _extra_engine_config(self) -> t.Dict[str, t.Any]: - # This ensures the 'database' name from the config is passed - # to the FabricAdapter's constructor. return { "database": self.database, "catalog_support": CatalogSupport.REQUIRES_SET_CATALOG, diff --git a/sqlmesh/core/engine_adapter/__init__.py b/sqlmesh/core/engine_adapter/__init__.py index c8b8299bd1..337de39905 100644 --- a/sqlmesh/core/engine_adapter/__init__.py +++ b/sqlmesh/core/engine_adapter/__init__.py @@ -47,11 +47,9 @@ def create_engine_adapter( connection_factory: t.Callable[[], t.Any], dialect: str, **kwargs: t.Any ) -> EngineAdapter: - print(kwargs) dialect = dialect.lower() dialect = DIALECT_ALIASES.get(dialect, dialect) engine_adapter = DIALECT_TO_ENGINE_ADAPTER.get(dialect) - print(engine_adapter) if engine_adapter is None: return EngineAdapter(connection_factory, dialect, **kwargs) if engine_adapter is EngineAdapterWithIndexSupport: diff --git a/sqlmesh/core/engine_adapter/fabric.py b/sqlmesh/core/engine_adapter/fabric.py index 4865c3c8f5..1f21ffbf26 100644 --- a/sqlmesh/core/engine_adapter/fabric.py +++ b/sqlmesh/core/engine_adapter/fabric.py @@ -43,7 +43,7 @@ def __init__(self, *args: t.Any, **kwargs: t.Any): except Exception as e: raise RuntimeError(f"Failed to set database context to '{self.database}'. Reason: {e}") - def _get_schema_name(self, name: t.Union[str, exp.Table, exp.Identifier]) -> t.Optional[str]: + def _get_schema_name(self, name: t.Union[str, exp.Table]) -> t.Optional[str]: """ Safely extracts the schema name from a table or schema name, which can be a string or a sqlglot expression. @@ -112,14 +112,31 @@ def _get_data_objects( catalog=catalog, schema=row.schema_name, name=row.name, - type=DataObjectType.from_str(row.type), + type=DataObjectType.from_str(str(row.type)), ) for row in dataframe.itertuples() ] + def schema_exists(self, schema_name: SchemaName) -> bool: + """ + Checks if a schema exists. + """ + schema = exp.to_table(schema_name).db + if not schema: + return False + + sql = ( + exp.select("1") + .from_("INFORMATION_SCHEMA.SCHEMATA") + .where(f"SCHEMA_NAME = '{schema}'") + .where(f"CATALOG_NAME = '{self.database}'") + ) + result = self.fetchone(sql, quote_identifiers=True) + return result[0] == 1 if result else False + def create_schema( self, - schema_name: SchemaName, + schema_name: t.Optional[SchemaName], ignore_if_exists: bool = True, warn_on_error: bool = True, **kwargs: t.Any, @@ -128,53 +145,51 @@ def create_schema( Creates a schema in a Microsoft Fabric Warehouse. Overridden to handle Fabric's specific T-SQL requirements. - T-SQL's `CREATE SCHEMA` command does not support `IF NOT EXISTS` directly - as part of the statement in all contexts, and error messages suggest - issues with batching or preceding statements like USE. """ - if schema_name is None: + if not schema_name: return - schema_name_str = ( - schema_name.name if isinstance(schema_name, exp.Identifier) else str(schema_name) - ) - - if not schema_name_str: - logger.warning("Attempted to create a schema with an empty name. Skipping.") - return - - schema_name_str = schema_name_str.strip('[]"').rstrip(".") + schema_exp = to_schema(schema_name) + simple_schema_name_str = exp.to_identifier(schema_exp.db).name if schema_exp.db else None - if not schema_name_str: + if not simple_schema_name_str: logger.warning( - "Attempted to create a schema with an empty name after sanitization. Skipping." + f"Could not determine simple schema name from '{schema_name}'. Skipping schema creation." ) return try: - if self.schema_exists(schema_name_str): + if self.schema_exists(simple_schema_name_str): if ignore_if_exists: return - raise RuntimeError(f"Schema '{schema_name_str}' already exists.") + raise RuntimeError(f"Schema '{simple_schema_name_str}' already exists.") except Exception as e: if warn_on_error: - logger.warning(f"Failed to check for existence of schema '{schema_name_str}': {e}") + logger.warning( + f"Failed to check for existence of schema '{simple_schema_name_str}': {e}" + ) else: raise try: - create_sql = f"CREATE SCHEMA [{schema_name_str}]" + create_sql = f"CREATE SCHEMA [{simple_schema_name_str}]" self.execute(create_sql) except Exception as e: - if "already exists" in str(e).lower() or "There is already an object named" in str(e): + error_message = str(e).lower() + if ( + "already exists" in error_message + or "there is already an object named" in error_message + ): if ignore_if_exists: return - raise RuntimeError(f"Schema '{schema_name_str}' already exists.") from e + raise RuntimeError( + f"Schema '{simple_schema_name_str}' already exists due to race condition." + ) from e else: if warn_on_error: - logger.warning(f"Failed to create schema {schema_name_str}. Reason: {e}") + logger.warning(f"Failed to create schema {simple_schema_name_str}. Reason: {e}") else: - raise RuntimeError(f"Failed to create schema {schema_name_str}.") from e + raise RuntimeError(f"Failed to create schema {simple_schema_name_str}.") from e def _create_table_from_columns( self, @@ -251,7 +266,7 @@ def _fully_qualify(self, name: t.Union[TableName, SchemaName]) -> exp.Table: and isinstance(table.this, exp.Identifier) and (table.this.name.startswith("#")) ): - temp_identifier = exp.Identifier(this=table.this.this, quoted=True) + temp_identifier = exp.Identifier(this=table.this.name, quoted=True) return exp.Table(this=temp_identifier) schema = self._get_schema_name(name) @@ -308,6 +323,8 @@ def create_view( def columns( self, table_name: TableName, include_pseudo_columns: bool = False ) -> t.Dict[str, exp.DataType]: + import numpy as np + table = exp.to_table(table_name) schema = self._get_schema_name(table_name) @@ -346,6 +363,7 @@ def columns( ) df = self.fetchdf(sql) + df = df.replace({np.nan: None}) def build_var_length_col( column_name: str, @@ -356,11 +374,9 @@ def build_var_length_col( ) -> t.Tuple[str, str]: data_type = data_type.lower() - char_len_int = ( - int(character_maximum_length) if character_maximum_length is not None else None - ) - prec_int = int(numeric_precision) if numeric_precision is not None else None - scale_int = int(numeric_scale) if numeric_scale is not None else None + char_len_int = character_maximum_length + prec_int = numeric_precision + scale_int = numeric_scale if data_type in self.VARIABLE_LENGTH_DATA_TYPES and char_len_int is not None: if char_len_int > 0: @@ -378,79 +394,31 @@ def build_var_length_col( return (column_name, data_type) - columns_raw = [ - ( - row.COLUMN_NAME, - row.DATA_TYPE, - getattr(row, "CHARACTER_MAXIMUM_LENGTH", None), - getattr(row, "NUMERIC_PRECISION", None), - getattr(row, "NUMERIC_SCALE", None), + def _to_optional_int(val: t.Any) -> t.Optional[int]: + """Safely convert DataFrame values to Optional[int] for mypy.""" + if val is None: + return None + try: + return int(val) + except (ValueError, TypeError): + return None + + columns_processed = [ + build_var_length_col( + str(row.COLUMN_NAME), + str(row.DATA_TYPE), + _to_optional_int(row.CHARACTER_MAXIMUM_LENGTH), + _to_optional_int(row.NUMERIC_PRECISION), + _to_optional_int(row.NUMERIC_SCALE), ) for row in df.itertuples() ] - columns_processed = [build_var_length_col(*row) for row in columns_raw] - return { column_name: exp.DataType.build(data_type, dialect=self.dialect) for column_name, data_type in columns_processed } - def create_schema( - self, - schema_name: SchemaName, - ignore_if_exists: bool = True, - warn_on_error: bool = True, - **kwargs: t.Any, - ) -> None: - if schema_name is None: - return - - schema_exp = to_schema(schema_name) - simple_schema_name_str = None - if schema_exp.db: - simple_schema_name_str = exp.to_identifier(schema_exp.db).name - - if not simple_schema_name_str: - logger.warning( - f"Could not determine simple schema name from '{schema_name}'. Skipping schema creation." - ) - return - - if ignore_if_exists: - try: - if self.schema_exists(simple_schema_name_str): - return - except Exception as e: - if warn_on_error: - logger.warning( - f"Failed to check for existence of schema '{simple_schema_name_str}': {e}" - ) - else: - raise - elif self.schema_exists(simple_schema_name_str): - raise RuntimeError(f"Schema '{simple_schema_name_str}' already exists.") - - try: - create_sql = f"CREATE SCHEMA [{simple_schema_name_str}]" - self.execute(create_sql) - except Exception as e: - error_message = str(e).lower() - if ( - "already exists" in error_message - or "there is already an object named" in error_message - ): - if ignore_if_exists: - return - raise RuntimeError( - f"Schema '{simple_schema_name_str}' already exists due to race condition." - ) from e - else: - if warn_on_error: - logger.warning(f"Failed to create schema {simple_schema_name_str}. Reason: {e}") - else: - raise RuntimeError(f"Failed to create schema {simple_schema_name_str}.") from e - def _insert_overwrite_by_condition( self, table_name: TableName, From 332ea32caa898c8f4e98b28d4ecfb381a553ef73 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Fredh=C3=B8i?= Date: Thu, 19 Jun 2025 13:04:54 +0200 Subject: [PATCH 05/70] CTEs no qualify --- sqlmesh/core/engine_adapter/fabric.py | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/sqlmesh/core/engine_adapter/fabric.py b/sqlmesh/core/engine_adapter/fabric.py index 1f21ffbf26..9f37e8b14f 100644 --- a/sqlmesh/core/engine_adapter/fabric.py +++ b/sqlmesh/core/engine_adapter/fabric.py @@ -303,7 +303,14 @@ def create_view( qualified_view_name = self._fully_qualify(view_name) if isinstance(query_or_df, exp.Expression): + # CTEs should not be qualified with the database name. + cte_names = {cte.alias_or_name for cte in query_or_df.find_all(exp.CTE)} + for table in query_or_df.find_all(exp.Table): + if table.this.name in cte_names: + continue + + # Qualify all other tables that don't already have a catalog. if not table.catalog: qualified_table = self._fully_qualify(table) table.replace(qualified_table) From 585fb7e403b950034ac3cd97c7ec516e5fe54095 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Fredh=C3=B8i?= Date: Mon, 23 Jun 2025 20:44:43 +0200 Subject: [PATCH 06/70] simplifying --- sqlmesh/core/config/connection.py | 9 +- sqlmesh/core/engine_adapter/fabric.py | 392 +++----------------------- 2 files changed, 40 insertions(+), 361 deletions(-) diff --git a/sqlmesh/core/config/connection.py b/sqlmesh/core/config/connection.py index cc26e63242..9e95e9ae78 100644 --- a/sqlmesh/core/config/connection.py +++ b/sqlmesh/core/config/connection.py @@ -43,7 +43,14 @@ logger = logging.getLogger(__name__) -RECOMMENDED_STATE_SYNC_ENGINES = {"postgres", "gcp_postgres", "mysql", "mssql", "azuresql"} +RECOMMENDED_STATE_SYNC_ENGINES = { + "postgres", + "gcp_postgres", + "mysql", + "mssql", + "azuresql", + "fabric", +} FORBIDDEN_STATE_SYNC_ENGINES = { # Do not support row-level operations "spark", diff --git a/sqlmesh/core/engine_adapter/fabric.py b/sqlmesh/core/engine_adapter/fabric.py index 9f37e8b14f..a4eb30a91d 100644 --- a/sqlmesh/core/engine_adapter/fabric.py +++ b/sqlmesh/core/engine_adapter/fabric.py @@ -3,19 +3,10 @@ import typing as t from sqlglot import exp from sqlmesh.core.engine_adapter.mssql import MSSQLEngineAdapter -from sqlmesh.core.engine_adapter.shared import ( - InsertOverwriteStrategy, - SourceQuery, - DataObject, - DataObjectType, -) -import logging -from sqlmesh.core.dialect import to_schema +from sqlmesh.core.engine_adapter.shared import InsertOverwriteStrategy, SourceQuery -logger = logging.getLogger(__name__) if t.TYPE_CHECKING: - from sqlmesh.core._typing import SchemaName, TableName - from sqlmesh.core.engine_adapter._typing import QueryOrDF + from sqlmesh.core._typing import TableName class FabricAdapter(MSSQLEngineAdapter): @@ -26,334 +17,35 @@ class FabricAdapter(MSSQLEngineAdapter): DIALECT = "fabric" SUPPORTS_INDEXES = False SUPPORTS_TRANSACTIONS = False - INSERT_OVERWRITE_STRATEGY = InsertOverwriteStrategy.DELETE_INSERT - def __init__(self, *args: t.Any, **kwargs: t.Any): - self.database = kwargs.get("database") - - super().__init__(*args, **kwargs) - - if not self.database: - raise ValueError( - "The 'database' parameter is required in the connection config for the FabricWarehouseAdapter." - ) - try: - self.execute(f"USE [{self.database}]") - except Exception as e: - raise RuntimeError(f"Failed to set database context to '{self.database}'. Reason: {e}") - - def _get_schema_name(self, name: t.Union[str, exp.Table]) -> t.Optional[str]: - """ - Safely extracts the schema name from a table or schema name, which can be - a string or a sqlglot expression. - - Fabric requires database names to be explicitly specified in many contexts, - including referencing schemas in INFORMATION_SCHEMA. This function helps - in extracting the schema part correctly from potentially qualified names. - """ - table = exp.to_table(name) - - if table.this and table.this.name.startswith("#"): - return None - - schema_part = table.db - - if not schema_part: - return None - - if isinstance(schema_part, exp.Identifier): - return schema_part.name - if isinstance(schema_part, str): - return schema_part - - raise TypeError(f"Unexpected type for schema part: {type(schema_part)}") - - def _get_data_objects( - self, schema_name: SchemaName, object_names: t.Optional[t.Set[str]] = None - ) -> t.List[DataObject]: - """ - Returns all the data objects that exist in the given schema and database. - - Overridden to query `INFORMATION_SCHEMA.TABLES` with explicit database qualification - and preserved casing using `quoted=True`. - """ - import pandas as pd - - catalog = self.get_current_catalog() - - from_table = exp.Table( - this=exp.to_identifier("TABLES", quoted=True), - db=exp.to_identifier("INFORMATION_SCHEMA", quoted=True), - catalog=exp.to_identifier(self.database), - ) - - query = ( - exp.select( - exp.column("TABLE_NAME").as_("name"), - exp.column("TABLE_SCHEMA").as_("schema_name"), - exp.case() - .when(exp.column("TABLE_TYPE").eq("BASE TABLE"), exp.Literal.string("TABLE")) - .else_(exp.column("TABLE_TYPE")) - .as_("type"), - ) - .from_(from_table) - .where(exp.column("TABLE_SCHEMA").eq(str(to_schema(schema_name).db).strip("[]"))) - ) - if object_names: - query = query.where( - exp.column("TABLE_NAME").isin(*(name.strip("[]") for name in object_names)) - ) - - dataframe: pd.DataFrame = self.fetchdf(query) - - return [ - DataObject( - catalog=catalog, - schema=row.schema_name, - name=row.name, - type=DataObjectType.from_str(str(row.type)), - ) - for row in dataframe.itertuples() - ] - - def schema_exists(self, schema_name: SchemaName) -> bool: - """ - Checks if a schema exists. - """ - schema = exp.to_table(schema_name).db - if not schema: - return False - - sql = ( - exp.select("1") - .from_("INFORMATION_SCHEMA.SCHEMATA") - .where(f"SCHEMA_NAME = '{schema}'") - .where(f"CATALOG_NAME = '{self.database}'") - ) - result = self.fetchone(sql, quote_identifiers=True) - return result[0] == 1 if result else False - - def create_schema( - self, - schema_name: t.Optional[SchemaName], - ignore_if_exists: bool = True, - warn_on_error: bool = True, - **kwargs: t.Any, - ) -> None: - """ - Creates a schema in a Microsoft Fabric Warehouse. - - Overridden to handle Fabric's specific T-SQL requirements. - """ - if not schema_name: - return - - schema_exp = to_schema(schema_name) - simple_schema_name_str = exp.to_identifier(schema_exp.db).name if schema_exp.db else None - - if not simple_schema_name_str: - logger.warning( - f"Could not determine simple schema name from '{schema_name}'. Skipping schema creation." - ) - return - - try: - if self.schema_exists(simple_schema_name_str): - if ignore_if_exists: - return - raise RuntimeError(f"Schema '{simple_schema_name_str}' already exists.") - except Exception as e: - if warn_on_error: - logger.warning( - f"Failed to check for existence of schema '{simple_schema_name_str}': {e}" - ) - else: - raise - - try: - create_sql = f"CREATE SCHEMA [{simple_schema_name_str}]" - self.execute(create_sql) - except Exception as e: - error_message = str(e).lower() - if ( - "already exists" in error_message - or "there is already an object named" in error_message - ): - if ignore_if_exists: - return - raise RuntimeError( - f"Schema '{simple_schema_name_str}' already exists due to race condition." - ) from e - else: - if warn_on_error: - logger.warning(f"Failed to create schema {simple_schema_name_str}. Reason: {e}") - else: - raise RuntimeError(f"Failed to create schema {simple_schema_name_str}.") from e - - def _create_table_from_columns( - self, - table_name: TableName, - columns_to_types: t.Dict[str, exp.DataType], - primary_key: t.Optional[t.Tuple[str, ...]] = None, - exists: bool = True, - table_description: t.Optional[str] = None, - column_descriptions: t.Optional[t.Dict[str, str]] = None, - **kwargs: t.Any, - ) -> None: - """ - Creates a table, ensuring the schema exists first and that all - object names are fully qualified with the database. - """ - table_exp = exp.to_table(table_name) - schema_name = self._get_schema_name(table_name) - - self.create_schema(schema_name) - - fully_qualified_table_name = f"[{self.database}].[{schema_name}].[{table_exp.name}]" - - column_defs = ", ".join( - f"[{col}] {kind.sql(dialect=self.dialect)}" for col, kind in columns_to_types.items() - ) - - create_table_sql = f"CREATE TABLE {fully_qualified_table_name} ({column_defs})" - - if not exists: - self.execute(create_table_sql) - return - - if not self.table_exists(table_name): - self.execute(create_table_sql) - - if table_description and self.comments_enabled: - qualified_table_for_comment = self._fully_qualify(table_name) - self._create_table_comment(qualified_table_for_comment, table_description) - if column_descriptions and self.comments_enabled: - self._create_column_comments(qualified_table_for_comment, column_descriptions) - def table_exists(self, table_name: TableName) -> bool: """ Checks if a table exists. - Overridden to query the uppercase `INFORMATION_SCHEMA` required + Querying the uppercase `INFORMATION_SCHEMA` required by case-sensitive Fabric environments. """ table = exp.to_table(table_name) - schema = self._get_schema_name(table_name) - sql = ( exp.select("1") .from_("INFORMATION_SCHEMA.TABLES") .where(f"TABLE_NAME = '{table.alias_or_name}'") - .where(f"TABLE_SCHEMA = '{schema}'") + .where(f"TABLE_SCHEMA = '{table.db}'") ) result = self.fetchone(sql, quote_identifiers=True) return result[0] == 1 if result else False - def _fully_qualify(self, name: t.Union[TableName, SchemaName]) -> exp.Table: - """ - Ensures an object name is prefixed with the configured database and schema. - - Overridden to prevent qualification for temporary objects (starting with # or ##). - Temporary objects should not be qualified with database or schema in T-SQL. - """ - table = exp.to_table(name) - - if ( - table.this - and isinstance(table.this, exp.Identifier) - and (table.this.name.startswith("#")) - ): - temp_identifier = exp.Identifier(this=table.this.name, quoted=True) - return exp.Table(this=temp_identifier) - - schema = self._get_schema_name(name) - - return exp.Table( - this=table.this, - db=exp.to_identifier(schema) if schema else None, - catalog=exp.to_identifier(self.database), - ) - - def create_view( - self, - view_name: TableName, - query_or_df: QueryOrDF, - columns_to_types: t.Optional[t.Dict[str, exp.DataType]] = None, - replace: bool = True, - materialized: bool = False, - materialized_properties: t.Optional[t.Dict[str, t.Any]] = None, - table_description: t.Optional[str] = None, - column_descriptions: t.Optional[t.Dict[str, str]] = None, - view_properties: t.Optional[t.Dict[str, exp.Expression]] = None, - **create_kwargs: t.Any, - ) -> None: - """ - Creates a view from a query or DataFrame. - - Overridden to ensure that the view name and all tables referenced - in the source query are fully qualified with the database name, - as required by Fabric. - """ - view_schema = self._get_schema_name(view_name) - self.create_schema(view_schema) - - qualified_view_name = self._fully_qualify(view_name) - - if isinstance(query_or_df, exp.Expression): - # CTEs should not be qualified with the database name. - cte_names = {cte.alias_or_name for cte in query_or_df.find_all(exp.CTE)} - - for table in query_or_df.find_all(exp.Table): - if table.this.name in cte_names: - continue - - # Qualify all other tables that don't already have a catalog. - if not table.catalog: - qualified_table = self._fully_qualify(table) - table.replace(qualified_table) - - return super().create_view( - qualified_view_name, - query_or_df, - columns_to_types, - replace, - materialized, - table_description=table_description, - column_descriptions=column_descriptions, - view_properties=view_properties, - **create_kwargs, - ) - def columns( - self, table_name: TableName, include_pseudo_columns: bool = False + self, + table_name: TableName, + include_pseudo_columns: bool = True, ) -> t.Dict[str, exp.DataType]: - import numpy as np + """Fabric doesn't support describe so we query INFORMATION_SCHEMA.""" table = exp.to_table(table_name) - schema = self._get_schema_name(table_name) - - if ( - not schema - and table.this - and isinstance(table.this, exp.Identifier) - and table.this.name.startswith("__temp_") - ): - schema = "dbo" - - if not schema: - logger.warning( - f"Cannot fetch columns for table '{table_name}' without a schema name in Fabric." - ) - return {} - - from_table = exp.Table( - this=exp.to_identifier("COLUMNS", quoted=True), - db=exp.to_identifier("INFORMATION_SCHEMA", quoted=True), - catalog=exp.to_identifier(self.database), - ) sql = ( exp.select( @@ -363,14 +55,14 @@ def columns( "NUMERIC_PRECISION", "NUMERIC_SCALE", ) - .from_(from_table) - .where(f"TABLE_NAME = '{table.name.strip('[]')}'") - .where(f"TABLE_SCHEMA = '{schema.strip('[]')}'") - .order_by("ORDINAL_POSITION") + .from_("INFORMATION_SCHEMA.COLUMNS") + .where(f"TABLE_NAME = '{table.name}'") ) + database_name = table.db + if database_name: + sql = sql.where(f"TABLE_SCHEMA = '{database_name}'") - df = self.fetchdf(sql) - df = df.replace({np.nan: None}) + columns_raw = self.fetchall(sql, quote_identifiers=True) def build_var_length_col( column_name: str, @@ -378,52 +70,32 @@ def build_var_length_col( character_maximum_length: t.Optional[int] = None, numeric_precision: t.Optional[int] = None, numeric_scale: t.Optional[int] = None, - ) -> t.Tuple[str, str]: + ) -> tuple: data_type = data_type.lower() - - char_len_int = character_maximum_length - prec_int = numeric_precision - scale_int = numeric_scale - - if data_type in self.VARIABLE_LENGTH_DATA_TYPES and char_len_int is not None: - if char_len_int > 0: - return (column_name, f"{data_type}({char_len_int})") - if char_len_int == -1: - return (column_name, f"{data_type}(max)") if ( - data_type in ("decimal", "numeric") - and prec_int is not None - and scale_int is not None + data_type in self.VARIABLE_LENGTH_DATA_TYPES + and character_maximum_length is not None + and character_maximum_length > 0 + ): + return (column_name, f"{data_type}({character_maximum_length})") + if ( + data_type in ("varbinary", "varchar", "nvarchar") + and character_maximum_length is not None + and character_maximum_length == -1 ): - return (column_name, f"{data_type}({prec_int}, {scale_int})") - if data_type == "float" and prec_int is not None: - return (column_name, f"{data_type}({prec_int})") + return (column_name, f"{data_type}(max)") + if data_type in ("decimal", "numeric"): + return (column_name, f"{data_type}({numeric_precision}, {numeric_scale})") + if data_type == "float": + return (column_name, f"{data_type}({numeric_precision})") return (column_name, data_type) - def _to_optional_int(val: t.Any) -> t.Optional[int]: - """Safely convert DataFrame values to Optional[int] for mypy.""" - if val is None: - return None - try: - return int(val) - except (ValueError, TypeError): - return None - - columns_processed = [ - build_var_length_col( - str(row.COLUMN_NAME), - str(row.DATA_TYPE), - _to_optional_int(row.CHARACTER_MAXIMUM_LENGTH), - _to_optional_int(row.NUMERIC_PRECISION), - _to_optional_int(row.NUMERIC_SCALE), - ) - for row in df.itertuples() - ] + columns = [build_var_length_col(*row) for row in columns_raw] return { column_name: exp.DataType.build(data_type, dialect=self.dialect) - for column_name, data_type in columns_processed + for column_name, data_type in columns } def _insert_overwrite_by_condition( @@ -448,7 +120,7 @@ def _insert_overwrite_by_condition( for source_query in source_queries: with source_query as query: - query = self._order_projections_and_filter(query, columns_to_types) + query = self._order_projections_and_filter(query, columns_to_types, where=where) self._insert_append_query( table_name, query, From 1bbe90e633b90d0e0fd3b7683f3094858f29f6d1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Fredh=C3=B8i?= Date: Mon, 23 Jun 2025 22:27:59 +0200 Subject: [PATCH 07/70] docs & tests --- docs/integrations/engines/fabric.md | 30 +++++++++ docs/integrations/overview.md | 1 + mkdocs.yml | 1 + pyproject.toml | 1 + sqlmesh/core/config/connection.py | 2 +- sqlmesh/core/engine_adapter/fabric.py | 4 +- tests/core/engine_adapter/test_fabric.py | 83 ++++++++++++++++++++++++ 7 files changed, 120 insertions(+), 2 deletions(-) create mode 100644 docs/integrations/engines/fabric.md create mode 100644 tests/core/engine_adapter/test_fabric.py diff --git a/docs/integrations/engines/fabric.md b/docs/integrations/engines/fabric.md new file mode 100644 index 0000000000..aca9c32eed --- /dev/null +++ b/docs/integrations/engines/fabric.md @@ -0,0 +1,30 @@ +# Fabric + +## Local/Built-in Scheduler +**Engine Adapter Type**: `fabric` + +### Installation +#### Microsoft Entra ID / Azure Active Directory Authentication: +``` +pip install "sqlmesh[mssql-odbc]" +``` + +### Connection options + +| Option | Description | Type | Required | +| ----------------- | ------------------------------------------------------------ | :----------: | :------: | +| `type` | Engine type name - must be `fabric` | string | Y | +| `host` | The hostname of the Fabric Warehouse server | string | Y | +| `user` | The client id to use for authentication with the Fabric Warehouse server | string | N | +| `password` | The client secret to use for authentication with the Fabric Warehouse server | string | N | +| `port` | The port number of the Fabric Warehouse server | int | N | +| `database` | The target database | string | N | +| `charset` | The character set used for the connection | string | N | +| `timeout` | The query timeout in seconds. Default: no timeout | int | N | +| `login_timeout` | The timeout for connection and login in seconds. Default: 60 | int | N | +| `appname` | The application name to use for the connection | string | N | +| `conn_properties` | The list of connection properties | list[string] | N | +| `autocommit` | Is autocommit mode enabled. Default: false | bool | N | +| `driver` | The driver to use for the connection. Default: pyodbc | string | N | +| `driver_name` | The driver name to use for the connection. E.g., *ODBC Driver 18 for SQL Server* | string | N | +| `odbc_properties` | The dict of ODBC connection properties. E.g., authentication: ActiveDirectoryServicePrincipal. See more [here](https://learn.microsoft.com/en-us/sql/connect/odbc/dsn-connection-string-attribute?view=sql-server-ver16). | dict | N | \ No newline at end of file diff --git a/docs/integrations/overview.md b/docs/integrations/overview.md index 9f829ceab7..c23fe0fc47 100644 --- a/docs/integrations/overview.md +++ b/docs/integrations/overview.md @@ -17,6 +17,7 @@ SQLMesh supports the following execution engines for running SQLMesh projects: * [ClickHouse](./engines/clickhouse.md) * [Databricks](./engines/databricks.md) * [DuckDB](./engines/duckdb.md) +* [Fabric](./engines/fabric.md) * [MotherDuck](./engines/motherduck.md) * [MSSQL](./engines/mssql.md) * [MySQL](./engines/mysql.md) diff --git a/mkdocs.yml b/mkdocs.yml index 56ec348a04..b7ab52e858 100644 --- a/mkdocs.yml +++ b/mkdocs.yml @@ -84,6 +84,7 @@ nav: - integrations/engines/clickhouse.md - integrations/engines/databricks.md - integrations/engines/duckdb.md + - integrations/engines/fabric.md - integrations/engines/motherduck.md - integrations/engines/mssql.md - integrations/engines/mysql.md diff --git a/pyproject.toml b/pyproject.toml index ea20c21e74..c8eeaec3e7 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -249,6 +249,7 @@ markers = [ "clickhouse_cloud: test for Clickhouse (cloud mode)", "databricks: test for Databricks", "duckdb: test for DuckDB", + "fabric: test for Fabric", "motherduck: test for MotherDuck", "mssql: test for MSSQL", "mysql: test for MySQL", diff --git a/sqlmesh/core/config/connection.py b/sqlmesh/core/config/connection.py index 9e95e9ae78..a6aaa96b4a 100644 --- a/sqlmesh/core/config/connection.py +++ b/sqlmesh/core/config/connection.py @@ -49,7 +49,6 @@ "mysql", "mssql", "azuresql", - "fabric", } FORBIDDEN_STATE_SYNC_ENGINES = { # Do not support row-level operations @@ -1603,6 +1602,7 @@ class FabricConnectionConfig(MSSQLConnectionConfig): """ type_: t.Literal["fabric"] = Field(alias="type", default="fabric") # type: ignore + driver: t.Literal["pyodbc"] = "pyodbc" autocommit: t.Optional[bool] = True @property diff --git a/sqlmesh/core/engine_adapter/fabric.py b/sqlmesh/core/engine_adapter/fabric.py index a4eb30a91d..44cc8bcfb3 100644 --- a/sqlmesh/core/engine_adapter/fabric.py +++ b/sqlmesh/core/engine_adapter/fabric.py @@ -31,8 +31,10 @@ def table_exists(self, table_name: TableName) -> bool: exp.select("1") .from_("INFORMATION_SCHEMA.TABLES") .where(f"TABLE_NAME = '{table.alias_or_name}'") - .where(f"TABLE_SCHEMA = '{table.db}'") ) + database_name = table.db + if database_name: + sql = sql.where(f"TABLE_SCHEMA = '{database_name}'") result = self.fetchone(sql, quote_identifiers=True) diff --git a/tests/core/engine_adapter/test_fabric.py b/tests/core/engine_adapter/test_fabric.py new file mode 100644 index 0000000000..623bbe6653 --- /dev/null +++ b/tests/core/engine_adapter/test_fabric.py @@ -0,0 +1,83 @@ +# type: ignore + +import typing as t + +import pytest +from sqlglot import exp, parse_one + +from sqlmesh.core.engine_adapter import FabricAdapter +from tests.core.engine_adapter import to_sql_calls + +pytestmark = [pytest.mark.engine, pytest.mark.fabric] + + +@pytest.fixture +def adapter(make_mocked_engine_adapter: t.Callable) -> FabricAdapter: + return make_mocked_engine_adapter(FabricAdapter) + + +def test_columns(adapter: FabricAdapter): + adapter.cursor.fetchall.return_value = [ + ("decimal_ps", "decimal", None, 5, 4), + ("decimal", "decimal", None, 18, 0), + ("float", "float", None, 53, None), + ("char_n", "char", 10, None, None), + ("varchar_n", "varchar", 10, None, None), + ("nvarchar_max", "nvarchar", -1, None, None), + ] + + assert adapter.columns("db.table") == { + "decimal_ps": exp.DataType.build("decimal(5, 4)", dialect=adapter.dialect), + "decimal": exp.DataType.build("decimal(18, 0)", dialect=adapter.dialect), + "float": exp.DataType.build("float(53)", dialect=adapter.dialect), + "char_n": exp.DataType.build("char(10)", dialect=adapter.dialect), + "varchar_n": exp.DataType.build("varchar(10)", dialect=adapter.dialect), + "nvarchar_max": exp.DataType.build("nvarchar(max)", dialect=adapter.dialect), + } + + # Verify that the adapter queries the uppercase INFORMATION_SCHEMA + adapter.cursor.execute.assert_called_once_with( + """SELECT [COLUMN_NAME], [DATA_TYPE], [CHARACTER_MAXIMUM_LENGTH], [NUMERIC_PRECISION], [NUMERIC_SCALE] FROM [INFORMATION_SCHEMA].[COLUMNS] WHERE [TABLE_NAME] = 'table' AND [TABLE_SCHEMA] = 'db';""" + ) + + +def test_table_exists(adapter: FabricAdapter): + adapter.cursor.fetchone.return_value = (1,) + assert adapter.table_exists("db.table") + # Verify that the adapter queries the uppercase INFORMATION_SCHEMA + adapter.cursor.execute.assert_called_once_with( + """SELECT 1 FROM [INFORMATION_SCHEMA].[TABLES] WHERE [TABLE_NAME] = 'table' AND [TABLE_SCHEMA] = 'db';""" + ) + + adapter.cursor.fetchone.return_value = None + assert not adapter.table_exists("db.table") + + +def test_insert_overwrite_by_time_partition(adapter: FabricAdapter): + adapter.insert_overwrite_by_time_partition( + "test_table", + parse_one("SELECT a, b FROM tbl"), + start="2022-01-01", + end="2022-01-02", + time_column="b", + time_formatter=lambda x, _: exp.Literal.string(x.strftime("%Y-%m-%d")), + columns_to_types={"a": exp.DataType.build("INT"), "b": exp.DataType.build("STRING")}, + ) + + # Fabric adapter should use DELETE/INSERT strategy, not MERGE. + assert to_sql_calls(adapter) == [ + """DELETE FROM [test_table] WHERE [b] BETWEEN '2022-01-01' AND '2022-01-02';""", + """INSERT INTO [test_table] ([a], [b]) SELECT [a], [b] FROM (SELECT [a], [b] FROM [tbl]) AS [_subquery] WHERE [b] BETWEEN '2022-01-01' AND '2022-01-02';""", + ] + + +def test_replace_query(adapter: FabricAdapter): + adapter.cursor.fetchone.return_value = (1,) + adapter.replace_query("test_table", parse_one("SELECT a FROM tbl"), {"a": "int"}) + + # This behavior is inherited from MSSQLEngineAdapter and should be TRUNCATE + INSERT + assert to_sql_calls(adapter) == [ + """SELECT 1 FROM [INFORMATION_SCHEMA].[TABLES] WHERE [TABLE_NAME] = 'test_table';""", + "TRUNCATE TABLE [test_table];", + "INSERT INTO [test_table] ([a]) SELECT [a] FROM [tbl];", + ] From 689557028b08f2130eb44fcb53b838a7bd4a9779 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Fredh=C3=B8i?= Date: Mon, 23 Jun 2025 23:29:03 +0200 Subject: [PATCH 08/70] connection tests --- docs/guides/configuration.md | 1 + sqlmesh/core/config/__init__.py | 1 + sqlmesh/core/engine_adapter/fabric.py | 30 +++++----- tests/core/test_connection_config.py | 83 +++++++++++++++++++++++++++ 4 files changed, 98 insertions(+), 17 deletions(-) diff --git a/docs/guides/configuration.md b/docs/guides/configuration.md index 361171d937..06aa3298ce 100644 --- a/docs/guides/configuration.md +++ b/docs/guides/configuration.md @@ -598,6 +598,7 @@ These pages describe the connection configuration options for each execution eng * [BigQuery](../integrations/engines/bigquery.md) * [Databricks](../integrations/engines/databricks.md) * [DuckDB](../integrations/engines/duckdb.md) +* [Fabric](../integrations/engines/fabric.md) * [MotherDuck](../integrations/engines/motherduck.md) * [MySQL](../integrations/engines/mysql.md) * [MSSQL](../integrations/engines/mssql.md) diff --git a/sqlmesh/core/config/__init__.py b/sqlmesh/core/config/__init__.py index af84818858..65435376a0 100644 --- a/sqlmesh/core/config/__init__.py +++ b/sqlmesh/core/config/__init__.py @@ -10,6 +10,7 @@ ConnectionConfig as ConnectionConfig, DatabricksConnectionConfig as DatabricksConnectionConfig, DuckDBConnectionConfig as DuckDBConnectionConfig, + FabricConnectionConfig as FabricConnectionConfig, GCPPostgresConnectionConfig as GCPPostgresConnectionConfig, MotherDuckConnectionConfig as MotherDuckConnectionConfig, MSSQLConnectionConfig as MSSQLConnectionConfig, diff --git a/sqlmesh/core/engine_adapter/fabric.py b/sqlmesh/core/engine_adapter/fabric.py index 44cc8bcfb3..f0a025607a 100644 --- a/sqlmesh/core/engine_adapter/fabric.py +++ b/sqlmesh/core/engine_adapter/fabric.py @@ -4,6 +4,7 @@ from sqlglot import exp from sqlmesh.core.engine_adapter.mssql import MSSQLEngineAdapter from sqlmesh.core.engine_adapter.shared import InsertOverwriteStrategy, SourceQuery +from sqlmesh.core.engine_adapter.base import EngineAdapter if t.TYPE_CHECKING: from sqlmesh.core._typing import TableName @@ -110,22 +111,17 @@ def _insert_overwrite_by_condition( **kwargs: t.Any, ) -> None: """ - Implements the insert overwrite strategy for Fabric. + Implements the insert overwrite strategy for Fabric using DELETE and INSERT. - Overridden to enforce a `DELETE`/`INSERT` strategy, as Fabric's - `MERGE` statement has limitations. + This method is overridden to avoid the MERGE statement from the parent + MSSQLEngineAdapter, which is not fully supported in Fabric. """ - - columns_to_types = columns_to_types or self.columns(table_name) - - self.delete_from(table_name, where=where or exp.true()) - - for source_query in source_queries: - with source_query as query: - query = self._order_projections_and_filter(query, columns_to_types, where=where) - self._insert_append_query( - table_name, - query, - columns_to_types=columns_to_types, - order_projections=False, - ) + return EngineAdapter._insert_overwrite_by_condition( + self, + table_name=table_name, + source_queries=source_queries, + columns_to_types=columns_to_types, + where=where, + insert_overwrite_strategy_override=InsertOverwriteStrategy.DELETE_INSERT, + **kwargs, + ) diff --git a/tests/core/test_connection_config.py b/tests/core/test_connection_config.py index ba33cb010b..daa2fc77d3 100644 --- a/tests/core/test_connection_config.py +++ b/tests/core/test_connection_config.py @@ -12,6 +12,7 @@ ConnectionConfig, DatabricksConnectionConfig, DuckDBAttachOptions, + FabricConnectionConfig, DuckDBConnectionConfig, GCPPostgresConnectionConfig, MotherDuckConnectionConfig, @@ -1392,3 +1393,85 @@ def test_mssql_pymssql_connection_factory(): # Clean up the mock module if "pymssql" in sys.modules: del sys.modules["pymssql"] + + +def test_fabric_connection_config_defaults(make_config): + """Test Fabric connection config defaults to pyodbc and autocommit=True.""" + config = make_config(type="fabric", host="localhost", check_import=False) + assert isinstance(config, FabricConnectionConfig) + assert config.driver == "pyodbc" + assert config.autocommit is True + + # Ensure it creates the FabricAdapter + from sqlmesh.core.engine_adapter.fabric import FabricAdapter + + assert isinstance(config.create_engine_adapter(), FabricAdapter) + + +def test_fabric_connection_config_parameter_validation(make_config): + """Test Fabric connection config parameter validation.""" + # Test that FabricConnectionConfig correctly handles pyodbc-specific parameters. + config = make_config( + type="fabric", + host="localhost", + driver_name="ODBC Driver 18 for SQL Server", + trust_server_certificate=True, + encrypt=False, + odbc_properties={"Authentication": "ActiveDirectoryServicePrincipal"}, + check_import=False, + ) + assert isinstance(config, FabricConnectionConfig) + assert config.driver == "pyodbc" # Driver is fixed to pyodbc + assert config.driver_name == "ODBC Driver 18 for SQL Server" + assert config.trust_server_certificate is True + assert config.encrypt is False + assert config.odbc_properties == {"Authentication": "ActiveDirectoryServicePrincipal"} + + # Test that specifying a different driver for Fabric raises an error + with pytest.raises(ConfigError, match=r"Input should be 'pyodbc'"): + make_config(type="fabric", host="localhost", driver="pymssql", check_import=False) + + +def test_fabric_pyodbc_connection_string_generation(): + """Test that the Fabric pyodbc connection gets invoked with the correct ODBC connection string.""" + with patch("pyodbc.connect") as mock_pyodbc_connect: + # Create a Fabric config + config = FabricConnectionConfig( + host="testserver.datawarehouse.fabric.microsoft.com", + port=1433, + database="testdb", + user="testuser", + password="testpass", + driver_name="ODBC Driver 18 for SQL Server", + trust_server_certificate=True, + encrypt=True, + login_timeout=30, + check_import=False, + ) + + # Get the connection factory with kwargs and call it + factory_with_kwargs = config._connection_factory_with_kwargs + connection = factory_with_kwargs() + + # Verify pyodbc.connect was called with the correct connection string + mock_pyodbc_connect.assert_called_once() + call_args = mock_pyodbc_connect.call_args + + # Check the connection string (first argument) + conn_str = call_args[0][0] + expected_parts = [ + "DRIVER={ODBC Driver 18 for SQL Server}", + "SERVER=testserver.datawarehouse.fabric.microsoft.com,1433", + "DATABASE=testdb", + "Encrypt=YES", + "TrustServerCertificate=YES", + "Connection Timeout=30", + "UID=testuser", + "PWD=testpass", + ] + + for part in expected_parts: + assert part in conn_str + + # Check autocommit parameter, should default to True for Fabric + assert call_args[1]["autocommit"] is True From 9c0a2dd36de66e993f2bd6845b4e8d9046efce82 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Fredh=C3=B8i?= Date: Tue, 24 Jun 2025 15:08:59 +0200 Subject: [PATCH 09/70] remove table_exist and columns --- sqlmesh/core/engine_adapter/fabric.py | 81 ------------------------ tests/core/engine_adapter/test_fabric.py | 30 +++++++-- 2 files changed, 24 insertions(+), 87 deletions(-) diff --git a/sqlmesh/core/engine_adapter/fabric.py b/sqlmesh/core/engine_adapter/fabric.py index f0a025607a..5725d3060a 100644 --- a/sqlmesh/core/engine_adapter/fabric.py +++ b/sqlmesh/core/engine_adapter/fabric.py @@ -20,87 +20,6 @@ class FabricAdapter(MSSQLEngineAdapter): SUPPORTS_TRANSACTIONS = False INSERT_OVERWRITE_STRATEGY = InsertOverwriteStrategy.DELETE_INSERT - def table_exists(self, table_name: TableName) -> bool: - """ - Checks if a table exists. - - Querying the uppercase `INFORMATION_SCHEMA` required - by case-sensitive Fabric environments. - """ - table = exp.to_table(table_name) - sql = ( - exp.select("1") - .from_("INFORMATION_SCHEMA.TABLES") - .where(f"TABLE_NAME = '{table.alias_or_name}'") - ) - database_name = table.db - if database_name: - sql = sql.where(f"TABLE_SCHEMA = '{database_name}'") - - result = self.fetchone(sql, quote_identifiers=True) - - return result[0] == 1 if result else False - - def columns( - self, - table_name: TableName, - include_pseudo_columns: bool = True, - ) -> t.Dict[str, exp.DataType]: - """Fabric doesn't support describe so we query INFORMATION_SCHEMA.""" - - table = exp.to_table(table_name) - - sql = ( - exp.select( - "COLUMN_NAME", - "DATA_TYPE", - "CHARACTER_MAXIMUM_LENGTH", - "NUMERIC_PRECISION", - "NUMERIC_SCALE", - ) - .from_("INFORMATION_SCHEMA.COLUMNS") - .where(f"TABLE_NAME = '{table.name}'") - ) - database_name = table.db - if database_name: - sql = sql.where(f"TABLE_SCHEMA = '{database_name}'") - - columns_raw = self.fetchall(sql, quote_identifiers=True) - - def build_var_length_col( - column_name: str, - data_type: str, - character_maximum_length: t.Optional[int] = None, - numeric_precision: t.Optional[int] = None, - numeric_scale: t.Optional[int] = None, - ) -> tuple: - data_type = data_type.lower() - if ( - data_type in self.VARIABLE_LENGTH_DATA_TYPES - and character_maximum_length is not None - and character_maximum_length > 0 - ): - return (column_name, f"{data_type}({character_maximum_length})") - if ( - data_type in ("varbinary", "varchar", "nvarchar") - and character_maximum_length is not None - and character_maximum_length == -1 - ): - return (column_name, f"{data_type}(max)") - if data_type in ("decimal", "numeric"): - return (column_name, f"{data_type}({numeric_precision}, {numeric_scale})") - if data_type == "float": - return (column_name, f"{data_type}({numeric_precision})") - - return (column_name, data_type) - - columns = [build_var_length_col(*row) for row in columns_raw] - - return { - column_name: exp.DataType.build(data_type, dialect=self.dialect) - for column_name, data_type in columns - } - def _insert_overwrite_by_condition( self, table_name: TableName, diff --git a/tests/core/engine_adapter/test_fabric.py b/tests/core/engine_adapter/test_fabric.py index 623bbe6653..80aea0c989 100644 --- a/tests/core/engine_adapter/test_fabric.py +++ b/tests/core/engine_adapter/test_fabric.py @@ -53,7 +53,9 @@ def test_table_exists(adapter: FabricAdapter): assert not adapter.table_exists("db.table") -def test_insert_overwrite_by_time_partition(adapter: FabricAdapter): +def test_insert_overwrite_by_time_partition( + adapter: FabricAdapter, assert_exp_eq +): # Add assert_exp_eq fixture adapter.insert_overwrite_by_time_partition( "test_table", parse_one("SELECT a, b FROM tbl"), @@ -64,11 +66,27 @@ def test_insert_overwrite_by_time_partition(adapter: FabricAdapter): columns_to_types={"a": exp.DataType.build("INT"), "b": exp.DataType.build("STRING")}, ) - # Fabric adapter should use DELETE/INSERT strategy, not MERGE. - assert to_sql_calls(adapter) == [ - """DELETE FROM [test_table] WHERE [b] BETWEEN '2022-01-01' AND '2022-01-02';""", - """INSERT INTO [test_table] ([a], [b]) SELECT [a], [b] FROM (SELECT [a], [b] FROM [tbl]) AS [_subquery] WHERE [b] BETWEEN '2022-01-01' AND '2022-01-02';""", - ] + # Get the list of generated SQL strings + actual_sql_calls = to_sql_calls(adapter) + + # There should be two calls: DELETE and INSERT + assert len(actual_sql_calls) == 2 + + # Assert the DELETE statement is correct (string comparison is fine for this simple one) + assert ( + actual_sql_calls[0] + == "DELETE FROM [test_table] WHERE [b] BETWEEN '2022-01-01' AND '2022-01-02';" + ) + + # Assert the INSERT statement is semantically correct + expected_insert_sql = """ + INSERT INTO [test_table] ([a], [b]) + SELECT [a], [b] FROM (SELECT [a], [b] FROM [tbl]) AS [_subquery] + WHERE [b] BETWEEN '2022-01-01' AND '2022-01-02'; + """ + + # Use assert_exp_eq to compare the parsed SQL expressions + assert_exp_eq(actual_sql_calls[1], expected_insert_sql) def test_replace_query(adapter: FabricAdapter): From f40fc4d0d44e6835da7d9ede4aee96e51506e1d5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Fredh=C3=B8i?= Date: Wed, 25 Jun 2025 08:52:33 +0200 Subject: [PATCH 10/70] updated tests --- sqlmesh/core/config/connection.py | 4 +++- tests/core/engine_adapter/test_fabric.py | 30 +++++------------------- 2 files changed, 9 insertions(+), 25 deletions(-) diff --git a/sqlmesh/core/config/connection.py b/sqlmesh/core/config/connection.py index a6aaa96b4a..16ae80424b 100644 --- a/sqlmesh/core/config/connection.py +++ b/sqlmesh/core/config/connection.py @@ -1596,12 +1596,14 @@ def _extra_engine_config(self) -> t.Dict[str, t.Any]: class FabricConnectionConfig(MSSQLConnectionConfig): """ Fabric Connection Configuration. - Inherits most settings from MSSQLConnectionConfig and sets the type to 'fabric'. It is recommended to use the 'pyodbc' driver for Fabric. """ type_: t.Literal["fabric"] = Field(alias="type", default="fabric") # type: ignore + DIALECT: t.ClassVar[t.Literal["fabric"]] = "fabric" + DISPLAY_NAME: t.ClassVar[t.Literal["Fabric"]] = "Fabric" + DISPLAY_ORDER: t.ClassVar[t.Literal[17]] = 17 driver: t.Literal["pyodbc"] = "pyodbc" autocommit: t.Optional[bool] = True diff --git a/tests/core/engine_adapter/test_fabric.py b/tests/core/engine_adapter/test_fabric.py index 80aea0c989..709df816d2 100644 --- a/tests/core/engine_adapter/test_fabric.py +++ b/tests/core/engine_adapter/test_fabric.py @@ -53,9 +53,7 @@ def test_table_exists(adapter: FabricAdapter): assert not adapter.table_exists("db.table") -def test_insert_overwrite_by_time_partition( - adapter: FabricAdapter, assert_exp_eq -): # Add assert_exp_eq fixture +def test_insert_overwrite_by_time_partition(adapter: FabricAdapter): adapter.insert_overwrite_by_time_partition( "test_table", parse_one("SELECT a, b FROM tbl"), @@ -66,27 +64,11 @@ def test_insert_overwrite_by_time_partition( columns_to_types={"a": exp.DataType.build("INT"), "b": exp.DataType.build("STRING")}, ) - # Get the list of generated SQL strings - actual_sql_calls = to_sql_calls(adapter) - - # There should be two calls: DELETE and INSERT - assert len(actual_sql_calls) == 2 - - # Assert the DELETE statement is correct (string comparison is fine for this simple one) - assert ( - actual_sql_calls[0] - == "DELETE FROM [test_table] WHERE [b] BETWEEN '2022-01-01' AND '2022-01-02';" - ) - - # Assert the INSERT statement is semantically correct - expected_insert_sql = """ - INSERT INTO [test_table] ([a], [b]) - SELECT [a], [b] FROM (SELECT [a], [b] FROM [tbl]) AS [_subquery] - WHERE [b] BETWEEN '2022-01-01' AND '2022-01-02'; - """ - - # Use assert_exp_eq to compare the parsed SQL expressions - assert_exp_eq(actual_sql_calls[1], expected_insert_sql) + # Fabric adapter should use DELETE/INSERT strategy, not MERGE. + assert to_sql_calls(adapter) == [ + """DELETE FROM [test_table] WHERE [b] BETWEEN '2022-01-01' AND '2022-01-02';""", + """INSERT INTO [test_table] ([a], [b]) SELECT [a], [b] FROM (SELECT [a] AS [a], [b] AS [b] FROM [tbl]) AS [_subquery] WHERE [b] BETWEEN '2022-01-01' AND '2022-01-02';""", + ] def test_replace_query(adapter: FabricAdapter): From 5cc30ab63aa95fa0fa48f47b0a4b576807fcb2a8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Fredh=C3=B8i?= Date: Wed, 25 Jun 2025 10:54:41 +0200 Subject: [PATCH 11/70] mypy --- sqlmesh/core/config/connection.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/sqlmesh/core/config/connection.py b/sqlmesh/core/config/connection.py index 16ae80424b..1505e26080 100644 --- a/sqlmesh/core/config/connection.py +++ b/sqlmesh/core/config/connection.py @@ -1601,9 +1601,9 @@ class FabricConnectionConfig(MSSQLConnectionConfig): """ type_: t.Literal["fabric"] = Field(alias="type", default="fabric") # type: ignore - DIALECT: t.ClassVar[t.Literal["fabric"]] = "fabric" - DISPLAY_NAME: t.ClassVar[t.Literal["Fabric"]] = "Fabric" - DISPLAY_ORDER: t.ClassVar[t.Literal[17]] = 17 + DIALECT: t.ClassVar[t.Literal["fabric"]] = "fabric" # type: ignore + DISPLAY_NAME: t.ClassVar[t.Literal["Fabric"]] = "Fabric" # type: ignore + DISPLAY_ORDER: t.ClassVar[t.Literal[17]] = 17 # type: ignore driver: t.Literal["pyodbc"] = "pyodbc" autocommit: t.Optional[bool] = True From d5f7aa77ee15525e1c0247cb58947c37c0dddef7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Fredh=C3=B8i?= Date: Wed, 25 Jun 2025 11:10:04 +0200 Subject: [PATCH 12/70] ruff --- sqlmesh/core/config/connection.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/sqlmesh/core/config/connection.py b/sqlmesh/core/config/connection.py index 1505e26080..e9bab2185b 100644 --- a/sqlmesh/core/config/connection.py +++ b/sqlmesh/core/config/connection.py @@ -1601,9 +1601,9 @@ class FabricConnectionConfig(MSSQLConnectionConfig): """ type_: t.Literal["fabric"] = Field(alias="type", default="fabric") # type: ignore - DIALECT: t.ClassVar[t.Literal["fabric"]] = "fabric" # type: ignore - DISPLAY_NAME: t.ClassVar[t.Literal["Fabric"]] = "Fabric" # type: ignore - DISPLAY_ORDER: t.ClassVar[t.Literal[17]] = 17 # type: ignore + DIALECT: t.ClassVar[t.Literal["fabric"]] = "fabric" # type: ignore + DISPLAY_NAME: t.ClassVar[t.Literal["Fabric"]] = "Fabric" # type: ignore + DISPLAY_ORDER: t.ClassVar[t.Literal[17]] = 17 # type: ignore driver: t.Literal["pyodbc"] = "pyodbc" autocommit: t.Optional[bool] = True From 50fe5e4f881ed949bbb0879c767b0c3202ebb168 Mon Sep 17 00:00:00 2001 From: Andreas <65893109+fresioAS@users.noreply.github.com> Date: Wed, 25 Jun 2025 16:11:25 +0200 Subject: [PATCH 13/70] Update fabric.md --- docs/integrations/engines/fabric.md | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/docs/integrations/engines/fabric.md b/docs/integrations/engines/fabric.md index aca9c32eed..1dd47fbe11 100644 --- a/docs/integrations/engines/fabric.md +++ b/docs/integrations/engines/fabric.md @@ -3,6 +3,8 @@ ## Local/Built-in Scheduler **Engine Adapter Type**: `fabric` +NOTE: Fabric Warehouse is not recommended to be used for the SQLMesh [state connection](../../reference/configuration.md#connections). + ### Installation #### Microsoft Entra ID / Azure Active Directory Authentication: ``` @@ -27,4 +29,4 @@ pip install "sqlmesh[mssql-odbc]" | `autocommit` | Is autocommit mode enabled. Default: false | bool | N | | `driver` | The driver to use for the connection. Default: pyodbc | string | N | | `driver_name` | The driver name to use for the connection. E.g., *ODBC Driver 18 for SQL Server* | string | N | -| `odbc_properties` | The dict of ODBC connection properties. E.g., authentication: ActiveDirectoryServicePrincipal. See more [here](https://learn.microsoft.com/en-us/sql/connect/odbc/dsn-connection-string-attribute?view=sql-server-ver16). | dict | N | \ No newline at end of file +| `odbc_properties` | The dict of ODBC connection properties. E.g., authentication: ActiveDirectoryServicePrincipal. See more [here](https://learn.microsoft.com/en-us/sql/connect/odbc/dsn-connection-string-attribute?view=sql-server-ver16). | dict | N | From 3a06c909d59d8c19d856d82897739e1368dc650c Mon Sep 17 00:00:00 2001 From: Andreas <65893109+fresioAS@users.noreply.github.com> Date: Wed, 2 Jul 2025 13:28:52 +0200 Subject: [PATCH 14/70] Update sqlmesh/core/engine_adapter/fabric.py MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Co-authored-by: Mattias Thalén --- sqlmesh/core/engine_adapter/fabric.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/sqlmesh/core/engine_adapter/fabric.py b/sqlmesh/core/engine_adapter/fabric.py index 5725d3060a..97322641bd 100644 --- a/sqlmesh/core/engine_adapter/fabric.py +++ b/sqlmesh/core/engine_adapter/fabric.py @@ -10,7 +10,9 @@ from sqlmesh.core._typing import TableName -class FabricAdapter(MSSQLEngineAdapter): +from sqlmesh.core.engine_adapter.mixins import LogicalMergeMixin + +class FabricAdapter(LogicalMergeMixin, MSSQLEngineAdapter): """ Adapter for Microsoft Fabric. """ From 145b69b62a4dae082b19ff7006bc5f3cd0376ba3 Mon Sep 17 00:00:00 2001 From: Andreas <65893109+fresioAS@users.noreply.github.com> Date: Wed, 2 Jul 2025 14:39:03 +0200 Subject: [PATCH 15/70] Update fabric.py --- sqlmesh/core/engine_adapter/fabric.py | 1 + 1 file changed, 1 insertion(+) diff --git a/sqlmesh/core/engine_adapter/fabric.py b/sqlmesh/core/engine_adapter/fabric.py index 97322641bd..d7b862d50a 100644 --- a/sqlmesh/core/engine_adapter/fabric.py +++ b/sqlmesh/core/engine_adapter/fabric.py @@ -12,6 +12,7 @@ from sqlmesh.core.engine_adapter.mixins import LogicalMergeMixin + class FabricAdapter(LogicalMergeMixin, MSSQLEngineAdapter): """ Adapter for Microsoft Fabric. From ecf3e7bdc41652cffa2af0e5270a51873d09858c Mon Sep 17 00:00:00 2001 From: Erin Drummond Date: Thu, 3 Jul 2025 22:46:25 +0000 Subject: [PATCH 16/70] Add Fabric to integration tests --- Makefile | 3 +++ pyproject.toml | 1 + tests/core/engine_adapter/integration/__init__.py | 1 + tests/core/engine_adapter/integration/config.yaml | 13 +++++++++++++ 4 files changed, 18 insertions(+) diff --git a/Makefile b/Makefile index 0a89bba437..e643ae7ad2 100644 --- a/Makefile +++ b/Makefile @@ -173,6 +173,9 @@ clickhouse-cloud-test: guard-CLICKHOUSE_CLOUD_HOST guard-CLICKHOUSE_CLOUD_USERNA athena-test: guard-AWS_ACCESS_KEY_ID guard-AWS_SECRET_ACCESS_KEY guard-ATHENA_S3_WAREHOUSE_LOCATION engine-athena-install pytest -n auto -m "athena" --retries 3 --junitxml=test-results/junit-athena.xml +fabric-test: guard-FABRIC_HOST guard-FABRIC_CLIENT_ID guard-FABRIC_CLIENT_SECRET guard-FABRIC_DATABASE engine-fabric-install + pytest -n auto -m "fabric" --retries 3 --junitxml=test-results/junit-fabric.xml + vscode_settings: mkdir -p .vscode cp -r ./tooling/vscode/*.json .vscode/ diff --git a/pyproject.toml b/pyproject.toml index c02c5e1565..fee2618e3c 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -103,6 +103,7 @@ dev = [ dbt = ["dbt-core<2"] dlt = ["dlt"] duckdb = [] +fabric = ["pyodbc"] gcppostgres = ["cloud-sql-python-connector[pg8000]>=1.8.0"] github = ["PyGithub~=2.5.0"] llm = ["langchain", "openai"] diff --git a/tests/core/engine_adapter/integration/__init__.py b/tests/core/engine_adapter/integration/__init__.py index 7e35b832be..99402df6ae 100644 --- a/tests/core/engine_adapter/integration/__init__.py +++ b/tests/core/engine_adapter/integration/__init__.py @@ -82,6 +82,7 @@ def pytest_marks(self) -> t.List[MarkDecorator]: IntegrationTestEngine("bigquery", native_dataframe_type="bigframe", cloud=True), IntegrationTestEngine("databricks", native_dataframe_type="pyspark", cloud=True), IntegrationTestEngine("snowflake", native_dataframe_type="snowpark", cloud=True), + IntegrationTestEngine("fabric", cloud=True) ] ENGINES_BY_NAME = {e.engine: e for e in ENGINES} diff --git a/tests/core/engine_adapter/integration/config.yaml b/tests/core/engine_adapter/integration/config.yaml index d18ea5366f..4b9c881208 100644 --- a/tests/core/engine_adapter/integration/config.yaml +++ b/tests/core/engine_adapter/integration/config.yaml @@ -186,5 +186,18 @@ gateways: state_connection: type: duckdb + inttest_fabric: + connection: + type: fabric + driver: pyodbc + host: {{ env_var("FABRIC_HOST") }} + user: {{ env_var("FABRIC_CLIENT_ID") }} + password: {{ env_var("FABRIC_CLIENT_SECRET") }} + database: {{ env_var("FABRIC_DATABASE") }} + odbc_properties: + Authentication: ActiveDirectoryServicePrincipal + state_connection: + type: duckdb + model_defaults: dialect: duckdb From 9127bda187545fb68ac7e2af9794e6056277d9fd Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Mattias=20Thal=C3=A9n?= Date: Thu, 10 Jul 2025 08:26:37 +0000 Subject: [PATCH 17/70] feat(tests): add fabric timestamp handling in dialects test --- tests/core/engine_adapter/integration/test_integration.py | 1 + 1 file changed, 1 insertion(+) diff --git a/tests/core/engine_adapter/integration/test_integration.py b/tests/core/engine_adapter/integration/test_integration.py index ee839d7593..ae93b7c827 100644 --- a/tests/core/engine_adapter/integration/test_integration.py +++ b/tests/core/engine_adapter/integration/test_integration.py @@ -1756,6 +1756,7 @@ def test_dialects(ctx: TestContext): { "default": pd.Timestamp("2020-01-01 00:00:00+00:00"), "clickhouse": pd.Timestamp("2020-01-01 00:00:00"), + "fabric": pd.Timestamp("2020-01-01 00:00:00"), "mysql": pd.Timestamp("2020-01-01 00:00:00"), "spark": pd.Timestamp("2020-01-01 00:00:00"), "databricks": pd.Timestamp("2020-01-01 00:00:00"), From deb9321f9588b2b72816eed0e170de8d9e390320 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Mattias=20Thal=C3=A9n?= Date: Thu, 10 Jul 2025 11:17:49 +0000 Subject: [PATCH 18/70] fix: update catalog support configuration in FabricConnectionConfig --- sqlmesh/core/config/connection.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sqlmesh/core/config/connection.py b/sqlmesh/core/config/connection.py index dc96c9bea5..11028dcdc4 100644 --- a/sqlmesh/core/config/connection.py +++ b/sqlmesh/core/config/connection.py @@ -1701,7 +1701,7 @@ def _engine_adapter(self) -> t.Type[EngineAdapter]: def _extra_engine_config(self) -> t.Dict[str, t.Any]: return { "database": self.database, - "catalog_support": CatalogSupport.REQUIRES_SET_CATALOG, + "catalog_support": CatalogSupport.SINGLE_CATALOG_ONLY, } From 4412fc9a6c194dc49ffb92c746d4db301bad1463 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Mattias=20Thal=C3=A9n?= Date: Thu, 10 Jul 2025 12:44:32 +0000 Subject: [PATCH 19/70] fix(mssql): update driver selection logic to allow enforcing pyodbc in Fabric --- sqlmesh/core/config/connection.py | 9 +++++- tests/core/test_connection_config.py | 47 ++++++++++++++++++++++++++++ 2 files changed, 55 insertions(+), 1 deletion(-) diff --git a/sqlmesh/core/config/connection.py b/sqlmesh/core/config/connection.py index 11028dcdc4..0643750374 100644 --- a/sqlmesh/core/config/connection.py +++ b/sqlmesh/core/config/connection.py @@ -1510,7 +1510,14 @@ def _mssql_engine_import_validator(cls, data: t.Any) -> t.Any: if not isinstance(data, dict): return data - driver = data.get("driver", "pymssql") + # Get the default driver for this specific class + default_driver = "pymssql" + if hasattr(cls, "model_fields") and "driver" in cls.model_fields: + field_info = cls.model_fields["driver"] + if hasattr(field_info, "default") and field_info.default is not None: + default_driver = field_info.default + + driver = data.get("driver", default_driver) # Define the mapping of driver to import module and extra name driver_configs = {"pymssql": ("pymssql", "mssql"), "pyodbc": ("pyodbc", "mssql-odbc")} diff --git a/tests/core/test_connection_config.py b/tests/core/test_connection_config.py index 8021609990..1464b8b00f 100644 --- a/tests/core/test_connection_config.py +++ b/tests/core/test_connection_config.py @@ -1766,3 +1766,50 @@ def test_fabric_pyodbc_connection_string_generation(): # Check autocommit parameter, should default to True for Fabric assert call_args[1]["autocommit"] is True + + +def test_mssql_driver_defaults(make_config): + """Test driver defaults for MSSQL connection config. + + Ensures MSSQL defaults to 'pymssql' but can be overridden to 'pyodbc'. + """ + + # Test 1: MSSQL with no driver specified - should default to pymssql + config_no_driver = make_config(type="mssql", host="localhost", check_import=False) + assert isinstance(config_no_driver, MSSQLConnectionConfig) + assert config_no_driver.driver == "pymssql" + + # Test 2: MSSQL with explicit pymssql driver + config_pymssql = make_config( + type="mssql", host="localhost", driver="pymssql", check_import=False + ) + assert isinstance(config_pymssql, MSSQLConnectionConfig) + assert config_pymssql.driver == "pymssql" + + # Test 3: MSSQL with explicit pyodbc driver + config_pyodbc = make_config(type="mssql", host="localhost", driver="pyodbc", check_import=False) + assert isinstance(config_pyodbc, MSSQLConnectionConfig) + assert config_pyodbc.driver == "pyodbc" + + +def test_fabric_driver_defaults(make_config): + """Test driver defaults for Fabric connection config. + + Ensures Fabric defaults to 'pyodbc' and cannot be changed to 'pymssql'. + """ + + # Test 1: Fabric with no driver specified - should default to pyodbc + config_no_driver = make_config(type="fabric", host="localhost", check_import=False) + assert isinstance(config_no_driver, FabricConnectionConfig) + assert config_no_driver.driver == "pyodbc" + + # Test 2: Fabric with explicit pyodbc driver + config_pyodbc = make_config( + type="fabric", host="localhost", driver="pyodbc", check_import=False + ) + assert isinstance(config_pyodbc, FabricConnectionConfig) + assert config_pyodbc.driver == "pyodbc" + + # Test 3: Fabric with pymssql driver should fail (not allowed) + with pytest.raises(ConfigError, match=r"Input should be 'pyodbc'"): + make_config(type="fabric", host="localhost", driver="pymssql", check_import=False) From 6ac197eb5fb9a5846ace0b506015e08d763b28c7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Mattias=20Thal=C3=A9n?= Date: Fri, 11 Jul 2025 08:55:57 +0000 Subject: [PATCH 20/70] fix(fabric): Skip test_value_normalization for TIMESTAMPTZ --- tests/core/engine_adapter/integration/__init__.py | 2 +- .../engine_adapter/integration/test_integration.py | 14 ++++++-------- 2 files changed, 7 insertions(+), 9 deletions(-) diff --git a/tests/core/engine_adapter/integration/__init__.py b/tests/core/engine_adapter/integration/__init__.py index 99402df6ae..275d8be669 100644 --- a/tests/core/engine_adapter/integration/__init__.py +++ b/tests/core/engine_adapter/integration/__init__.py @@ -82,7 +82,7 @@ def pytest_marks(self) -> t.List[MarkDecorator]: IntegrationTestEngine("bigquery", native_dataframe_type="bigframe", cloud=True), IntegrationTestEngine("databricks", native_dataframe_type="pyspark", cloud=True), IntegrationTestEngine("snowflake", native_dataframe_type="snowpark", cloud=True), - IntegrationTestEngine("fabric", cloud=True) + IntegrationTestEngine("fabric", cloud=True), ] ENGINES_BY_NAME = {e.engine: e for e in ENGINES} diff --git a/tests/core/engine_adapter/integration/test_integration.py b/tests/core/engine_adapter/integration/test_integration.py index ae93b7c827..0844cce3c4 100644 --- a/tests/core/engine_adapter/integration/test_integration.py +++ b/tests/core/engine_adapter/integration/test_integration.py @@ -2158,14 +2158,12 @@ def test_value_normalization( input_data: t.Tuple[t.Any, ...], expected_results: t.Tuple[str, ...], ) -> None: - if ( - ctx.dialect == "trino" - and ctx.engine_adapter.current_catalog_type == "hive" - and column_type == exp.DataType.Type.TIMESTAMPTZ - ): - pytest.skip( - "Trino on Hive doesnt support creating tables with TIMESTAMP WITH TIME ZONE fields" - ) + # Skip TIMESTAMPTZ tests for engines that don't support it + if column_type == exp.DataType.Type.TIMESTAMPTZ: + if ctx.dialect == "trino" and ctx.engine_adapter.current_catalog_type == "hive": + pytest.skip("Trino on Hive doesn't support TIMESTAMP WITH TIME ZONE fields") + if ctx.dialect == "fabric": + pytest.skip("Fabric doesn't support TIMESTAMP WITH TIME ZONE fields") if not isinstance(ctx.engine_adapter, RowDiffMixin): pytest.skip( From 173e0ac8fda685ecfaca6ef3acd8dcc01e1e4cf1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Mattias=20Thal=C3=A9n?= Date: Fri, 11 Jul 2025 09:04:00 +0000 Subject: [PATCH 21/70] Manually set sqlglot to dev branch. --- pyproject.toml | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index 9b5b072d8a..dfb1ee511c 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -23,7 +23,9 @@ dependencies = [ "requests", "rich[jupyter]", "ruamel.yaml", - "sqlglot[rs]~=27.0.0", + + # TODO: Change this to the real release before merge! + "sqlglot[rs] @ git+https://github.com/mattiasthalen/sqlglot@fix/fabric-ensure-varchar-max", #~=27.0.0", "tenacity", "time-machine", "json-stream" @@ -103,7 +105,7 @@ dev = [ dbt = ["dbt-core<2"] dlt = ["dlt"] duckdb = [] -fabric = ["pyodbc"] +fabric = ["pyodbc>=5.0.0"] gcppostgres = ["cloud-sql-python-connector[pg8000]>=1.8.0"] github = ["PyGithub~=2.5.0"] llm = ["langchain", "openai"] From e1542414a6e0344ec005fc694ac786bf30b762a5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Fredh=C3=B8i?= Date: Tue, 17 Jun 2025 00:45:12 +0200 Subject: [PATCH 22/70] feat: Add support for Microsoft Fabric Waerhouse --- sqlmesh/core/config/connection.py | 22 ++ sqlmesh/core/engine_adapter/__init__.py | 4 + .../core/engine_adapter/fabric_warehouse.py | 233 ++++++++++++++++++ 3 files changed, 259 insertions(+) create mode 100644 sqlmesh/core/engine_adapter/fabric_warehouse.py diff --git a/sqlmesh/core/config/connection.py b/sqlmesh/core/config/connection.py index 49d49e40e7..9ee15def93 100644 --- a/sqlmesh/core/config/connection.py +++ b/sqlmesh/core/config/connection.py @@ -1673,6 +1673,28 @@ def _extra_engine_config(self) -> t.Dict[str, t.Any]: return {"catalog_support": CatalogSupport.SINGLE_CATALOG_ONLY} +class FabricWarehouseConnectionConfig(MSSQLConnectionConfig): + """ + Fabric Warehouse Connection Configuration. Inherits most settings from MSSQLConnectionConfig. + """ + + type_: t.Literal["fabric_warehouse"] = Field(alias="type", default="fabric_warehouse") # type: ignore + autocommit: t.Optional[bool] = True + + @property + def _engine_adapter(self) -> t.Type[EngineAdapter]: + from sqlmesh.core.engine_adapter.fabric_warehouse import FabricWarehouseAdapter + + return FabricWarehouseAdapter + + @property + def _extra_engine_config(self) -> t.Dict[str, t.Any]: + return { + "database": self.database, + "catalog_support": CatalogSupport.REQUIRES_SET_CATALOG, + } + + class SparkConnectionConfig(ConnectionConfig): """ Vanilla Spark Connection Configuration. Use `DatabricksConnectionConfig` for Databricks. diff --git a/sqlmesh/core/engine_adapter/__init__.py b/sqlmesh/core/engine_adapter/__init__.py index 19332dc005..b876c3b924 100644 --- a/sqlmesh/core/engine_adapter/__init__.py +++ b/sqlmesh/core/engine_adapter/__init__.py @@ -19,6 +19,7 @@ from sqlmesh.core.engine_adapter.trino import TrinoEngineAdapter from sqlmesh.core.engine_adapter.athena import AthenaEngineAdapter from sqlmesh.core.engine_adapter.risingwave import RisingwaveEngineAdapter +from sqlmesh.core.engine_adapter.fabric_warehouse import FabricWarehouseAdapter DIALECT_TO_ENGINE_ADAPTER = { "hive": SparkEngineAdapter, @@ -35,6 +36,7 @@ "trino": TrinoEngineAdapter, "athena": AthenaEngineAdapter, "risingwave": RisingwaveEngineAdapter, + "fabric_warehouse": FabricWarehouseAdapter, } DIALECT_ALIASES = { @@ -45,9 +47,11 @@ def create_engine_adapter( connection_factory: t.Callable[[], t.Any], dialect: str, **kwargs: t.Any ) -> EngineAdapter: + print(kwargs) dialect = dialect.lower() dialect = DIALECT_ALIASES.get(dialect, dialect) engine_adapter = DIALECT_TO_ENGINE_ADAPTER.get(dialect) + print(engine_adapter) if engine_adapter is None: return EngineAdapter(connection_factory, dialect, **kwargs) if engine_adapter is EngineAdapterWithIndexSupport: diff --git a/sqlmesh/core/engine_adapter/fabric_warehouse.py b/sqlmesh/core/engine_adapter/fabric_warehouse.py new file mode 100644 index 0000000000..037f827366 --- /dev/null +++ b/sqlmesh/core/engine_adapter/fabric_warehouse.py @@ -0,0 +1,233 @@ +from __future__ import annotations + +import typing as t +from sqlglot import exp +from sqlmesh.core.engine_adapter.mssql import MSSQLEngineAdapter +from sqlmesh.core.engine_adapter.shared import InsertOverwriteStrategy, SourceQuery + +if t.TYPE_CHECKING: + from sqlmesh.core._typing import SchemaName, TableName + from sqlmesh.core.engine_adapter._typing import QueryOrDF + + +class FabricWarehouseAdapter(MSSQLEngineAdapter): + """ + Adapter for Microsoft Fabric Warehouses. + """ + + DIALECT = "tsql" + SUPPORTS_INDEXES = False + SUPPORTS_TRANSACTIONS = False + + INSERT_OVERWRITE_STRATEGY = InsertOverwriteStrategy.DELETE_INSERT + + def __init__(self, *args: t.Any, **kwargs: t.Any): + self.database = kwargs.get("database") + + super().__init__(*args, **kwargs) + + if not self.database: + raise ValueError( + "The 'database' parameter is required in the connection config for the FabricWarehouseAdapter." + ) + try: + self.execute(f"USE [{self.database}]") + except Exception as e: + raise RuntimeError(f"Failed to set database context to '{self.database}'. Reason: {e}") + + def _get_schema_name(self, name: t.Union[TableName, SchemaName]) -> str: + """Extracts the schema name from a sqlglot object or string.""" + table = exp.to_table(name) + schema_part = table.db + + if isinstance(schema_part, exp.Identifier): + return schema_part.name + if isinstance(schema_part, str): + return schema_part + + if schema_part is None and table.this and table.this.is_identifier: + return table.this.name + + raise ValueError(f"Could not determine schema name from '{name}'") + + def create_schema(self, schema: SchemaName) -> None: + """ + Creates a schema in a Microsoft Fabric Warehouse. + + Overridden to handle Fabric's specific T-SQL requirements. + T-SQL's `CREATE SCHEMA` command does not support `IF NOT EXISTS`, so this + implementation first checks for the schema's existence in the + `INFORMATION_SCHEMA.SCHEMATA` view. + """ + sql = ( + exp.select("1") + .from_(f"{self.database}.INFORMATION_SCHEMA.SCHEMATA") + .where(f"SCHEMA_NAME = '{schema}'") + ) + if self.fetchone(sql): + return + self.execute(f"USE [{self.database}]") + self.execute(f"CREATE SCHEMA [{schema}]") + + def _create_table_from_columns( + self, + table_name: TableName, + columns_to_types: t.Dict[str, exp.DataType], + primary_key: t.Optional[t.Tuple[str, ...]] = None, + exists: bool = True, + table_description: t.Optional[str] = None, + column_descriptions: t.Optional[t.Dict[str, str]] = None, + **kwargs: t.Any, + ) -> None: + """ + Creates a table, ensuring the schema exists first and that all + object names are fully qualified with the database. + """ + table_exp = exp.to_table(table_name) + schema_name = self._get_schema_name(table_name) + + self.create_schema(schema_name) + + fully_qualified_table_name = f"[{self.database}].[{schema_name}].[{table_exp.name}]" + + column_defs = ", ".join( + f"[{col}] {kind.sql(dialect=self.dialect)}" for col, kind in columns_to_types.items() + ) + + create_table_sql = f"CREATE TABLE {fully_qualified_table_name} ({column_defs})" + + if not exists: + self.execute(create_table_sql) + return + + if not self.table_exists(table_name): + self.execute(create_table_sql) + + if table_description and self.comments_enabled: + qualified_table_for_comment = self._fully_qualify(table_name) + self._create_table_comment(qualified_table_for_comment, table_description) + if column_descriptions and self.comments_enabled: + self._create_column_comments(qualified_table_for_comment, column_descriptions) + + def table_exists(self, table_name: TableName) -> bool: + """ + Checks if a table exists. + + Overridden to query the uppercase `INFORMATION_SCHEMA` required + by case-sensitive Fabric environments. + """ + table = exp.to_table(table_name) + schema = self._get_schema_name(table_name) + + sql = ( + exp.select("1") + .from_("INFORMATION_SCHEMA.TABLES") + .where(f"TABLE_NAME = '{table.alias_or_name}'") + .where(f"TABLE_SCHEMA = '{schema}'") + ) + + result = self.fetchone(sql, quote_identifiers=True) + + return result[0] == 1 if result else False + + def _fully_qualify(self, name: t.Union[TableName, SchemaName]) -> exp.Table: + """Ensures an object name is prefixed with the configured database.""" + table = exp.to_table(name) + return exp.Table(this=table.this, db=table.db, catalog=exp.to_identifier(self.database)) + + def create_view( + self, + view_name: TableName, + query_or_df: QueryOrDF, + columns_to_types: t.Optional[t.Dict[str, exp.DataType]] = None, + replace: bool = True, + materialized: bool = False, + materialized_properties: t.Optional[t.Dict[str, t.Any]] = None, + table_description: t.Optional[str] = None, + column_descriptions: t.Optional[t.Dict[str, str]] = None, + view_properties: t.Optional[t.Dict[str, exp.Expression]] = None, + **create_kwargs: t.Any, + ) -> None: + """ + Creates a view from a query or DataFrame. + + Overridden to ensure that the view name and all tables referenced + in the source query are fully qualified with the database name, + as required by Fabric. + """ + view_schema = self._get_schema_name(view_name) + self.create_schema(view_schema) + + qualified_view_name = self._fully_qualify(view_name) + + if isinstance(query_or_df, exp.Expression): + for table in query_or_df.find_all(exp.Table): + if not table.catalog: + qualified_table = self._fully_qualify(table) + table.replace(qualified_table) + + return super().create_view( + qualified_view_name, + query_or_df, + columns_to_types, + replace, + materialized, + table_description=table_description, + column_descriptions=column_descriptions, + view_properties=view_properties, + **create_kwargs, + ) + + def columns( + self, table_name: TableName, include_pseudo_columns: bool = False + ) -> t.Dict[str, exp.DataType]: + """ + Fetches column names and types for the target table. + + Overridden to query the uppercase `INFORMATION_SCHEMA.COLUMNS` view + required by case-sensitive Fabric environments. + """ + table = exp.to_table(table_name) + schema = self._get_schema_name(table_name) + sql = ( + exp.select("COLUMN_NAME", "DATA_TYPE") + .from_(f"{self.database}.INFORMATION_SCHEMA.COLUMNS") + .where(f"TABLE_NAME = '{table.name}'") + .where(f"TABLE_SCHEMA = '{schema}'") + .order_by("ORDINAL_POSITION") + ) + df = self.fetchdf(sql) + return { + str(row.COLUMN_NAME): exp.DataType.build(str(row.DATA_TYPE), dialect=self.dialect) + for row in df.itertuples() + } + + def _insert_overwrite_by_condition( + self, + table_name: TableName, + source_queries: t.List[SourceQuery], + columns_to_types: t.Optional[t.Dict[str, exp.DataType]] = None, + where: t.Optional[exp.Condition] = None, + insert_overwrite_strategy_override: t.Optional[InsertOverwriteStrategy] = None, + **kwargs: t.Any, + ) -> None: + """ + Implements the insert overwrite strategy for Fabric. + + Overridden to enforce a `DELETE`/`INSERT` strategy, as Fabric's + `MERGE` statement has limitations. + """ + + columns_to_types = columns_to_types or self.columns(table_name) + + self.delete_from(table_name, where=where or exp.true()) + + for source_query in source_queries: + with source_query as query: + query = self._order_projections_and_filter(query, columns_to_types) + self._insert_append_query( + table_name, + query, + columns_to_types=columns_to_types, + order_projections=False, + ) From 2bdd4175bd778caef0ebc621dc1fd06bfc51e005 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Fredh=C3=B8i?= Date: Tue, 17 Jun 2025 00:51:12 +0200 Subject: [PATCH 23/70] removing some print statements --- sqlmesh/core/engine_adapter/__init__.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/sqlmesh/core/engine_adapter/__init__.py b/sqlmesh/core/engine_adapter/__init__.py index b876c3b924..27a2be1e32 100644 --- a/sqlmesh/core/engine_adapter/__init__.py +++ b/sqlmesh/core/engine_adapter/__init__.py @@ -47,11 +47,9 @@ def create_engine_adapter( connection_factory: t.Callable[[], t.Any], dialect: str, **kwargs: t.Any ) -> EngineAdapter: - print(kwargs) dialect = dialect.lower() dialect = DIALECT_ALIASES.get(dialect, dialect) engine_adapter = DIALECT_TO_ENGINE_ADAPTER.get(dialect) - print(engine_adapter) if engine_adapter is None: return EngineAdapter(connection_factory, dialect, **kwargs) if engine_adapter is EngineAdapterWithIndexSupport: From cbe3bdcb1d0412f6770756b567bf34d2e54bc9c8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Fredh=C3=B8i?= Date: Wed, 18 Jun 2025 00:10:54 +0200 Subject: [PATCH 24/70] adding dialect & handling temp views --- sqlmesh/core/config/connection.py | 16 +- sqlmesh/core/engine_adapter/__init__.py | 6 +- sqlmesh/core/engine_adapter/fabric.py | 482 ++++++++++++++++++ .../core/engine_adapter/fabric_warehouse.py | 233 --------- 4 files changed, 497 insertions(+), 240 deletions(-) create mode 100644 sqlmesh/core/engine_adapter/fabric.py delete mode 100644 sqlmesh/core/engine_adapter/fabric_warehouse.py diff --git a/sqlmesh/core/config/connection.py b/sqlmesh/core/config/connection.py index 9ee15def93..4a65ef3436 100644 --- a/sqlmesh/core/config/connection.py +++ b/sqlmesh/core/config/connection.py @@ -1673,22 +1673,28 @@ def _extra_engine_config(self) -> t.Dict[str, t.Any]: return {"catalog_support": CatalogSupport.SINGLE_CATALOG_ONLY} -class FabricWarehouseConnectionConfig(MSSQLConnectionConfig): +class FabricConnectionConfig(MSSQLConnectionConfig): """ - Fabric Warehouse Connection Configuration. Inherits most settings from MSSQLConnectionConfig. + Fabric Connection Configuration. + + Inherits most settings from MSSQLConnectionConfig and sets the type to 'fabric'. + It is recommended to use the 'pyodbc' driver for Fabric. """ - type_: t.Literal["fabric_warehouse"] = Field(alias="type", default="fabric_warehouse") # type: ignore + type_: t.Literal["fabric"] = Field(alias="type", default="fabric") autocommit: t.Optional[bool] = True @property def _engine_adapter(self) -> t.Type[EngineAdapter]: - from sqlmesh.core.engine_adapter.fabric_warehouse import FabricWarehouseAdapter + # This is the crucial link to the adapter you already created. + from sqlmesh.core.engine_adapter.fabric import FabricAdapter - return FabricWarehouseAdapter + return FabricAdapter @property def _extra_engine_config(self) -> t.Dict[str, t.Any]: + # This ensures the 'database' name from the config is passed + # to the FabricAdapter's constructor. return { "database": self.database, "catalog_support": CatalogSupport.REQUIRES_SET_CATALOG, diff --git a/sqlmesh/core/engine_adapter/__init__.py b/sqlmesh/core/engine_adapter/__init__.py index 27a2be1e32..c8b8299bd1 100644 --- a/sqlmesh/core/engine_adapter/__init__.py +++ b/sqlmesh/core/engine_adapter/__init__.py @@ -19,7 +19,7 @@ from sqlmesh.core.engine_adapter.trino import TrinoEngineAdapter from sqlmesh.core.engine_adapter.athena import AthenaEngineAdapter from sqlmesh.core.engine_adapter.risingwave import RisingwaveEngineAdapter -from sqlmesh.core.engine_adapter.fabric_warehouse import FabricWarehouseAdapter +from sqlmesh.core.engine_adapter.fabric import FabricAdapter DIALECT_TO_ENGINE_ADAPTER = { "hive": SparkEngineAdapter, @@ -36,7 +36,7 @@ "trino": TrinoEngineAdapter, "athena": AthenaEngineAdapter, "risingwave": RisingwaveEngineAdapter, - "fabric_warehouse": FabricWarehouseAdapter, + "fabric": FabricAdapter, } DIALECT_ALIASES = { @@ -47,9 +47,11 @@ def create_engine_adapter( connection_factory: t.Callable[[], t.Any], dialect: str, **kwargs: t.Any ) -> EngineAdapter: + print(kwargs) dialect = dialect.lower() dialect = DIALECT_ALIASES.get(dialect, dialect) engine_adapter = DIALECT_TO_ENGINE_ADAPTER.get(dialect) + print(engine_adapter) if engine_adapter is None: return EngineAdapter(connection_factory, dialect, **kwargs) if engine_adapter is EngineAdapterWithIndexSupport: diff --git a/sqlmesh/core/engine_adapter/fabric.py b/sqlmesh/core/engine_adapter/fabric.py new file mode 100644 index 0000000000..4865c3c8f5 --- /dev/null +++ b/sqlmesh/core/engine_adapter/fabric.py @@ -0,0 +1,482 @@ +from __future__ import annotations + +import typing as t +from sqlglot import exp +from sqlmesh.core.engine_adapter.mssql import MSSQLEngineAdapter +from sqlmesh.core.engine_adapter.shared import ( + InsertOverwriteStrategy, + SourceQuery, + DataObject, + DataObjectType, +) +import logging +from sqlmesh.core.dialect import to_schema + +logger = logging.getLogger(__name__) +if t.TYPE_CHECKING: + from sqlmesh.core._typing import SchemaName, TableName + from sqlmesh.core.engine_adapter._typing import QueryOrDF + + +class FabricAdapter(MSSQLEngineAdapter): + """ + Adapter for Microsoft Fabric. + """ + + DIALECT = "fabric" + SUPPORTS_INDEXES = False + SUPPORTS_TRANSACTIONS = False + + INSERT_OVERWRITE_STRATEGY = InsertOverwriteStrategy.DELETE_INSERT + + def __init__(self, *args: t.Any, **kwargs: t.Any): + self.database = kwargs.get("database") + + super().__init__(*args, **kwargs) + + if not self.database: + raise ValueError( + "The 'database' parameter is required in the connection config for the FabricWarehouseAdapter." + ) + try: + self.execute(f"USE [{self.database}]") + except Exception as e: + raise RuntimeError(f"Failed to set database context to '{self.database}'. Reason: {e}") + + def _get_schema_name(self, name: t.Union[str, exp.Table, exp.Identifier]) -> t.Optional[str]: + """ + Safely extracts the schema name from a table or schema name, which can be + a string or a sqlglot expression. + + Fabric requires database names to be explicitly specified in many contexts, + including referencing schemas in INFORMATION_SCHEMA. This function helps + in extracting the schema part correctly from potentially qualified names. + """ + table = exp.to_table(name) + + if table.this and table.this.name.startswith("#"): + return None + + schema_part = table.db + + if not schema_part: + return None + + if isinstance(schema_part, exp.Identifier): + return schema_part.name + if isinstance(schema_part, str): + return schema_part + + raise TypeError(f"Unexpected type for schema part: {type(schema_part)}") + + def _get_data_objects( + self, schema_name: SchemaName, object_names: t.Optional[t.Set[str]] = None + ) -> t.List[DataObject]: + """ + Returns all the data objects that exist in the given schema and database. + + Overridden to query `INFORMATION_SCHEMA.TABLES` with explicit database qualification + and preserved casing using `quoted=True`. + """ + import pandas as pd + + catalog = self.get_current_catalog() + + from_table = exp.Table( + this=exp.to_identifier("TABLES", quoted=True), + db=exp.to_identifier("INFORMATION_SCHEMA", quoted=True), + catalog=exp.to_identifier(self.database), + ) + + query = ( + exp.select( + exp.column("TABLE_NAME").as_("name"), + exp.column("TABLE_SCHEMA").as_("schema_name"), + exp.case() + .when(exp.column("TABLE_TYPE").eq("BASE TABLE"), exp.Literal.string("TABLE")) + .else_(exp.column("TABLE_TYPE")) + .as_("type"), + ) + .from_(from_table) + .where(exp.column("TABLE_SCHEMA").eq(str(to_schema(schema_name).db).strip("[]"))) + ) + if object_names: + query = query.where( + exp.column("TABLE_NAME").isin(*(name.strip("[]") for name in object_names)) + ) + + dataframe: pd.DataFrame = self.fetchdf(query) + + return [ + DataObject( + catalog=catalog, + schema=row.schema_name, + name=row.name, + type=DataObjectType.from_str(row.type), + ) + for row in dataframe.itertuples() + ] + + def create_schema( + self, + schema_name: SchemaName, + ignore_if_exists: bool = True, + warn_on_error: bool = True, + **kwargs: t.Any, + ) -> None: + """ + Creates a schema in a Microsoft Fabric Warehouse. + + Overridden to handle Fabric's specific T-SQL requirements. + T-SQL's `CREATE SCHEMA` command does not support `IF NOT EXISTS` directly + as part of the statement in all contexts, and error messages suggest + issues with batching or preceding statements like USE. + """ + if schema_name is None: + return + + schema_name_str = ( + schema_name.name if isinstance(schema_name, exp.Identifier) else str(schema_name) + ) + + if not schema_name_str: + logger.warning("Attempted to create a schema with an empty name. Skipping.") + return + + schema_name_str = schema_name_str.strip('[]"').rstrip(".") + + if not schema_name_str: + logger.warning( + "Attempted to create a schema with an empty name after sanitization. Skipping." + ) + return + + try: + if self.schema_exists(schema_name_str): + if ignore_if_exists: + return + raise RuntimeError(f"Schema '{schema_name_str}' already exists.") + except Exception as e: + if warn_on_error: + logger.warning(f"Failed to check for existence of schema '{schema_name_str}': {e}") + else: + raise + + try: + create_sql = f"CREATE SCHEMA [{schema_name_str}]" + self.execute(create_sql) + except Exception as e: + if "already exists" in str(e).lower() or "There is already an object named" in str(e): + if ignore_if_exists: + return + raise RuntimeError(f"Schema '{schema_name_str}' already exists.") from e + else: + if warn_on_error: + logger.warning(f"Failed to create schema {schema_name_str}. Reason: {e}") + else: + raise RuntimeError(f"Failed to create schema {schema_name_str}.") from e + + def _create_table_from_columns( + self, + table_name: TableName, + columns_to_types: t.Dict[str, exp.DataType], + primary_key: t.Optional[t.Tuple[str, ...]] = None, + exists: bool = True, + table_description: t.Optional[str] = None, + column_descriptions: t.Optional[t.Dict[str, str]] = None, + **kwargs: t.Any, + ) -> None: + """ + Creates a table, ensuring the schema exists first and that all + object names are fully qualified with the database. + """ + table_exp = exp.to_table(table_name) + schema_name = self._get_schema_name(table_name) + + self.create_schema(schema_name) + + fully_qualified_table_name = f"[{self.database}].[{schema_name}].[{table_exp.name}]" + + column_defs = ", ".join( + f"[{col}] {kind.sql(dialect=self.dialect)}" for col, kind in columns_to_types.items() + ) + + create_table_sql = f"CREATE TABLE {fully_qualified_table_name} ({column_defs})" + + if not exists: + self.execute(create_table_sql) + return + + if not self.table_exists(table_name): + self.execute(create_table_sql) + + if table_description and self.comments_enabled: + qualified_table_for_comment = self._fully_qualify(table_name) + self._create_table_comment(qualified_table_for_comment, table_description) + if column_descriptions and self.comments_enabled: + self._create_column_comments(qualified_table_for_comment, column_descriptions) + + def table_exists(self, table_name: TableName) -> bool: + """ + Checks if a table exists. + + Overridden to query the uppercase `INFORMATION_SCHEMA` required + by case-sensitive Fabric environments. + """ + table = exp.to_table(table_name) + schema = self._get_schema_name(table_name) + + sql = ( + exp.select("1") + .from_("INFORMATION_SCHEMA.TABLES") + .where(f"TABLE_NAME = '{table.alias_or_name}'") + .where(f"TABLE_SCHEMA = '{schema}'") + ) + + result = self.fetchone(sql, quote_identifiers=True) + + return result[0] == 1 if result else False + + def _fully_qualify(self, name: t.Union[TableName, SchemaName]) -> exp.Table: + """ + Ensures an object name is prefixed with the configured database and schema. + + Overridden to prevent qualification for temporary objects (starting with # or ##). + Temporary objects should not be qualified with database or schema in T-SQL. + """ + table = exp.to_table(name) + + if ( + table.this + and isinstance(table.this, exp.Identifier) + and (table.this.name.startswith("#")) + ): + temp_identifier = exp.Identifier(this=table.this.this, quoted=True) + return exp.Table(this=temp_identifier) + + schema = self._get_schema_name(name) + + return exp.Table( + this=table.this, + db=exp.to_identifier(schema) if schema else None, + catalog=exp.to_identifier(self.database), + ) + + def create_view( + self, + view_name: TableName, + query_or_df: QueryOrDF, + columns_to_types: t.Optional[t.Dict[str, exp.DataType]] = None, + replace: bool = True, + materialized: bool = False, + materialized_properties: t.Optional[t.Dict[str, t.Any]] = None, + table_description: t.Optional[str] = None, + column_descriptions: t.Optional[t.Dict[str, str]] = None, + view_properties: t.Optional[t.Dict[str, exp.Expression]] = None, + **create_kwargs: t.Any, + ) -> None: + """ + Creates a view from a query or DataFrame. + + Overridden to ensure that the view name and all tables referenced + in the source query are fully qualified with the database name, + as required by Fabric. + """ + view_schema = self._get_schema_name(view_name) + self.create_schema(view_schema) + + qualified_view_name = self._fully_qualify(view_name) + + if isinstance(query_or_df, exp.Expression): + for table in query_or_df.find_all(exp.Table): + if not table.catalog: + qualified_table = self._fully_qualify(table) + table.replace(qualified_table) + + return super().create_view( + qualified_view_name, + query_or_df, + columns_to_types, + replace, + materialized, + table_description=table_description, + column_descriptions=column_descriptions, + view_properties=view_properties, + **create_kwargs, + ) + + def columns( + self, table_name: TableName, include_pseudo_columns: bool = False + ) -> t.Dict[str, exp.DataType]: + table = exp.to_table(table_name) + schema = self._get_schema_name(table_name) + + if ( + not schema + and table.this + and isinstance(table.this, exp.Identifier) + and table.this.name.startswith("__temp_") + ): + schema = "dbo" + + if not schema: + logger.warning( + f"Cannot fetch columns for table '{table_name}' without a schema name in Fabric." + ) + return {} + + from_table = exp.Table( + this=exp.to_identifier("COLUMNS", quoted=True), + db=exp.to_identifier("INFORMATION_SCHEMA", quoted=True), + catalog=exp.to_identifier(self.database), + ) + + sql = ( + exp.select( + "COLUMN_NAME", + "DATA_TYPE", + "CHARACTER_MAXIMUM_LENGTH", + "NUMERIC_PRECISION", + "NUMERIC_SCALE", + ) + .from_(from_table) + .where(f"TABLE_NAME = '{table.name.strip('[]')}'") + .where(f"TABLE_SCHEMA = '{schema.strip('[]')}'") + .order_by("ORDINAL_POSITION") + ) + + df = self.fetchdf(sql) + + def build_var_length_col( + column_name: str, + data_type: str, + character_maximum_length: t.Optional[int] = None, + numeric_precision: t.Optional[int] = None, + numeric_scale: t.Optional[int] = None, + ) -> t.Tuple[str, str]: + data_type = data_type.lower() + + char_len_int = ( + int(character_maximum_length) if character_maximum_length is not None else None + ) + prec_int = int(numeric_precision) if numeric_precision is not None else None + scale_int = int(numeric_scale) if numeric_scale is not None else None + + if data_type in self.VARIABLE_LENGTH_DATA_TYPES and char_len_int is not None: + if char_len_int > 0: + return (column_name, f"{data_type}({char_len_int})") + if char_len_int == -1: + return (column_name, f"{data_type}(max)") + if ( + data_type in ("decimal", "numeric") + and prec_int is not None + and scale_int is not None + ): + return (column_name, f"{data_type}({prec_int}, {scale_int})") + if data_type == "float" and prec_int is not None: + return (column_name, f"{data_type}({prec_int})") + + return (column_name, data_type) + + columns_raw = [ + ( + row.COLUMN_NAME, + row.DATA_TYPE, + getattr(row, "CHARACTER_MAXIMUM_LENGTH", None), + getattr(row, "NUMERIC_PRECISION", None), + getattr(row, "NUMERIC_SCALE", None), + ) + for row in df.itertuples() + ] + + columns_processed = [build_var_length_col(*row) for row in columns_raw] + + return { + column_name: exp.DataType.build(data_type, dialect=self.dialect) + for column_name, data_type in columns_processed + } + + def create_schema( + self, + schema_name: SchemaName, + ignore_if_exists: bool = True, + warn_on_error: bool = True, + **kwargs: t.Any, + ) -> None: + if schema_name is None: + return + + schema_exp = to_schema(schema_name) + simple_schema_name_str = None + if schema_exp.db: + simple_schema_name_str = exp.to_identifier(schema_exp.db).name + + if not simple_schema_name_str: + logger.warning( + f"Could not determine simple schema name from '{schema_name}'. Skipping schema creation." + ) + return + + if ignore_if_exists: + try: + if self.schema_exists(simple_schema_name_str): + return + except Exception as e: + if warn_on_error: + logger.warning( + f"Failed to check for existence of schema '{simple_schema_name_str}': {e}" + ) + else: + raise + elif self.schema_exists(simple_schema_name_str): + raise RuntimeError(f"Schema '{simple_schema_name_str}' already exists.") + + try: + create_sql = f"CREATE SCHEMA [{simple_schema_name_str}]" + self.execute(create_sql) + except Exception as e: + error_message = str(e).lower() + if ( + "already exists" in error_message + or "there is already an object named" in error_message + ): + if ignore_if_exists: + return + raise RuntimeError( + f"Schema '{simple_schema_name_str}' already exists due to race condition." + ) from e + else: + if warn_on_error: + logger.warning(f"Failed to create schema {simple_schema_name_str}. Reason: {e}") + else: + raise RuntimeError(f"Failed to create schema {simple_schema_name_str}.") from e + + def _insert_overwrite_by_condition( + self, + table_name: TableName, + source_queries: t.List[SourceQuery], + columns_to_types: t.Optional[t.Dict[str, exp.DataType]] = None, + where: t.Optional[exp.Condition] = None, + insert_overwrite_strategy_override: t.Optional[InsertOverwriteStrategy] = None, + **kwargs: t.Any, + ) -> None: + """ + Implements the insert overwrite strategy for Fabric. + + Overridden to enforce a `DELETE`/`INSERT` strategy, as Fabric's + `MERGE` statement has limitations. + """ + + columns_to_types = columns_to_types or self.columns(table_name) + + self.delete_from(table_name, where=where or exp.true()) + + for source_query in source_queries: + with source_query as query: + query = self._order_projections_and_filter(query, columns_to_types) + self._insert_append_query( + table_name, + query, + columns_to_types=columns_to_types, + order_projections=False, + ) diff --git a/sqlmesh/core/engine_adapter/fabric_warehouse.py b/sqlmesh/core/engine_adapter/fabric_warehouse.py deleted file mode 100644 index 037f827366..0000000000 --- a/sqlmesh/core/engine_adapter/fabric_warehouse.py +++ /dev/null @@ -1,233 +0,0 @@ -from __future__ import annotations - -import typing as t -from sqlglot import exp -from sqlmesh.core.engine_adapter.mssql import MSSQLEngineAdapter -from sqlmesh.core.engine_adapter.shared import InsertOverwriteStrategy, SourceQuery - -if t.TYPE_CHECKING: - from sqlmesh.core._typing import SchemaName, TableName - from sqlmesh.core.engine_adapter._typing import QueryOrDF - - -class FabricWarehouseAdapter(MSSQLEngineAdapter): - """ - Adapter for Microsoft Fabric Warehouses. - """ - - DIALECT = "tsql" - SUPPORTS_INDEXES = False - SUPPORTS_TRANSACTIONS = False - - INSERT_OVERWRITE_STRATEGY = InsertOverwriteStrategy.DELETE_INSERT - - def __init__(self, *args: t.Any, **kwargs: t.Any): - self.database = kwargs.get("database") - - super().__init__(*args, **kwargs) - - if not self.database: - raise ValueError( - "The 'database' parameter is required in the connection config for the FabricWarehouseAdapter." - ) - try: - self.execute(f"USE [{self.database}]") - except Exception as e: - raise RuntimeError(f"Failed to set database context to '{self.database}'. Reason: {e}") - - def _get_schema_name(self, name: t.Union[TableName, SchemaName]) -> str: - """Extracts the schema name from a sqlglot object or string.""" - table = exp.to_table(name) - schema_part = table.db - - if isinstance(schema_part, exp.Identifier): - return schema_part.name - if isinstance(schema_part, str): - return schema_part - - if schema_part is None and table.this and table.this.is_identifier: - return table.this.name - - raise ValueError(f"Could not determine schema name from '{name}'") - - def create_schema(self, schema: SchemaName) -> None: - """ - Creates a schema in a Microsoft Fabric Warehouse. - - Overridden to handle Fabric's specific T-SQL requirements. - T-SQL's `CREATE SCHEMA` command does not support `IF NOT EXISTS`, so this - implementation first checks for the schema's existence in the - `INFORMATION_SCHEMA.SCHEMATA` view. - """ - sql = ( - exp.select("1") - .from_(f"{self.database}.INFORMATION_SCHEMA.SCHEMATA") - .where(f"SCHEMA_NAME = '{schema}'") - ) - if self.fetchone(sql): - return - self.execute(f"USE [{self.database}]") - self.execute(f"CREATE SCHEMA [{schema}]") - - def _create_table_from_columns( - self, - table_name: TableName, - columns_to_types: t.Dict[str, exp.DataType], - primary_key: t.Optional[t.Tuple[str, ...]] = None, - exists: bool = True, - table_description: t.Optional[str] = None, - column_descriptions: t.Optional[t.Dict[str, str]] = None, - **kwargs: t.Any, - ) -> None: - """ - Creates a table, ensuring the schema exists first and that all - object names are fully qualified with the database. - """ - table_exp = exp.to_table(table_name) - schema_name = self._get_schema_name(table_name) - - self.create_schema(schema_name) - - fully_qualified_table_name = f"[{self.database}].[{schema_name}].[{table_exp.name}]" - - column_defs = ", ".join( - f"[{col}] {kind.sql(dialect=self.dialect)}" for col, kind in columns_to_types.items() - ) - - create_table_sql = f"CREATE TABLE {fully_qualified_table_name} ({column_defs})" - - if not exists: - self.execute(create_table_sql) - return - - if not self.table_exists(table_name): - self.execute(create_table_sql) - - if table_description and self.comments_enabled: - qualified_table_for_comment = self._fully_qualify(table_name) - self._create_table_comment(qualified_table_for_comment, table_description) - if column_descriptions and self.comments_enabled: - self._create_column_comments(qualified_table_for_comment, column_descriptions) - - def table_exists(self, table_name: TableName) -> bool: - """ - Checks if a table exists. - - Overridden to query the uppercase `INFORMATION_SCHEMA` required - by case-sensitive Fabric environments. - """ - table = exp.to_table(table_name) - schema = self._get_schema_name(table_name) - - sql = ( - exp.select("1") - .from_("INFORMATION_SCHEMA.TABLES") - .where(f"TABLE_NAME = '{table.alias_or_name}'") - .where(f"TABLE_SCHEMA = '{schema}'") - ) - - result = self.fetchone(sql, quote_identifiers=True) - - return result[0] == 1 if result else False - - def _fully_qualify(self, name: t.Union[TableName, SchemaName]) -> exp.Table: - """Ensures an object name is prefixed with the configured database.""" - table = exp.to_table(name) - return exp.Table(this=table.this, db=table.db, catalog=exp.to_identifier(self.database)) - - def create_view( - self, - view_name: TableName, - query_or_df: QueryOrDF, - columns_to_types: t.Optional[t.Dict[str, exp.DataType]] = None, - replace: bool = True, - materialized: bool = False, - materialized_properties: t.Optional[t.Dict[str, t.Any]] = None, - table_description: t.Optional[str] = None, - column_descriptions: t.Optional[t.Dict[str, str]] = None, - view_properties: t.Optional[t.Dict[str, exp.Expression]] = None, - **create_kwargs: t.Any, - ) -> None: - """ - Creates a view from a query or DataFrame. - - Overridden to ensure that the view name and all tables referenced - in the source query are fully qualified with the database name, - as required by Fabric. - """ - view_schema = self._get_schema_name(view_name) - self.create_schema(view_schema) - - qualified_view_name = self._fully_qualify(view_name) - - if isinstance(query_or_df, exp.Expression): - for table in query_or_df.find_all(exp.Table): - if not table.catalog: - qualified_table = self._fully_qualify(table) - table.replace(qualified_table) - - return super().create_view( - qualified_view_name, - query_or_df, - columns_to_types, - replace, - materialized, - table_description=table_description, - column_descriptions=column_descriptions, - view_properties=view_properties, - **create_kwargs, - ) - - def columns( - self, table_name: TableName, include_pseudo_columns: bool = False - ) -> t.Dict[str, exp.DataType]: - """ - Fetches column names and types for the target table. - - Overridden to query the uppercase `INFORMATION_SCHEMA.COLUMNS` view - required by case-sensitive Fabric environments. - """ - table = exp.to_table(table_name) - schema = self._get_schema_name(table_name) - sql = ( - exp.select("COLUMN_NAME", "DATA_TYPE") - .from_(f"{self.database}.INFORMATION_SCHEMA.COLUMNS") - .where(f"TABLE_NAME = '{table.name}'") - .where(f"TABLE_SCHEMA = '{schema}'") - .order_by("ORDINAL_POSITION") - ) - df = self.fetchdf(sql) - return { - str(row.COLUMN_NAME): exp.DataType.build(str(row.DATA_TYPE), dialect=self.dialect) - for row in df.itertuples() - } - - def _insert_overwrite_by_condition( - self, - table_name: TableName, - source_queries: t.List[SourceQuery], - columns_to_types: t.Optional[t.Dict[str, exp.DataType]] = None, - where: t.Optional[exp.Condition] = None, - insert_overwrite_strategy_override: t.Optional[InsertOverwriteStrategy] = None, - **kwargs: t.Any, - ) -> None: - """ - Implements the insert overwrite strategy for Fabric. - - Overridden to enforce a `DELETE`/`INSERT` strategy, as Fabric's - `MERGE` statement has limitations. - """ - - columns_to_types = columns_to_types or self.columns(table_name) - - self.delete_from(table_name, where=where or exp.true()) - - for source_query in source_queries: - with source_query as query: - query = self._order_projections_and_filter(query, columns_to_types) - self._insert_append_query( - table_name, - query, - columns_to_types=columns_to_types, - order_projections=False, - ) From 0080583a2cd4f67df76575d2524a3f411cc4d39c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Fredh=C3=B8i?= Date: Wed, 18 Jun 2025 11:21:47 +0200 Subject: [PATCH 25/70] isnan error --- sqlmesh/core/config/connection.py | 5 +- sqlmesh/core/engine_adapter/__init__.py | 2 - sqlmesh/core/engine_adapter/fabric.py | 160 ++++++++++-------------- 3 files changed, 65 insertions(+), 102 deletions(-) diff --git a/sqlmesh/core/config/connection.py b/sqlmesh/core/config/connection.py index 4a65ef3436..d5b538711e 100644 --- a/sqlmesh/core/config/connection.py +++ b/sqlmesh/core/config/connection.py @@ -1681,20 +1681,17 @@ class FabricConnectionConfig(MSSQLConnectionConfig): It is recommended to use the 'pyodbc' driver for Fabric. """ - type_: t.Literal["fabric"] = Field(alias="type", default="fabric") + type_: t.Literal["fabric"] = Field(alias="type", default="fabric") # type: ignore autocommit: t.Optional[bool] = True @property def _engine_adapter(self) -> t.Type[EngineAdapter]: - # This is the crucial link to the adapter you already created. from sqlmesh.core.engine_adapter.fabric import FabricAdapter return FabricAdapter @property def _extra_engine_config(self) -> t.Dict[str, t.Any]: - # This ensures the 'database' name from the config is passed - # to the FabricAdapter's constructor. return { "database": self.database, "catalog_support": CatalogSupport.REQUIRES_SET_CATALOG, diff --git a/sqlmesh/core/engine_adapter/__init__.py b/sqlmesh/core/engine_adapter/__init__.py index c8b8299bd1..337de39905 100644 --- a/sqlmesh/core/engine_adapter/__init__.py +++ b/sqlmesh/core/engine_adapter/__init__.py @@ -47,11 +47,9 @@ def create_engine_adapter( connection_factory: t.Callable[[], t.Any], dialect: str, **kwargs: t.Any ) -> EngineAdapter: - print(kwargs) dialect = dialect.lower() dialect = DIALECT_ALIASES.get(dialect, dialect) engine_adapter = DIALECT_TO_ENGINE_ADAPTER.get(dialect) - print(engine_adapter) if engine_adapter is None: return EngineAdapter(connection_factory, dialect, **kwargs) if engine_adapter is EngineAdapterWithIndexSupport: diff --git a/sqlmesh/core/engine_adapter/fabric.py b/sqlmesh/core/engine_adapter/fabric.py index 4865c3c8f5..1f21ffbf26 100644 --- a/sqlmesh/core/engine_adapter/fabric.py +++ b/sqlmesh/core/engine_adapter/fabric.py @@ -43,7 +43,7 @@ def __init__(self, *args: t.Any, **kwargs: t.Any): except Exception as e: raise RuntimeError(f"Failed to set database context to '{self.database}'. Reason: {e}") - def _get_schema_name(self, name: t.Union[str, exp.Table, exp.Identifier]) -> t.Optional[str]: + def _get_schema_name(self, name: t.Union[str, exp.Table]) -> t.Optional[str]: """ Safely extracts the schema name from a table or schema name, which can be a string or a sqlglot expression. @@ -112,14 +112,31 @@ def _get_data_objects( catalog=catalog, schema=row.schema_name, name=row.name, - type=DataObjectType.from_str(row.type), + type=DataObjectType.from_str(str(row.type)), ) for row in dataframe.itertuples() ] + def schema_exists(self, schema_name: SchemaName) -> bool: + """ + Checks if a schema exists. + """ + schema = exp.to_table(schema_name).db + if not schema: + return False + + sql = ( + exp.select("1") + .from_("INFORMATION_SCHEMA.SCHEMATA") + .where(f"SCHEMA_NAME = '{schema}'") + .where(f"CATALOG_NAME = '{self.database}'") + ) + result = self.fetchone(sql, quote_identifiers=True) + return result[0] == 1 if result else False + def create_schema( self, - schema_name: SchemaName, + schema_name: t.Optional[SchemaName], ignore_if_exists: bool = True, warn_on_error: bool = True, **kwargs: t.Any, @@ -128,53 +145,51 @@ def create_schema( Creates a schema in a Microsoft Fabric Warehouse. Overridden to handle Fabric's specific T-SQL requirements. - T-SQL's `CREATE SCHEMA` command does not support `IF NOT EXISTS` directly - as part of the statement in all contexts, and error messages suggest - issues with batching or preceding statements like USE. """ - if schema_name is None: + if not schema_name: return - schema_name_str = ( - schema_name.name if isinstance(schema_name, exp.Identifier) else str(schema_name) - ) - - if not schema_name_str: - logger.warning("Attempted to create a schema with an empty name. Skipping.") - return - - schema_name_str = schema_name_str.strip('[]"').rstrip(".") + schema_exp = to_schema(schema_name) + simple_schema_name_str = exp.to_identifier(schema_exp.db).name if schema_exp.db else None - if not schema_name_str: + if not simple_schema_name_str: logger.warning( - "Attempted to create a schema with an empty name after sanitization. Skipping." + f"Could not determine simple schema name from '{schema_name}'. Skipping schema creation." ) return try: - if self.schema_exists(schema_name_str): + if self.schema_exists(simple_schema_name_str): if ignore_if_exists: return - raise RuntimeError(f"Schema '{schema_name_str}' already exists.") + raise RuntimeError(f"Schema '{simple_schema_name_str}' already exists.") except Exception as e: if warn_on_error: - logger.warning(f"Failed to check for existence of schema '{schema_name_str}': {e}") + logger.warning( + f"Failed to check for existence of schema '{simple_schema_name_str}': {e}" + ) else: raise try: - create_sql = f"CREATE SCHEMA [{schema_name_str}]" + create_sql = f"CREATE SCHEMA [{simple_schema_name_str}]" self.execute(create_sql) except Exception as e: - if "already exists" in str(e).lower() or "There is already an object named" in str(e): + error_message = str(e).lower() + if ( + "already exists" in error_message + or "there is already an object named" in error_message + ): if ignore_if_exists: return - raise RuntimeError(f"Schema '{schema_name_str}' already exists.") from e + raise RuntimeError( + f"Schema '{simple_schema_name_str}' already exists due to race condition." + ) from e else: if warn_on_error: - logger.warning(f"Failed to create schema {schema_name_str}. Reason: {e}") + logger.warning(f"Failed to create schema {simple_schema_name_str}. Reason: {e}") else: - raise RuntimeError(f"Failed to create schema {schema_name_str}.") from e + raise RuntimeError(f"Failed to create schema {simple_schema_name_str}.") from e def _create_table_from_columns( self, @@ -251,7 +266,7 @@ def _fully_qualify(self, name: t.Union[TableName, SchemaName]) -> exp.Table: and isinstance(table.this, exp.Identifier) and (table.this.name.startswith("#")) ): - temp_identifier = exp.Identifier(this=table.this.this, quoted=True) + temp_identifier = exp.Identifier(this=table.this.name, quoted=True) return exp.Table(this=temp_identifier) schema = self._get_schema_name(name) @@ -308,6 +323,8 @@ def create_view( def columns( self, table_name: TableName, include_pseudo_columns: bool = False ) -> t.Dict[str, exp.DataType]: + import numpy as np + table = exp.to_table(table_name) schema = self._get_schema_name(table_name) @@ -346,6 +363,7 @@ def columns( ) df = self.fetchdf(sql) + df = df.replace({np.nan: None}) def build_var_length_col( column_name: str, @@ -356,11 +374,9 @@ def build_var_length_col( ) -> t.Tuple[str, str]: data_type = data_type.lower() - char_len_int = ( - int(character_maximum_length) if character_maximum_length is not None else None - ) - prec_int = int(numeric_precision) if numeric_precision is not None else None - scale_int = int(numeric_scale) if numeric_scale is not None else None + char_len_int = character_maximum_length + prec_int = numeric_precision + scale_int = numeric_scale if data_type in self.VARIABLE_LENGTH_DATA_TYPES and char_len_int is not None: if char_len_int > 0: @@ -378,79 +394,31 @@ def build_var_length_col( return (column_name, data_type) - columns_raw = [ - ( - row.COLUMN_NAME, - row.DATA_TYPE, - getattr(row, "CHARACTER_MAXIMUM_LENGTH", None), - getattr(row, "NUMERIC_PRECISION", None), - getattr(row, "NUMERIC_SCALE", None), + def _to_optional_int(val: t.Any) -> t.Optional[int]: + """Safely convert DataFrame values to Optional[int] for mypy.""" + if val is None: + return None + try: + return int(val) + except (ValueError, TypeError): + return None + + columns_processed = [ + build_var_length_col( + str(row.COLUMN_NAME), + str(row.DATA_TYPE), + _to_optional_int(row.CHARACTER_MAXIMUM_LENGTH), + _to_optional_int(row.NUMERIC_PRECISION), + _to_optional_int(row.NUMERIC_SCALE), ) for row in df.itertuples() ] - columns_processed = [build_var_length_col(*row) for row in columns_raw] - return { column_name: exp.DataType.build(data_type, dialect=self.dialect) for column_name, data_type in columns_processed } - def create_schema( - self, - schema_name: SchemaName, - ignore_if_exists: bool = True, - warn_on_error: bool = True, - **kwargs: t.Any, - ) -> None: - if schema_name is None: - return - - schema_exp = to_schema(schema_name) - simple_schema_name_str = None - if schema_exp.db: - simple_schema_name_str = exp.to_identifier(schema_exp.db).name - - if not simple_schema_name_str: - logger.warning( - f"Could not determine simple schema name from '{schema_name}'. Skipping schema creation." - ) - return - - if ignore_if_exists: - try: - if self.schema_exists(simple_schema_name_str): - return - except Exception as e: - if warn_on_error: - logger.warning( - f"Failed to check for existence of schema '{simple_schema_name_str}': {e}" - ) - else: - raise - elif self.schema_exists(simple_schema_name_str): - raise RuntimeError(f"Schema '{simple_schema_name_str}' already exists.") - - try: - create_sql = f"CREATE SCHEMA [{simple_schema_name_str}]" - self.execute(create_sql) - except Exception as e: - error_message = str(e).lower() - if ( - "already exists" in error_message - or "there is already an object named" in error_message - ): - if ignore_if_exists: - return - raise RuntimeError( - f"Schema '{simple_schema_name_str}' already exists due to race condition." - ) from e - else: - if warn_on_error: - logger.warning(f"Failed to create schema {simple_schema_name_str}. Reason: {e}") - else: - raise RuntimeError(f"Failed to create schema {simple_schema_name_str}.") from e - def _insert_overwrite_by_condition( self, table_name: TableName, From bded0d0c76446030b2b255a737e8592358347137 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Fredh=C3=B8i?= Date: Thu, 19 Jun 2025 13:04:54 +0200 Subject: [PATCH 26/70] CTEs no qualify --- sqlmesh/core/engine_adapter/fabric.py | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/sqlmesh/core/engine_adapter/fabric.py b/sqlmesh/core/engine_adapter/fabric.py index 1f21ffbf26..9f37e8b14f 100644 --- a/sqlmesh/core/engine_adapter/fabric.py +++ b/sqlmesh/core/engine_adapter/fabric.py @@ -303,7 +303,14 @@ def create_view( qualified_view_name = self._fully_qualify(view_name) if isinstance(query_or_df, exp.Expression): + # CTEs should not be qualified with the database name. + cte_names = {cte.alias_or_name for cte in query_or_df.find_all(exp.CTE)} + for table in query_or_df.find_all(exp.Table): + if table.this.name in cte_names: + continue + + # Qualify all other tables that don't already have a catalog. if not table.catalog: qualified_table = self._fully_qualify(table) table.replace(qualified_table) From 51753f1c8c57a76391b92738facd068938aa9192 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Fredh=C3=B8i?= Date: Mon, 23 Jun 2025 20:44:43 +0200 Subject: [PATCH 27/70] simplifying --- sqlmesh/core/config/connection.py | 9 +- sqlmesh/core/engine_adapter/fabric.py | 392 +++----------------------- 2 files changed, 40 insertions(+), 361 deletions(-) diff --git a/sqlmesh/core/config/connection.py b/sqlmesh/core/config/connection.py index d5b538711e..0d9f5683e1 100644 --- a/sqlmesh/core/config/connection.py +++ b/sqlmesh/core/config/connection.py @@ -43,7 +43,14 @@ logger = logging.getLogger(__name__) -RECOMMENDED_STATE_SYNC_ENGINES = {"postgres", "gcp_postgres", "mysql", "mssql", "azuresql"} +RECOMMENDED_STATE_SYNC_ENGINES = { + "postgres", + "gcp_postgres", + "mysql", + "mssql", + "azuresql", + "fabric", +} FORBIDDEN_STATE_SYNC_ENGINES = { # Do not support row-level operations "spark", diff --git a/sqlmesh/core/engine_adapter/fabric.py b/sqlmesh/core/engine_adapter/fabric.py index 9f37e8b14f..a4eb30a91d 100644 --- a/sqlmesh/core/engine_adapter/fabric.py +++ b/sqlmesh/core/engine_adapter/fabric.py @@ -3,19 +3,10 @@ import typing as t from sqlglot import exp from sqlmesh.core.engine_adapter.mssql import MSSQLEngineAdapter -from sqlmesh.core.engine_adapter.shared import ( - InsertOverwriteStrategy, - SourceQuery, - DataObject, - DataObjectType, -) -import logging -from sqlmesh.core.dialect import to_schema +from sqlmesh.core.engine_adapter.shared import InsertOverwriteStrategy, SourceQuery -logger = logging.getLogger(__name__) if t.TYPE_CHECKING: - from sqlmesh.core._typing import SchemaName, TableName - from sqlmesh.core.engine_adapter._typing import QueryOrDF + from sqlmesh.core._typing import TableName class FabricAdapter(MSSQLEngineAdapter): @@ -26,334 +17,35 @@ class FabricAdapter(MSSQLEngineAdapter): DIALECT = "fabric" SUPPORTS_INDEXES = False SUPPORTS_TRANSACTIONS = False - INSERT_OVERWRITE_STRATEGY = InsertOverwriteStrategy.DELETE_INSERT - def __init__(self, *args: t.Any, **kwargs: t.Any): - self.database = kwargs.get("database") - - super().__init__(*args, **kwargs) - - if not self.database: - raise ValueError( - "The 'database' parameter is required in the connection config for the FabricWarehouseAdapter." - ) - try: - self.execute(f"USE [{self.database}]") - except Exception as e: - raise RuntimeError(f"Failed to set database context to '{self.database}'. Reason: {e}") - - def _get_schema_name(self, name: t.Union[str, exp.Table]) -> t.Optional[str]: - """ - Safely extracts the schema name from a table or schema name, which can be - a string or a sqlglot expression. - - Fabric requires database names to be explicitly specified in many contexts, - including referencing schemas in INFORMATION_SCHEMA. This function helps - in extracting the schema part correctly from potentially qualified names. - """ - table = exp.to_table(name) - - if table.this and table.this.name.startswith("#"): - return None - - schema_part = table.db - - if not schema_part: - return None - - if isinstance(schema_part, exp.Identifier): - return schema_part.name - if isinstance(schema_part, str): - return schema_part - - raise TypeError(f"Unexpected type for schema part: {type(schema_part)}") - - def _get_data_objects( - self, schema_name: SchemaName, object_names: t.Optional[t.Set[str]] = None - ) -> t.List[DataObject]: - """ - Returns all the data objects that exist in the given schema and database. - - Overridden to query `INFORMATION_SCHEMA.TABLES` with explicit database qualification - and preserved casing using `quoted=True`. - """ - import pandas as pd - - catalog = self.get_current_catalog() - - from_table = exp.Table( - this=exp.to_identifier("TABLES", quoted=True), - db=exp.to_identifier("INFORMATION_SCHEMA", quoted=True), - catalog=exp.to_identifier(self.database), - ) - - query = ( - exp.select( - exp.column("TABLE_NAME").as_("name"), - exp.column("TABLE_SCHEMA").as_("schema_name"), - exp.case() - .when(exp.column("TABLE_TYPE").eq("BASE TABLE"), exp.Literal.string("TABLE")) - .else_(exp.column("TABLE_TYPE")) - .as_("type"), - ) - .from_(from_table) - .where(exp.column("TABLE_SCHEMA").eq(str(to_schema(schema_name).db).strip("[]"))) - ) - if object_names: - query = query.where( - exp.column("TABLE_NAME").isin(*(name.strip("[]") for name in object_names)) - ) - - dataframe: pd.DataFrame = self.fetchdf(query) - - return [ - DataObject( - catalog=catalog, - schema=row.schema_name, - name=row.name, - type=DataObjectType.from_str(str(row.type)), - ) - for row in dataframe.itertuples() - ] - - def schema_exists(self, schema_name: SchemaName) -> bool: - """ - Checks if a schema exists. - """ - schema = exp.to_table(schema_name).db - if not schema: - return False - - sql = ( - exp.select("1") - .from_("INFORMATION_SCHEMA.SCHEMATA") - .where(f"SCHEMA_NAME = '{schema}'") - .where(f"CATALOG_NAME = '{self.database}'") - ) - result = self.fetchone(sql, quote_identifiers=True) - return result[0] == 1 if result else False - - def create_schema( - self, - schema_name: t.Optional[SchemaName], - ignore_if_exists: bool = True, - warn_on_error: bool = True, - **kwargs: t.Any, - ) -> None: - """ - Creates a schema in a Microsoft Fabric Warehouse. - - Overridden to handle Fabric's specific T-SQL requirements. - """ - if not schema_name: - return - - schema_exp = to_schema(schema_name) - simple_schema_name_str = exp.to_identifier(schema_exp.db).name if schema_exp.db else None - - if not simple_schema_name_str: - logger.warning( - f"Could not determine simple schema name from '{schema_name}'. Skipping schema creation." - ) - return - - try: - if self.schema_exists(simple_schema_name_str): - if ignore_if_exists: - return - raise RuntimeError(f"Schema '{simple_schema_name_str}' already exists.") - except Exception as e: - if warn_on_error: - logger.warning( - f"Failed to check for existence of schema '{simple_schema_name_str}': {e}" - ) - else: - raise - - try: - create_sql = f"CREATE SCHEMA [{simple_schema_name_str}]" - self.execute(create_sql) - except Exception as e: - error_message = str(e).lower() - if ( - "already exists" in error_message - or "there is already an object named" in error_message - ): - if ignore_if_exists: - return - raise RuntimeError( - f"Schema '{simple_schema_name_str}' already exists due to race condition." - ) from e - else: - if warn_on_error: - logger.warning(f"Failed to create schema {simple_schema_name_str}. Reason: {e}") - else: - raise RuntimeError(f"Failed to create schema {simple_schema_name_str}.") from e - - def _create_table_from_columns( - self, - table_name: TableName, - columns_to_types: t.Dict[str, exp.DataType], - primary_key: t.Optional[t.Tuple[str, ...]] = None, - exists: bool = True, - table_description: t.Optional[str] = None, - column_descriptions: t.Optional[t.Dict[str, str]] = None, - **kwargs: t.Any, - ) -> None: - """ - Creates a table, ensuring the schema exists first and that all - object names are fully qualified with the database. - """ - table_exp = exp.to_table(table_name) - schema_name = self._get_schema_name(table_name) - - self.create_schema(schema_name) - - fully_qualified_table_name = f"[{self.database}].[{schema_name}].[{table_exp.name}]" - - column_defs = ", ".join( - f"[{col}] {kind.sql(dialect=self.dialect)}" for col, kind in columns_to_types.items() - ) - - create_table_sql = f"CREATE TABLE {fully_qualified_table_name} ({column_defs})" - - if not exists: - self.execute(create_table_sql) - return - - if not self.table_exists(table_name): - self.execute(create_table_sql) - - if table_description and self.comments_enabled: - qualified_table_for_comment = self._fully_qualify(table_name) - self._create_table_comment(qualified_table_for_comment, table_description) - if column_descriptions and self.comments_enabled: - self._create_column_comments(qualified_table_for_comment, column_descriptions) - def table_exists(self, table_name: TableName) -> bool: """ Checks if a table exists. - Overridden to query the uppercase `INFORMATION_SCHEMA` required + Querying the uppercase `INFORMATION_SCHEMA` required by case-sensitive Fabric environments. """ table = exp.to_table(table_name) - schema = self._get_schema_name(table_name) - sql = ( exp.select("1") .from_("INFORMATION_SCHEMA.TABLES") .where(f"TABLE_NAME = '{table.alias_or_name}'") - .where(f"TABLE_SCHEMA = '{schema}'") + .where(f"TABLE_SCHEMA = '{table.db}'") ) result = self.fetchone(sql, quote_identifiers=True) return result[0] == 1 if result else False - def _fully_qualify(self, name: t.Union[TableName, SchemaName]) -> exp.Table: - """ - Ensures an object name is prefixed with the configured database and schema. - - Overridden to prevent qualification for temporary objects (starting with # or ##). - Temporary objects should not be qualified with database or schema in T-SQL. - """ - table = exp.to_table(name) - - if ( - table.this - and isinstance(table.this, exp.Identifier) - and (table.this.name.startswith("#")) - ): - temp_identifier = exp.Identifier(this=table.this.name, quoted=True) - return exp.Table(this=temp_identifier) - - schema = self._get_schema_name(name) - - return exp.Table( - this=table.this, - db=exp.to_identifier(schema) if schema else None, - catalog=exp.to_identifier(self.database), - ) - - def create_view( - self, - view_name: TableName, - query_or_df: QueryOrDF, - columns_to_types: t.Optional[t.Dict[str, exp.DataType]] = None, - replace: bool = True, - materialized: bool = False, - materialized_properties: t.Optional[t.Dict[str, t.Any]] = None, - table_description: t.Optional[str] = None, - column_descriptions: t.Optional[t.Dict[str, str]] = None, - view_properties: t.Optional[t.Dict[str, exp.Expression]] = None, - **create_kwargs: t.Any, - ) -> None: - """ - Creates a view from a query or DataFrame. - - Overridden to ensure that the view name and all tables referenced - in the source query are fully qualified with the database name, - as required by Fabric. - """ - view_schema = self._get_schema_name(view_name) - self.create_schema(view_schema) - - qualified_view_name = self._fully_qualify(view_name) - - if isinstance(query_or_df, exp.Expression): - # CTEs should not be qualified with the database name. - cte_names = {cte.alias_or_name for cte in query_or_df.find_all(exp.CTE)} - - for table in query_or_df.find_all(exp.Table): - if table.this.name in cte_names: - continue - - # Qualify all other tables that don't already have a catalog. - if not table.catalog: - qualified_table = self._fully_qualify(table) - table.replace(qualified_table) - - return super().create_view( - qualified_view_name, - query_or_df, - columns_to_types, - replace, - materialized, - table_description=table_description, - column_descriptions=column_descriptions, - view_properties=view_properties, - **create_kwargs, - ) - def columns( - self, table_name: TableName, include_pseudo_columns: bool = False + self, + table_name: TableName, + include_pseudo_columns: bool = True, ) -> t.Dict[str, exp.DataType]: - import numpy as np + """Fabric doesn't support describe so we query INFORMATION_SCHEMA.""" table = exp.to_table(table_name) - schema = self._get_schema_name(table_name) - - if ( - not schema - and table.this - and isinstance(table.this, exp.Identifier) - and table.this.name.startswith("__temp_") - ): - schema = "dbo" - - if not schema: - logger.warning( - f"Cannot fetch columns for table '{table_name}' without a schema name in Fabric." - ) - return {} - - from_table = exp.Table( - this=exp.to_identifier("COLUMNS", quoted=True), - db=exp.to_identifier("INFORMATION_SCHEMA", quoted=True), - catalog=exp.to_identifier(self.database), - ) sql = ( exp.select( @@ -363,14 +55,14 @@ def columns( "NUMERIC_PRECISION", "NUMERIC_SCALE", ) - .from_(from_table) - .where(f"TABLE_NAME = '{table.name.strip('[]')}'") - .where(f"TABLE_SCHEMA = '{schema.strip('[]')}'") - .order_by("ORDINAL_POSITION") + .from_("INFORMATION_SCHEMA.COLUMNS") + .where(f"TABLE_NAME = '{table.name}'") ) + database_name = table.db + if database_name: + sql = sql.where(f"TABLE_SCHEMA = '{database_name}'") - df = self.fetchdf(sql) - df = df.replace({np.nan: None}) + columns_raw = self.fetchall(sql, quote_identifiers=True) def build_var_length_col( column_name: str, @@ -378,52 +70,32 @@ def build_var_length_col( character_maximum_length: t.Optional[int] = None, numeric_precision: t.Optional[int] = None, numeric_scale: t.Optional[int] = None, - ) -> t.Tuple[str, str]: + ) -> tuple: data_type = data_type.lower() - - char_len_int = character_maximum_length - prec_int = numeric_precision - scale_int = numeric_scale - - if data_type in self.VARIABLE_LENGTH_DATA_TYPES and char_len_int is not None: - if char_len_int > 0: - return (column_name, f"{data_type}({char_len_int})") - if char_len_int == -1: - return (column_name, f"{data_type}(max)") if ( - data_type in ("decimal", "numeric") - and prec_int is not None - and scale_int is not None + data_type in self.VARIABLE_LENGTH_DATA_TYPES + and character_maximum_length is not None + and character_maximum_length > 0 + ): + return (column_name, f"{data_type}({character_maximum_length})") + if ( + data_type in ("varbinary", "varchar", "nvarchar") + and character_maximum_length is not None + and character_maximum_length == -1 ): - return (column_name, f"{data_type}({prec_int}, {scale_int})") - if data_type == "float" and prec_int is not None: - return (column_name, f"{data_type}({prec_int})") + return (column_name, f"{data_type}(max)") + if data_type in ("decimal", "numeric"): + return (column_name, f"{data_type}({numeric_precision}, {numeric_scale})") + if data_type == "float": + return (column_name, f"{data_type}({numeric_precision})") return (column_name, data_type) - def _to_optional_int(val: t.Any) -> t.Optional[int]: - """Safely convert DataFrame values to Optional[int] for mypy.""" - if val is None: - return None - try: - return int(val) - except (ValueError, TypeError): - return None - - columns_processed = [ - build_var_length_col( - str(row.COLUMN_NAME), - str(row.DATA_TYPE), - _to_optional_int(row.CHARACTER_MAXIMUM_LENGTH), - _to_optional_int(row.NUMERIC_PRECISION), - _to_optional_int(row.NUMERIC_SCALE), - ) - for row in df.itertuples() - ] + columns = [build_var_length_col(*row) for row in columns_raw] return { column_name: exp.DataType.build(data_type, dialect=self.dialect) - for column_name, data_type in columns_processed + for column_name, data_type in columns } def _insert_overwrite_by_condition( @@ -448,7 +120,7 @@ def _insert_overwrite_by_condition( for source_query in source_queries: with source_query as query: - query = self._order_projections_and_filter(query, columns_to_types) + query = self._order_projections_and_filter(query, columns_to_types, where=where) self._insert_append_query( table_name, query, From 55d73145a03b191f3bf1b9ea70c7e5b81a4042b4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Fredh=C3=B8i?= Date: Mon, 23 Jun 2025 22:27:59 +0200 Subject: [PATCH 28/70] docs & tests --- docs/integrations/engines/fabric.md | 30 +++++++++ docs/integrations/overview.md | 1 + mkdocs.yml | 1 + pyproject.toml | 1 + sqlmesh/core/config/connection.py | 2 +- sqlmesh/core/engine_adapter/fabric.py | 4 +- tests/core/engine_adapter/test_fabric.py | 83 ++++++++++++++++++++++++ 7 files changed, 120 insertions(+), 2 deletions(-) create mode 100644 docs/integrations/engines/fabric.md create mode 100644 tests/core/engine_adapter/test_fabric.py diff --git a/docs/integrations/engines/fabric.md b/docs/integrations/engines/fabric.md new file mode 100644 index 0000000000..aca9c32eed --- /dev/null +++ b/docs/integrations/engines/fabric.md @@ -0,0 +1,30 @@ +# Fabric + +## Local/Built-in Scheduler +**Engine Adapter Type**: `fabric` + +### Installation +#### Microsoft Entra ID / Azure Active Directory Authentication: +``` +pip install "sqlmesh[mssql-odbc]" +``` + +### Connection options + +| Option | Description | Type | Required | +| ----------------- | ------------------------------------------------------------ | :----------: | :------: | +| `type` | Engine type name - must be `fabric` | string | Y | +| `host` | The hostname of the Fabric Warehouse server | string | Y | +| `user` | The client id to use for authentication with the Fabric Warehouse server | string | N | +| `password` | The client secret to use for authentication with the Fabric Warehouse server | string | N | +| `port` | The port number of the Fabric Warehouse server | int | N | +| `database` | The target database | string | N | +| `charset` | The character set used for the connection | string | N | +| `timeout` | The query timeout in seconds. Default: no timeout | int | N | +| `login_timeout` | The timeout for connection and login in seconds. Default: 60 | int | N | +| `appname` | The application name to use for the connection | string | N | +| `conn_properties` | The list of connection properties | list[string] | N | +| `autocommit` | Is autocommit mode enabled. Default: false | bool | N | +| `driver` | The driver to use for the connection. Default: pyodbc | string | N | +| `driver_name` | The driver name to use for the connection. E.g., *ODBC Driver 18 for SQL Server* | string | N | +| `odbc_properties` | The dict of ODBC connection properties. E.g., authentication: ActiveDirectoryServicePrincipal. See more [here](https://learn.microsoft.com/en-us/sql/connect/odbc/dsn-connection-string-attribute?view=sql-server-ver16). | dict | N | \ No newline at end of file diff --git a/docs/integrations/overview.md b/docs/integrations/overview.md index 5e850afbf6..94b9289d21 100644 --- a/docs/integrations/overview.md +++ b/docs/integrations/overview.md @@ -17,6 +17,7 @@ SQLMesh supports the following execution engines for running SQLMesh projects (e * [ClickHouse](./engines/clickhouse.md) (clickhouse) * [Databricks](./engines/databricks.md) (databricks) * [DuckDB](./engines/duckdb.md) (duckdb) +* [Fabric](./engines/fabric.md) (fabric) * [MotherDuck](./engines/motherduck.md) (motherduck) * [MSSQL](./engines/mssql.md) (mssql) * [MySQL](./engines/mysql.md) (mysql) diff --git a/mkdocs.yml b/mkdocs.yml index aa4db57cb4..3bb0e868e8 100644 --- a/mkdocs.yml +++ b/mkdocs.yml @@ -83,6 +83,7 @@ nav: - integrations/engines/clickhouse.md - integrations/engines/databricks.md - integrations/engines/duckdb.md + - integrations/engines/fabric.md - integrations/engines/motherduck.md - integrations/engines/mssql.md - integrations/engines/mysql.md diff --git a/pyproject.toml b/pyproject.toml index 204a1c7f3d..9f066624d6 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -252,6 +252,7 @@ markers = [ "clickhouse_cloud: test for Clickhouse (cloud mode)", "databricks: test for Databricks", "duckdb: test for DuckDB", + "fabric: test for Fabric", "motherduck: test for MotherDuck", "mssql: test for MSSQL", "mysql: test for MySQL", diff --git a/sqlmesh/core/config/connection.py b/sqlmesh/core/config/connection.py index 0d9f5683e1..65b00e0852 100644 --- a/sqlmesh/core/config/connection.py +++ b/sqlmesh/core/config/connection.py @@ -49,7 +49,6 @@ "mysql", "mssql", "azuresql", - "fabric", } FORBIDDEN_STATE_SYNC_ENGINES = { # Do not support row-level operations @@ -1689,6 +1688,7 @@ class FabricConnectionConfig(MSSQLConnectionConfig): """ type_: t.Literal["fabric"] = Field(alias="type", default="fabric") # type: ignore + driver: t.Literal["pyodbc"] = "pyodbc" autocommit: t.Optional[bool] = True @property diff --git a/sqlmesh/core/engine_adapter/fabric.py b/sqlmesh/core/engine_adapter/fabric.py index a4eb30a91d..44cc8bcfb3 100644 --- a/sqlmesh/core/engine_adapter/fabric.py +++ b/sqlmesh/core/engine_adapter/fabric.py @@ -31,8 +31,10 @@ def table_exists(self, table_name: TableName) -> bool: exp.select("1") .from_("INFORMATION_SCHEMA.TABLES") .where(f"TABLE_NAME = '{table.alias_or_name}'") - .where(f"TABLE_SCHEMA = '{table.db}'") ) + database_name = table.db + if database_name: + sql = sql.where(f"TABLE_SCHEMA = '{database_name}'") result = self.fetchone(sql, quote_identifiers=True) diff --git a/tests/core/engine_adapter/test_fabric.py b/tests/core/engine_adapter/test_fabric.py new file mode 100644 index 0000000000..623bbe6653 --- /dev/null +++ b/tests/core/engine_adapter/test_fabric.py @@ -0,0 +1,83 @@ +# type: ignore + +import typing as t + +import pytest +from sqlglot import exp, parse_one + +from sqlmesh.core.engine_adapter import FabricAdapter +from tests.core.engine_adapter import to_sql_calls + +pytestmark = [pytest.mark.engine, pytest.mark.fabric] + + +@pytest.fixture +def adapter(make_mocked_engine_adapter: t.Callable) -> FabricAdapter: + return make_mocked_engine_adapter(FabricAdapter) + + +def test_columns(adapter: FabricAdapter): + adapter.cursor.fetchall.return_value = [ + ("decimal_ps", "decimal", None, 5, 4), + ("decimal", "decimal", None, 18, 0), + ("float", "float", None, 53, None), + ("char_n", "char", 10, None, None), + ("varchar_n", "varchar", 10, None, None), + ("nvarchar_max", "nvarchar", -1, None, None), + ] + + assert adapter.columns("db.table") == { + "decimal_ps": exp.DataType.build("decimal(5, 4)", dialect=adapter.dialect), + "decimal": exp.DataType.build("decimal(18, 0)", dialect=adapter.dialect), + "float": exp.DataType.build("float(53)", dialect=adapter.dialect), + "char_n": exp.DataType.build("char(10)", dialect=adapter.dialect), + "varchar_n": exp.DataType.build("varchar(10)", dialect=adapter.dialect), + "nvarchar_max": exp.DataType.build("nvarchar(max)", dialect=adapter.dialect), + } + + # Verify that the adapter queries the uppercase INFORMATION_SCHEMA + adapter.cursor.execute.assert_called_once_with( + """SELECT [COLUMN_NAME], [DATA_TYPE], [CHARACTER_MAXIMUM_LENGTH], [NUMERIC_PRECISION], [NUMERIC_SCALE] FROM [INFORMATION_SCHEMA].[COLUMNS] WHERE [TABLE_NAME] = 'table' AND [TABLE_SCHEMA] = 'db';""" + ) + + +def test_table_exists(adapter: FabricAdapter): + adapter.cursor.fetchone.return_value = (1,) + assert adapter.table_exists("db.table") + # Verify that the adapter queries the uppercase INFORMATION_SCHEMA + adapter.cursor.execute.assert_called_once_with( + """SELECT 1 FROM [INFORMATION_SCHEMA].[TABLES] WHERE [TABLE_NAME] = 'table' AND [TABLE_SCHEMA] = 'db';""" + ) + + adapter.cursor.fetchone.return_value = None + assert not adapter.table_exists("db.table") + + +def test_insert_overwrite_by_time_partition(adapter: FabricAdapter): + adapter.insert_overwrite_by_time_partition( + "test_table", + parse_one("SELECT a, b FROM tbl"), + start="2022-01-01", + end="2022-01-02", + time_column="b", + time_formatter=lambda x, _: exp.Literal.string(x.strftime("%Y-%m-%d")), + columns_to_types={"a": exp.DataType.build("INT"), "b": exp.DataType.build("STRING")}, + ) + + # Fabric adapter should use DELETE/INSERT strategy, not MERGE. + assert to_sql_calls(adapter) == [ + """DELETE FROM [test_table] WHERE [b] BETWEEN '2022-01-01' AND '2022-01-02';""", + """INSERT INTO [test_table] ([a], [b]) SELECT [a], [b] FROM (SELECT [a], [b] FROM [tbl]) AS [_subquery] WHERE [b] BETWEEN '2022-01-01' AND '2022-01-02';""", + ] + + +def test_replace_query(adapter: FabricAdapter): + adapter.cursor.fetchone.return_value = (1,) + adapter.replace_query("test_table", parse_one("SELECT a FROM tbl"), {"a": "int"}) + + # This behavior is inherited from MSSQLEngineAdapter and should be TRUNCATE + INSERT + assert to_sql_calls(adapter) == [ + """SELECT 1 FROM [INFORMATION_SCHEMA].[TABLES] WHERE [TABLE_NAME] = 'test_table';""", + "TRUNCATE TABLE [test_table];", + "INSERT INTO [test_table] ([a]) SELECT [a] FROM [tbl];", + ] From 1f37a4b2a22a51761cd21763ed6016c14c392552 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Fredh=C3=B8i?= Date: Mon, 23 Jun 2025 23:29:03 +0200 Subject: [PATCH 29/70] connection tests --- docs/guides/configuration.md | 1 + sqlmesh/core/config/__init__.py | 1 + sqlmesh/core/engine_adapter/fabric.py | 30 +++++----- tests/core/test_connection_config.py | 83 +++++++++++++++++++++++++++ 4 files changed, 98 insertions(+), 17 deletions(-) diff --git a/docs/guides/configuration.md b/docs/guides/configuration.md index 52ebdf7793..9d44cd9f62 100644 --- a/docs/guides/configuration.md +++ b/docs/guides/configuration.md @@ -767,6 +767,7 @@ These pages describe the connection configuration options for each execution eng * [BigQuery](../integrations/engines/bigquery.md) * [Databricks](../integrations/engines/databricks.md) * [DuckDB](../integrations/engines/duckdb.md) +* [Fabric](../integrations/engines/fabric.md) * [MotherDuck](../integrations/engines/motherduck.md) * [MySQL](../integrations/engines/mysql.md) * [MSSQL](../integrations/engines/mssql.md) diff --git a/sqlmesh/core/config/__init__.py b/sqlmesh/core/config/__init__.py index af84818858..65435376a0 100644 --- a/sqlmesh/core/config/__init__.py +++ b/sqlmesh/core/config/__init__.py @@ -10,6 +10,7 @@ ConnectionConfig as ConnectionConfig, DatabricksConnectionConfig as DatabricksConnectionConfig, DuckDBConnectionConfig as DuckDBConnectionConfig, + FabricConnectionConfig as FabricConnectionConfig, GCPPostgresConnectionConfig as GCPPostgresConnectionConfig, MotherDuckConnectionConfig as MotherDuckConnectionConfig, MSSQLConnectionConfig as MSSQLConnectionConfig, diff --git a/sqlmesh/core/engine_adapter/fabric.py b/sqlmesh/core/engine_adapter/fabric.py index 44cc8bcfb3..f0a025607a 100644 --- a/sqlmesh/core/engine_adapter/fabric.py +++ b/sqlmesh/core/engine_adapter/fabric.py @@ -4,6 +4,7 @@ from sqlglot import exp from sqlmesh.core.engine_adapter.mssql import MSSQLEngineAdapter from sqlmesh.core.engine_adapter.shared import InsertOverwriteStrategy, SourceQuery +from sqlmesh.core.engine_adapter.base import EngineAdapter if t.TYPE_CHECKING: from sqlmesh.core._typing import TableName @@ -110,22 +111,17 @@ def _insert_overwrite_by_condition( **kwargs: t.Any, ) -> None: """ - Implements the insert overwrite strategy for Fabric. + Implements the insert overwrite strategy for Fabric using DELETE and INSERT. - Overridden to enforce a `DELETE`/`INSERT` strategy, as Fabric's - `MERGE` statement has limitations. + This method is overridden to avoid the MERGE statement from the parent + MSSQLEngineAdapter, which is not fully supported in Fabric. """ - - columns_to_types = columns_to_types or self.columns(table_name) - - self.delete_from(table_name, where=where or exp.true()) - - for source_query in source_queries: - with source_query as query: - query = self._order_projections_and_filter(query, columns_to_types, where=where) - self._insert_append_query( - table_name, - query, - columns_to_types=columns_to_types, - order_projections=False, - ) + return EngineAdapter._insert_overwrite_by_condition( + self, + table_name=table_name, + source_queries=source_queries, + columns_to_types=columns_to_types, + where=where, + insert_overwrite_strategy_override=InsertOverwriteStrategy.DELETE_INSERT, + **kwargs, + ) diff --git a/tests/core/test_connection_config.py b/tests/core/test_connection_config.py index 7fe2487891..14306f7fce 100644 --- a/tests/core/test_connection_config.py +++ b/tests/core/test_connection_config.py @@ -12,6 +12,7 @@ ConnectionConfig, DatabricksConnectionConfig, DuckDBAttachOptions, + FabricConnectionConfig, DuckDBConnectionConfig, GCPPostgresConnectionConfig, MotherDuckConnectionConfig, @@ -1687,3 +1688,85 @@ def mock_add_output_converter(sql_type, converter_func): expected_dt = datetime(2023, 1, 1, 12, 0, 0, 0, timezone(timedelta(hours=-8, minutes=0))) assert result == expected_dt assert result.tzinfo == timezone(timedelta(hours=-8)) + + +def test_fabric_connection_config_defaults(make_config): + """Test Fabric connection config defaults to pyodbc and autocommit=True.""" + config = make_config(type="fabric", host="localhost", check_import=False) + assert isinstance(config, FabricConnectionConfig) + assert config.driver == "pyodbc" + assert config.autocommit is True + + # Ensure it creates the FabricAdapter + from sqlmesh.core.engine_adapter.fabric import FabricAdapter + + assert isinstance(config.create_engine_adapter(), FabricAdapter) + + +def test_fabric_connection_config_parameter_validation(make_config): + """Test Fabric connection config parameter validation.""" + # Test that FabricConnectionConfig correctly handles pyodbc-specific parameters. + config = make_config( + type="fabric", + host="localhost", + driver_name="ODBC Driver 18 for SQL Server", + trust_server_certificate=True, + encrypt=False, + odbc_properties={"Authentication": "ActiveDirectoryServicePrincipal"}, + check_import=False, + ) + assert isinstance(config, FabricConnectionConfig) + assert config.driver == "pyodbc" # Driver is fixed to pyodbc + assert config.driver_name == "ODBC Driver 18 for SQL Server" + assert config.trust_server_certificate is True + assert config.encrypt is False + assert config.odbc_properties == {"Authentication": "ActiveDirectoryServicePrincipal"} + + # Test that specifying a different driver for Fabric raises an error + with pytest.raises(ConfigError, match=r"Input should be 'pyodbc'"): + make_config(type="fabric", host="localhost", driver="pymssql", check_import=False) + + +def test_fabric_pyodbc_connection_string_generation(): + """Test that the Fabric pyodbc connection gets invoked with the correct ODBC connection string.""" + with patch("pyodbc.connect") as mock_pyodbc_connect: + # Create a Fabric config + config = FabricConnectionConfig( + host="testserver.datawarehouse.fabric.microsoft.com", + port=1433, + database="testdb", + user="testuser", + password="testpass", + driver_name="ODBC Driver 18 for SQL Server", + trust_server_certificate=True, + encrypt=True, + login_timeout=30, + check_import=False, + ) + + # Get the connection factory with kwargs and call it + factory_with_kwargs = config._connection_factory_with_kwargs + connection = factory_with_kwargs() + + # Verify pyodbc.connect was called with the correct connection string + mock_pyodbc_connect.assert_called_once() + call_args = mock_pyodbc_connect.call_args + + # Check the connection string (first argument) + conn_str = call_args[0][0] + expected_parts = [ + "DRIVER={ODBC Driver 18 for SQL Server}", + "SERVER=testserver.datawarehouse.fabric.microsoft.com,1433", + "DATABASE=testdb", + "Encrypt=YES", + "TrustServerCertificate=YES", + "Connection Timeout=30", + "UID=testuser", + "PWD=testpass", + ] + + for part in expected_parts: + assert part in conn_str + + # Check autocommit parameter, should default to True for Fabric + assert call_args[1]["autocommit"] is True From 5cb0e4f72a9ec3fc000faa0e3ed230df1af2e3a1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Fredh=C3=B8i?= Date: Tue, 24 Jun 2025 15:08:59 +0200 Subject: [PATCH 30/70] remove table_exist and columns --- sqlmesh/core/engine_adapter/fabric.py | 81 ------------------------ tests/core/engine_adapter/test_fabric.py | 30 +++++++-- 2 files changed, 24 insertions(+), 87 deletions(-) diff --git a/sqlmesh/core/engine_adapter/fabric.py b/sqlmesh/core/engine_adapter/fabric.py index f0a025607a..5725d3060a 100644 --- a/sqlmesh/core/engine_adapter/fabric.py +++ b/sqlmesh/core/engine_adapter/fabric.py @@ -20,87 +20,6 @@ class FabricAdapter(MSSQLEngineAdapter): SUPPORTS_TRANSACTIONS = False INSERT_OVERWRITE_STRATEGY = InsertOverwriteStrategy.DELETE_INSERT - def table_exists(self, table_name: TableName) -> bool: - """ - Checks if a table exists. - - Querying the uppercase `INFORMATION_SCHEMA` required - by case-sensitive Fabric environments. - """ - table = exp.to_table(table_name) - sql = ( - exp.select("1") - .from_("INFORMATION_SCHEMA.TABLES") - .where(f"TABLE_NAME = '{table.alias_or_name}'") - ) - database_name = table.db - if database_name: - sql = sql.where(f"TABLE_SCHEMA = '{database_name}'") - - result = self.fetchone(sql, quote_identifiers=True) - - return result[0] == 1 if result else False - - def columns( - self, - table_name: TableName, - include_pseudo_columns: bool = True, - ) -> t.Dict[str, exp.DataType]: - """Fabric doesn't support describe so we query INFORMATION_SCHEMA.""" - - table = exp.to_table(table_name) - - sql = ( - exp.select( - "COLUMN_NAME", - "DATA_TYPE", - "CHARACTER_MAXIMUM_LENGTH", - "NUMERIC_PRECISION", - "NUMERIC_SCALE", - ) - .from_("INFORMATION_SCHEMA.COLUMNS") - .where(f"TABLE_NAME = '{table.name}'") - ) - database_name = table.db - if database_name: - sql = sql.where(f"TABLE_SCHEMA = '{database_name}'") - - columns_raw = self.fetchall(sql, quote_identifiers=True) - - def build_var_length_col( - column_name: str, - data_type: str, - character_maximum_length: t.Optional[int] = None, - numeric_precision: t.Optional[int] = None, - numeric_scale: t.Optional[int] = None, - ) -> tuple: - data_type = data_type.lower() - if ( - data_type in self.VARIABLE_LENGTH_DATA_TYPES - and character_maximum_length is not None - and character_maximum_length > 0 - ): - return (column_name, f"{data_type}({character_maximum_length})") - if ( - data_type in ("varbinary", "varchar", "nvarchar") - and character_maximum_length is not None - and character_maximum_length == -1 - ): - return (column_name, f"{data_type}(max)") - if data_type in ("decimal", "numeric"): - return (column_name, f"{data_type}({numeric_precision}, {numeric_scale})") - if data_type == "float": - return (column_name, f"{data_type}({numeric_precision})") - - return (column_name, data_type) - - columns = [build_var_length_col(*row) for row in columns_raw] - - return { - column_name: exp.DataType.build(data_type, dialect=self.dialect) - for column_name, data_type in columns - } - def _insert_overwrite_by_condition( self, table_name: TableName, diff --git a/tests/core/engine_adapter/test_fabric.py b/tests/core/engine_adapter/test_fabric.py index 623bbe6653..80aea0c989 100644 --- a/tests/core/engine_adapter/test_fabric.py +++ b/tests/core/engine_adapter/test_fabric.py @@ -53,7 +53,9 @@ def test_table_exists(adapter: FabricAdapter): assert not adapter.table_exists("db.table") -def test_insert_overwrite_by_time_partition(adapter: FabricAdapter): +def test_insert_overwrite_by_time_partition( + adapter: FabricAdapter, assert_exp_eq +): # Add assert_exp_eq fixture adapter.insert_overwrite_by_time_partition( "test_table", parse_one("SELECT a, b FROM tbl"), @@ -64,11 +66,27 @@ def test_insert_overwrite_by_time_partition(adapter: FabricAdapter): columns_to_types={"a": exp.DataType.build("INT"), "b": exp.DataType.build("STRING")}, ) - # Fabric adapter should use DELETE/INSERT strategy, not MERGE. - assert to_sql_calls(adapter) == [ - """DELETE FROM [test_table] WHERE [b] BETWEEN '2022-01-01' AND '2022-01-02';""", - """INSERT INTO [test_table] ([a], [b]) SELECT [a], [b] FROM (SELECT [a], [b] FROM [tbl]) AS [_subquery] WHERE [b] BETWEEN '2022-01-01' AND '2022-01-02';""", - ] + # Get the list of generated SQL strings + actual_sql_calls = to_sql_calls(adapter) + + # There should be two calls: DELETE and INSERT + assert len(actual_sql_calls) == 2 + + # Assert the DELETE statement is correct (string comparison is fine for this simple one) + assert ( + actual_sql_calls[0] + == "DELETE FROM [test_table] WHERE [b] BETWEEN '2022-01-01' AND '2022-01-02';" + ) + + # Assert the INSERT statement is semantically correct + expected_insert_sql = """ + INSERT INTO [test_table] ([a], [b]) + SELECT [a], [b] FROM (SELECT [a], [b] FROM [tbl]) AS [_subquery] + WHERE [b] BETWEEN '2022-01-01' AND '2022-01-02'; + """ + + # Use assert_exp_eq to compare the parsed SQL expressions + assert_exp_eq(actual_sql_calls[1], expected_insert_sql) def test_replace_query(adapter: FabricAdapter): From 825354557a0c48375e6fd84ac9c10fc4e5bae5ba Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Fredh=C3=B8i?= Date: Wed, 25 Jun 2025 08:52:33 +0200 Subject: [PATCH 31/70] updated tests --- sqlmesh/core/config/connection.py | 4 +++- tests/core/engine_adapter/test_fabric.py | 30 +++++------------------- 2 files changed, 9 insertions(+), 25 deletions(-) diff --git a/sqlmesh/core/config/connection.py b/sqlmesh/core/config/connection.py index 65b00e0852..7f6e3b4bb2 100644 --- a/sqlmesh/core/config/connection.py +++ b/sqlmesh/core/config/connection.py @@ -1682,12 +1682,14 @@ def _extra_engine_config(self) -> t.Dict[str, t.Any]: class FabricConnectionConfig(MSSQLConnectionConfig): """ Fabric Connection Configuration. - Inherits most settings from MSSQLConnectionConfig and sets the type to 'fabric'. It is recommended to use the 'pyodbc' driver for Fabric. """ type_: t.Literal["fabric"] = Field(alias="type", default="fabric") # type: ignore + DIALECT: t.ClassVar[t.Literal["fabric"]] = "fabric" + DISPLAY_NAME: t.ClassVar[t.Literal["Fabric"]] = "Fabric" + DISPLAY_ORDER: t.ClassVar[t.Literal[17]] = 17 driver: t.Literal["pyodbc"] = "pyodbc" autocommit: t.Optional[bool] = True diff --git a/tests/core/engine_adapter/test_fabric.py b/tests/core/engine_adapter/test_fabric.py index 80aea0c989..709df816d2 100644 --- a/tests/core/engine_adapter/test_fabric.py +++ b/tests/core/engine_adapter/test_fabric.py @@ -53,9 +53,7 @@ def test_table_exists(adapter: FabricAdapter): assert not adapter.table_exists("db.table") -def test_insert_overwrite_by_time_partition( - adapter: FabricAdapter, assert_exp_eq -): # Add assert_exp_eq fixture +def test_insert_overwrite_by_time_partition(adapter: FabricAdapter): adapter.insert_overwrite_by_time_partition( "test_table", parse_one("SELECT a, b FROM tbl"), @@ -66,27 +64,11 @@ def test_insert_overwrite_by_time_partition( columns_to_types={"a": exp.DataType.build("INT"), "b": exp.DataType.build("STRING")}, ) - # Get the list of generated SQL strings - actual_sql_calls = to_sql_calls(adapter) - - # There should be two calls: DELETE and INSERT - assert len(actual_sql_calls) == 2 - - # Assert the DELETE statement is correct (string comparison is fine for this simple one) - assert ( - actual_sql_calls[0] - == "DELETE FROM [test_table] WHERE [b] BETWEEN '2022-01-01' AND '2022-01-02';" - ) - - # Assert the INSERT statement is semantically correct - expected_insert_sql = """ - INSERT INTO [test_table] ([a], [b]) - SELECT [a], [b] FROM (SELECT [a], [b] FROM [tbl]) AS [_subquery] - WHERE [b] BETWEEN '2022-01-01' AND '2022-01-02'; - """ - - # Use assert_exp_eq to compare the parsed SQL expressions - assert_exp_eq(actual_sql_calls[1], expected_insert_sql) + # Fabric adapter should use DELETE/INSERT strategy, not MERGE. + assert to_sql_calls(adapter) == [ + """DELETE FROM [test_table] WHERE [b] BETWEEN '2022-01-01' AND '2022-01-02';""", + """INSERT INTO [test_table] ([a], [b]) SELECT [a], [b] FROM (SELECT [a] AS [a], [b] AS [b] FROM [tbl]) AS [_subquery] WHERE [b] BETWEEN '2022-01-01' AND '2022-01-02';""", + ] def test_replace_query(adapter: FabricAdapter): From 6a54905a82120430b691bb3b7f41b0d0f0732197 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Fredh=C3=B8i?= Date: Wed, 25 Jun 2025 10:54:41 +0200 Subject: [PATCH 32/70] mypy --- sqlmesh/core/config/connection.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/sqlmesh/core/config/connection.py b/sqlmesh/core/config/connection.py index 7f6e3b4bb2..2f68aab63e 100644 --- a/sqlmesh/core/config/connection.py +++ b/sqlmesh/core/config/connection.py @@ -1687,9 +1687,9 @@ class FabricConnectionConfig(MSSQLConnectionConfig): """ type_: t.Literal["fabric"] = Field(alias="type", default="fabric") # type: ignore - DIALECT: t.ClassVar[t.Literal["fabric"]] = "fabric" - DISPLAY_NAME: t.ClassVar[t.Literal["Fabric"]] = "Fabric" - DISPLAY_ORDER: t.ClassVar[t.Literal[17]] = 17 + DIALECT: t.ClassVar[t.Literal["fabric"]] = "fabric" # type: ignore + DISPLAY_NAME: t.ClassVar[t.Literal["Fabric"]] = "Fabric" # type: ignore + DISPLAY_ORDER: t.ClassVar[t.Literal[17]] = 17 # type: ignore driver: t.Literal["pyodbc"] = "pyodbc" autocommit: t.Optional[bool] = True From 6f1a5754ff0646544793bc19e1c7c9f01a1a1c63 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Fredh=C3=B8i?= Date: Wed, 25 Jun 2025 11:10:04 +0200 Subject: [PATCH 33/70] ruff --- sqlmesh/core/config/connection.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/sqlmesh/core/config/connection.py b/sqlmesh/core/config/connection.py index 2f68aab63e..e8ec9b4e40 100644 --- a/sqlmesh/core/config/connection.py +++ b/sqlmesh/core/config/connection.py @@ -1687,9 +1687,9 @@ class FabricConnectionConfig(MSSQLConnectionConfig): """ type_: t.Literal["fabric"] = Field(alias="type", default="fabric") # type: ignore - DIALECT: t.ClassVar[t.Literal["fabric"]] = "fabric" # type: ignore - DISPLAY_NAME: t.ClassVar[t.Literal["Fabric"]] = "Fabric" # type: ignore - DISPLAY_ORDER: t.ClassVar[t.Literal[17]] = 17 # type: ignore + DIALECT: t.ClassVar[t.Literal["fabric"]] = "fabric" # type: ignore + DISPLAY_NAME: t.ClassVar[t.Literal["Fabric"]] = "Fabric" # type: ignore + DISPLAY_ORDER: t.ClassVar[t.Literal[17]] = 17 # type: ignore driver: t.Literal["pyodbc"] = "pyodbc" autocommit: t.Optional[bool] = True From c2d10a2451787b814959935e6951954e17a44753 Mon Sep 17 00:00:00 2001 From: Andreas <65893109+fresioAS@users.noreply.github.com> Date: Wed, 25 Jun 2025 16:11:25 +0200 Subject: [PATCH 34/70] Update fabric.md --- docs/integrations/engines/fabric.md | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/docs/integrations/engines/fabric.md b/docs/integrations/engines/fabric.md index aca9c32eed..1dd47fbe11 100644 --- a/docs/integrations/engines/fabric.md +++ b/docs/integrations/engines/fabric.md @@ -3,6 +3,8 @@ ## Local/Built-in Scheduler **Engine Adapter Type**: `fabric` +NOTE: Fabric Warehouse is not recommended to be used for the SQLMesh [state connection](../../reference/configuration.md#connections). + ### Installation #### Microsoft Entra ID / Azure Active Directory Authentication: ``` @@ -27,4 +29,4 @@ pip install "sqlmesh[mssql-odbc]" | `autocommit` | Is autocommit mode enabled. Default: false | bool | N | | `driver` | The driver to use for the connection. Default: pyodbc | string | N | | `driver_name` | The driver name to use for the connection. E.g., *ODBC Driver 18 for SQL Server* | string | N | -| `odbc_properties` | The dict of ODBC connection properties. E.g., authentication: ActiveDirectoryServicePrincipal. See more [here](https://learn.microsoft.com/en-us/sql/connect/odbc/dsn-connection-string-attribute?view=sql-server-ver16). | dict | N | \ No newline at end of file +| `odbc_properties` | The dict of ODBC connection properties. E.g., authentication: ActiveDirectoryServicePrincipal. See more [here](https://learn.microsoft.com/en-us/sql/connect/odbc/dsn-connection-string-attribute?view=sql-server-ver16). | dict | N | From cd9f261f175ae4dac663933cc758aced32fb6641 Mon Sep 17 00:00:00 2001 From: Andreas <65893109+fresioAS@users.noreply.github.com> Date: Wed, 2 Jul 2025 13:28:52 +0200 Subject: [PATCH 35/70] Update sqlmesh/core/engine_adapter/fabric.py MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Co-authored-by: Mattias Thalén --- sqlmesh/core/engine_adapter/fabric.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/sqlmesh/core/engine_adapter/fabric.py b/sqlmesh/core/engine_adapter/fabric.py index 5725d3060a..97322641bd 100644 --- a/sqlmesh/core/engine_adapter/fabric.py +++ b/sqlmesh/core/engine_adapter/fabric.py @@ -10,7 +10,9 @@ from sqlmesh.core._typing import TableName -class FabricAdapter(MSSQLEngineAdapter): +from sqlmesh.core.engine_adapter.mixins import LogicalMergeMixin + +class FabricAdapter(LogicalMergeMixin, MSSQLEngineAdapter): """ Adapter for Microsoft Fabric. """ From 1eb623a96f395561b751263650d0d69cdc198e89 Mon Sep 17 00:00:00 2001 From: Andreas <65893109+fresioAS@users.noreply.github.com> Date: Wed, 2 Jul 2025 14:39:03 +0200 Subject: [PATCH 36/70] Update fabric.py --- sqlmesh/core/engine_adapter/fabric.py | 1 + 1 file changed, 1 insertion(+) diff --git a/sqlmesh/core/engine_adapter/fabric.py b/sqlmesh/core/engine_adapter/fabric.py index 97322641bd..d7b862d50a 100644 --- a/sqlmesh/core/engine_adapter/fabric.py +++ b/sqlmesh/core/engine_adapter/fabric.py @@ -12,6 +12,7 @@ from sqlmesh.core.engine_adapter.mixins import LogicalMergeMixin + class FabricAdapter(LogicalMergeMixin, MSSQLEngineAdapter): """ Adapter for Microsoft Fabric. From 5113ef43627ce6aa3759b60e079c47876aaf4bba Mon Sep 17 00:00:00 2001 From: Andreas <65893109+fresioAS@users.noreply.github.com> Date: Thu, 10 Jul 2025 16:53:22 +0200 Subject: [PATCH 37/70] Update sqlmesh/core/config/connection.py MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Co-authored-by: Mattias Thalén --- sqlmesh/core/config/connection.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sqlmesh/core/config/connection.py b/sqlmesh/core/config/connection.py index e8ec9b4e40..e86d13e77c 100644 --- a/sqlmesh/core/config/connection.py +++ b/sqlmesh/core/config/connection.py @@ -1703,7 +1703,7 @@ def _engine_adapter(self) -> t.Type[EngineAdapter]: def _extra_engine_config(self) -> t.Dict[str, t.Any]: return { "database": self.database, - "catalog_support": CatalogSupport.REQUIRES_SET_CATALOG, + "catalog_support": CatalogSupport.SINGLE_CATALOG_ONLY, } From d17677eb28cda45844cbccfe48b6a19052e23171 Mon Sep 17 00:00:00 2001 From: Erin Drummond Date: Thu, 3 Jul 2025 22:46:25 +0000 Subject: [PATCH 38/70] Add Fabric to integration tests --- Makefile | 3 +++ pyproject.toml | 1 + tests/core/engine_adapter/integration/__init__.py | 1 + tests/core/engine_adapter/integration/config.yaml | 13 +++++++++++++ 4 files changed, 18 insertions(+) diff --git a/Makefile b/Makefile index 0a89bba437..e643ae7ad2 100644 --- a/Makefile +++ b/Makefile @@ -173,6 +173,9 @@ clickhouse-cloud-test: guard-CLICKHOUSE_CLOUD_HOST guard-CLICKHOUSE_CLOUD_USERNA athena-test: guard-AWS_ACCESS_KEY_ID guard-AWS_SECRET_ACCESS_KEY guard-ATHENA_S3_WAREHOUSE_LOCATION engine-athena-install pytest -n auto -m "athena" --retries 3 --junitxml=test-results/junit-athena.xml +fabric-test: guard-FABRIC_HOST guard-FABRIC_CLIENT_ID guard-FABRIC_CLIENT_SECRET guard-FABRIC_DATABASE engine-fabric-install + pytest -n auto -m "fabric" --retries 3 --junitxml=test-results/junit-fabric.xml + vscode_settings: mkdir -p .vscode cp -r ./tooling/vscode/*.json .vscode/ diff --git a/pyproject.toml b/pyproject.toml index 9f066624d6..9b5b072d8a 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -103,6 +103,7 @@ dev = [ dbt = ["dbt-core<2"] dlt = ["dlt"] duckdb = [] +fabric = ["pyodbc"] gcppostgres = ["cloud-sql-python-connector[pg8000]>=1.8.0"] github = ["PyGithub~=2.5.0"] llm = ["langchain", "openai"] diff --git a/tests/core/engine_adapter/integration/__init__.py b/tests/core/engine_adapter/integration/__init__.py index 7e35b832be..99402df6ae 100644 --- a/tests/core/engine_adapter/integration/__init__.py +++ b/tests/core/engine_adapter/integration/__init__.py @@ -82,6 +82,7 @@ def pytest_marks(self) -> t.List[MarkDecorator]: IntegrationTestEngine("bigquery", native_dataframe_type="bigframe", cloud=True), IntegrationTestEngine("databricks", native_dataframe_type="pyspark", cloud=True), IntegrationTestEngine("snowflake", native_dataframe_type="snowpark", cloud=True), + IntegrationTestEngine("fabric", cloud=True) ] ENGINES_BY_NAME = {e.engine: e for e in ENGINES} diff --git a/tests/core/engine_adapter/integration/config.yaml b/tests/core/engine_adapter/integration/config.yaml index d18ea5366f..4b9c881208 100644 --- a/tests/core/engine_adapter/integration/config.yaml +++ b/tests/core/engine_adapter/integration/config.yaml @@ -186,5 +186,18 @@ gateways: state_connection: type: duckdb + inttest_fabric: + connection: + type: fabric + driver: pyodbc + host: {{ env_var("FABRIC_HOST") }} + user: {{ env_var("FABRIC_CLIENT_ID") }} + password: {{ env_var("FABRIC_CLIENT_SECRET") }} + database: {{ env_var("FABRIC_DATABASE") }} + odbc_properties: + Authentication: ActiveDirectoryServicePrincipal + state_connection: + type: duckdb + model_defaults: dialect: duckdb From f54b0f666782e5b68dbfcdce54103ef2d4e80039 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Mattias=20Thal=C3=A9n?= Date: Sun, 20 Jul 2025 20:43:59 +0000 Subject: [PATCH 39/70] fix: update varchar columns to varchar(max) in table diff tests --- .../integration/test_integration.py | 17 ++++++++++------- 1 file changed, 10 insertions(+), 7 deletions(-) diff --git a/tests/core/engine_adapter/integration/test_integration.py b/tests/core/engine_adapter/integration/test_integration.py index 0844cce3c4..e30475e2f5 100644 --- a/tests/core/engine_adapter/integration/test_integration.py +++ b/tests/core/engine_adapter/integration/test_integration.py @@ -2253,7 +2253,10 @@ def test_table_diff_grain_check_single_key(ctx: TestContext): src_table = ctx.table("source") target_table = ctx.table("target") - columns_to_types = {"key1": exp.DataType.build("int"), "value": exp.DataType.build("varchar")} + columns_to_types = { + "key1": exp.DataType.build("int"), + "value": exp.DataType.build("varchar(max)"), + } ctx.engine_adapter.create_table(src_table, columns_to_types) ctx.engine_adapter.create_table(target_table, columns_to_types) @@ -2316,8 +2319,8 @@ def test_table_diff_grain_check_multiple_keys(ctx: TestContext): columns_to_types = { "key1": exp.DataType.build("int"), - "key2": exp.DataType.build("varchar"), - "value": exp.DataType.build("varchar"), + "key2": exp.DataType.build("varchar(max)"), + "value": exp.DataType.build("varchar(max)"), } ctx.engine_adapter.create_table(src_table, columns_to_types) @@ -2374,13 +2377,13 @@ def test_table_diff_arbitrary_condition(ctx: TestContext): columns_to_types_src = { "id": exp.DataType.build("int"), - "value": exp.DataType.build("varchar"), + "value": exp.DataType.build("varchar(max)"), "ts": exp.DataType.build("timestamp"), } columns_to_types_target = { "item_id": exp.DataType.build("int"), - "value": exp.DataType.build("varchar"), + "value": exp.DataType.build("varchar(max)"), "ts": exp.DataType.build("timestamp"), } @@ -2441,8 +2444,8 @@ def test_table_diff_identical_dataset(ctx: TestContext): columns_to_types = { "key1": exp.DataType.build("int"), - "key2": exp.DataType.build("varchar"), - "value": exp.DataType.build("varchar"), + "key2": exp.DataType.build("varchar(max)"), + "value": exp.DataType.build("varchar(max)"), } ctx.engine_adapter.create_table(src_table, columns_to_types) From cd4aa95de08d04ab07e45e194841882304812208 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Mattias=20Thal=C3=A9n?= Date: Sun, 20 Jul 2025 21:41:02 +0000 Subject: [PATCH 40/70] fix: change varchar(max) to varchar(8000) in integration tests --- .../integration/test_integration.py | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/tests/core/engine_adapter/integration/test_integration.py b/tests/core/engine_adapter/integration/test_integration.py index e30475e2f5..354abb5bea 100644 --- a/tests/core/engine_adapter/integration/test_integration.py +++ b/tests/core/engine_adapter/integration/test_integration.py @@ -554,7 +554,7 @@ def test_insert_overwrite_by_time_partition(ctx_query_and_df: TestContext): if ctx.dialect == "bigquery": ds_type = "datetime" if ctx.dialect == "tsql": - ds_type = "varchar(max)" + ds_type = "varchar(8000)" ctx.columns_to_types = {"id": "int", "ds": ds_type} table = ctx.table("test_table") @@ -2255,7 +2255,7 @@ def test_table_diff_grain_check_single_key(ctx: TestContext): columns_to_types = { "key1": exp.DataType.build("int"), - "value": exp.DataType.build("varchar(max)"), + "value": exp.DataType.build("varchar(8000)"), } ctx.engine_adapter.create_table(src_table, columns_to_types) @@ -2319,8 +2319,8 @@ def test_table_diff_grain_check_multiple_keys(ctx: TestContext): columns_to_types = { "key1": exp.DataType.build("int"), - "key2": exp.DataType.build("varchar(max)"), - "value": exp.DataType.build("varchar(max)"), + "key2": exp.DataType.build("varchar(8000)"), + "value": exp.DataType.build("varchar(8000)"), } ctx.engine_adapter.create_table(src_table, columns_to_types) @@ -2377,13 +2377,13 @@ def test_table_diff_arbitrary_condition(ctx: TestContext): columns_to_types_src = { "id": exp.DataType.build("int"), - "value": exp.DataType.build("varchar(max)"), + "value": exp.DataType.build("varchar(8000)"), "ts": exp.DataType.build("timestamp"), } columns_to_types_target = { "item_id": exp.DataType.build("int"), - "value": exp.DataType.build("varchar(max)"), + "value": exp.DataType.build("varchar(8000)"), "ts": exp.DataType.build("timestamp"), } @@ -2444,8 +2444,8 @@ def test_table_diff_identical_dataset(ctx: TestContext): columns_to_types = { "key1": exp.DataType.build("int"), - "key2": exp.DataType.build("varchar(max)"), - "value": exp.DataType.build("varchar(max)"), + "key2": exp.DataType.build("varchar(8000)"), + "value": exp.DataType.build("varchar(8000)"), } ctx.engine_adapter.create_table(src_table, columns_to_types) From 94e7978d8381082d98925669602fabb5c1707ffe Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Mattias=20Thal=C3=A9n?= Date: Sun, 20 Jul 2025 21:57:03 +0000 Subject: [PATCH 41/70] Revert "fix(mssql): update driver selection logic to allow enforcing pyodbc in Fabric" This reverts commit 4412fc9a6c194dc49ffb92c746d4db301bad1463. --- sqlmesh/core/config/connection.py | 9 +----- tests/core/test_connection_config.py | 47 ---------------------------- 2 files changed, 1 insertion(+), 55 deletions(-) diff --git a/sqlmesh/core/config/connection.py b/sqlmesh/core/config/connection.py index 55534d81d9..d305f52a45 100644 --- a/sqlmesh/core/config/connection.py +++ b/sqlmesh/core/config/connection.py @@ -1529,14 +1529,7 @@ def _mssql_engine_import_validator(cls, data: t.Any) -> t.Any: if not isinstance(data, dict): return data - # Get the default driver for this specific class - default_driver = "pymssql" - if hasattr(cls, "model_fields") and "driver" in cls.model_fields: - field_info = cls.model_fields["driver"] - if hasattr(field_info, "default") and field_info.default is not None: - default_driver = field_info.default - - driver = data.get("driver", default_driver) + driver = data.get("driver", "pymssql") # Define the mapping of driver to import module and extra name driver_configs = {"pymssql": ("pymssql", "mssql"), "pyodbc": ("pyodbc", "mssql-odbc")} diff --git a/tests/core/test_connection_config.py b/tests/core/test_connection_config.py index 0082638b91..14306f7fce 100644 --- a/tests/core/test_connection_config.py +++ b/tests/core/test_connection_config.py @@ -1770,50 +1770,3 @@ def test_fabric_pyodbc_connection_string_generation(): # Check autocommit parameter, should default to True for Fabric assert call_args[1]["autocommit"] is True - - -def test_mssql_driver_defaults(make_config): - """Test driver defaults for MSSQL connection config. - - Ensures MSSQL defaults to 'pymssql' but can be overridden to 'pyodbc'. - """ - - # Test 1: MSSQL with no driver specified - should default to pymssql - config_no_driver = make_config(type="mssql", host="localhost", check_import=False) - assert isinstance(config_no_driver, MSSQLConnectionConfig) - assert config_no_driver.driver == "pymssql" - - # Test 2: MSSQL with explicit pymssql driver - config_pymssql = make_config( - type="mssql", host="localhost", driver="pymssql", check_import=False - ) - assert isinstance(config_pymssql, MSSQLConnectionConfig) - assert config_pymssql.driver == "pymssql" - - # Test 3: MSSQL with explicit pyodbc driver - config_pyodbc = make_config(type="mssql", host="localhost", driver="pyodbc", check_import=False) - assert isinstance(config_pyodbc, MSSQLConnectionConfig) - assert config_pyodbc.driver == "pyodbc" - - -def test_fabric_driver_defaults(make_config): - """Test driver defaults for Fabric connection config. - - Ensures Fabric defaults to 'pyodbc' and cannot be changed to 'pymssql'. - """ - - # Test 1: Fabric with no driver specified - should default to pyodbc - config_no_driver = make_config(type="fabric", host="localhost", check_import=False) - assert isinstance(config_no_driver, FabricConnectionConfig) - assert config_no_driver.driver == "pyodbc" - - # Test 2: Fabric with explicit pyodbc driver - config_pyodbc = make_config( - type="fabric", host="localhost", driver="pyodbc", check_import=False - ) - assert isinstance(config_pyodbc, FabricConnectionConfig) - assert config_pyodbc.driver == "pyodbc" - - # Test 3: Fabric with pymssql driver should fail (not allowed) - with pytest.raises(ConfigError, match=r"Input should be 'pyodbc'"): - make_config(type="fabric", host="localhost", driver="pymssql", check_import=False) From 1628ca5364d0767df56010256eb9f8879757667e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Mattias=20Thal=C3=A9n?= Date: Sun, 20 Jul 2025 22:41:06 +0000 Subject: [PATCH 42/70] Test removal of fabric config --- .../engine_adapter/integration/config.yaml | 24 +++++++++---------- 1 file changed, 12 insertions(+), 12 deletions(-) diff --git a/tests/core/engine_adapter/integration/config.yaml b/tests/core/engine_adapter/integration/config.yaml index 4b9c881208..42bedcfab0 100644 --- a/tests/core/engine_adapter/integration/config.yaml +++ b/tests/core/engine_adapter/integration/config.yaml @@ -186,18 +186,18 @@ gateways: state_connection: type: duckdb - inttest_fabric: - connection: - type: fabric - driver: pyodbc - host: {{ env_var("FABRIC_HOST") }} - user: {{ env_var("FABRIC_CLIENT_ID") }} - password: {{ env_var("FABRIC_CLIENT_SECRET") }} - database: {{ env_var("FABRIC_DATABASE") }} - odbc_properties: - Authentication: ActiveDirectoryServicePrincipal - state_connection: - type: duckdb + #inttest_fabric: + # connection: + # type: fabric + # driver: pyodbc + # host: {{ env_var("FABRIC_HOST") }} + # user: {{ env_var("FABRIC_CLIENT_ID") }} + # password: {{ env_var("FABRIC_CLIENT_SECRET") }} + # database: {{ env_var("FABRIC_DATABASE") }} + # odbc_properties: + # Authentication: ActiveDirectoryServicePrincipal + # state_connection: + # type: duckdb model_defaults: dialect: duckdb From bce209e764a757a09db19c5f8c8f6968978ae5cf Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Mattias=20Thal=C3=A9n?= Date: Tue, 22 Jul 2025 21:54:09 +0000 Subject: [PATCH 43/70] Bump SQLGlot to 27.2.0 --- pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index 91399136e3..572ac2b73d 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -23,7 +23,7 @@ dependencies = [ "requests", "rich[jupyter]", "ruamel.yaml", - "sqlglot[rs]~=27.1.0", + "sqlglot[rs]~=27.2.0", "tenacity", "time-machine", "json-stream" From 25c393f801661d7d9feb7c06e6f0ab31a4fe7de7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Mattias=20Thal=C3=A9n?= Date: Tue, 22 Jul 2025 21:58:19 +0000 Subject: [PATCH 44/70] Activate fabric profile in integration testing --- .../engine_adapter/integration/config.yaml | 24 +++++++++---------- 1 file changed, 12 insertions(+), 12 deletions(-) diff --git a/tests/core/engine_adapter/integration/config.yaml b/tests/core/engine_adapter/integration/config.yaml index 42bedcfab0..4b9c881208 100644 --- a/tests/core/engine_adapter/integration/config.yaml +++ b/tests/core/engine_adapter/integration/config.yaml @@ -186,18 +186,18 @@ gateways: state_connection: type: duckdb - #inttest_fabric: - # connection: - # type: fabric - # driver: pyodbc - # host: {{ env_var("FABRIC_HOST") }} - # user: {{ env_var("FABRIC_CLIENT_ID") }} - # password: {{ env_var("FABRIC_CLIENT_SECRET") }} - # database: {{ env_var("FABRIC_DATABASE") }} - # odbc_properties: - # Authentication: ActiveDirectoryServicePrincipal - # state_connection: - # type: duckdb + inttest_fabric: + connection: + type: fabric + driver: pyodbc + host: {{ env_var("FABRIC_HOST") }} + user: {{ env_var("FABRIC_CLIENT_ID") }} + password: {{ env_var("FABRIC_CLIENT_SECRET") }} + database: {{ env_var("FABRIC_DATABASE") }} + odbc_properties: + Authentication: ActiveDirectoryServicePrincipal + state_connection: + type: duckdb model_defaults: dialect: duckdb From 7756a8f3729a2caffd16d1281a818ec342bc3418 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Mattias=20Thal=C3=A9n?= Date: Tue, 22 Jul 2025 22:04:17 +0000 Subject: [PATCH 45/70] Add odbc to engine_tests_cloud in circleci --- .circleci/continue_config.yml | 3 +++ 1 file changed, 3 insertions(+) diff --git a/.circleci/continue_config.yml b/.circleci/continue_config.yml index 34bdf0e98b..c395d9e5ab 100644 --- a/.circleci/continue_config.yml +++ b/.circleci/continue_config.yml @@ -237,6 +237,9 @@ jobs: steps: - halt_unless_core - checkout + - run: + name: Install ODBC + command: sudo apt-get install unixodbc-dev - run: name: Generate database name command: | From 16552638e286335c18d0912442dec1858bb2f7f8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Mattias=20Thal=C3=A9n?= Date: Fri, 11 Jul 2025 00:03:05 +0000 Subject: [PATCH 46/70] feat(fabric): add catalog management for Fabric --- sqlmesh/core/config/connection.py | 7 +- sqlmesh/core/engine_adapter/fabric.py | 134 ++++++++++++++++++++++++++ 2 files changed, 140 insertions(+), 1 deletion(-) diff --git a/sqlmesh/core/config/connection.py b/sqlmesh/core/config/connection.py index d305f52a45..8af192c7d5 100644 --- a/sqlmesh/core/config/connection.py +++ b/sqlmesh/core/config/connection.py @@ -1709,6 +1709,11 @@ class FabricConnectionConfig(MSSQLConnectionConfig): DISPLAY_ORDER: t.ClassVar[t.Literal[17]] = 17 # type: ignore driver: t.Literal["pyodbc"] = "pyodbc" autocommit: t.Optional[bool] = True + workspace_id: t.Optional[str] = None + # Service Principal authentication for Fabric REST API + tenant_id: t.Optional[str] = None + client_id: t.Optional[str] = None + client_secret: t.Optional[str] = None @property def _engine_adapter(self) -> t.Type[EngineAdapter]: @@ -1720,7 +1725,7 @@ def _engine_adapter(self) -> t.Type[EngineAdapter]: def _extra_engine_config(self) -> t.Dict[str, t.Any]: return { "database": self.database, - "catalog_support": CatalogSupport.SINGLE_CATALOG_ONLY, + "catalog_support": CatalogSupport.REQUIRES_SET_CATALOG, } diff --git a/sqlmesh/core/engine_adapter/fabric.py b/sqlmesh/core/engine_adapter/fabric.py index d7b862d50a..7e3475a3e6 100644 --- a/sqlmesh/core/engine_adapter/fabric.py +++ b/sqlmesh/core/engine_adapter/fabric.py @@ -1,10 +1,13 @@ from __future__ import annotations import typing as t +import logging from sqlglot import exp from sqlmesh.core.engine_adapter.mssql import MSSQLEngineAdapter from sqlmesh.core.engine_adapter.shared import InsertOverwriteStrategy, SourceQuery from sqlmesh.core.engine_adapter.base import EngineAdapter +from sqlmesh.utils import optional_import +from sqlmesh.utils.errors import SQLMeshError if t.TYPE_CHECKING: from sqlmesh.core._typing import TableName @@ -12,6 +15,9 @@ from sqlmesh.core.engine_adapter.mixins import LogicalMergeMixin +logger = logging.getLogger(__name__) +requests = optional_import("requests") + class FabricAdapter(LogicalMergeMixin, MSSQLEngineAdapter): """ @@ -21,6 +27,7 @@ class FabricAdapter(LogicalMergeMixin, MSSQLEngineAdapter): DIALECT = "fabric" SUPPORTS_INDEXES = False SUPPORTS_TRANSACTIONS = False + SUPPORTS_CREATE_DROP_CATALOG = True INSERT_OVERWRITE_STRATEGY = InsertOverwriteStrategy.DELETE_INSERT def _insert_overwrite_by_condition( @@ -47,3 +54,130 @@ def _insert_overwrite_by_condition( insert_overwrite_strategy_override=InsertOverwriteStrategy.DELETE_INSERT, **kwargs, ) + + def _get_access_token(self) -> str: + """Get access token using Service Principal authentication.""" + tenant_id = self._extra_config.get("tenant_id") + client_id = self._extra_config.get("client_id") + client_secret = self._extra_config.get("client_secret") + + if not all([tenant_id, client_id, client_secret]): + raise SQLMeshError( + "Service Principal authentication requires tenant_id, client_id, and client_secret " + "in the Fabric connection configuration" + ) + + if not requests: + raise SQLMeshError("requests library is required for Fabric authentication") + + # Use Azure AD OAuth2 token endpoint + token_url = f"https://login.microsoftonline.com/{tenant_id}/oauth2/v2.0/token" + + data = { + "grant_type": "client_credentials", + "client_id": client_id, + "client_secret": client_secret, + "scope": "https://api.fabric.microsoft.com/.default", + } + + try: + response = requests.post(token_url, data=data) + response.raise_for_status() + token_data = response.json() + return token_data["access_token"] + except requests.exceptions.RequestException as e: + raise SQLMeshError(f"Failed to authenticate with Azure AD: {e}") + except KeyError: + raise SQLMeshError("Invalid response from Azure AD token endpoint") + + def _get_fabric_auth_headers(self) -> t.Dict[str, str]: + """Get authentication headers for Fabric REST API calls.""" + access_token = self._get_access_token() + return {"Authorization": f"Bearer {access_token}", "Content-Type": "application/json"} + + def _make_fabric_api_request( + self, method: str, endpoint: str, data: t.Optional[t.Dict[str, t.Any]] = None + ) -> t.Dict[str, t.Any]: + """Make a request to the Fabric REST API.""" + if not requests: + raise SQLMeshError("requests library is required for Fabric catalog operations") + + workspace_id = self._extra_config.get("workspace_id") + if not workspace_id: + raise SQLMeshError( + "workspace_id parameter is required in connection config for Fabric catalog operations" + ) + + base_url = "https://api.fabric.microsoft.com/v1" + url = f"{base_url}/workspaces/{workspace_id}/{endpoint}" + + headers = self._get_fabric_auth_headers() + + try: + if method.upper() == "GET": + response = requests.get(url, headers=headers) + elif method.upper() == "POST": + response = requests.post(url, headers=headers, json=data) + elif method.upper() == "DELETE": + response = requests.delete(url, headers=headers) + else: + raise SQLMeshError(f"Unsupported HTTP method: {method}") + + response.raise_for_status() + + if response.status_code == 204: # No content + return {} + + return response.json() if response.content else {} + + except requests.exceptions.RequestException as e: + raise SQLMeshError(f"Fabric API request failed: {e}") + + def _create_catalog(self, catalog_name: exp.Identifier) -> None: + """Create a catalog (warehouse) in Microsoft Fabric via REST API.""" + warehouse_name = catalog_name.sql(dialect=self.dialect, identify=False) + + logger.info(f"Creating Fabric warehouse: {warehouse_name}") + + request_data = { + "displayName": warehouse_name, + "description": f"Warehouse created by SQLMesh: {warehouse_name}", + } + + try: + self._make_fabric_api_request("POST", "warehouses", request_data) + logger.info(f"Successfully created Fabric warehouse: {warehouse_name}") + except SQLMeshError as e: + if "already exists" in str(e).lower(): + logger.info(f"Fabric warehouse already exists: {warehouse_name}") + return + raise + + def _drop_catalog(self, catalog_name: exp.Identifier) -> None: + """Drop a catalog (warehouse) in Microsoft Fabric via REST API.""" + warehouse_name = catalog_name.sql(dialect=self.dialect, identify=False) + + logger.info(f"Deleting Fabric warehouse: {warehouse_name}") + + # First, we need to get the warehouse ID by listing warehouses + try: + warehouses = self._make_fabric_api_request("GET", "warehouses") + warehouse_id = None + + for warehouse in warehouses.get("value", []): + if warehouse.get("displayName") == warehouse_name: + warehouse_id = warehouse.get("id") + break + + if not warehouse_id: + raise SQLMeshError(f"Warehouse not found: {warehouse_name}") + + # Delete the warehouse by ID + self._make_fabric_api_request("DELETE", f"warehouses/{warehouse_id}") + logger.info(f"Successfully deleted Fabric warehouse: {warehouse_name}") + + except SQLMeshError as e: + if "not found" in str(e).lower(): + logger.info(f"Fabric warehouse does not exist: {warehouse_name}") + return + raise From ea088aac89360090664dfe04290df4d0d62830e6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Mattias=20Thal=C3=A9n?= Date: Fri, 11 Jul 2025 11:49:05 +0000 Subject: [PATCH 47/70] feat(fabric): update connection configuration for Fabric adapter --- sqlmesh/core/config/connection.py | 9 +++------ sqlmesh/core/engine_adapter/fabric.py | 14 +++++++------- tests/core/engine_adapter/integration/config.yaml | 6 ++++-- 3 files changed, 14 insertions(+), 15 deletions(-) diff --git a/sqlmesh/core/config/connection.py b/sqlmesh/core/config/connection.py index 8af192c7d5..005fc531b9 100644 --- a/sqlmesh/core/config/connection.py +++ b/sqlmesh/core/config/connection.py @@ -1708,12 +1708,9 @@ class FabricConnectionConfig(MSSQLConnectionConfig): DISPLAY_NAME: t.ClassVar[t.Literal["Fabric"]] = "Fabric" # type: ignore DISPLAY_ORDER: t.ClassVar[t.Literal[17]] = 17 # type: ignore driver: t.Literal["pyodbc"] = "pyodbc" + workspace: str + tenant: str autocommit: t.Optional[bool] = True - workspace_id: t.Optional[str] = None - # Service Principal authentication for Fabric REST API - tenant_id: t.Optional[str] = None - client_id: t.Optional[str] = None - client_secret: t.Optional[str] = None @property def _engine_adapter(self) -> t.Type[EngineAdapter]: @@ -1725,7 +1722,7 @@ def _engine_adapter(self) -> t.Type[EngineAdapter]: def _extra_engine_config(self) -> t.Dict[str, t.Any]: return { "database": self.database, - "catalog_support": CatalogSupport.REQUIRES_SET_CATALOG, + "catalog_support": CatalogSupport.FULL_SUPPORT, } diff --git a/sqlmesh/core/engine_adapter/fabric.py b/sqlmesh/core/engine_adapter/fabric.py index 7e3475a3e6..820338ca51 100644 --- a/sqlmesh/core/engine_adapter/fabric.py +++ b/sqlmesh/core/engine_adapter/fabric.py @@ -57,9 +57,9 @@ def _insert_overwrite_by_condition( def _get_access_token(self) -> str: """Get access token using Service Principal authentication.""" - tenant_id = self._extra_config.get("tenant_id") - client_id = self._extra_config.get("client_id") - client_secret = self._extra_config.get("client_secret") + tenant_id = self._extra_config.get("tenant") + client_id = self._extra_config.get("user") + client_secret = self._extra_config.get("password") if not all([tenant_id, client_id, client_secret]): raise SQLMeshError( @@ -102,14 +102,14 @@ def _make_fabric_api_request( if not requests: raise SQLMeshError("requests library is required for Fabric catalog operations") - workspace_id = self._extra_config.get("workspace_id") - if not workspace_id: + workspace = self._extra_config.get("workspace") + if not workspace: raise SQLMeshError( - "workspace_id parameter is required in connection config for Fabric catalog operations" + "workspace parameter is required in connection config for Fabric catalog operations" ) base_url = "https://api.fabric.microsoft.com/v1" - url = f"{base_url}/workspaces/{workspace_id}/{endpoint}" + url = f"{base_url}/workspaces/{workspace}/{endpoint}" headers = self._get_fabric_auth_headers() diff --git a/tests/core/engine_adapter/integration/config.yaml b/tests/core/engine_adapter/integration/config.yaml index 4b9c881208..402f618fef 100644 --- a/tests/core/engine_adapter/integration/config.yaml +++ b/tests/core/engine_adapter/integration/config.yaml @@ -192,10 +192,12 @@ gateways: driver: pyodbc host: {{ env_var("FABRIC_HOST") }} user: {{ env_var("FABRIC_CLIENT_ID") }} - password: {{ env_var("FABRIC_CLIENT_SECRET") }} + password: {{ env_var("FABRIC_CLIENT_SECRET") }} database: {{ env_var("FABRIC_DATABASE") }} + tenant: {{ env_var("FABRIC_TENANT") }} + workspace: {{ env_var("FABRIC_WORKSPACE") }} odbc_properties: - Authentication: ActiveDirectoryServicePrincipal + Authentication: ActiveDirectoryServicePrincipal state_connection: type: duckdb From e19e3e41d447be599144ed211d6f8e8807038fea Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Mattias=20Thal=C3=A9n?= Date: Mon, 21 Jul 2025 22:36:40 +0000 Subject: [PATCH 48/70] feat(fabric): Add support for catalog operations --- Makefile | 2 +- sqlmesh/core/config/connection.py | 4 + sqlmesh/core/engine_adapter/fabric.py | 119 +++++++++++++++++- .../engine_adapter/integration/__init__.py | 3 + 4 files changed, 122 insertions(+), 6 deletions(-) diff --git a/Makefile b/Makefile index e643ae7ad2..cd2ff86ca3 100644 --- a/Makefile +++ b/Makefile @@ -174,7 +174,7 @@ athena-test: guard-AWS_ACCESS_KEY_ID guard-AWS_SECRET_ACCESS_KEY guard-ATHENA_S3 pytest -n auto -m "athena" --retries 3 --junitxml=test-results/junit-athena.xml fabric-test: guard-FABRIC_HOST guard-FABRIC_CLIENT_ID guard-FABRIC_CLIENT_SECRET guard-FABRIC_DATABASE engine-fabric-install - pytest -n auto -m "fabric" --retries 3 --junitxml=test-results/junit-fabric.xml + pytest -n auto -m "fabric" --retries 3 --timeout 600 --junitxml=test-results/junit-fabric.xml vscode_settings: mkdir -p .vscode diff --git a/sqlmesh/core/config/connection.py b/sqlmesh/core/config/connection.py index 005fc531b9..4365ee7cf0 100644 --- a/sqlmesh/core/config/connection.py +++ b/sqlmesh/core/config/connection.py @@ -1723,6 +1723,10 @@ def _extra_engine_config(self) -> t.Dict[str, t.Any]: return { "database": self.database, "catalog_support": CatalogSupport.FULL_SUPPORT, + "workspace": self.workspace, + "tenant": self.tenant, + "user": self.user, + "password": self.password, } diff --git a/sqlmesh/core/engine_adapter/fabric.py b/sqlmesh/core/engine_adapter/fabric.py index 820338ca51..16d20d7bf7 100644 --- a/sqlmesh/core/engine_adapter/fabric.py +++ b/sqlmesh/core/engine_adapter/fabric.py @@ -2,6 +2,7 @@ import typing as t import logging +import time from sqlglot import exp from sqlmesh.core.engine_adapter.mssql import MSSQLEngineAdapter from sqlmesh.core.engine_adapter.shared import InsertOverwriteStrategy, SourceQuery @@ -130,6 +131,18 @@ def _make_fabric_api_request( return response.json() if response.content else {} + except requests.exceptions.HTTPError as e: + error_details = "" + try: + if response.content: + error_response = response.json() + error_details = error_response.get("error", {}).get( + "message", str(error_response) + ) + except (ValueError, AttributeError): + error_details = response.text if hasattr(response, "text") else str(e) + + raise SQLMeshError(f"Fabric API HTTP error ({response.status_code}): {error_details}") except requests.exceptions.RequestException as e: raise SQLMeshError(f"Fabric API request failed: {e}") @@ -139,18 +152,70 @@ def _create_catalog(self, catalog_name: exp.Identifier) -> None: logger.info(f"Creating Fabric warehouse: {warehouse_name}") + # First check if warehouse already exists + try: + warehouses = self._make_fabric_api_request("GET", "warehouses") + for warehouse in warehouses.get("value", []): + if warehouse.get("displayName") == warehouse_name: + logger.info(f"Fabric warehouse already exists: {warehouse_name}") + return + except SQLMeshError as e: + logger.warning(f"Failed to check existing warehouses: {e}") + + # Create the warehouse request_data = { "displayName": warehouse_name, "description": f"Warehouse created by SQLMesh: {warehouse_name}", } try: - self._make_fabric_api_request("POST", "warehouses", request_data) + response = self._make_fabric_api_request("POST", "warehouses", request_data) logger.info(f"Successfully created Fabric warehouse: {warehouse_name}") + + # Wait for warehouse to become ready + max_retries = 30 # Wait up to 5 minutes + retry_delay = 10 # 10 seconds between retries + + for attempt in range(max_retries): + try: + # Try to verify warehouse exists and is ready + warehouses = self._make_fabric_api_request("GET", "warehouses") + for warehouse in warehouses.get("value", []): + if warehouse.get("displayName") == warehouse_name: + state = warehouse.get("state", "Unknown") + logger.info(f"Warehouse {warehouse_name} state: {state}") + if state == "Active": + logger.info(f"Warehouse {warehouse_name} is ready") + return + if state == "Failed": + raise SQLMeshError(f"Warehouse {warehouse_name} creation failed") + + if attempt < max_retries - 1: + logger.info( + f"Waiting for warehouse {warehouse_name} to become ready (attempt {attempt + 1}/{max_retries})" + ) + time.sleep(retry_delay) + else: + logger.warning( + f"Warehouse {warehouse_name} may not be fully ready after {max_retries} attempts" + ) + + except SQLMeshError as e: + if attempt < max_retries - 1: + logger.warning( + f"Failed to check warehouse readiness (attempt {attempt + 1}/{max_retries}): {e}" + ) + time.sleep(retry_delay) + else: + logger.error(f"Failed to verify warehouse readiness: {e}") + raise + except SQLMeshError as e: - if "already exists" in str(e).lower(): + error_msg = str(e).lower() + if "already exists" in error_msg or "conflict" in error_msg: logger.info(f"Fabric warehouse already exists: {warehouse_name}") return + logger.error(f"Failed to create Fabric warehouse {warehouse_name}: {e}") raise def _drop_catalog(self, catalog_name: exp.Identifier) -> None: @@ -159,8 +224,8 @@ def _drop_catalog(self, catalog_name: exp.Identifier) -> None: logger.info(f"Deleting Fabric warehouse: {warehouse_name}") - # First, we need to get the warehouse ID by listing warehouses try: + # First, get the warehouse ID by listing warehouses warehouses = self._make_fabric_api_request("GET", "warehouses") warehouse_id = None @@ -170,14 +235,58 @@ def _drop_catalog(self, catalog_name: exp.Identifier) -> None: break if not warehouse_id: - raise SQLMeshError(f"Warehouse not found: {warehouse_name}") + logger.info(f"Fabric warehouse does not exist: {warehouse_name}") + return # Delete the warehouse by ID self._make_fabric_api_request("DELETE", f"warehouses/{warehouse_id}") logger.info(f"Successfully deleted Fabric warehouse: {warehouse_name}") + # Wait for warehouse to be fully deleted + max_retries = 15 # Wait up to 2.5 minutes + retry_delay = 10 # 10 seconds between retries + + for attempt in range(max_retries): + try: + warehouses = self._make_fabric_api_request("GET", "warehouses") + still_exists = False + + for warehouse in warehouses.get("value", []): + if warehouse.get("displayName") == warehouse_name: + state = warehouse.get("state", "Unknown") + logger.info(f"Warehouse {warehouse_name} deletion state: {state}") + still_exists = True + break + + if not still_exists: + logger.info(f"Warehouse {warehouse_name} successfully deleted") + return + + if attempt < max_retries - 1: + logger.info( + f"Waiting for warehouse {warehouse_name} deletion to complete (attempt {attempt + 1}/{max_retries})" + ) + time.sleep(retry_delay) + else: + logger.warning( + f"Warehouse {warehouse_name} may still be in deletion process after {max_retries} attempts" + ) + + except SQLMeshError as e: + if attempt < max_retries - 1: + logger.warning( + f"Failed to check warehouse deletion status (attempt {attempt + 1}/{max_retries}): {e}" + ) + time.sleep(retry_delay) + else: + logger.warning(f"Failed to verify warehouse deletion: {e}") + # Don't raise here as deletion might have succeeded + return + except SQLMeshError as e: - if "not found" in str(e).lower(): + error_msg = str(e).lower() + if "not found" in error_msg or "does not exist" in error_msg: logger.info(f"Fabric warehouse does not exist: {warehouse_name}") return + logger.error(f"Failed to delete Fabric warehouse {warehouse_name}: {e}") raise diff --git a/tests/core/engine_adapter/integration/__init__.py b/tests/core/engine_adapter/integration/__init__.py index 275d8be669..eebcdaf7a4 100644 --- a/tests/core/engine_adapter/integration/__init__.py +++ b/tests/core/engine_adapter/integration/__init__.py @@ -680,6 +680,9 @@ def create_catalog(self, catalog_name: str): except Exception: pass self.engine_adapter.cursor.connection.autocommit(False) + elif self.dialect == "fabric": + # Use the engine adapter's built-in catalog creation functionality + self.engine_adapter.create_catalog(catalog_name) elif self.dialect == "snowflake": self.engine_adapter.execute(f'CREATE DATABASE IF NOT EXISTS "{catalog_name}"') elif self.dialect == "duckdb": From a869df3eb2b5b2aed6508ef50cd5e64cb71adcd0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Mattias=20Thal=C3=A9n?= Date: Tue, 22 Jul 2025 21:49:56 +0000 Subject: [PATCH 49/70] feat(fabric): Refactor _create_catalog --- Makefile | 2 +- sqlmesh/core/engine_adapter/fabric.py | 165 +++++++++++++++++--------- 2 files changed, 107 insertions(+), 60 deletions(-) diff --git a/Makefile b/Makefile index cd2ff86ca3..e643ae7ad2 100644 --- a/Makefile +++ b/Makefile @@ -174,7 +174,7 @@ athena-test: guard-AWS_ACCESS_KEY_ID guard-AWS_SECRET_ACCESS_KEY guard-ATHENA_S3 pytest -n auto -m "athena" --retries 3 --junitxml=test-results/junit-athena.xml fabric-test: guard-FABRIC_HOST guard-FABRIC_CLIENT_ID guard-FABRIC_CLIENT_SECRET guard-FABRIC_DATABASE engine-fabric-install - pytest -n auto -m "fabric" --retries 3 --timeout 600 --junitxml=test-results/junit-fabric.xml + pytest -n auto -m "fabric" --retries 3 --junitxml=test-results/junit-fabric.xml vscode_settings: mkdir -p .vscode diff --git a/sqlmesh/core/engine_adapter/fabric.py b/sqlmesh/core/engine_adapter/fabric.py index 16d20d7bf7..33773fb3bc 100644 --- a/sqlmesh/core/engine_adapter/fabric.py +++ b/sqlmesh/core/engine_adapter/fabric.py @@ -146,77 +146,124 @@ def _make_fabric_api_request( except requests.exceptions.RequestException as e: raise SQLMeshError(f"Fabric API request failed: {e}") - def _create_catalog(self, catalog_name: exp.Identifier) -> None: - """Create a catalog (warehouse) in Microsoft Fabric via REST API.""" - warehouse_name = catalog_name.sql(dialect=self.dialect, identify=False) + def _make_fabric_api_request_with_location( + self, method: str, endpoint: str, data: t.Optional[t.Dict[str, t.Any]] = None + ) -> t.Dict[str, t.Any]: + """Make a request to the Fabric REST API and return response with status code and location.""" + if not requests: + raise SQLMeshError("requests library is required for Fabric catalog operations") - logger.info(f"Creating Fabric warehouse: {warehouse_name}") + workspace = self._extra_config.get("workspace") + if not workspace: + raise SQLMeshError( + "workspace parameter is required in connection config for Fabric catalog operations" + ) + + base_url = "https://api.fabric.microsoft.com/v1" + url = f"{base_url}/workspaces/{workspace}/{endpoint}" + headers = self._get_fabric_auth_headers() - # First check if warehouse already exists try: - warehouses = self._make_fabric_api_request("GET", "warehouses") - for warehouse in warehouses.get("value", []): - if warehouse.get("displayName") == warehouse_name: - logger.info(f"Fabric warehouse already exists: {warehouse_name}") + if method.upper() == "POST": + response = requests.post(url, headers=headers, json=data) + else: + raise SQLMeshError(f"Unsupported HTTP method for location tracking: {method}") + + # Check for errors first + response.raise_for_status() + + result = {"status_code": response.status_code} + + # Extract location header for polling + if "location" in response.headers: + result["location"] = response.headers["location"] + + # Include response body if present + if response.content: + result.update(response.json()) + + return result + + except requests.exceptions.HTTPError as e: + error_details = "" + try: + if response.content: + error_response = response.json() + error_details = error_response.get("error", {}).get( + "message", str(error_response) + ) + except (ValueError, AttributeError): + error_details = response.text if hasattr(response, "text") else str(e) + + raise SQLMeshError(f"Fabric API HTTP error ({response.status_code}): {error_details}") + except requests.exceptions.RequestException as e: + raise SQLMeshError(f"Fabric API request failed: {e}") + + def _poll_operation_status(self, location_url: str, operation_name: str) -> None: + """Poll the operation status until completion.""" + if not requests: + raise SQLMeshError("requests library is required for Fabric catalog operations") + + headers = self._get_fabric_auth_headers() + max_attempts = 60 # Poll for up to 10 minutes + initial_delay = 1 # Start with 1 second + + for attempt in range(max_attempts): + try: + response = requests.get(location_url, headers=headers) + response.raise_for_status() + + result = response.json() + status = result.get("status", "Unknown") + + logger.info(f"Operation {operation_name} status: {status}") + + if status == "Succeeded": return - except SQLMeshError as e: - logger.warning(f"Failed to check existing warehouses: {e}") + if status == "Failed": + error_msg = result.get("error", {}).get("message", "Unknown error") + raise SQLMeshError(f"Operation {operation_name} failed: {error_msg}") + elif status in ["InProgress", "Running"]: + # Use exponential backoff with max of 30 seconds + delay = min(initial_delay * (2 ** min(attempt // 3, 4)), 30) + logger.info(f"Waiting {delay} seconds before next status check...") + time.sleep(delay) + else: + logger.warning(f"Unknown status '{status}' for operation {operation_name}") + time.sleep(5) + + except requests.exceptions.RequestException as e: + if attempt < max_attempts - 1: + logger.warning(f"Failed to poll status (attempt {attempt + 1}): {e}") + time.sleep(5) + else: + raise SQLMeshError(f"Failed to poll operation status: {e}") + + raise SQLMeshError(f"Operation {operation_name} did not complete within timeout") + + def _create_catalog(self, catalog_name: exp.Identifier) -> None: + """Create a catalog (warehouse) in Microsoft Fabric via REST API.""" + warehouse_name = catalog_name.sql(dialect=self.dialect, identify=False) + logger.info(f"Creating Fabric warehouse: {warehouse_name}") - # Create the warehouse request_data = { "displayName": warehouse_name, "description": f"Warehouse created by SQLMesh: {warehouse_name}", } - try: - response = self._make_fabric_api_request("POST", "warehouses", request_data) - logger.info(f"Successfully created Fabric warehouse: {warehouse_name}") - - # Wait for warehouse to become ready - max_retries = 30 # Wait up to 5 minutes - retry_delay = 10 # 10 seconds between retries + response = self._make_fabric_api_request_with_location("POST", "warehouses", request_data) - for attempt in range(max_retries): - try: - # Try to verify warehouse exists and is ready - warehouses = self._make_fabric_api_request("GET", "warehouses") - for warehouse in warehouses.get("value", []): - if warehouse.get("displayName") == warehouse_name: - state = warehouse.get("state", "Unknown") - logger.info(f"Warehouse {warehouse_name} state: {state}") - if state == "Active": - logger.info(f"Warehouse {warehouse_name} is ready") - return - if state == "Failed": - raise SQLMeshError(f"Warehouse {warehouse_name} creation failed") - - if attempt < max_retries - 1: - logger.info( - f"Waiting for warehouse {warehouse_name} to become ready (attempt {attempt + 1}/{max_retries})" - ) - time.sleep(retry_delay) - else: - logger.warning( - f"Warehouse {warehouse_name} may not be fully ready after {max_retries} attempts" - ) - - except SQLMeshError as e: - if attempt < max_retries - 1: - logger.warning( - f"Failed to check warehouse readiness (attempt {attempt + 1}/{max_retries}): {e}" - ) - time.sleep(retry_delay) - else: - logger.error(f"Failed to verify warehouse readiness: {e}") - raise + # Handle direct success (201) or async creation (202) + if response.get("status_code") == 201: + logger.info(f"Successfully created Fabric warehouse: {warehouse_name}") + return - except SQLMeshError as e: - error_msg = str(e).lower() - if "already exists" in error_msg or "conflict" in error_msg: - logger.info(f"Fabric warehouse already exists: {warehouse_name}") - return - logger.error(f"Failed to create Fabric warehouse {warehouse_name}: {e}") - raise + if response.get("status_code") == 202 and response.get("location"): + logger.info(f"Warehouse creation initiated for: {warehouse_name}") + self._poll_operation_status(response["location"], warehouse_name) + logger.info(f"Successfully created Fabric warehouse: {warehouse_name}") + else: + raise SQLMeshError(f"Unexpected response from warehouse creation: {response}") def _drop_catalog(self, catalog_name: exp.Identifier) -> None: """Drop a catalog (warehouse) in Microsoft Fabric via REST API.""" From 90d5abdaf23734dd72a9ec24850ec27df16ff6a6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Mattias=20Thal=C3=A9n?= Date: Tue, 22 Jul 2025 22:52:22 +0000 Subject: [PATCH 50/70] feat(fabric): Refactor _drop_catalog --- sqlmesh/core/engine_adapter/fabric.py | 43 +-------------------------- 1 file changed, 1 insertion(+), 42 deletions(-) diff --git a/sqlmesh/core/engine_adapter/fabric.py b/sqlmesh/core/engine_adapter/fabric.py index 33773fb3bc..38d5407d9a 100644 --- a/sqlmesh/core/engine_adapter/fabric.py +++ b/sqlmesh/core/engine_adapter/fabric.py @@ -272,7 +272,7 @@ def _drop_catalog(self, catalog_name: exp.Identifier) -> None: logger.info(f"Deleting Fabric warehouse: {warehouse_name}") try: - # First, get the warehouse ID by listing warehouses + # Get the warehouse ID by listing warehouses warehouses = self._make_fabric_api_request("GET", "warehouses") warehouse_id = None @@ -289,47 +289,6 @@ def _drop_catalog(self, catalog_name: exp.Identifier) -> None: self._make_fabric_api_request("DELETE", f"warehouses/{warehouse_id}") logger.info(f"Successfully deleted Fabric warehouse: {warehouse_name}") - # Wait for warehouse to be fully deleted - max_retries = 15 # Wait up to 2.5 minutes - retry_delay = 10 # 10 seconds between retries - - for attempt in range(max_retries): - try: - warehouses = self._make_fabric_api_request("GET", "warehouses") - still_exists = False - - for warehouse in warehouses.get("value", []): - if warehouse.get("displayName") == warehouse_name: - state = warehouse.get("state", "Unknown") - logger.info(f"Warehouse {warehouse_name} deletion state: {state}") - still_exists = True - break - - if not still_exists: - logger.info(f"Warehouse {warehouse_name} successfully deleted") - return - - if attempt < max_retries - 1: - logger.info( - f"Waiting for warehouse {warehouse_name} deletion to complete (attempt {attempt + 1}/{max_retries})" - ) - time.sleep(retry_delay) - else: - logger.warning( - f"Warehouse {warehouse_name} may still be in deletion process after {max_retries} attempts" - ) - - except SQLMeshError as e: - if attempt < max_retries - 1: - logger.warning( - f"Failed to check warehouse deletion status (attempt {attempt + 1}/{max_retries}): {e}" - ) - time.sleep(retry_delay) - else: - logger.warning(f"Failed to verify warehouse deletion: {e}") - # Don't raise here as deletion might have succeeded - return - except SQLMeshError as e: error_msg = str(e).lower() if "not found" in error_msg or "does not exist" in error_msg: From eac16da84cf7564c392b9f620f5e8ab01b5972ca Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Mattias=20Thal=C3=A9n?= Date: Tue, 22 Jul 2025 23:05:43 +0000 Subject: [PATCH 51/70] fix(fabric): update response json --- sqlmesh/core/engine_adapter/fabric.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/sqlmesh/core/engine_adapter/fabric.py b/sqlmesh/core/engine_adapter/fabric.py index 38d5407d9a..88fb368ff9 100644 --- a/sqlmesh/core/engine_adapter/fabric.py +++ b/sqlmesh/core/engine_adapter/fabric.py @@ -180,7 +180,9 @@ def _make_fabric_api_request_with_location( # Include response body if present if response.content: - result.update(response.json()) + json_data = response.json() + if json_data: + result.update(json_data) return result From f4aad0009f49c7dcba4aa5f4dc81ae87b8aed243 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Mattias=20Thal=C3=A9n?= Date: Thu, 24 Jul 2025 08:24:29 +0000 Subject: [PATCH 52/70] feat(fabric): Override set_current_catalog --- sqlmesh/core/engine_adapter/fabric.py | 72 +++++++++++++++++++++++++++ 1 file changed, 72 insertions(+) diff --git a/sqlmesh/core/engine_adapter/fabric.py b/sqlmesh/core/engine_adapter/fabric.py index 88fb368ff9..ae598adbaa 100644 --- a/sqlmesh/core/engine_adapter/fabric.py +++ b/sqlmesh/core/engine_adapter/fabric.py @@ -31,6 +31,30 @@ class FabricAdapter(LogicalMergeMixin, MSSQLEngineAdapter): SUPPORTS_CREATE_DROP_CATALOG = True INSERT_OVERWRITE_STRATEGY = InsertOverwriteStrategy.DELETE_INSERT + def __init__(self, *args: t.Any, **kwargs: t.Any) -> None: + super().__init__(*args, **kwargs) + # Store the desired catalog for dynamic switching + self._target_catalog: t.Optional[str] = None + # Store the original connection factory for wrapping + self._original_connection_factory = self._connection_pool._connection_factory # type: ignore + # Replace the connection factory with our custom one + self._connection_pool._connection_factory = self._create_fabric_connection # type: ignore + + def _create_fabric_connection(self) -> t.Any: + """Custom connection factory that uses the target catalog if set.""" + # If we have a target catalog, we need to modify the connection parameters + if self._target_catalog: + # The original factory was created with partial(), so we need to extract and modify the kwargs + if hasattr(self._original_connection_factory, "keywords"): + # It's a partial function, get the original keywords + original_kwargs = self._original_connection_factory.keywords.copy() + original_kwargs["database"] = self._target_catalog + # Call the underlying function with modified kwargs + return self._original_connection_factory.func(**original_kwargs) + + # Use the original factory if no target catalog is set + return self._original_connection_factory() + def _insert_overwrite_by_condition( self, table_name: TableName, @@ -298,3 +322,51 @@ def _drop_catalog(self, catalog_name: exp.Identifier) -> None: return logger.error(f"Failed to delete Fabric warehouse {warehouse_name}: {e}") raise + + def set_current_catalog(self, catalog_name: str) -> None: + """ + Set the current catalog for Microsoft Fabric connections. + + Override to handle Fabric's stateless session limitation where USE statements + don't persist across queries. Instead, we close existing connections and + recreate them with the new catalog in the connection configuration. + + Args: + catalog_name: The name of the catalog (warehouse) to switch to + + Note: + Fabric doesn't support catalog switching via USE statements because each + statement runs as an independent session. This method works around this + limitation by updating the connection pool with new catalog configuration. + + See: + https://learn.microsoft.com/en-us/fabric/data-warehouse/sql-query-editor#limitations + """ + current_catalog = self.get_current_catalog() + + # If already using the requested catalog, do nothing + if current_catalog and current_catalog == catalog_name: + logger.debug(f"Already using catalog '{catalog_name}', no action needed") + return + + logger.info(f"Switching from catalog '{current_catalog}' to '{catalog_name}'") + + # Set the target catalog for our custom connection factory + self._target_catalog = catalog_name + + # Close all existing connections since Fabric requires reconnection for catalog changes + self.close() + + # Verify the catalog switch worked by getting a new connection + try: + actual_catalog = self.get_current_catalog() + if actual_catalog and actual_catalog == catalog_name: + logger.debug(f"Successfully switched to catalog '{catalog_name}'") + else: + logger.warning( + f"Catalog switch may have failed. Expected '{catalog_name}', got '{actual_catalog}'" + ) + except Exception as e: + logger.debug(f"Could not verify catalog switch: {e}") + + logger.debug(f"Updated target catalog to '{catalog_name}' and closed connections") From 38b82bb62e7d1ef690f107377673eefa02ff4c36 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Mattias=20Thal=C3=A9n?= Date: Thu, 24 Jul 2025 09:57:02 +0000 Subject: [PATCH 53/70] feat(fabric): Override drop schema --- sqlmesh/core/engine_adapter/fabric.py | 32 +++++++++++++++++++++++++++ 1 file changed, 32 insertions(+) diff --git a/sqlmesh/core/engine_adapter/fabric.py b/sqlmesh/core/engine_adapter/fabric.py index ae598adbaa..174b48c040 100644 --- a/sqlmesh/core/engine_adapter/fabric.py +++ b/sqlmesh/core/engine_adapter/fabric.py @@ -370,3 +370,35 @@ def set_current_catalog(self, catalog_name: str) -> None: logger.debug(f"Could not verify catalog switch: {e}") logger.debug(f"Updated target catalog to '{catalog_name}' and closed connections") + + def drop_schema( + self, + schema_name: t.Union[str, exp.Table], + ignore_if_not_exists: bool = True, + cascade: bool = False, + **drop_args: t.Any, + ) -> None: + """ + Override drop_schema to handle catalog-qualified schema names. + Fabric doesn't support 'DROP SCHEMA [catalog].[schema]' syntax. + """ + logger.debug(f"drop_schema called with: {schema_name} (type: {type(schema_name)})") + + # If it's a string with a dot, assume it's catalog.schema format + if isinstance(schema_name, str) and "." in schema_name: + parts = schema_name.split(".", 1) # Split only on first dot + catalog_name = parts[0].strip('"[]') # Remove quotes/brackets + schema_only = parts[1].strip('"[]') + logger.debug( + f"Detected catalog.schema format: catalog='{catalog_name}', schema='{schema_only}'" + ) + + # Switch to the catalog first + self.set_current_catalog(catalog_name) + + # Use just the schema name + super().drop_schema(schema_only, ignore_if_not_exists, cascade, **drop_args) + else: + # No catalog qualification, use as-is + logger.debug(f"No catalog detected, using original: {schema_name}") + super().drop_schema(schema_name, ignore_if_not_exists, cascade, **drop_args) From 719a1d5967e2b61a1036f027fff574bf0571d149 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Mattias=20Thal=C3=A9n?= Date: Thu, 24 Jul 2025 10:18:20 +0000 Subject: [PATCH 54/70] feat(fabric): Override create schema --- sqlmesh/core/engine_adapter/fabric.py | 31 +++++++++++++++++++++++++++ 1 file changed, 31 insertions(+) diff --git a/sqlmesh/core/engine_adapter/fabric.py b/sqlmesh/core/engine_adapter/fabric.py index 174b48c040..80b8f929a5 100644 --- a/sqlmesh/core/engine_adapter/fabric.py +++ b/sqlmesh/core/engine_adapter/fabric.py @@ -402,3 +402,34 @@ def drop_schema( # No catalog qualification, use as-is logger.debug(f"No catalog detected, using original: {schema_name}") super().drop_schema(schema_name, ignore_if_not_exists, cascade, **drop_args) + + def create_schema( + self, + schema_name: t.Union[str, exp.Table], + ignore_if_exists: bool = True, + **kwargs: t.Any, + ) -> None: + """ + Override create_schema to handle catalog-qualified schema names. + Fabric doesn't support 'CREATE SCHEMA [catalog].[schema]' syntax. + """ + logger.debug(f"create_schema called with: {schema_name} (type: {type(schema_name)})") + + # If it's a string with a dot, assume it's catalog.schema format + if isinstance(schema_name, str) and "." in schema_name: + parts = schema_name.split(".", 1) # Split only on first dot + catalog_name = parts[0].strip('"[]') # Remove quotes/brackets + schema_only = parts[1].strip('"[]') + logger.debug( + f"Detected catalog.schema format: catalog='{catalog_name}', schema='{schema_only}'" + ) + + # Switch to the catalog first + self.set_current_catalog(catalog_name) + + # Use just the schema name + super().create_schema(schema_only, ignore_if_exists, **kwargs) + else: + # No catalog qualification, use as-is + logger.debug(f"No catalog detected, using original: {schema_name}") + super().create_schema(schema_name, ignore_if_exists, **kwargs) From 3af7ec9ed4d3fbb589e439c5c265f4a933123bb2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Mattias=20Thal=C3=A9n?= Date: Thu, 24 Jul 2025 11:18:48 +0000 Subject: [PATCH 55/70] feat(fabric): Override create view --- sqlmesh/core/engine_adapter/fabric.py | 98 +++++++++++++++++++++++++++ 1 file changed, 98 insertions(+) diff --git a/sqlmesh/core/engine_adapter/fabric.py b/sqlmesh/core/engine_adapter/fabric.py index 80b8f929a5..55fe0c4325 100644 --- a/sqlmesh/core/engine_adapter/fabric.py +++ b/sqlmesh/core/engine_adapter/fabric.py @@ -433,3 +433,101 @@ def create_schema( # No catalog qualification, use as-is logger.debug(f"No catalog detected, using original: {schema_name}") super().create_schema(schema_name, ignore_if_exists, **kwargs) + + def create_view( + self, + view_name: t.Union[str, exp.Table], + query_or_df: t.Any, + columns_to_types: t.Optional[t.Dict[str, exp.DataType]] = None, + replace: bool = True, + materialized: bool = False, + materialized_properties: t.Optional[t.Dict[str, t.Any]] = None, + table_description: t.Optional[str] = None, + column_descriptions: t.Optional[t.Dict[str, str]] = None, + view_properties: t.Optional[t.Dict[str, exp.Expression]] = None, + **create_kwargs: t.Any, + ) -> None: + """ + Override create_view to handle catalog-qualified view names. + Fabric doesn't support 'CREATE VIEW [catalog].[schema].[view]' syntax. + """ + logger.debug(f"create_view called with: {view_name} (type: {type(view_name)})") + + # Handle exp.Table objects that might be catalog-qualified + if isinstance(view_name, exp.Table): + if view_name.catalog: + # Has catalog qualification - switch to catalog and use schema.table + catalog_name = view_name.catalog + schema_name = view_name.db or "" + table_name = view_name.name + + logger.debug( + f"Detected exp.Table with catalog: catalog='{catalog_name}', schema='{schema_name}', table='{table_name}'" + ) + + # Switch to the catalog first + self.set_current_catalog(catalog_name) + + # Create new Table expression without catalog + unqualified_view = exp.Table(this=table_name, db=schema_name) + + super().create_view( + unqualified_view, + query_or_df, + columns_to_types, + replace, + materialized, + materialized_properties, + table_description, + column_descriptions, + view_properties, + **create_kwargs, + ) + return + + # Handle string view names that might be catalog-qualified + elif isinstance(view_name, str): + # Check if it's in catalog.schema.view format + parts = view_name.split(".") + if len(parts) == 3: + # catalog.schema.view format + catalog_name = parts[0].strip('"[]') + schema_name = parts[1].strip('"[]') + view_only = parts[2].strip('"[]') + unqualified_view_str = f"{schema_name}.{view_only}" + logger.debug( + f"Detected catalog.schema.view format: catalog='{catalog_name}', unqualified='{unqualified_view_str}'" + ) + + # Switch to the catalog first + self.set_current_catalog(catalog_name) + + # Use just the schema.view name + super().create_view( + unqualified_view_str, + query_or_df, + columns_to_types, + replace, + materialized, + materialized_properties, + table_description, + column_descriptions, + view_properties, + **create_kwargs, + ) + return + + # No catalog qualification, use as-is + logger.debug(f"No catalog detected, using original: {view_name}") + super().create_view( + view_name, + query_or_df, + columns_to_types, + replace, + materialized, + materialized_properties, + table_description, + column_descriptions, + view_properties, + **create_kwargs, + ) From cedfab49f4f9660c57a3ebae7304fbcdb0f11d92 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Mattias=20Thal=C3=A9n?= Date: Thu, 24 Jul 2025 11:39:01 +0000 Subject: [PATCH 56/70] feat(fabric): Catalog dropping functionality in TestContext --- tests/core/engine_adapter/integration/__init__.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/tests/core/engine_adapter/integration/__init__.py b/tests/core/engine_adapter/integration/__init__.py index eebcdaf7a4..63c4ca465f 100644 --- a/tests/core/engine_adapter/integration/__init__.py +++ b/tests/core/engine_adapter/integration/__init__.py @@ -699,6 +699,9 @@ def drop_catalog(self, catalog_name: str): return # bigquery cannot create/drop catalogs if self.dialect == "databricks": self.engine_adapter.execute(f"DROP CATALOG IF EXISTS {catalog_name} CASCADE") + elif self.dialect == "fabric": + # Use the engine adapter's built-in catalog dropping functionality + self.engine_adapter.drop_catalog(catalog_name) else: self.engine_adapter.execute(f'DROP DATABASE IF EXISTS "{catalog_name}"') From 7cb84473bd7ddea57f0dd48c1a0e7c3ee4772569 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Mattias=20Thal=C3=A9n?= Date: Thu, 24 Jul 2025 11:47:21 +0000 Subject: [PATCH 57/70] fix(fabric): Ensure schemas exist before creating tables --- sqlmesh/core/engine_adapter/fabric.py | 126 +++++++++++++++++++++++++- 1 file changed, 125 insertions(+), 1 deletion(-) diff --git a/sqlmesh/core/engine_adapter/fabric.py b/sqlmesh/core/engine_adapter/fabric.py index 55fe0c4325..978c511260 100644 --- a/sqlmesh/core/engine_adapter/fabric.py +++ b/sqlmesh/core/engine_adapter/fabric.py @@ -434,6 +434,122 @@ def create_schema( logger.debug(f"No catalog detected, using original: {schema_name}") super().create_schema(schema_name, ignore_if_exists, **kwargs) + def _ensure_schema_exists(self, table_name: TableName) -> None: + """ + Ensure that the schema for a table exists before creating the table. + This is necessary for Fabric because schemas must exist before tables can be created in them. + """ + table = exp.to_table(table_name) + if table.db: + schema_name = table.db + catalog_name = table.catalog + + # Build the full schema name + if catalog_name: + full_schema_name = f"{catalog_name}.{schema_name}" + else: + full_schema_name = schema_name + + logger.debug(f"Ensuring schema exists: {full_schema_name}") + + try: + # Create the schema if it doesn't exist + self.create_schema(full_schema_name, ignore_if_exists=True) + except Exception as e: + logger.debug(f"Error creating schema {full_schema_name}: {e}") + # Continue anyway - the schema might already exist or we might not have permissions + + def _create_table( + self, + table_name_or_schema: t.Union[exp.Schema, TableName], + expression: t.Optional[exp.Expression], + exists: bool = True, + replace: bool = False, + columns_to_types: t.Optional[t.Dict[str, exp.DataType]] = None, + table_description: t.Optional[str] = None, + column_descriptions: t.Optional[t.Dict[str, str]] = None, + table_kind: t.Optional[str] = None, + **kwargs: t.Any, + ) -> None: + """ + Override _create_table to ensure schema exists before creating tables. + """ + # Extract table name for schema creation + if isinstance(table_name_or_schema, exp.Schema): + table_name = table_name_or_schema.this + else: + table_name = table_name_or_schema + + # Ensure the schema exists before creating the table + self._ensure_schema_exists(table_name) + + # Call the parent implementation + super()._create_table( + table_name_or_schema=table_name_or_schema, + expression=expression, + exists=exists, + replace=replace, + columns_to_types=columns_to_types, + table_description=table_description, + column_descriptions=column_descriptions, + table_kind=table_kind, + **kwargs, + ) + + def create_table( + self, + table_name: TableName, + columns_to_types: t.Dict[str, exp.DataType], + primary_key: t.Optional[t.Tuple[str, ...]] = None, + exists: bool = True, + table_description: t.Optional[str] = None, + column_descriptions: t.Optional[t.Dict[str, str]] = None, + **kwargs: t.Any, + ) -> None: + """ + Override create_table to ensure schema exists before creating tables. + """ + # Ensure the schema exists before creating the table + self._ensure_schema_exists(table_name) + + # Call the parent implementation + super().create_table( + table_name=table_name, + columns_to_types=columns_to_types, + primary_key=primary_key, + exists=exists, + table_description=table_description, + column_descriptions=column_descriptions, + **kwargs, + ) + + def ctas( + self, + table_name: TableName, + query_or_df: t.Any, + columns_to_types: t.Optional[t.Dict[str, exp.DataType]] = None, + exists: bool = True, + table_description: t.Optional[str] = None, + column_descriptions: t.Optional[t.Dict[str, str]] = None, + **kwargs: t.Any, + ) -> None: + """ + Override ctas to ensure schema exists before creating tables. + """ + # Ensure the schema exists before creating the table + self._ensure_schema_exists(table_name) + + # Call the parent implementation + super().ctas( + table_name=table_name, + query_or_df=query_or_df, + columns_to_types=columns_to_types, + exists=exists, + table_description=table_description, + column_descriptions=column_descriptions, + **kwargs, + ) + def create_view( self, view_name: t.Union[str, exp.Table], @@ -448,11 +564,19 @@ def create_view( **create_kwargs: t.Any, ) -> None: """ - Override create_view to handle catalog-qualified view names. + Override create_view to handle catalog-qualified view names and ensure schema exists. Fabric doesn't support 'CREATE VIEW [catalog].[schema].[view]' syntax. """ logger.debug(f"create_view called with: {view_name} (type: {type(view_name)})") + # Ensure schema exists for the view + if isinstance(view_name, exp.Table): + self._ensure_schema_exists(view_name) + elif isinstance(view_name, str): + # Parse string to table for schema extraction + parsed_table = exp.to_table(view_name) + self._ensure_schema_exists(parsed_table) + # Handle exp.Table objects that might be catalog-qualified if isinstance(view_name, exp.Table): if view_name.catalog: From 0ae2621697700d4ea1144e936b0b62b83ae6d876 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Mattias=20Thal=C3=A9n?= Date: Thu, 24 Jul 2025 13:32:32 +0000 Subject: [PATCH 58/70] Revert "Add odbc to engine_tests_cloud in circleci" This reverts commit 7756a8f3729a2caffd16d1281a818ec342bc3418. --- .circleci/continue_config.yml | 3 --- 1 file changed, 3 deletions(-) diff --git a/.circleci/continue_config.yml b/.circleci/continue_config.yml index e56d9dd11e..b93caf482e 100644 --- a/.circleci/continue_config.yml +++ b/.circleci/continue_config.yml @@ -237,9 +237,6 @@ jobs: steps: - halt_unless_core - checkout - - run: - name: Install ODBC - command: sudo apt-get install unixodbc-dev - run: name: Generate database name command: | From 3d95bba6b9d6c7799e6beb876cf655e6d4aafcaf Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Mattias=20Thal=C3=A9n?= Date: Thu, 24 Jul 2025 13:33:36 +0000 Subject: [PATCH 59/70] fix(circleci): Add unixodbc-dev to common dependencies in install script --- .circleci/install-prerequisites.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.circleci/install-prerequisites.sh b/.circleci/install-prerequisites.sh index 1eebd92c71..acd25ae02c 100755 --- a/.circleci/install-prerequisites.sh +++ b/.circleci/install-prerequisites.sh @@ -12,7 +12,7 @@ fi ENGINE="$1" -COMMON_DEPENDENCIES="libpq-dev netcat-traditional" +COMMON_DEPENDENCIES="libpq-dev netcat-traditional unixodbc-dev" ENGINE_DEPENDENCIES="" if [ "$ENGINE" == "spark" ]; then From 9718fd9fcca9c3146dd8ec3b51258e1c955f916c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Mattias=20Thal=C3=A9n?= Date: Thu, 24 Jul 2025 14:26:12 +0000 Subject: [PATCH 60/70] Revert "fix: change varchar(max) to varchar(8000) in integration tests" This reverts commit cd4aa95de08d04ab07e45e194841882304812208. --- .../integration/test_integration.py | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/tests/core/engine_adapter/integration/test_integration.py b/tests/core/engine_adapter/integration/test_integration.py index 354abb5bea..e30475e2f5 100644 --- a/tests/core/engine_adapter/integration/test_integration.py +++ b/tests/core/engine_adapter/integration/test_integration.py @@ -554,7 +554,7 @@ def test_insert_overwrite_by_time_partition(ctx_query_and_df: TestContext): if ctx.dialect == "bigquery": ds_type = "datetime" if ctx.dialect == "tsql": - ds_type = "varchar(8000)" + ds_type = "varchar(max)" ctx.columns_to_types = {"id": "int", "ds": ds_type} table = ctx.table("test_table") @@ -2255,7 +2255,7 @@ def test_table_diff_grain_check_single_key(ctx: TestContext): columns_to_types = { "key1": exp.DataType.build("int"), - "value": exp.DataType.build("varchar(8000)"), + "value": exp.DataType.build("varchar(max)"), } ctx.engine_adapter.create_table(src_table, columns_to_types) @@ -2319,8 +2319,8 @@ def test_table_diff_grain_check_multiple_keys(ctx: TestContext): columns_to_types = { "key1": exp.DataType.build("int"), - "key2": exp.DataType.build("varchar(8000)"), - "value": exp.DataType.build("varchar(8000)"), + "key2": exp.DataType.build("varchar(max)"), + "value": exp.DataType.build("varchar(max)"), } ctx.engine_adapter.create_table(src_table, columns_to_types) @@ -2377,13 +2377,13 @@ def test_table_diff_arbitrary_condition(ctx: TestContext): columns_to_types_src = { "id": exp.DataType.build("int"), - "value": exp.DataType.build("varchar(8000)"), + "value": exp.DataType.build("varchar(max)"), "ts": exp.DataType.build("timestamp"), } columns_to_types_target = { "item_id": exp.DataType.build("int"), - "value": exp.DataType.build("varchar(8000)"), + "value": exp.DataType.build("varchar(max)"), "ts": exp.DataType.build("timestamp"), } @@ -2444,8 +2444,8 @@ def test_table_diff_identical_dataset(ctx: TestContext): columns_to_types = { "key1": exp.DataType.build("int"), - "key2": exp.DataType.build("varchar(8000)"), - "value": exp.DataType.build("varchar(8000)"), + "key2": exp.DataType.build("varchar(max)"), + "value": exp.DataType.build("varchar(max)"), } ctx.engine_adapter.create_table(src_table, columns_to_types) From bd0f759c3a00dc0427ad746ffac3d66e54d0e8e1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Mattias=20Thal=C3=A9n?= Date: Thu, 24 Jul 2025 14:31:15 +0000 Subject: [PATCH 61/70] fix(docs): update installation command and add tenant & workspace UUID to connection options for Fabric engine --- docs/integrations/engines/fabric.md | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/docs/integrations/engines/fabric.md b/docs/integrations/engines/fabric.md index 1dd47fbe11..a560d85c9e 100644 --- a/docs/integrations/engines/fabric.md +++ b/docs/integrations/engines/fabric.md @@ -8,7 +8,7 @@ NOTE: Fabric Warehouse is not recommended to be used for the SQLMesh [state conn ### Installation #### Microsoft Entra ID / Azure Active Directory Authentication: ``` -pip install "sqlmesh[mssql-odbc]" +pip install "sqlmesh[fabric]" ``` ### Connection options @@ -27,6 +27,8 @@ pip install "sqlmesh[mssql-odbc]" | `appname` | The application name to use for the connection | string | N | | `conn_properties` | The list of connection properties | list[string] | N | | `autocommit` | Is autocommit mode enabled. Default: false | bool | N | -| `driver` | The driver to use for the connection. Default: pyodbc | string | N | +| `driver` | The driver to use for the connection. Default: pyodbc | string | N | | `driver_name` | The driver name to use for the connection. E.g., *ODBC Driver 18 for SQL Server* | string | N | +| `tenant` | The Fabric tenant UUID | string | Y | +| `workspace` | The Fabric workspace UUID | string | Y | | `odbc_properties` | The dict of ODBC connection properties. E.g., authentication: ActiveDirectoryServicePrincipal. See more [here](https://learn.microsoft.com/en-us/sql/connect/odbc/dsn-connection-string-attribute?view=sql-server-ver16). | dict | N | From 6cd54305ae0b3e1b109765515bbac0ab2add2136 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Mattias=20Thal=C3=A9n?= Date: Thu, 24 Jul 2025 14:50:28 +0000 Subject: [PATCH 62/70] fix(tests): change varchar(max) to varchar in table creation tests --- .../engine_adapter/integration/test_integration.py | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/tests/core/engine_adapter/integration/test_integration.py b/tests/core/engine_adapter/integration/test_integration.py index e30475e2f5..509ecf3cfa 100644 --- a/tests/core/engine_adapter/integration/test_integration.py +++ b/tests/core/engine_adapter/integration/test_integration.py @@ -2255,7 +2255,7 @@ def test_table_diff_grain_check_single_key(ctx: TestContext): columns_to_types = { "key1": exp.DataType.build("int"), - "value": exp.DataType.build("varchar(max)"), + "value": exp.DataType.build("varchar"), } ctx.engine_adapter.create_table(src_table, columns_to_types) @@ -2319,8 +2319,8 @@ def test_table_diff_grain_check_multiple_keys(ctx: TestContext): columns_to_types = { "key1": exp.DataType.build("int"), - "key2": exp.DataType.build("varchar(max)"), - "value": exp.DataType.build("varchar(max)"), + "key2": exp.DataType.build("varchar"), + "value": exp.DataType.build("varchar"), } ctx.engine_adapter.create_table(src_table, columns_to_types) @@ -2377,13 +2377,13 @@ def test_table_diff_arbitrary_condition(ctx: TestContext): columns_to_types_src = { "id": exp.DataType.build("int"), - "value": exp.DataType.build("varchar(max)"), + "value": exp.DataType.build("varchar"), "ts": exp.DataType.build("timestamp"), } columns_to_types_target = { "item_id": exp.DataType.build("int"), - "value": exp.DataType.build("varchar(max)"), + "value": exp.DataType.build("varchar"), "ts": exp.DataType.build("timestamp"), } @@ -2444,8 +2444,8 @@ def test_table_diff_identical_dataset(ctx: TestContext): columns_to_types = { "key1": exp.DataType.build("int"), - "key2": exp.DataType.build("varchar(max)"), - "value": exp.DataType.build("varchar(max)"), + "key2": exp.DataType.build("varchar"), + "value": exp.DataType.build("varchar"), } ctx.engine_adapter.create_table(src_table, columns_to_types) From d4a3c2b09e36f0be91d47fdef46d05f482391049 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Mattias=20Thal=C3=A9n?= Date: Thu, 24 Jul 2025 19:11:05 +0200 Subject: [PATCH 63/70] Bump sqlglot --- pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index 954ba8da03..a29dbc34a7 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -23,7 +23,7 @@ dependencies = [ "requests", "rich[jupyter]", "ruamel.yaml", - "sqlglot[rs]~=27.2.0", + "sqlglot[rs]~=27.3.1", "tenacity", "time-machine", "json-stream" From efa97af41ab16b0a54ae7ac4c8c7aa97ecd1f263 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Mattias=20Thal=C3=A9n?= Date: Thu, 24 Jul 2025 17:33:29 +0000 Subject: [PATCH 64/70] feat(circleci): add fabric to the list of cloud engines to test --- .circleci/continue_config.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/.circleci/continue_config.yml b/.circleci/continue_config.yml index b93caf482e..afaf0e080b 100644 --- a/.circleci/continue_config.yml +++ b/.circleci/continue_config.yml @@ -303,6 +303,7 @@ workflows: - bigquery - clickhouse-cloud - athena + - fabric filters: branches: only: From e693baf18b2b8f461ef5abc6bf8d46e7cbf28fcd Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Mattias=20Thal=C3=A9n?= Date: Mon, 28 Jul 2025 12:26:44 +0000 Subject: [PATCH 65/70] fix(fabric): Update docs and add id to parameter names --- docs/integrations/engines/fabric.md | 4 ++-- sqlmesh/core/config/connection.py | 8 +++---- sqlmesh/core/engine_adapter/fabric.py | 23 ++++++++----------- .../engine_adapter/integration/config.yaml | 4 ++-- 4 files changed, 18 insertions(+), 21 deletions(-) diff --git a/docs/integrations/engines/fabric.md b/docs/integrations/engines/fabric.md index a560d85c9e..eb00b5ac1d 100644 --- a/docs/integrations/engines/fabric.md +++ b/docs/integrations/engines/fabric.md @@ -29,6 +29,6 @@ pip install "sqlmesh[fabric]" | `autocommit` | Is autocommit mode enabled. Default: false | bool | N | | `driver` | The driver to use for the connection. Default: pyodbc | string | N | | `driver_name` | The driver name to use for the connection. E.g., *ODBC Driver 18 for SQL Server* | string | N | -| `tenant` | The Fabric tenant UUID | string | Y | -| `workspace` | The Fabric workspace UUID | string | Y | +| `tenant_id` | The Azure / Entra tenant UUID | string | Y | +| `workspace_id` | The Fabric workspace UUID. The preferred way to retrieve it is by running `notebookutils.runtime.context.get("currentWorkspaceId")` in a python notebook. | string | Y | | `odbc_properties` | The dict of ODBC connection properties. E.g., authentication: ActiveDirectoryServicePrincipal. See more [here](https://learn.microsoft.com/en-us/sql/connect/odbc/dsn-connection-string-attribute?view=sql-server-ver16). | dict | N | diff --git a/sqlmesh/core/config/connection.py b/sqlmesh/core/config/connection.py index 4365ee7cf0..e72374a877 100644 --- a/sqlmesh/core/config/connection.py +++ b/sqlmesh/core/config/connection.py @@ -1708,8 +1708,8 @@ class FabricConnectionConfig(MSSQLConnectionConfig): DISPLAY_NAME: t.ClassVar[t.Literal["Fabric"]] = "Fabric" # type: ignore DISPLAY_ORDER: t.ClassVar[t.Literal[17]] = 17 # type: ignore driver: t.Literal["pyodbc"] = "pyodbc" - workspace: str - tenant: str + workspace_id: str + tenant_id: str autocommit: t.Optional[bool] = True @property @@ -1723,8 +1723,8 @@ def _extra_engine_config(self) -> t.Dict[str, t.Any]: return { "database": self.database, "catalog_support": CatalogSupport.FULL_SUPPORT, - "workspace": self.workspace, - "tenant": self.tenant, + "workspace_id": self.workspace_id, + "tenant_id": self.tenant_id, "user": self.user, "password": self.password, } diff --git a/sqlmesh/core/engine_adapter/fabric.py b/sqlmesh/core/engine_adapter/fabric.py index 978c511260..257a974424 100644 --- a/sqlmesh/core/engine_adapter/fabric.py +++ b/sqlmesh/core/engine_adapter/fabric.py @@ -82,7 +82,7 @@ def _insert_overwrite_by_condition( def _get_access_token(self) -> str: """Get access token using Service Principal authentication.""" - tenant_id = self._extra_config.get("tenant") + tenant_id = self._extra_config.get("tenant_id") client_id = self._extra_config.get("user") client_secret = self._extra_config.get("password") @@ -127,14 +127,14 @@ def _make_fabric_api_request( if not requests: raise SQLMeshError("requests library is required for Fabric catalog operations") - workspace = self._extra_config.get("workspace") - if not workspace: + workspace_id = self._extra_config.get("workspace_id") + if not workspace_id: raise SQLMeshError( - "workspace parameter is required in connection config for Fabric catalog operations" + "workspace_id parameter is required in connection config for Fabric catalog operations" ) base_url = "https://api.fabric.microsoft.com/v1" - url = f"{base_url}/workspaces/{workspace}/{endpoint}" + url = f"{base_url}/workspaces/{workspace_id}/{endpoint}" headers = self._get_fabric_auth_headers() @@ -177,14 +177,14 @@ def _make_fabric_api_request_with_location( if not requests: raise SQLMeshError("requests library is required for Fabric catalog operations") - workspace = self._extra_config.get("workspace") - if not workspace: + workspace_id = self._extra_config.get("workspace_id") + if not workspace_id: raise SQLMeshError( - "workspace parameter is required in connection config for Fabric catalog operations" + "workspace_id parameter is required in connection config for Fabric catalog operations" ) base_url = "https://api.fabric.microsoft.com/v1" - url = f"{base_url}/workspaces/{workspace}/{endpoint}" + url = f"{base_url}/workspaces/{workspace_id}/{endpoint}" headers = self._get_fabric_auth_headers() try: @@ -445,10 +445,7 @@ def _ensure_schema_exists(self, table_name: TableName) -> None: catalog_name = table.catalog # Build the full schema name - if catalog_name: - full_schema_name = f"{catalog_name}.{schema_name}" - else: - full_schema_name = schema_name + full_schema_name = f"{catalog_name}.{schema_name}" if catalog_name else schema_name logger.debug(f"Ensuring schema exists: {full_schema_name}") diff --git a/tests/core/engine_adapter/integration/config.yaml b/tests/core/engine_adapter/integration/config.yaml index 402f618fef..6733077ff0 100644 --- a/tests/core/engine_adapter/integration/config.yaml +++ b/tests/core/engine_adapter/integration/config.yaml @@ -194,8 +194,8 @@ gateways: user: {{ env_var("FABRIC_CLIENT_ID") }} password: {{ env_var("FABRIC_CLIENT_SECRET") }} database: {{ env_var("FABRIC_DATABASE") }} - tenant: {{ env_var("FABRIC_TENANT") }} - workspace: {{ env_var("FABRIC_WORKSPACE") }} + tenant_id: {{ env_var("FABRIC_TENANT_ID") }} + workspace_id: {{ env_var("FABRIC_WORKSPACE_ID") }} odbc_properties: Authentication: ActiveDirectoryServicePrincipal state_connection: From eaba56a24a78fa11fc26daf15537b542f401869b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Mattias=20Thal=C3=A9n?= Date: Tue, 29 Jul 2025 23:13:13 +0000 Subject: [PATCH 66/70] fix(fabric): Leverage tenacity for retry logic --- sqlmesh/core/engine_adapter/fabric.py | 80 ++++++++++++++------------- 1 file changed, 43 insertions(+), 37 deletions(-) diff --git a/sqlmesh/core/engine_adapter/fabric.py b/sqlmesh/core/engine_adapter/fabric.py index 257a974424..ad07f62786 100644 --- a/sqlmesh/core/engine_adapter/fabric.py +++ b/sqlmesh/core/engine_adapter/fabric.py @@ -2,8 +2,8 @@ import typing as t import logging -import time from sqlglot import exp +from tenacity import retry, stop_after_attempt, wait_exponential, retry_if_result from sqlmesh.core.engine_adapter.mssql import MSSQLEngineAdapter from sqlmesh.core.engine_adapter.shared import InsertOverwriteStrategy, SourceQuery from sqlmesh.core.engine_adapter.base import EngineAdapter @@ -225,47 +225,53 @@ def _make_fabric_api_request_with_location( except requests.exceptions.RequestException as e: raise SQLMeshError(f"Fabric API request failed: {e}") - def _poll_operation_status(self, location_url: str, operation_name: str) -> None: - """Poll the operation status until completion.""" + @retry( + wait=wait_exponential(multiplier=1, min=1, max=30), + stop=stop_after_attempt(60), + retry=retry_if_result(lambda result: result not in ["Succeeded", "Failed"]), + ) + def _check_operation_status(self, location_url: str, operation_name: str) -> str: + """Check the operation status and return the status string.""" if not requests: raise SQLMeshError("requests library is required for Fabric catalog operations") headers = self._get_fabric_auth_headers() - max_attempts = 60 # Poll for up to 10 minutes - initial_delay = 1 # Start with 1 second - for attempt in range(max_attempts): - try: - response = requests.get(location_url, headers=headers) - response.raise_for_status() - - result = response.json() - status = result.get("status", "Unknown") - - logger.info(f"Operation {operation_name} status: {status}") - - if status == "Succeeded": - return - if status == "Failed": - error_msg = result.get("error", {}).get("message", "Unknown error") - raise SQLMeshError(f"Operation {operation_name} failed: {error_msg}") - elif status in ["InProgress", "Running"]: - # Use exponential backoff with max of 30 seconds - delay = min(initial_delay * (2 ** min(attempt // 3, 4)), 30) - logger.info(f"Waiting {delay} seconds before next status check...") - time.sleep(delay) - else: - logger.warning(f"Unknown status '{status}' for operation {operation_name}") - time.sleep(5) - - except requests.exceptions.RequestException as e: - if attempt < max_attempts - 1: - logger.warning(f"Failed to poll status (attempt {attempt + 1}): {e}") - time.sleep(5) - else: - raise SQLMeshError(f"Failed to poll operation status: {e}") - - raise SQLMeshError(f"Operation {operation_name} did not complete within timeout") + try: + response = requests.get(location_url, headers=headers) + response.raise_for_status() + + result = response.json() + status = result.get("status", "Unknown") + + logger.info(f"Operation {operation_name} status: {status}") + + if status == "Failed": + error_msg = result.get("error", {}).get("message", "Unknown error") + raise SQLMeshError(f"Operation {operation_name} failed: {error_msg}") + elif status in ["InProgress", "Running"]: + logger.info(f"Operation {operation_name} still in progress...") + elif status not in ["Succeeded"]: + logger.warning(f"Unknown status '{status}' for operation {operation_name}") + + return status + + except requests.exceptions.RequestException as e: + logger.warning(f"Failed to poll status: {e}") + raise SQLMeshError(f"Failed to poll operation status: {e}") + + def _poll_operation_status(self, location_url: str, operation_name: str) -> None: + """Poll the operation status until completion.""" + try: + final_status = self._check_operation_status(location_url, operation_name) + if final_status != "Succeeded": + raise SQLMeshError( + f"Operation {operation_name} completed with status: {final_status}" + ) + except Exception as e: + if "retry" in str(e).lower(): + raise SQLMeshError(f"Operation {operation_name} did not complete within timeout") + raise def _create_catalog(self, catalog_name: exp.Identifier) -> None: """Create a catalog (warehouse) in Microsoft Fabric via REST API.""" From 9a720c8a0c626df6f129a9cbd36a6125a601af0d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Mattias=20Thal=C3=A9n?= Date: Tue, 29 Jul 2025 23:15:02 +0000 Subject: [PATCH 67/70] fix(fabric): Use SchemaName instead of t.Union --- sqlmesh/core/engine_adapter/fabric.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/sqlmesh/core/engine_adapter/fabric.py b/sqlmesh/core/engine_adapter/fabric.py index ad07f62786..e79b5cb235 100644 --- a/sqlmesh/core/engine_adapter/fabric.py +++ b/sqlmesh/core/engine_adapter/fabric.py @@ -11,7 +11,7 @@ from sqlmesh.utils.errors import SQLMeshError if t.TYPE_CHECKING: - from sqlmesh.core._typing import TableName + from sqlmesh.core._typing import TableName, SchemaName from sqlmesh.core.engine_adapter.mixins import LogicalMergeMixin @@ -379,7 +379,7 @@ def set_current_catalog(self, catalog_name: str) -> None: def drop_schema( self, - schema_name: t.Union[str, exp.Table], + schema_name: SchemaName, ignore_if_not_exists: bool = True, cascade: bool = False, **drop_args: t.Any, @@ -411,7 +411,7 @@ def drop_schema( def create_schema( self, - schema_name: t.Union[str, exp.Table], + schema_name: SchemaName, ignore_if_exists: bool = True, **kwargs: t.Any, ) -> None: @@ -555,7 +555,7 @@ def ctas( def create_view( self, - view_name: t.Union[str, exp.Table], + view_name: SchemaName, query_or_df: t.Any, columns_to_types: t.Optional[t.Dict[str, exp.DataType]] = None, replace: bool = True, From 0dccfdc61250fb2047db7a8965e50964d9fcd34b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Mattias=20Thal=C3=A9n?= Date: Tue, 29 Jul 2025 23:22:18 +0000 Subject: [PATCH 68/70] fix(fabric): Use exp.Table to extract extract schema name --- sqlmesh/core/engine_adapter/fabric.py | 159 +++++++++++--------------- 1 file changed, 68 insertions(+), 91 deletions(-) diff --git a/sqlmesh/core/engine_adapter/fabric.py b/sqlmesh/core/engine_adapter/fabric.py index e79b5cb235..6d0d1066d2 100644 --- a/sqlmesh/core/engine_adapter/fabric.py +++ b/sqlmesh/core/engine_adapter/fabric.py @@ -390,11 +390,18 @@ def drop_schema( """ logger.debug(f"drop_schema called with: {schema_name} (type: {type(schema_name)})") - # If it's a string with a dot, assume it's catalog.schema format - if isinstance(schema_name, str) and "." in schema_name: - parts = schema_name.split(".", 1) # Split only on first dot - catalog_name = parts[0].strip('"[]') # Remove quotes/brackets - schema_only = parts[1].strip('"[]') + # Parse schema_name into an exp.Table to properly handle both string and Table cases + table = exp.to_table(schema_name) + + if table.catalog: + # 3-part name detected (catalog.db.table) - this shouldn't happen for schema operations + raise SQLMeshError( + f"Invalid schema name format: {schema_name}. Expected 'schema' or 'catalog.schema'" + ) + elif table.db: + # Catalog-qualified schema: catalog.schema + catalog_name = table.db + schema_only = table.name logger.debug( f"Detected catalog.schema format: catalog='{catalog_name}', schema='{schema_only}'" ) @@ -421,11 +428,18 @@ def create_schema( """ logger.debug(f"create_schema called with: {schema_name} (type: {type(schema_name)})") - # If it's a string with a dot, assume it's catalog.schema format - if isinstance(schema_name, str) and "." in schema_name: - parts = schema_name.split(".", 1) # Split only on first dot - catalog_name = parts[0].strip('"[]') # Remove quotes/brackets - schema_only = parts[1].strip('"[]') + # Parse schema_name into an exp.Table to properly handle both string and Table cases + table = exp.to_table(schema_name) + + if table.catalog: + # 3-part name detected (catalog.db.table) - this shouldn't happen for schema operations + raise SQLMeshError( + f"Invalid schema name format: {schema_name}. Expected 'schema' or 'catalog.schema'" + ) + elif table.db: + # Catalog-qualified schema: catalog.schema + catalog_name = table.db + schema_only = table.name logger.debug( f"Detected catalog.schema format: catalog='{catalog_name}', schema='{schema_only}'" ) @@ -572,89 +586,52 @@ def create_view( """ logger.debug(f"create_view called with: {view_name} (type: {type(view_name)})") + # Parse view_name into an exp.Table to properly handle both string and Table cases + table = exp.to_table(view_name) + # Ensure schema exists for the view - if isinstance(view_name, exp.Table): - self._ensure_schema_exists(view_name) - elif isinstance(view_name, str): - # Parse string to table for schema extraction - parsed_table = exp.to_table(view_name) - self._ensure_schema_exists(parsed_table) - - # Handle exp.Table objects that might be catalog-qualified - if isinstance(view_name, exp.Table): - if view_name.catalog: - # Has catalog qualification - switch to catalog and use schema.table - catalog_name = view_name.catalog - schema_name = view_name.db or "" - table_name = view_name.name - - logger.debug( - f"Detected exp.Table with catalog: catalog='{catalog_name}', schema='{schema_name}', table='{table_name}'" - ) + self._ensure_schema_exists(table) - # Switch to the catalog first - self.set_current_catalog(catalog_name) - - # Create new Table expression without catalog - unqualified_view = exp.Table(this=table_name, db=schema_name) - - super().create_view( - unqualified_view, - query_or_df, - columns_to_types, - replace, - materialized, - materialized_properties, - table_description, - column_descriptions, - view_properties, - **create_kwargs, - ) - return + if table.catalog: + # 3-part name: catalog.schema.view + catalog_name = table.catalog + schema_name = table.db or "" + view_only = table.name - # Handle string view names that might be catalog-qualified - elif isinstance(view_name, str): - # Check if it's in catalog.schema.view format - parts = view_name.split(".") - if len(parts) == 3: - # catalog.schema.view format - catalog_name = parts[0].strip('"[]') - schema_name = parts[1].strip('"[]') - view_only = parts[2].strip('"[]') - unqualified_view_str = f"{schema_name}.{view_only}" - logger.debug( - f"Detected catalog.schema.view format: catalog='{catalog_name}', unqualified='{unqualified_view_str}'" - ) + logger.debug( + f"Detected catalog.schema.view format: catalog='{catalog_name}', schema='{schema_name}', view='{view_only}'" + ) - # Switch to the catalog first - self.set_current_catalog(catalog_name) - - # Use just the schema.view name - super().create_view( - unqualified_view_str, - query_or_df, - columns_to_types, - replace, - materialized, - materialized_properties, - table_description, - column_descriptions, - view_properties, - **create_kwargs, - ) - return + # Switch to the catalog first + self.set_current_catalog(catalog_name) - # No catalog qualification, use as-is - logger.debug(f"No catalog detected, using original: {view_name}") - super().create_view( - view_name, - query_or_df, - columns_to_types, - replace, - materialized, - materialized_properties, - table_description, - column_descriptions, - view_properties, - **create_kwargs, - ) + # Create new Table expression without catalog + unqualified_view = exp.Table(this=view_only, db=schema_name) + + super().create_view( + unqualified_view, + query_or_df, + columns_to_types, + replace, + materialized, + materialized_properties, + table_description, + column_descriptions, + view_properties, + **create_kwargs, + ) + else: + # No catalog qualification, use as-is + logger.debug(f"No catalog detected, using original: {view_name}") + super().create_view( + view_name, + query_or_df, + columns_to_types, + replace, + materialized, + materialized_properties, + table_description, + column_descriptions, + view_properties, + **create_kwargs, + ) From 0569051f6e483bd727babc34006b15c1327e5ec1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Mattias=20Thal=C3=A9n?= Date: Wed, 30 Jul 2025 06:44:03 +0000 Subject: [PATCH 69/70] fix(fabric): Correct catalog.schema parsing --- sqlmesh/core/engine_adapter/fabric.py | 132 ++++++++++++++++---------- 1 file changed, 84 insertions(+), 48 deletions(-) diff --git a/sqlmesh/core/engine_adapter/fabric.py b/sqlmesh/core/engine_adapter/fabric.py index 6d0d1066d2..26e93f55ed 100644 --- a/sqlmesh/core/engine_adapter/fabric.py +++ b/sqlmesh/core/engine_adapter/fabric.py @@ -390,31 +390,49 @@ def drop_schema( """ logger.debug(f"drop_schema called with: {schema_name} (type: {type(schema_name)})") - # Parse schema_name into an exp.Table to properly handle both string and Table cases - table = exp.to_table(schema_name) - - if table.catalog: - # 3-part name detected (catalog.db.table) - this shouldn't happen for schema operations - raise SQLMeshError( - f"Invalid schema name format: {schema_name}. Expected 'schema' or 'catalog.schema'" - ) - elif table.db: - # Catalog-qualified schema: catalog.schema - catalog_name = table.db - schema_only = table.name - logger.debug( - f"Detected catalog.schema format: catalog='{catalog_name}', schema='{schema_only}'" - ) - - # Switch to the catalog first - self.set_current_catalog(catalog_name) - - # Use just the schema name - super().drop_schema(schema_only, ignore_if_not_exists, cascade, **drop_args) + # Handle Table objects created by schema_() function + if isinstance(schema_name, exp.Table) and not schema_name.name: + # This is a schema Table object - check for catalog qualification + if schema_name.catalog: + # Catalog-qualified schema: catalog.schema + catalog_name = schema_name.catalog + schema_only = schema_name.db + logger.debug( + f"Detected catalog-qualified schema: catalog='{catalog_name}', schema='{schema_only}'" + ) + # Switch to the catalog first + self.set_current_catalog(catalog_name) + # Use just the schema name + super().drop_schema(schema_only, ignore_if_not_exists, cascade, **drop_args) + else: + # Schema only, no catalog + schema_only = schema_name.db + logger.debug(f"Detected schema-only: schema='{schema_only}'") + super().drop_schema(schema_only, ignore_if_not_exists, cascade, **drop_args) else: - # No catalog qualification, use as-is - logger.debug(f"No catalog detected, using original: {schema_name}") - super().drop_schema(schema_name, ignore_if_not_exists, cascade, **drop_args) + # Handle string or table name inputs by parsing as table + table = exp.to_table(schema_name) + + if table.catalog: + # 3-part name detected (catalog.db.table) - this shouldn't happen for schema operations + raise SQLMeshError( + f"Invalid schema name format: {schema_name}. Expected 'schema' or 'catalog.schema', got 3-part name" + ) + elif table.db: + # Catalog-qualified schema: catalog.schema + catalog_name = table.db + schema_only = table.name + logger.debug( + f"Detected catalog.schema format: catalog='{catalog_name}', schema='{schema_only}'" + ) + # Switch to the catalog first + self.set_current_catalog(catalog_name) + # Use just the schema name + super().drop_schema(schema_only, ignore_if_not_exists, cascade, **drop_args) + else: + # No catalog qualification, use as-is + logger.debug(f"No catalog detected, using original: {schema_name}") + super().drop_schema(schema_name, ignore_if_not_exists, cascade, **drop_args) def create_schema( self, @@ -428,31 +446,49 @@ def create_schema( """ logger.debug(f"create_schema called with: {schema_name} (type: {type(schema_name)})") - # Parse schema_name into an exp.Table to properly handle both string and Table cases - table = exp.to_table(schema_name) - - if table.catalog: - # 3-part name detected (catalog.db.table) - this shouldn't happen for schema operations - raise SQLMeshError( - f"Invalid schema name format: {schema_name}. Expected 'schema' or 'catalog.schema'" - ) - elif table.db: - # Catalog-qualified schema: catalog.schema - catalog_name = table.db - schema_only = table.name - logger.debug( - f"Detected catalog.schema format: catalog='{catalog_name}', schema='{schema_only}'" - ) - - # Switch to the catalog first - self.set_current_catalog(catalog_name) - - # Use just the schema name - super().create_schema(schema_only, ignore_if_exists, **kwargs) + # Handle Table objects created by schema_() function + if isinstance(schema_name, exp.Table) and not schema_name.name: + # This is a schema Table object - check for catalog qualification + if schema_name.catalog: + # Catalog-qualified schema: catalog.schema + catalog_name = schema_name.catalog + schema_only = schema_name.db + logger.debug( + f"Detected catalog-qualified schema: catalog='{catalog_name}', schema='{schema_only}'" + ) + # Switch to the catalog first + self.set_current_catalog(catalog_name) + # Use just the schema name + super().create_schema(schema_only, ignore_if_exists, **kwargs) + else: + # Schema only, no catalog + schema_only = schema_name.db + logger.debug(f"Detected schema-only: schema='{schema_only}'") + super().create_schema(schema_only, ignore_if_exists, **kwargs) else: - # No catalog qualification, use as-is - logger.debug(f"No catalog detected, using original: {schema_name}") - super().create_schema(schema_name, ignore_if_exists, **kwargs) + # Handle string or table name inputs by parsing as table + table = exp.to_table(schema_name) + + if table.catalog: + # 3-part name detected (catalog.db.table) - this shouldn't happen for schema operations + raise SQLMeshError( + f"Invalid schema name format: {schema_name}. Expected 'schema' or 'catalog.schema', got 3-part name" + ) + elif table.db: + # Catalog-qualified schema: catalog.schema + catalog_name = table.db + schema_only = table.name + logger.debug( + f"Detected catalog.schema format: catalog='{catalog_name}', schema='{schema_only}'" + ) + # Switch to the catalog first + self.set_current_catalog(catalog_name) + # Use just the schema name + super().create_schema(schema_only, ignore_if_exists, **kwargs) + else: + # No catalog qualification, use as-is + logger.debug(f"No catalog detected, using original: {schema_name}") + super().create_schema(schema_name, ignore_if_exists, **kwargs) def _ensure_schema_exists(self, table_name: TableName) -> None: """ From cfcc05fe2f0e2db1fabf3676c6ca4418696bf0b8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Mattias=20Thal=C3=A9n?= Date: Wed, 30 Jul 2025 20:56:21 +0000 Subject: [PATCH 70/70] fix(fabric): Add workspace_id and tenant_id to unit tests --- tests/core/test_connection_config.py | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/tests/core/test_connection_config.py b/tests/core/test_connection_config.py index 14306f7fce..522c85c434 100644 --- a/tests/core/test_connection_config.py +++ b/tests/core/test_connection_config.py @@ -1692,7 +1692,13 @@ def mock_add_output_converter(sql_type, converter_func): def test_fabric_connection_config_defaults(make_config): """Test Fabric connection config defaults to pyodbc and autocommit=True.""" - config = make_config(type="fabric", host="localhost", check_import=False) + config = make_config( + type="fabric", + host="localhost", + workspace_id="test-workspace-id", + tenant_id="test-tenant-id", + check_import=False, + ) assert isinstance(config, FabricConnectionConfig) assert config.driver == "pyodbc" assert config.autocommit is True @@ -1713,6 +1719,8 @@ def test_fabric_connection_config_parameter_validation(make_config): trust_server_certificate=True, encrypt=False, odbc_properties={"Authentication": "ActiveDirectoryServicePrincipal"}, + workspace_id="test-workspace-id", + tenant_id="test-tenant-id", check_import=False, ) assert isinstance(config, FabricConnectionConfig) @@ -1741,6 +1749,8 @@ def test_fabric_pyodbc_connection_string_generation(): trust_server_certificate=True, encrypt=True, login_timeout=30, + workspace_id="test-workspace-id", + tenant_id="test-tenant-id", check_import=False, )