From 3c05a5b20b4b9ef558d0c922f834f00863e724ce Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Sun, 31 Aug 2025 11:26:40 +0200 Subject: [PATCH 001/150] chore(deps): bump moto from 5.1.10 to 5.1.11 (#873) Bumps [moto](https://github.com/getmoto/moto) from 5.1.10 to 5.1.11. - [Release notes](https://github.com/getmoto/moto/releases) - [Changelog](https://github.com/getmoto/moto/blob/master/CHANGELOG.md) - [Commits](https://github.com/getmoto/moto/compare/5.1.10...5.1.11) --- updated-dependencies: - dependency-name: moto dependency-version: 5.1.11 dependency-type: direct:production update-type: version-update:semver-patch ... Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> --- pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index c7bfee78b..209794685 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -134,7 +134,7 @@ dev = [ "datacontract-cli[all]", "httpx==0.28.1", "kafka-python", - "moto==5.1.10", + "moto==5.1.11", "pandas>=2.1.0", "pre-commit>=3.7.1,<4.4.0", "pytest", From 35803e4d0f2b2da78bed27d3e032b28ee0ee6b9b Mon Sep 17 00:00:00 2001 From: Damien Maresma <136118865+dmaresma@users.noreply.github.com> Date: Wed, 3 Sep 2025 02:17:01 -0400 Subject: [PATCH 002/150] fix : odcs excel export Error filling properties (#876) * init * fix None artifacts in the property name when nested * format --- datacontract/export/excel_exporter.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/datacontract/export/excel_exporter.py b/datacontract/export/excel_exporter.py index 102a7a18a..df8896211 100644 --- a/datacontract/export/excel_exporter.py +++ b/datacontract/export/excel_exporter.py @@ -283,7 +283,7 @@ def fill_single_property_template( sheet: Worksheet, row_index: int, prefix: str, property: SchemaProperty, header_map: dict ) -> int: """Fill a single property row using the template's column structure""" - property_name = f"{prefix}.{property.name}" if prefix else property.name + property_name = f"{prefix}{'.' + property.name if property.name else ''}" if prefix else property.name # Helper function to set cell value by header name def set_by_header(header_name: str, value: Any): @@ -307,7 +307,7 @@ def set_by_header(header_name: str, value: Any): set_by_header("Classification", property.classification) set_by_header("Tags", ",".join(property.tags) if property.tags else "") set_by_header( - "Example(s)", ",".join(property.examples) if property.examples else "" + "Example(s)", ",".join(map(str, property.examples)) if property.examples else "" ) # Note: using "Example(s)" as in template set_by_header("Encrypted Name", property.encryptedName) set_by_header( @@ -404,7 +404,7 @@ def fill_properties_quality( if not property.name: continue - full_property_name = f"{prefix}.{property.name}" if prefix else property.name + full_property_name = f"{prefix}{'.' + property.name if property.name else ''}" if prefix else property.name # Add quality attributes for this property if property.quality: From ece0d17937f210561bb91d75c14d64759a319dee Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Mon, 22 Sep 2025 14:08:45 +0200 Subject: [PATCH 003/150] chore(deps): bump moto from 5.1.11 to 5.1.13 (#897) Bumps [moto](https://github.com/getmoto/moto) from 5.1.11 to 5.1.13. - [Release notes](https://github.com/getmoto/moto/releases) - [Changelog](https://github.com/getmoto/moto/blob/master/CHANGELOG.md) - [Commits](https://github.com/getmoto/moto/compare/5.1.11...5.1.13) --- updated-dependencies: - dependency-name: moto dependency-version: 5.1.13 dependency-type: direct:production update-type: version-update:semver-patch ... Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> --- pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index 209794685..c4ac8846a 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -134,7 +134,7 @@ dev = [ "datacontract-cli[all]", "httpx==0.28.1", "kafka-python", - "moto==5.1.11", + "moto==5.1.13", "pandas>=2.1.0", "pre-commit>=3.7.1,<4.4.0", "pytest", From 9c36ffc18cb0181c408e60686f5c66e6b340cd00 Mon Sep 17 00:00:00 2001 From: pug <34815797+0xpugsley@users.noreply.github.com> Date: Mon, 22 Sep 2025 14:40:29 +0200 Subject: [PATCH 004/150] Fix markdown servers rendering (#887) * refactor: enhance extra_to_markdown function to support table cell formatting - Updated extra_to_markdown to accept an additional parameter for table cell context. - Adjusted markdown generation for extra attributes based on the new parameter. - Improved handling of None values and formatting for better Markdown output. * refactor: streamline extra_to_markdown function for improved readability and performance - Simplified the logic in extra_to_markdown by using a list to accumulate parts and joining them at the end. - Enhanced handling of Markdown formatting for extra attributes, including better management of line endings based on context. - Introduced a helper function to render headers, improving code clarity and maintainability. * feat: add another server entry in markdown exporter tests - Introduced a new 'development' section in datacontract.yaml for S3 data access. - Defined environment, location, format, and roles for the development setup. - Updated expected.md to reflect the new development configuration in the output. * fix: resolve server section rendering issue in markdown exporter - Fixed the rendering of the server section in the markdown exporter to ensure proper output formatting. --- CHANGELOG.md | 2 +- datacontract/export/markdown_converter.py | 55 +++++++++++++------ .../markdown/export/datacontract.yaml | 12 ++++ tests/fixtures/markdown/export/expected.md | 1 + 4 files changed, 53 insertions(+), 17 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 4a422b2ec..3fdc2e7df 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -19,7 +19,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 - Improved regex for extracting Azure storage account names from URLs with containerName@storageAccountName format (#848) - JSON Schema Check: Add globbing support for local JSON files - +- Fixed server section rendering for markdown exporter ## [0.10.34] - 2025-08-06 diff --git a/datacontract/export/markdown_converter.py b/datacontract/export/markdown_converter.py index 0d235fae3..077f22d20 100644 --- a/datacontract/export/markdown_converter.py +++ b/datacontract/export/markdown_converter.py @@ -82,7 +82,7 @@ def obj_attributes_to_markdown(obj: BaseModel, excluded_fields: set = set(), is_ if value ] description = f"*{description_to_markdown(description_value)}*" - extra = [extra_to_markdown(obj)] if obj.model_extra else [] + extra = [extra_to_markdown(obj, is_in_table_cell)] if obj.model_extra else [] return newline_char.join([description] + attributes + extra) @@ -293,26 +293,49 @@ def dict_to_markdown(dictionary: Dict[str, str]) -> str: return "\n".join(markdown_parts) + "\n" -def extra_to_markdown(obj: BaseModel) -> str: +def extra_to_markdown(obj: BaseModel, is_in_table_cell: bool = False) -> str: """ Convert the extra attributes of a data contract to Markdown format. Args: obj (BaseModel): The data contract object containing extra attributes. + is_in_table_cell (bool): Whether the extra attributes are in a table cell. Returns: str: A Markdown formatted string representing the extra attributes of the data contract. """ - markdown_part = "" extra = obj.model_extra - if extra: - for key_extra, value_extra in extra.items(): - markdown_part += f"\n### {key_extra.capitalize()}\n" - if isinstance(value_extra, list) and len(value_extra): - if isinstance(value_extra[0], dict): - markdown_part += array_of_dict_to_markdown(value_extra) - elif isinstance(value_extra[0], str): - markdown_part += array_to_markdown(value_extra) - elif isinstance(value_extra, dict): - markdown_part += dict_to_markdown(value_extra) - else: - markdown_part += f"{str(value_extra)}\n" - return markdown_part + + if not extra: + return "" + + bullet_char = "•" + value_line_ending = "" if is_in_table_cell else "\n" + row_suffix = "
" if is_in_table_cell else "" + + def render_header(key: str) -> str: + return ( + f"{bullet_char} **{key}:** " + if is_in_table_cell + else f"\n### {key.capitalize()}\n" + ) + + parts: list[str] = [] + for key_extra, value_extra in extra.items(): + if not value_extra: + continue + + parts.append(render_header(key_extra)) + + if isinstance(value_extra, list) and len(value_extra): + if isinstance(value_extra[0], dict): + parts.append(array_of_dict_to_markdown(value_extra)) + elif isinstance(value_extra[0], str): + parts.append(array_to_markdown(value_extra)) + elif isinstance(value_extra, dict): + parts.append(dict_to_markdown(value_extra)) + else: + parts.append(f"{str(value_extra)}{value_line_ending}") + + if row_suffix: + parts.append(row_suffix) + + return "".join(parts) diff --git a/tests/fixtures/markdown/export/datacontract.yaml b/tests/fixtures/markdown/export/datacontract.yaml index 992af7335..81b477936 100644 --- a/tests/fixtures/markdown/export/datacontract.yaml +++ b/tests/fixtures/markdown/export/datacontract.yaml @@ -24,6 +24,18 @@ servers: description: Access to the data for US region - name: analyst_cn description: Access to the data for China region + development: + type: s3 + environment: dev + location: s3://datacontract-example-orders-latest/v2/{model}/*.json + format: json + delimiter: new_line + description: "One folder per model. One file per day." + roles: + - name: analyst_us + description: Access to the data for US region + - name: analyst_cn + description: Access to the data for China region terms: usage: | Data can be used for reports, analytics and machine learning use cases. diff --git a/tests/fixtures/markdown/export/expected.md b/tests/fixtures/markdown/export/expected.md index 37d88bfd7..15e83af1b 100644 --- a/tests/fixtures/markdown/export/expected.md +++ b/tests/fixtures/markdown/export/expected.md @@ -10,6 +10,7 @@ | Name | Type | Attributes | | ---- | ---- | ---------- | | production | s3 | *One folder per model. One file per day.*
• **environment:** prod
• **format:** json
• **delimiter:** new_line
• **location:** s3://datacontract-example-orders-latest/v2/{model}/*.json
• **roles:** [{'name': 'analyst_us', 'description': 'Access to the data for US region'}, {'name': 'analyst_cn', 'description': 'Access to the data for China region'}] | +| development | s3 | *One folder per model. One file per day.*
• **environment:** dev
• **format:** json
• **delimiter:** new_line
• **location:** s3://datacontract-example-orders-latest/v2/{model}/*.json
• **roles:** [{'name': 'analyst_us', 'description': 'Access to the data for US region'}, {'name': 'analyst_cn', 'description': 'Access to the data for China region'}] | ## Terms *No description.* From e3a591511aa6d20bad82a1419273f1dd02627d95 Mon Sep 17 00:00:00 2001 From: sugato <47884819+toshifumisuga@users.noreply.github.com> Date: Wed, 24 Sep 2025 16:33:06 +0900 Subject: [PATCH 005/150] fix primaryKey support for dbt export (#898) --- datacontract/export/dbt_converter.py | 34 +++++++- tests/test_export_dbt_models.py | 125 +++++++++++++++++++++++++++ 2 files changed, 155 insertions(+), 4 deletions(-) diff --git a/datacontract/export/dbt_converter.py b/datacontract/export/dbt_converter.py index 3cf07afe3..fef30373c 100644 --- a/datacontract/export/dbt_converter.py +++ b/datacontract/export/dbt_converter.py @@ -115,9 +115,28 @@ def _to_dbt_model( dbt_model["config"]["contract"] = {"enforced": True} if model_value.description is not None: dbt_model["description"] = model_value.description.strip().replace("\n", " ") - columns = _to_columns(data_contract_spec, model_value.fields, _supports_constraints(model_type), adapter_type) + + # Handle model-level primaryKey (before columns for better YAML ordering) + primary_key_columns = [] + if hasattr(model_value, 'primaryKey') and model_value.primaryKey: + if isinstance(model_value.primaryKey, list) and len(model_value.primaryKey) > 1: + # Multiple columns: use dbt_utils.unique_combination_of_columns + dbt_model["data_tests"] = [{ + "dbt_utils.unique_combination_of_columns": { + "combination_of_columns": model_value.primaryKey + } + }] + elif isinstance(model_value.primaryKey, list) and len(model_value.primaryKey) == 1: + # Single column: handle at column level (pass to _to_columns) + primary_key_columns = model_value.primaryKey + elif isinstance(model_value.primaryKey, str): + # Single column as string: handle at column level + primary_key_columns = [model_value.primaryKey] + + columns = _to_columns(data_contract_spec, model_value.fields, _supports_constraints(model_type), adapter_type, primary_key_columns) if columns: dbt_model["columns"] = columns + return dbt_model @@ -143,10 +162,13 @@ def _to_columns( fields: Dict[str, Field], supports_constraints: bool, adapter_type: Optional[str], + primary_key_columns: Optional[list] = None, ) -> list: columns = [] + primary_key_columns = primary_key_columns or [] for field_name, field in fields.items(): - column = _to_column(data_contract_spec, field_name, field, supports_constraints, adapter_type) + is_primary_key = field_name in primary_key_columns + column = _to_column(data_contract_spec, field_name, field, supports_constraints, adapter_type, is_primary_key) columns.append(column) return columns @@ -164,6 +186,7 @@ def _to_column( field: Field, supports_constraints: bool, adapter_type: Optional[str], + is_primary_key: bool = False, ) -> dict: column = {"name": field_name} adapter_type = adapter_type or "snowflake" @@ -178,12 +201,15 @@ def _to_column( ) if field.description is not None: column["description"] = field.description.strip().replace("\n", " ") - if field.required: + # Handle required/not_null constraint + if field.required or is_primary_key: if supports_constraints: column.setdefault("constraints", []).append({"type": "not_null"}) else: column["data_tests"].append("not_null") - if field.unique: + + # Handle unique constraint + if field.unique or is_primary_key: if supports_constraints: column.setdefault("constraints", []).append({"type": "unique"}) else: diff --git a/tests/test_export_dbt_models.py b/tests/test_export_dbt_models.py index f6c216409..a3f4ff37b 100644 --- a/tests/test_export_dbt_models.py +++ b/tests/test_export_dbt_models.py @@ -31,6 +31,11 @@ def test_to_dbt_models(): contract: enforced: true description: The orders model + data_tests: + - dbt_utils.unique_combination_of_columns: + combination_of_columns: + - order_id + - order_status columns: - name: order_id data_type: VARCHAR @@ -88,6 +93,11 @@ def test_to_dbt_models_with_server(): contract: enforced: true description: The orders model + data_tests: + - dbt_utils.unique_combination_of_columns: + combination_of_columns: + - order_id + - order_status columns: - name: order_id data_type: STRING @@ -142,6 +152,11 @@ def test_to_dbt_models_with_no_model_type(): data_contract: orders-unit-test owner: checkout description: The orders model + data_tests: + - dbt_utils.unique_combination_of_columns: + combination_of_columns: + - order_id + - order_status columns: - name: order_id data_tests: @@ -182,6 +197,116 @@ def test_to_dbt_models_with_no_model_type(): assert result == yaml.safe_load(expected_dbt_model) +def test_to_dbt_models_with_model_level_composite_primary_key(): + """Test model-level primaryKey with multiple columns generates dbt_utils.unique_combination_of_columns""" + from datacontract.model.data_contract_specification import DataContractSpecification, Field, Info, Model + + # Create test data with model-level composite primaryKey + data_contract = DataContractSpecification( + id="my-data-contract-id", + info=Info(title="My Data Contract", version="0.0.1"), + models={ + "sfdc_loc_tenants_test": Model( + type="table", + primaryKey=["tenant_id", "account_id"], # Model-level composite primary key + fields={ + "tenant_id": Field(type="string", required=True), + "account_id": Field(type="string", required=True), + "name": Field(type="string", required=True) + } + ) + } + ) + + expected_dbt_model = """ +version: 2 +models: + - name: sfdc_loc_tenants_test + config: + meta: + data_contract: my-data-contract-id + materialized: table + contract: + enforced: true + data_tests: + - dbt_utils.unique_combination_of_columns: + combination_of_columns: + - tenant_id + - account_id + columns: + - name: tenant_id + data_type: STRING + constraints: + - type: not_null + - name: account_id + data_type: STRING + constraints: + - type: not_null + - name: name + data_type: STRING + constraints: + - type: not_null +""" + + result = yaml.safe_load(to_dbt_models_yaml(data_contract)) + expected = yaml.safe_load(expected_dbt_model) + + assert result == expected + + +def test_to_dbt_models_with_single_column_primary_key(): + """Test model-level primaryKey with single column adds unique constraint to column""" + from datacontract.model.data_contract_specification import DataContractSpecification, Field, Info, Model + + # Create test data with model-level single primaryKey + data_contract = DataContractSpecification( + id="my-data-contract-id", + info=Info(title="My Data Contract", version="0.0.1"), + models={ + "sfdc_loc_tenants_test": Model( + type="table", + primaryKey=["tenant_id"], # Model-level single primary key + fields={ + "tenant_id": Field(type="string", required=True), + "account_id": Field(type="string", required=True), + "name": Field(type="string", required=True) + } + ) + } + ) + + expected_dbt_model = """ +version: 2 +models: + - name: sfdc_loc_tenants_test + config: + meta: + data_contract: my-data-contract-id + materialized: table + contract: + enforced: true + columns: + - name: tenant_id + data_type: STRING + constraints: + - type: not_null + - type: unique + - name: account_id + data_type: STRING + constraints: + - type: not_null + - name: name + data_type: STRING + constraints: + - type: not_null +""" + + result = yaml.safe_load(to_dbt_models_yaml(data_contract)) + expected = yaml.safe_load(expected_dbt_model) + + assert result == expected + + def read_file(file): if not os.path.exists(file): print(f"The file '{file}' does not exist.") From 04f9d21ac86b7c8df765bb078a15beda75fc1802 Mon Sep 17 00:00:00 2001 From: jochen Date: Wed, 24 Sep 2025 19:40:29 +0200 Subject: [PATCH 006/150] chore: update dataContractSpecification to version 1.2.1 --- API.md | 2 +- README.md | 2 +- datacontract/api.py | 2 +- datacontract/export/rdf_converter.py | 4 ++-- datacontract/imports/odcs_v3_importer.py | 8 ++++---- datacontract/init/init_template.py | 2 +- datacontract/lint/resolve.py | 2 +- datacontract/lint/schema.py | 2 +- datacontract/schemas/datacontract-1.1.0.init.yaml | 2 +- datacontract/schemas/datacontract-1.2.0.init.yaml | 2 +- datacontract/schemas/download | 6 +++--- tests/fixtures/avro/export/datacontract.yaml | 2 +- .../fixtures/avro/export/datacontract_decimal.yaml | 2 +- tests/fixtures/avro/export/datacontract_enum.yaml | 2 +- .../avro/export/datacontract_logicalType.yaml | 2 +- .../avro/export/datacontract_test_field_float.yaml | 2 +- .../avro/export/datacontract_test_field_map.yaml | 2 +- .../export/datacontract_test_field_namespace.yaml | 2 +- .../export/datacontract_test_logical_type.yaml | 2 +- .../avro/export/datacontract_test_required.yaml | 2 +- .../fixtures/azure-delta-remote/datacontract.yaml | 2 +- tests/fixtures/azure-json-remote/datacontract.yaml | 2 +- .../azure-parquet-remote/datacontract.yaml | 2 +- tests/fixtures/bigquery/datacontract.yaml | 2 +- tests/fixtures/bigquery/datacontract_complex.yaml | 2 +- tests/fixtures/bigquery/export/datacontract.yaml | 2 +- tests/fixtures/bigquery/import/datacontract.yaml | 2 +- .../bigquery/import/datacontract_multi_import.yaml | 2 +- .../breaking/datacontract-definitions-v1.yaml | 2 +- .../breaking/datacontract-definitions-v2.yaml | 2 +- .../breaking/datacontract-definitions-v3.yaml | 2 +- .../breaking/datacontract-fields-array-v1.yaml | 2 +- .../breaking/datacontract-fields-array-v2.yaml | 2 +- .../fixtures/breaking/datacontract-fields-v1.yaml | 2 +- .../fixtures/breaking/datacontract-fields-v2.yaml | 2 +- .../fixtures/breaking/datacontract-fields-v3.yaml | 2 +- .../fixtures/breaking/datacontract-models-v1.yaml | 2 +- .../fixtures/breaking/datacontract-models-v2.yaml | 2 +- .../fixtures/breaking/datacontract-models-v3.yaml | 2 +- .../fixtures/breaking/datacontract-quality-v1.yaml | 2 +- .../fixtures/breaking/datacontract-quality-v2.yaml | 2 +- .../fixtures/breaking/datacontract-quality-v3.yaml | 2 +- tests/fixtures/catalog/datacontract-1.yaml | 2 +- tests/fixtures/catalog/datacontract-2.yaml | 2 +- tests/fixtures/custom/export/datacontract.yaml | 2 +- .../data-caterer/export/datacontract_nested.yaml | 2 +- tests/fixtures/databricks-sql/datacontract.yaml | 2 +- .../databricks-unity/import/datacontract.yaml | 2 +- .../import/datacontract_complex_types.yaml | 2 +- tests/fixtures/dataframe/datacontract.yaml | 2 +- tests/fixtures/dbml/datacontract.yaml | 2 +- tests/fixtures/dbml/import/datacontract.yaml | 2 +- .../dbml/import/datacontract_schema_filtered.yaml | 2 +- .../dbml/import/datacontract_table_filtered.yaml | 2 +- tests/fixtures/dbt/export/datacontract.yaml | 2 +- tests/fixtures/export/datacontract.html | 2 +- tests/fixtures/export/datacontract.yaml | 2 +- tests/fixtures/export/datacontract_nested.yaml | 2 +- .../export/datacontract_no_model_type.yaml | 2 +- tests/fixtures/export/datacontract_s3.yaml | 2 +- .../fixtures/export/rdf/datacontract-complex.yaml | 2 +- tests/fixtures/export/rdf/datacontract.yaml | 2 +- tests/fixtures/gcs-csv-remote/datacontract.yaml | 2 +- tests/fixtures/gcs-json-remote/datacontract.yaml | 2 +- tests/fixtures/glue/datacontract-empty-model.yaml | 2 +- tests/fixtures/glue/datacontract.yaml | 2 +- .../datacontract_quality_column.yaml | 2 +- tests/fixtures/import/football-datacontract.yml | 2 +- ...ball_deeply_nested_no_required_datacontract.yml | 2 +- .../import/json/inventory_ndjson.datacontract.yaml | 2 +- .../import/json/product_detail.datacontract.yaml | 2 +- .../import/json/product_simple.datacontract.yaml | 2 +- .../import/orders_union-types_datacontract.yml | 2 +- tests/fixtures/junit/datacontract.yaml | 2 +- tests/fixtures/kafka-avro-remote/datacontract.yaml | 2 +- tests/fixtures/kafka-json-remote/datacontract.yaml | 2 +- tests/fixtures/kafka/datacontract.yaml | 2 +- .../fixtures/lint/datacontract_csv_lint_base.yaml | 2 +- .../fixtures/lint/datacontract_quality_schema.yaml | 2 +- .../fixtures/lint/datacontract_unknown_model.yaml | 2 +- tests/fixtures/lint/invalid_datacontract.yaml | 2 +- tests/fixtures/lint/valid_datacontract.yaml | 2 +- tests/fixtures/lint/valid_datacontract_ref.yaml | 2 +- .../lint/valid_datacontract_references.yaml | 2 +- tests/fixtures/local-delta/datacontract.yaml | 2 +- .../fixtures/local-json-complex/datacontract.yaml | 2 +- tests/fixtures/local-json-nd/datacontract.yaml | 2 +- tests/fixtures/local-json/datacontract.yaml | 2 +- tests/fixtures/markdown/export/datacontract.yaml | 2 +- .../odcs_v3/adventureworks.datacontract.yml | 2 +- .../fixtures/odcs_v3/full-example.datacontract.yml | 8 +++++--- tests/fixtures/odcs_v3/full-example.odcs.yaml | 6 ++++-- tests/fixtures/parquet/datacontract.yaml | 2 +- tests/fixtures/parquet/datacontract_array.yaml | 2 +- tests/fixtures/parquet/datacontract_bigint.yaml | 2 +- tests/fixtures/parquet/datacontract_binary.yaml | 2 +- tests/fixtures/parquet/datacontract_boolean.yaml | 2 +- tests/fixtures/parquet/datacontract_date.yaml | 2 +- tests/fixtures/parquet/datacontract_decimal.yaml | 2 +- tests/fixtures/parquet/datacontract_double.yaml | 2 +- tests/fixtures/parquet/datacontract_float.yaml | 2 +- tests/fixtures/parquet/datacontract_integer.yaml | 2 +- tests/fixtures/parquet/datacontract_invalid.yaml | 2 +- tests/fixtures/parquet/datacontract_map.yaml | 2 +- tests/fixtures/parquet/datacontract_string.yaml | 2 +- tests/fixtures/parquet/datacontract_struct.yaml | 2 +- tests/fixtures/parquet/datacontract_timestamp.yaml | 2 +- .../parquet/datacontract_timestamp_ntz.yaml | 2 +- tests/fixtures/postgres-export/datacontract.yaml | 2 +- tests/fixtures/postgres/datacontract.yaml | 2 +- .../postgres/datacontract_case_sensitive.yaml | 2 +- .../postgres/datacontract_servicelevels.yaml | 2 +- tests/fixtures/protobuf/datacontract.yaml | 2 +- tests/fixtures/quality/datacontract.yaml | 2 +- tests/fixtures/s3-csv/datacontract.yaml | 2 +- tests/fixtures/s3-delta/datacontract.yaml | 2 +- tests/fixtures/s3-json-complex/datacontract.yaml | 2 +- .../s3-json-multiple-models/datacontract.yaml | 2 +- tests/fixtures/s3-json-remote/datacontract.yaml | 2 +- tests/fixtures/s3-json/datacontract.yaml | 2 +- tests/fixtures/snowflake/datacontract.yaml | 2 +- tests/fixtures/sodacl/datacontract.yaml | 2 +- tests/fixtures/spark/export/datacontract.yaml | 2 +- .../spark/import/users_datacontract_desc.yml | 2 +- .../spark/import/users_datacontract_no_desc.yml | 2 +- tests/fixtures/spec/datacontract_aliases.yaml | 2 +- tests/fixtures/spec/datacontract_fields_field.yaml | 2 +- tests/fixtures/sqlserver/datacontract.yaml | 2 +- tests/fixtures/trino/datacontract.yaml | 2 +- tests/test_duckdb_json.py | 2 +- tests/test_export_complex_data_contract.py | 2 +- tests/test_export_rdf.py | 12 ++++++------ tests/test_export_sodacl.py | 2 +- tests/test_import_avro.py | 12 ++++++------ tests/test_import_csv.py | 2 +- tests/test_import_dbt.py | 8 ++++---- tests/test_import_iceberg.py | 2 +- tests/test_import_parquet.py | 2 +- tests/test_import_protobuf.py | 2 +- tests/test_import_sql_postgres.py | 4 ++-- tests/test_import_sql_sqlserver.py | 2 +- tests/test_resolve.py | 14 +++++++------- 142 files changed, 175 insertions(+), 171 deletions(-) diff --git a/API.md b/API.md index dc6d4c491..f44074632 100644 --- a/API.md +++ b/API.md @@ -39,7 +39,7 @@ curl -X 'POST' \ 'http://localhost:4242/test?server=production' \ -H 'accept: application/json' \ -H 'Content-Type: application/yaml' \ - -d 'dataContractSpecification: 1.2.0 + -d 'dataContractSpecification: 1.2.1 id: urn:datacontract:checkout:orders-latest info: title: Orders Latest diff --git a/README.md b/README.md index 94fcd8715..03e55e0e7 100644 --- a/README.md +++ b/README.md @@ -1989,7 +1989,7 @@ if __name__ == "__main__": Output ```yaml -dataContractSpecification: 1.2.0 +dataContractSpecification: 1.2.1 id: uuid-custom info: title: my_custom_imported_data diff --git a/datacontract/api.py b/datacontract/api.py index ff5ae82ad..7c306a09a 100644 --- a/datacontract/api.py +++ b/datacontract/api.py @@ -10,7 +10,7 @@ from datacontract.data_contract import DataContract, ExportFormat from datacontract.model.run import Run -DATA_CONTRACT_EXAMPLE_PAYLOAD = """dataContractSpecification: 1.2.0 +DATA_CONTRACT_EXAMPLE_PAYLOAD = """dataContractSpecification: 1.2.1 id: urn:datacontract:checkout:orders-latest info: title: Orders Latest diff --git a/datacontract/export/rdf_converter.py b/datacontract/export/rdf_converter.py index 75fa240f2..944faf194 100644 --- a/datacontract/export/rdf_converter.py +++ b/datacontract/export/rdf_converter.py @@ -57,8 +57,8 @@ def to_rdf(data_contract_spec: DataContractSpecification, base) -> Graph: else: g = Graph(base=Namespace("")) - dc = Namespace("https://datacontract.com/DataContractSpecification/1.2.0/") - dcx = Namespace("https://datacontract.com/DataContractSpecification/1.2.0/Extension/") + dc = Namespace("https://datacontract.com/DataContractSpecification/1.2.1/") + dcx = Namespace("https://datacontract.com/DataContractSpecification/1.2.1/Extension/") g.bind("dc", dc) g.bind("dcx", dcx) diff --git a/datacontract/imports/odcs_v3_importer.py b/datacontract/imports/odcs_v3_importer.py index 80ee24904..9054dab8a 100644 --- a/datacontract/imports/odcs_v3_importer.py +++ b/datacontract/imports/odcs_v3_importer.py @@ -231,6 +231,8 @@ def convert_quality_list(odcs_quality_list): quality.description = odcs_quality.description if odcs_quality.query is not None: quality.query = odcs_quality.query + if odcs_quality.rule is not None: + quality.metric = odcs_quality.rule if odcs_quality.mustBe is not None: quality.mustBe = odcs_quality.mustBe if odcs_quality.mustNotBe is not None: @@ -238,11 +240,11 @@ def convert_quality_list(odcs_quality_list): if odcs_quality.mustBeGreaterThan is not None: quality.mustBeGreaterThan = odcs_quality.mustBeGreaterThan if odcs_quality.mustBeGreaterOrEqualTo is not None: - quality.mustBeGreaterThanOrEqualTo = odcs_quality.mustBeGreaterOrEqualTo + quality.mustBeGreaterOrEqualTo = odcs_quality.mustBeGreaterOrEqualTo if odcs_quality.mustBeLessThan is not None: quality.mustBeLessThan = odcs_quality.mustBeLessThan if odcs_quality.mustBeLessOrEqualTo is not None: - quality.mustBeLessThanOrEqualTo = odcs_quality.mustBeLessOrEqualTo + quality.mustBeLessOrEqualTo = odcs_quality.mustBeLessOrEqualTo if odcs_quality.mustBeBetween is not None: quality.mustBeBetween = odcs_quality.mustBeBetween if odcs_quality.mustNotBeBetween is not None: @@ -255,8 +257,6 @@ def convert_quality_list(odcs_quality_list): quality.model_extra["businessImpact"] = odcs_quality.businessImpact if odcs_quality.dimension is not None: quality.model_extra["dimension"] = odcs_quality.dimension - if odcs_quality.rule is not None: - quality.model_extra["rule"] = odcs_quality.rule if odcs_quality.schedule is not None: quality.model_extra["schedule"] = odcs_quality.schedule if odcs_quality.scheduler is not None: diff --git a/datacontract/init/init_template.py b/datacontract/init/init_template.py index 03ed4871e..fe0473ce9 100644 --- a/datacontract/init/init_template.py +++ b/datacontract/init/init_template.py @@ -3,7 +3,7 @@ import requests -DEFAULT_DATA_CONTRACT_INIT_TEMPLATE = "datacontract-1.2.0.init.yaml" +DEFAULT_DATA_CONTRACT_INIT_TEMPLATE = "datacontract-1.2.1.init.yaml" def get_init_template(location: str = None) -> str: diff --git a/datacontract/lint/resolve.py b/datacontract/lint/resolve.py index 575fdc6a5..87b354cdc 100644 --- a/datacontract/lint/resolve.py +++ b/datacontract/lint/resolve.py @@ -303,7 +303,7 @@ def _resolve_data_contract_from_str( # if ODCS, then validate the ODCS schema and import to DataContractSpecification directly odcs = parse_odcs_v3_from_str(data_contract_str) - data_contract_specification = DataContractSpecification(dataContractSpecification="1.2.0") + data_contract_specification = DataContractSpecification(dataContractSpecification="1.2.1") return import_from_odcs(data_contract_specification, odcs) logging.info("Importing DCS") diff --git a/datacontract/lint/schema.py b/datacontract/lint/schema.py index 52a2eec38..f32b7cce6 100644 --- a/datacontract/lint/schema.py +++ b/datacontract/lint/schema.py @@ -8,7 +8,7 @@ from datacontract.model.exceptions import DataContractException -DEFAULT_DATA_CONTRACT_SCHEMA = "datacontract-1.2.0.schema.json" +DEFAULT_DATA_CONTRACT_SCHEMA = "datacontract-1.2.1.schema.json" def fetch_schema(location: str = None) -> Dict[str, Any]: diff --git a/datacontract/schemas/datacontract-1.1.0.init.yaml b/datacontract/schemas/datacontract-1.1.0.init.yaml index 2528401a2..29baf9bf8 100644 --- a/datacontract/schemas/datacontract-1.1.0.init.yaml +++ b/datacontract/schemas/datacontract-1.1.0.init.yaml @@ -1,4 +1,4 @@ -dataContractSpecification: 1.2.0 +dataContractSpecification: 1.2.1 id: my-data-contract-id info: title: My Data Contract diff --git a/datacontract/schemas/datacontract-1.2.0.init.yaml b/datacontract/schemas/datacontract-1.2.0.init.yaml index 2528401a2..29baf9bf8 100644 --- a/datacontract/schemas/datacontract-1.2.0.init.yaml +++ b/datacontract/schemas/datacontract-1.2.0.init.yaml @@ -1,4 +1,4 @@ -dataContractSpecification: 1.2.0 +dataContractSpecification: 1.2.1 id: my-data-contract-id info: title: My Data Contract diff --git a/datacontract/schemas/download b/datacontract/schemas/download index b152c7693..7b740e0fb 100644 --- a/datacontract/schemas/download +++ b/datacontract/schemas/download @@ -1,7 +1,7 @@ #!/bin/bash set -e -curl -o datacontract-1.2.0.init.yaml https://datacontract.com/datacontract.init.yaml -curl -o datacontract-1.2.0.schema.json https://datacontract.com/datacontract.schema.json -curl -o odcs-3.0.1.schema.json https://raw.githubusercontent.com/bitol-io/open-data-contract-standard/refs/heads/main/schema/odcs-json-schema-v3.0.1.json +curl -o datacontract-1.2.1.init.yaml https://datacontract.com/datacontract.init.yaml +curl -o datacontract-1.2.1.schema.json https://datacontract.com/datacontract.schema.json +curl -o odcs-3.0.2.schema.json https://raw.githubusercontent.com/bitol-io/open-data-contract-standard/refs/heads/main/schema/odcs-json-schema-v3.0.2.json diff --git a/tests/fixtures/avro/export/datacontract.yaml b/tests/fixtures/avro/export/datacontract.yaml index 526b44e0d..47b6e7ffc 100644 --- a/tests/fixtures/avro/export/datacontract.yaml +++ b/tests/fixtures/avro/export/datacontract.yaml @@ -1,4 +1,4 @@ -dataContractSpecification: 1.2.0 +dataContractSpecification: 1.2.1 id: orders info: title: Orders diff --git a/tests/fixtures/avro/export/datacontract_decimal.yaml b/tests/fixtures/avro/export/datacontract_decimal.yaml index 6630df9f5..e9e24fee5 100644 --- a/tests/fixtures/avro/export/datacontract_decimal.yaml +++ b/tests/fixtures/avro/export/datacontract_decimal.yaml @@ -1,4 +1,4 @@ -dataContractSpecification: 1.2.0 +dataContractSpecification: 1.2.1 id: my-data-contract-id info: title: My Data Contract diff --git a/tests/fixtures/avro/export/datacontract_enum.yaml b/tests/fixtures/avro/export/datacontract_enum.yaml index e1a565009..7cd27e1c8 100644 --- a/tests/fixtures/avro/export/datacontract_enum.yaml +++ b/tests/fixtures/avro/export/datacontract_enum.yaml @@ -1,4 +1,4 @@ -dataContractSpecification: 1.2.0 +dataContractSpecification: 1.2.1 id: my-data-contract-id info: title: My Data Contract diff --git a/tests/fixtures/avro/export/datacontract_logicalType.yaml b/tests/fixtures/avro/export/datacontract_logicalType.yaml index d3cdeadb4..953a97742 100644 --- a/tests/fixtures/avro/export/datacontract_logicalType.yaml +++ b/tests/fixtures/avro/export/datacontract_logicalType.yaml @@ -1,4 +1,4 @@ -dataContractSpecification: 1.2.0 +dataContractSpecification: 1.2.1 id: my-data-contract-id info: title: My Data Contract diff --git a/tests/fixtures/avro/export/datacontract_test_field_float.yaml b/tests/fixtures/avro/export/datacontract_test_field_float.yaml index ea62cb2fe..8a5f84614 100644 --- a/tests/fixtures/avro/export/datacontract_test_field_float.yaml +++ b/tests/fixtures/avro/export/datacontract_test_field_float.yaml @@ -1,4 +1,4 @@ -dataContractSpecification: 1.2.0 +dataContractSpecification: 1.2.1 id: pageviews info: title: Pageviews diff --git a/tests/fixtures/avro/export/datacontract_test_field_map.yaml b/tests/fixtures/avro/export/datacontract_test_field_map.yaml index 9d7966b86..dffe8e79a 100644 --- a/tests/fixtures/avro/export/datacontract_test_field_map.yaml +++ b/tests/fixtures/avro/export/datacontract_test_field_map.yaml @@ -1,4 +1,4 @@ -dataContractSpecification: 1.2.0 +dataContractSpecification: 1.2.1 id: pageviews info: title: Pageviews diff --git a/tests/fixtures/avro/export/datacontract_test_field_namespace.yaml b/tests/fixtures/avro/export/datacontract_test_field_namespace.yaml index fbf5756bb..fe5513e08 100644 --- a/tests/fixtures/avro/export/datacontract_test_field_namespace.yaml +++ b/tests/fixtures/avro/export/datacontract_test_field_namespace.yaml @@ -1,4 +1,4 @@ -dataContractSpecification: 1.2.0 +dataContractSpecification: 1.2.1 id: pageviews info: title: Pageviews diff --git a/tests/fixtures/avro/export/datacontract_test_logical_type.yaml b/tests/fixtures/avro/export/datacontract_test_logical_type.yaml index 467e27e45..55c6896b6 100644 --- a/tests/fixtures/avro/export/datacontract_test_logical_type.yaml +++ b/tests/fixtures/avro/export/datacontract_test_logical_type.yaml @@ -1,4 +1,4 @@ -dataContractSpecification: 1.2.0 +dataContractSpecification: 1.2.1 id: my-data-contract-id info: title: My Data Contract diff --git a/tests/fixtures/avro/export/datacontract_test_required.yaml b/tests/fixtures/avro/export/datacontract_test_required.yaml index 8dea1ecb6..f50062e5f 100644 --- a/tests/fixtures/avro/export/datacontract_test_required.yaml +++ b/tests/fixtures/avro/export/datacontract_test_required.yaml @@ -1,4 +1,4 @@ -dataContractSpecification: 1.2.0 +dataContractSpecification: 1.2.1 id: pageviews info: title: Pageviews diff --git a/tests/fixtures/azure-delta-remote/datacontract.yaml b/tests/fixtures/azure-delta-remote/datacontract.yaml index 91b88b2d9..4188770c3 100644 --- a/tests/fixtures/azure-delta-remote/datacontract.yaml +++ b/tests/fixtures/azure-delta-remote/datacontract.yaml @@ -1,4 +1,4 @@ -dataContractSpecification: 1.2.0 +dataContractSpecification: 1.2.1 id: orders-unit-test info: title: Orders Unit Test diff --git a/tests/fixtures/azure-json-remote/datacontract.yaml b/tests/fixtures/azure-json-remote/datacontract.yaml index e72c41252..171e2fcd0 100644 --- a/tests/fixtures/azure-json-remote/datacontract.yaml +++ b/tests/fixtures/azure-json-remote/datacontract.yaml @@ -1,4 +1,4 @@ -dataContractSpecification: 1.2.0 +dataContractSpecification: 1.2.1 id: orders-unit-test info: title: Orders Unit Test diff --git a/tests/fixtures/azure-parquet-remote/datacontract.yaml b/tests/fixtures/azure-parquet-remote/datacontract.yaml index 891a9f865..ae469bd1e 100644 --- a/tests/fixtures/azure-parquet-remote/datacontract.yaml +++ b/tests/fixtures/azure-parquet-remote/datacontract.yaml @@ -1,4 +1,4 @@ -dataContractSpecification: 1.2.0 +dataContractSpecification: 1.2.1 id: orders-unit-test info: title: Orders Unit Test diff --git a/tests/fixtures/bigquery/datacontract.yaml b/tests/fixtures/bigquery/datacontract.yaml index 5fd8d0a23..3271ba4ab 100644 --- a/tests/fixtures/bigquery/datacontract.yaml +++ b/tests/fixtures/bigquery/datacontract.yaml @@ -1,4 +1,4 @@ -dataContractSpecification: 1.2.0 +dataContractSpecification: 1.2.1 id: bigquery info: title: bigquery diff --git a/tests/fixtures/bigquery/datacontract_complex.yaml b/tests/fixtures/bigquery/datacontract_complex.yaml index ca7145e0c..2918e7aa5 100644 --- a/tests/fixtures/bigquery/datacontract_complex.yaml +++ b/tests/fixtures/bigquery/datacontract_complex.yaml @@ -1,4 +1,4 @@ -dataContractSpecification: 1.2.0 +dataContractSpecification: 1.2.1 id: bigquery info: title: bigquery diff --git a/tests/fixtures/bigquery/export/datacontract.yaml b/tests/fixtures/bigquery/export/datacontract.yaml index e4f26db1f..871d39c36 100644 --- a/tests/fixtures/bigquery/export/datacontract.yaml +++ b/tests/fixtures/bigquery/export/datacontract.yaml @@ -1,4 +1,4 @@ -dataContractSpecification: 1.2.0 +dataContractSpecification: 1.2.1 id: my-data-contract-id info: title: My Data Contract diff --git a/tests/fixtures/bigquery/import/datacontract.yaml b/tests/fixtures/bigquery/import/datacontract.yaml index 629210fef..d921bb416 100644 --- a/tests/fixtures/bigquery/import/datacontract.yaml +++ b/tests/fixtures/bigquery/import/datacontract.yaml @@ -1,4 +1,4 @@ -dataContractSpecification: 1.2.0 +dataContractSpecification: 1.2.1 id: my-data-contract-id info: title: My Data Contract diff --git a/tests/fixtures/bigquery/import/datacontract_multi_import.yaml b/tests/fixtures/bigquery/import/datacontract_multi_import.yaml index 364a6045e..b486ee23b 100644 --- a/tests/fixtures/bigquery/import/datacontract_multi_import.yaml +++ b/tests/fixtures/bigquery/import/datacontract_multi_import.yaml @@ -1,4 +1,4 @@ -dataContractSpecification: 1.2.0 +dataContractSpecification: 1.2.1 id: my-data-contract-id info: title: My Data Contract diff --git a/tests/fixtures/breaking/datacontract-definitions-v1.yaml b/tests/fixtures/breaking/datacontract-definitions-v1.yaml index a11fc387f..dcf2470b7 100644 --- a/tests/fixtures/breaking/datacontract-definitions-v1.yaml +++ b/tests/fixtures/breaking/datacontract-definitions-v1.yaml @@ -1,4 +1,4 @@ -dataContractSpecification: 1.2.0 +dataContractSpecification: 1.2.1 id: my-data-contract-id info: title: My Data Contract diff --git a/tests/fixtures/breaking/datacontract-definitions-v2.yaml b/tests/fixtures/breaking/datacontract-definitions-v2.yaml index e4207a9b6..06110a335 100644 --- a/tests/fixtures/breaking/datacontract-definitions-v2.yaml +++ b/tests/fixtures/breaking/datacontract-definitions-v2.yaml @@ -1,4 +1,4 @@ -dataContractSpecification: 1.2.0 +dataContractSpecification: 1.2.1 id: my-data-contract-id info: title: My Data Contract diff --git a/tests/fixtures/breaking/datacontract-definitions-v3.yaml b/tests/fixtures/breaking/datacontract-definitions-v3.yaml index 98055680b..0f096113e 100644 --- a/tests/fixtures/breaking/datacontract-definitions-v3.yaml +++ b/tests/fixtures/breaking/datacontract-definitions-v3.yaml @@ -1,4 +1,4 @@ -dataContractSpecification: 1.2.0 +dataContractSpecification: 1.2.1 id: my-data-contract-id info: title: My Data Contract diff --git a/tests/fixtures/breaking/datacontract-fields-array-v1.yaml b/tests/fixtures/breaking/datacontract-fields-array-v1.yaml index 37a1ad6bc..2334c5435 100644 --- a/tests/fixtures/breaking/datacontract-fields-array-v1.yaml +++ b/tests/fixtures/breaking/datacontract-fields-array-v1.yaml @@ -1,4 +1,4 @@ -dataContractSpecification: 1.2.0 +dataContractSpecification: 1.2.1 id: demo-contract-id info: title: Test for changes in datacontracts that includes arrays diff --git a/tests/fixtures/breaking/datacontract-fields-array-v2.yaml b/tests/fixtures/breaking/datacontract-fields-array-v2.yaml index 4e7877303..42f582b0c 100644 --- a/tests/fixtures/breaking/datacontract-fields-array-v2.yaml +++ b/tests/fixtures/breaking/datacontract-fields-array-v2.yaml @@ -1,4 +1,4 @@ -dataContractSpecification: 1.2.0 +dataContractSpecification: 1.2.1 id: demo-contract-id info: title: Test for changes in datacontracts that includes arrays diff --git a/tests/fixtures/breaking/datacontract-fields-v1.yaml b/tests/fixtures/breaking/datacontract-fields-v1.yaml index 03d69494e..eef3a512a 100644 --- a/tests/fixtures/breaking/datacontract-fields-v1.yaml +++ b/tests/fixtures/breaking/datacontract-fields-v1.yaml @@ -1,4 +1,4 @@ -dataContractSpecification: 1.2.0 +dataContractSpecification: 1.2.1 id: my-data-contract-id info: title: My Data Contract diff --git a/tests/fixtures/breaking/datacontract-fields-v2.yaml b/tests/fixtures/breaking/datacontract-fields-v2.yaml index 08225d1c0..1ebf8fce6 100644 --- a/tests/fixtures/breaking/datacontract-fields-v2.yaml +++ b/tests/fixtures/breaking/datacontract-fields-v2.yaml @@ -1,4 +1,4 @@ -dataContractSpecification: 1.2.0 +dataContractSpecification: 1.2.1 id: my-data-contract-id info: title: My Data Contract diff --git a/tests/fixtures/breaking/datacontract-fields-v3.yaml b/tests/fixtures/breaking/datacontract-fields-v3.yaml index 68fa8e883..36187ad08 100644 --- a/tests/fixtures/breaking/datacontract-fields-v3.yaml +++ b/tests/fixtures/breaking/datacontract-fields-v3.yaml @@ -1,4 +1,4 @@ -dataContractSpecification: 1.2.0 +dataContractSpecification: 1.2.1 id: my-data-contract-id info: title: My Data Contract diff --git a/tests/fixtures/breaking/datacontract-models-v1.yaml b/tests/fixtures/breaking/datacontract-models-v1.yaml index cee0935ff..ac46a5c7c 100644 --- a/tests/fixtures/breaking/datacontract-models-v1.yaml +++ b/tests/fixtures/breaking/datacontract-models-v1.yaml @@ -1,4 +1,4 @@ -dataContractSpecification: 1.2.0 +dataContractSpecification: 1.2.1 id: my-data-contract-id info: title: My Data Contract diff --git a/tests/fixtures/breaking/datacontract-models-v2.yaml b/tests/fixtures/breaking/datacontract-models-v2.yaml index 1a63f2514..11cf57e25 100644 --- a/tests/fixtures/breaking/datacontract-models-v2.yaml +++ b/tests/fixtures/breaking/datacontract-models-v2.yaml @@ -1,4 +1,4 @@ -dataContractSpecification: 1.2.0 +dataContractSpecification: 1.2.1 id: my-data-contract-id info: title: My Data Contract diff --git a/tests/fixtures/breaking/datacontract-models-v3.yaml b/tests/fixtures/breaking/datacontract-models-v3.yaml index f23adf157..dfab651ae 100644 --- a/tests/fixtures/breaking/datacontract-models-v3.yaml +++ b/tests/fixtures/breaking/datacontract-models-v3.yaml @@ -1,4 +1,4 @@ -dataContractSpecification: 1.2.0 +dataContractSpecification: 1.2.1 id: my-data-contract-id info: title: My Data Contract diff --git a/tests/fixtures/breaking/datacontract-quality-v1.yaml b/tests/fixtures/breaking/datacontract-quality-v1.yaml index 843adaca3..7baba622f 100644 --- a/tests/fixtures/breaking/datacontract-quality-v1.yaml +++ b/tests/fixtures/breaking/datacontract-quality-v1.yaml @@ -1,4 +1,4 @@ -dataContractSpecification: 1.2.0 +dataContractSpecification: 1.2.1 id: my-data-contract-id info: title: My Data Contract diff --git a/tests/fixtures/breaking/datacontract-quality-v2.yaml b/tests/fixtures/breaking/datacontract-quality-v2.yaml index 30c561d16..9e8c33c4e 100644 --- a/tests/fixtures/breaking/datacontract-quality-v2.yaml +++ b/tests/fixtures/breaking/datacontract-quality-v2.yaml @@ -1,4 +1,4 @@ -dataContractSpecification: 1.2.0 +dataContractSpecification: 1.2.1 id: my-data-contract-id info: title: My Data Contract diff --git a/tests/fixtures/breaking/datacontract-quality-v3.yaml b/tests/fixtures/breaking/datacontract-quality-v3.yaml index 5bea7f722..4832289a5 100644 --- a/tests/fixtures/breaking/datacontract-quality-v3.yaml +++ b/tests/fixtures/breaking/datacontract-quality-v3.yaml @@ -1,4 +1,4 @@ -dataContractSpecification: 1.2.0 +dataContractSpecification: 1.2.1 id: my-data-contract-id info: title: My Data Contract diff --git a/tests/fixtures/catalog/datacontract-1.yaml b/tests/fixtures/catalog/datacontract-1.yaml index 1e87c2899..799dedff4 100644 --- a/tests/fixtures/catalog/datacontract-1.yaml +++ b/tests/fixtures/catalog/datacontract-1.yaml @@ -1,4 +1,4 @@ -dataContractSpecification: 1.2.0 +dataContractSpecification: 1.2.1 id: orders-unit-test info: title: Orders Unit Test diff --git a/tests/fixtures/catalog/datacontract-2.yaml b/tests/fixtures/catalog/datacontract-2.yaml index ad643c2dc..3a9d63207 100644 --- a/tests/fixtures/catalog/datacontract-2.yaml +++ b/tests/fixtures/catalog/datacontract-2.yaml @@ -1,4 +1,4 @@ -dataContractSpecification: 1.2.0 +dataContractSpecification: 1.2.1 id: urn:datacontract:test info: title: Test datacontract diff --git a/tests/fixtures/custom/export/datacontract.yaml b/tests/fixtures/custom/export/datacontract.yaml index 04a30213a..62c347ab8 100644 --- a/tests/fixtures/custom/export/datacontract.yaml +++ b/tests/fixtures/custom/export/datacontract.yaml @@ -1,4 +1,4 @@ -dataContractSpecification: 1.2.0 +dataContractSpecification: 1.2.1 id: urn:datacontract:checkout:orders-latest info: title: Orders Latest diff --git a/tests/fixtures/data-caterer/export/datacontract_nested.yaml b/tests/fixtures/data-caterer/export/datacontract_nested.yaml index be59e9e3c..f8654004d 100644 --- a/tests/fixtures/data-caterer/export/datacontract_nested.yaml +++ b/tests/fixtures/data-caterer/export/datacontract_nested.yaml @@ -1,4 +1,4 @@ -dataContractSpecification: 1.2.0 +dataContractSpecification: 1.2.1 id: orders-unit-test info: title: Orders Unit Test diff --git a/tests/fixtures/databricks-sql/datacontract.yaml b/tests/fixtures/databricks-sql/datacontract.yaml index 338fcb67d..f86a19505 100644 --- a/tests/fixtures/databricks-sql/datacontract.yaml +++ b/tests/fixtures/databricks-sql/datacontract.yaml @@ -1,4 +1,4 @@ -dataContractSpecification: 1.2.0 +dataContractSpecification: 1.2.1 id: urn:datacontract:checkout:orders-latest info: title: Orders Latest diff --git a/tests/fixtures/databricks-unity/import/datacontract.yaml b/tests/fixtures/databricks-unity/import/datacontract.yaml index 7c6049353..522ac8207 100644 --- a/tests/fixtures/databricks-unity/import/datacontract.yaml +++ b/tests/fixtures/databricks-unity/import/datacontract.yaml @@ -1,4 +1,4 @@ -dataContractSpecification: 1.2.0 +dataContractSpecification: 1.2.1 id: my-data-contract-id info: title: My Data Contract diff --git a/tests/fixtures/databricks-unity/import/datacontract_complex_types.yaml b/tests/fixtures/databricks-unity/import/datacontract_complex_types.yaml index 30ec5e337..6d5fa5d87 100644 --- a/tests/fixtures/databricks-unity/import/datacontract_complex_types.yaml +++ b/tests/fixtures/databricks-unity/import/datacontract_complex_types.yaml @@ -1,4 +1,4 @@ -dataContractSpecification: 1.2.0 +dataContractSpecification: 1.2.1 id: my-data-contract-id info: title: My Data Contract diff --git a/tests/fixtures/dataframe/datacontract.yaml b/tests/fixtures/dataframe/datacontract.yaml index 80c084e9e..8eb4fa860 100644 --- a/tests/fixtures/dataframe/datacontract.yaml +++ b/tests/fixtures/dataframe/datacontract.yaml @@ -1,4 +1,4 @@ -dataContractSpecification: 1.2.0 +dataContractSpecification: 1.2.1 id: dataframetest info: title: dataframetest diff --git a/tests/fixtures/dbml/datacontract.yaml b/tests/fixtures/dbml/datacontract.yaml index c254449cc..774963155 100644 --- a/tests/fixtures/dbml/datacontract.yaml +++ b/tests/fixtures/dbml/datacontract.yaml @@ -1,4 +1,4 @@ -dataContractSpecification: 1.2.0 +dataContractSpecification: 1.2.1 id: urn:datacontract:checkout:orders-latest info: title: Orders Latest diff --git a/tests/fixtures/dbml/import/datacontract.yaml b/tests/fixtures/dbml/import/datacontract.yaml index 10a6a6845..efb7f596b 100644 --- a/tests/fixtures/dbml/import/datacontract.yaml +++ b/tests/fixtures/dbml/import/datacontract.yaml @@ -1,4 +1,4 @@ -dataContractSpecification: 1.2.0 +dataContractSpecification: 1.2.1 id: my-data-contract-id info: title: Orders Latest diff --git a/tests/fixtures/dbml/import/datacontract_schema_filtered.yaml b/tests/fixtures/dbml/import/datacontract_schema_filtered.yaml index 47df16f0b..c3c6414ed 100644 --- a/tests/fixtures/dbml/import/datacontract_schema_filtered.yaml +++ b/tests/fixtures/dbml/import/datacontract_schema_filtered.yaml @@ -1,4 +1,4 @@ -dataContractSpecification: 1.2.0 +dataContractSpecification: 1.2.1 id: my-data-contract-id info: title: Orders Latest diff --git a/tests/fixtures/dbml/import/datacontract_table_filtered.yaml b/tests/fixtures/dbml/import/datacontract_table_filtered.yaml index a81d31705..062cfe998 100644 --- a/tests/fixtures/dbml/import/datacontract_table_filtered.yaml +++ b/tests/fixtures/dbml/import/datacontract_table_filtered.yaml @@ -1,4 +1,4 @@ -dataContractSpecification: 1.2.0 +dataContractSpecification: 1.2.1 id: my-data-contract-id info: title: Orders Latest diff --git a/tests/fixtures/dbt/export/datacontract.yaml b/tests/fixtures/dbt/export/datacontract.yaml index 98488e1f7..08cacb223 100644 --- a/tests/fixtures/dbt/export/datacontract.yaml +++ b/tests/fixtures/dbt/export/datacontract.yaml @@ -1,4 +1,4 @@ -dataContractSpecification: 1.2.0 +dataContractSpecification: 1.2.1 id: orders-unit-test info: title: Orders Unit Test diff --git a/tests/fixtures/export/datacontract.html b/tests/fixtures/export/datacontract.html index e2ef66991..397b7330e 100644 --- a/tests/fixtures/export/datacontract.html +++ b/tests/fixtures/export/datacontract.html @@ -2350,7 +2350,7 @@

Definitions

-
dataContractSpecification: 1.2.0
+            
dataContractSpecification: 1.2.1
 id: orders-unit-test
 info:
   title: Orders Unit Test
diff --git a/tests/fixtures/export/datacontract.yaml b/tests/fixtures/export/datacontract.yaml
index 54680bf60..f5517a6b5 100644
--- a/tests/fixtures/export/datacontract.yaml
+++ b/tests/fixtures/export/datacontract.yaml
@@ -1,4 +1,4 @@
-dataContractSpecification: 1.2.0
+dataContractSpecification: 1.2.1
 id: orders-unit-test
 info:
   title: Orders Unit Test
diff --git a/tests/fixtures/export/datacontract_nested.yaml b/tests/fixtures/export/datacontract_nested.yaml
index 9ad498552..18abf7c36 100644
--- a/tests/fixtures/export/datacontract_nested.yaml
+++ b/tests/fixtures/export/datacontract_nested.yaml
@@ -1,4 +1,4 @@
-dataContractSpecification: 1.2.0
+dataContractSpecification: 1.2.1
 id: orders-unit-test
 info:
   title: Orders Unit Test
diff --git a/tests/fixtures/export/datacontract_no_model_type.yaml b/tests/fixtures/export/datacontract_no_model_type.yaml
index 8ca59b0d6..06c65c1b2 100644
--- a/tests/fixtures/export/datacontract_no_model_type.yaml
+++ b/tests/fixtures/export/datacontract_no_model_type.yaml
@@ -1,4 +1,4 @@
-dataContractSpecification: 1.2.0
+dataContractSpecification: 1.2.1
 id: orders-unit-test
 info:
   title: Orders Unit Test
diff --git a/tests/fixtures/export/datacontract_s3.yaml b/tests/fixtures/export/datacontract_s3.yaml
index 4b547aab3..9cf484f99 100644
--- a/tests/fixtures/export/datacontract_s3.yaml
+++ b/tests/fixtures/export/datacontract_s3.yaml
@@ -1,4 +1,4 @@
-dataContractSpecification: 1.2.0
+dataContractSpecification: 1.2.1
 id: orders-unit-test
 info:
   title: Orders Unit Test
diff --git a/tests/fixtures/export/rdf/datacontract-complex.yaml b/tests/fixtures/export/rdf/datacontract-complex.yaml
index c913dc585..8b96c4fa0 100644
--- a/tests/fixtures/export/rdf/datacontract-complex.yaml
+++ b/tests/fixtures/export/rdf/datacontract-complex.yaml
@@ -1,4 +1,4 @@
-dataContractSpecification: 1.2.0
+dataContractSpecification: 1.2.1
 id: orders-latest
 info:
   title: Orders Latest
diff --git a/tests/fixtures/export/rdf/datacontract.yaml b/tests/fixtures/export/rdf/datacontract.yaml
index 21ed4f130..8501f8682 100644
--- a/tests/fixtures/export/rdf/datacontract.yaml
+++ b/tests/fixtures/export/rdf/datacontract.yaml
@@ -1,4 +1,4 @@
-dataContractSpecification: 1.2.0
+dataContractSpecification: 1.2.1
 id: orders-unit-test
 info:
   title: Orders Unit Test
diff --git a/tests/fixtures/gcs-csv-remote/datacontract.yaml b/tests/fixtures/gcs-csv-remote/datacontract.yaml
index f0d217dd4..198e54dd5 100644
--- a/tests/fixtures/gcs-csv-remote/datacontract.yaml
+++ b/tests/fixtures/gcs-csv-remote/datacontract.yaml
@@ -1,4 +1,4 @@
-dataContractSpecification: 1.2.0
+dataContractSpecification: 1.2.1
 id: warehouses
 info:
   title: Warehouses
diff --git a/tests/fixtures/gcs-json-remote/datacontract.yaml b/tests/fixtures/gcs-json-remote/datacontract.yaml
index 9313b42f1..27fab51b5 100644
--- a/tests/fixtures/gcs-json-remote/datacontract.yaml
+++ b/tests/fixtures/gcs-json-remote/datacontract.yaml
@@ -1,4 +1,4 @@
-dataContractSpecification: 1.2.0
+dataContractSpecification: 1.2.1
 id: inventory-events
 info:
   title: Inventory Events
diff --git a/tests/fixtures/glue/datacontract-empty-model.yaml b/tests/fixtures/glue/datacontract-empty-model.yaml
index 153b5e147..20174950e 100644
--- a/tests/fixtures/glue/datacontract-empty-model.yaml
+++ b/tests/fixtures/glue/datacontract-empty-model.yaml
@@ -1,4 +1,4 @@
-dataContractSpecification: 1.2.0
+dataContractSpecification: 1.2.1
 id: my-data-contract-id
 info:
     title: My Data Contract
diff --git a/tests/fixtures/glue/datacontract.yaml b/tests/fixtures/glue/datacontract.yaml
index 8892a61f7..08ee7e814 100644
--- a/tests/fixtures/glue/datacontract.yaml
+++ b/tests/fixtures/glue/datacontract.yaml
@@ -1,4 +1,4 @@
-dataContractSpecification: 1.2.0
+dataContractSpecification: 1.2.1
 id: my-data-contract-id
 info:
     title: My Data Contract
diff --git a/tests/fixtures/great-expectations/datacontract_quality_column.yaml b/tests/fixtures/great-expectations/datacontract_quality_column.yaml
index 5571e8f95..ad692d046 100644
--- a/tests/fixtures/great-expectations/datacontract_quality_column.yaml
+++ b/tests/fixtures/great-expectations/datacontract_quality_column.yaml
@@ -1,4 +1,4 @@
-dataContractSpecification: 1.2.0
+dataContractSpecification: 1.2.1
 id: my-data-contract-id
 info:
   title: Orders Unit Test
diff --git a/tests/fixtures/import/football-datacontract.yml b/tests/fixtures/import/football-datacontract.yml
index 72d58eba4..f1e081d3f 100644
--- a/tests/fixtures/import/football-datacontract.yml
+++ b/tests/fixtures/import/football-datacontract.yml
@@ -1,4 +1,4 @@
-dataContractSpecification: 1.2.0
+dataContractSpecification: 1.2.1
 id: my-data-contract-id
 info:
   title: My Data Contract
diff --git a/tests/fixtures/import/football_deeply_nested_no_required_datacontract.yml b/tests/fixtures/import/football_deeply_nested_no_required_datacontract.yml
index ae3033b35..ead129718 100644
--- a/tests/fixtures/import/football_deeply_nested_no_required_datacontract.yml
+++ b/tests/fixtures/import/football_deeply_nested_no_required_datacontract.yml
@@ -1,4 +1,4 @@
-dataContractSpecification: 1.2.0
+dataContractSpecification: 1.2.1
 id: my-data-contract-id
 info:
   title: My Data Contract
diff --git a/tests/fixtures/import/json/inventory_ndjson.datacontract.yaml b/tests/fixtures/import/json/inventory_ndjson.datacontract.yaml
index d57cef5f9..9566294c8 100644
--- a/tests/fixtures/import/json/inventory_ndjson.datacontract.yaml
+++ b/tests/fixtures/import/json/inventory_ndjson.datacontract.yaml
@@ -1,4 +1,4 @@
-dataContractSpecification: 1.2.0
+dataContractSpecification: 1.2.1
 id: my-data-contract-id
 info:
   title: My Data Contract
diff --git a/tests/fixtures/import/json/product_detail.datacontract.yaml b/tests/fixtures/import/json/product_detail.datacontract.yaml
index 622ae3e84..efa5fa8b5 100644
--- a/tests/fixtures/import/json/product_detail.datacontract.yaml
+++ b/tests/fixtures/import/json/product_detail.datacontract.yaml
@@ -1,4 +1,4 @@
-dataContractSpecification: 1.2.0
+dataContractSpecification: 1.2.1
 id: my-data-contract-id
 info:
   title: My Data Contract
diff --git a/tests/fixtures/import/json/product_simple.datacontract.yaml b/tests/fixtures/import/json/product_simple.datacontract.yaml
index f4b0c17c4..91bde3287 100644
--- a/tests/fixtures/import/json/product_simple.datacontract.yaml
+++ b/tests/fixtures/import/json/product_simple.datacontract.yaml
@@ -1,4 +1,4 @@
-dataContractSpecification: 1.2.0
+dataContractSpecification: 1.2.1
 id: my-data-contract-id
 info:
   title: My Data Contract
diff --git a/tests/fixtures/import/orders_union-types_datacontract.yml b/tests/fixtures/import/orders_union-types_datacontract.yml
index 6abf5a290..cd971677d 100644
--- a/tests/fixtures/import/orders_union-types_datacontract.yml
+++ b/tests/fixtures/import/orders_union-types_datacontract.yml
@@ -1,4 +1,4 @@
-dataContractSpecification: 1.2.0
+dataContractSpecification: 1.2.1
 id: my-data-contract-id
 info:
   title: My Data Contract
diff --git a/tests/fixtures/junit/datacontract.yaml b/tests/fixtures/junit/datacontract.yaml
index 62cb30b34..a84afbc47 100644
--- a/tests/fixtures/junit/datacontract.yaml
+++ b/tests/fixtures/junit/datacontract.yaml
@@ -1,4 +1,4 @@
-dataContractSpecification: 1.2.0
+dataContractSpecification: 1.2.1
 id: junit_test
 info:
   title: Sample contract to demonstrate the JUnit output format
diff --git a/tests/fixtures/kafka-avro-remote/datacontract.yaml b/tests/fixtures/kafka-avro-remote/datacontract.yaml
index 027307f7e..a0c8164c0 100644
--- a/tests/fixtures/kafka-avro-remote/datacontract.yaml
+++ b/tests/fixtures/kafka-avro-remote/datacontract.yaml
@@ -1,4 +1,4 @@
-dataContractSpecification: 1.2.0
+dataContractSpecification: 1.2.1
 id: orders
 info:
   title: Orders
diff --git a/tests/fixtures/kafka-json-remote/datacontract.yaml b/tests/fixtures/kafka-json-remote/datacontract.yaml
index 4c5f1429e..0f3350bb7 100644
--- a/tests/fixtures/kafka-json-remote/datacontract.yaml
+++ b/tests/fixtures/kafka-json-remote/datacontract.yaml
@@ -1,4 +1,4 @@
-dataContractSpecification: 1.2.0
+dataContractSpecification: 1.2.1
 id: orders
 info:
   title: Orders
diff --git a/tests/fixtures/kafka/datacontract.yaml b/tests/fixtures/kafka/datacontract.yaml
index d7d67592f..d3ce8d24c 100644
--- a/tests/fixtures/kafka/datacontract.yaml
+++ b/tests/fixtures/kafka/datacontract.yaml
@@ -1,4 +1,4 @@
-dataContractSpecification: 1.2.0
+dataContractSpecification: 1.2.1
 id: inventory-events
 info:
   title: Inventory Events
diff --git a/tests/fixtures/lint/datacontract_csv_lint_base.yaml b/tests/fixtures/lint/datacontract_csv_lint_base.yaml
index efb36c3e0..17065a1ef 100644
--- a/tests/fixtures/lint/datacontract_csv_lint_base.yaml
+++ b/tests/fixtures/lint/datacontract_csv_lint_base.yaml
@@ -1,4 +1,4 @@
-dataContractSpecification: 1.2.0
+dataContractSpecification: 1.2.1
 id: my-data-contract-id
 info:
   title: My Data Contract
diff --git a/tests/fixtures/lint/datacontract_quality_schema.yaml b/tests/fixtures/lint/datacontract_quality_schema.yaml
index 3046d55e5..72c7751fa 100644
--- a/tests/fixtures/lint/datacontract_quality_schema.yaml
+++ b/tests/fixtures/lint/datacontract_quality_schema.yaml
@@ -1,4 +1,4 @@
-dataContractSpecification: 1.2.0
+dataContractSpecification: 1.2.1
 id: my-data-contract-id
 info:
   title: My Data Contract
diff --git a/tests/fixtures/lint/datacontract_unknown_model.yaml b/tests/fixtures/lint/datacontract_unknown_model.yaml
index 922e9d7e8..0ee1a3318 100644
--- a/tests/fixtures/lint/datacontract_unknown_model.yaml
+++ b/tests/fixtures/lint/datacontract_unknown_model.yaml
@@ -1,4 +1,4 @@
-dataContractSpecification: 1.2.0
+dataContractSpecification: 1.2.1
 id: my-data-contract-id
 info:
   title: My Data Contract
diff --git a/tests/fixtures/lint/invalid_datacontract.yaml b/tests/fixtures/lint/invalid_datacontract.yaml
index cfc846caf..7d698e839 100644
--- a/tests/fixtures/lint/invalid_datacontract.yaml
+++ b/tests/fixtures/lint/invalid_datacontract.yaml
@@ -1,4 +1,4 @@
-dataContractSpecification: 1.2.0
+dataContractSpecification: 1.2.1
 #id: my-data-contract-id
 info:
   title: My Data Contract
diff --git a/tests/fixtures/lint/valid_datacontract.yaml b/tests/fixtures/lint/valid_datacontract.yaml
index 7e4a39ce6..4825bdb1a 100644
--- a/tests/fixtures/lint/valid_datacontract.yaml
+++ b/tests/fixtures/lint/valid_datacontract.yaml
@@ -1,4 +1,4 @@
-dataContractSpecification: 1.2.0
+dataContractSpecification: 1.2.1
 id: my-data-contract-id
 info:
   title: My Data Contract
diff --git a/tests/fixtures/lint/valid_datacontract_ref.yaml b/tests/fixtures/lint/valid_datacontract_ref.yaml
index e89561e7f..9fe9d95f3 100644
--- a/tests/fixtures/lint/valid_datacontract_ref.yaml
+++ b/tests/fixtures/lint/valid_datacontract_ref.yaml
@@ -1,4 +1,4 @@
-dataContractSpecification: 1.2.0
+dataContractSpecification: 1.2.1
 id: urn:datacontract:checkout:orders-latest
 info:
   title: Orders Latest
diff --git a/tests/fixtures/lint/valid_datacontract_references.yaml b/tests/fixtures/lint/valid_datacontract_references.yaml
index e057bcc7e..f6711002e 100644
--- a/tests/fixtures/lint/valid_datacontract_references.yaml
+++ b/tests/fixtures/lint/valid_datacontract_references.yaml
@@ -1,4 +1,4 @@
-dataContractSpecification: 1.2.0
+dataContractSpecification: 1.2.1
 id: urn:datacontract:checkout:orders-latest
 info:
   title: Orders Latest
diff --git a/tests/fixtures/local-delta/datacontract.yaml b/tests/fixtures/local-delta/datacontract.yaml
index dff6f7dca..e61069858 100644
--- a/tests/fixtures/local-delta/datacontract.yaml
+++ b/tests/fixtures/local-delta/datacontract.yaml
@@ -1,4 +1,4 @@
-dataContractSpecification: 1.2.0
+dataContractSpecification: 1.2.1
 id: orders-unit-test
 info:
   title: Orders Unit Test
diff --git a/tests/fixtures/local-json-complex/datacontract.yaml b/tests/fixtures/local-json-complex/datacontract.yaml
index 75339c457..aa628b6c9 100644
--- a/tests/fixtures/local-json-complex/datacontract.yaml
+++ b/tests/fixtures/local-json-complex/datacontract.yaml
@@ -1,4 +1,4 @@
-dataContractSpecification: 1.2.0
+dataContractSpecification: 1.2.1
 id: jsonschema-complex-data
 info:
   title: sts data
diff --git a/tests/fixtures/local-json-nd/datacontract.yaml b/tests/fixtures/local-json-nd/datacontract.yaml
index 15bad9df9..d25821d30 100644
--- a/tests/fixtures/local-json-nd/datacontract.yaml
+++ b/tests/fixtures/local-json-nd/datacontract.yaml
@@ -1,4 +1,4 @@
-dataContractSpecification: 1.2.0
+dataContractSpecification: 1.2.1
 id: my-data-contract-local
 info:
   title: My Data Contract Local
diff --git a/tests/fixtures/local-json/datacontract.yaml b/tests/fixtures/local-json/datacontract.yaml
index 77992e3f9..35bc20af3 100644
--- a/tests/fixtures/local-json/datacontract.yaml
+++ b/tests/fixtures/local-json/datacontract.yaml
@@ -1,4 +1,4 @@
-dataContractSpecification: 1.2.0
+dataContractSpecification: 1.2.1
 id: "61111-0002"
 info:
   title: "Verbraucherpreisindex: Deutschland, Monate"
diff --git a/tests/fixtures/markdown/export/datacontract.yaml b/tests/fixtures/markdown/export/datacontract.yaml
index 81b477936..f2137479a 100644
--- a/tests/fixtures/markdown/export/datacontract.yaml
+++ b/tests/fixtures/markdown/export/datacontract.yaml
@@ -1,4 +1,4 @@
-dataContractSpecification: 1.2.0
+dataContractSpecification: 1.2.1
 id: urn:datacontract:checkout:orders-latest
 info:
   title: Orders Latest
diff --git a/tests/fixtures/odcs_v3/adventureworks.datacontract.yml b/tests/fixtures/odcs_v3/adventureworks.datacontract.yml
index c5d1dc0af..81ffb2aad 100644
--- a/tests/fixtures/odcs_v3/adventureworks.datacontract.yml
+++ b/tests/fixtures/odcs_v3/adventureworks.datacontract.yml
@@ -1,4 +1,4 @@
-dataContractSpecification: 1.2.0
+dataContractSpecification: 1.2.1
 id: 6aeafdc1-ed62-4c8f-bf0a-da1061c98cdb
 info:
   title: ''
diff --git a/tests/fixtures/odcs_v3/full-example.datacontract.yml b/tests/fixtures/odcs_v3/full-example.datacontract.yml
index b856c98d9..a96eb4cf7 100644
--- a/tests/fixtures/odcs_v3/full-example.datacontract.yml
+++ b/tests/fixtures/odcs_v3/full-example.datacontract.yml
@@ -1,4 +1,4 @@
-dataContractSpecification: 1.2.0
+dataContractSpecification: 1.2.1
 id: 53581432-6c55-4ba2-a65f-72344a91553a
 info:
   title: ''
@@ -61,7 +61,8 @@ models:
         classification: public
         quality:
           - type: library
-            rule: nullCheck
+            metric: nullValues
+            mustBe: 0
             description: column should not contain null values
             dimension: completeness
             severity: error
@@ -84,7 +85,8 @@ models:
     quality:
     - type: library
       description: Ensure row count is within expected volume range
-      rule: countCheck
+      metric: rowCount
+      mustBeGreaterThan: 1000
       dimension: completeness
       method: reconciliation
       severity: error
diff --git a/tests/fixtures/odcs_v3/full-example.odcs.yaml b/tests/fixtures/odcs_v3/full-example.odcs.yaml
index b0dcf5649..35d1a03bd 100644
--- a/tests/fixtures/odcs_v3/full-example.odcs.yaml
+++ b/tests/fixtures/odcs_v3/full-example.odcs.yaml
@@ -96,7 +96,8 @@ schema:
             type: implementation
         encryptedName: rcvr_cntry_code_encrypted
         quality:
-          - rule: nullCheck
+          - rule: nullValues
+            mustBe: 0
             description: column should not contain null values
             dimension: completeness # dropdown 7 values
             type: library
@@ -112,7 +113,8 @@ schema:
               - property: COMPARISON_TYPE
                 value: Greater than
     quality:
-      - rule: countCheck
+      - rule: rowCount
+        mustBeGreaterThan: 1000
         type: library
         description: Ensure row count is within expected volume range
         dimension: completeness
diff --git a/tests/fixtures/parquet/datacontract.yaml b/tests/fixtures/parquet/datacontract.yaml
index fbddb7b2b..dcad0970c 100644
--- a/tests/fixtures/parquet/datacontract.yaml
+++ b/tests/fixtures/parquet/datacontract.yaml
@@ -1,4 +1,4 @@
-dataContractSpecification: 1.2.0
+dataContractSpecification: 1.2.1
 id: combined-unit-test
 info:
   title: Combined Unit Test
diff --git a/tests/fixtures/parquet/datacontract_array.yaml b/tests/fixtures/parquet/datacontract_array.yaml
index 8e39f50e7..9e26aeb3f 100644
--- a/tests/fixtures/parquet/datacontract_array.yaml
+++ b/tests/fixtures/parquet/datacontract_array.yaml
@@ -1,4 +1,4 @@
-dataContractSpecification: 1.2.0
+dataContractSpecification: 1.2.1
 id: array-unit-test
 info:
   title: Array Unit Test
diff --git a/tests/fixtures/parquet/datacontract_bigint.yaml b/tests/fixtures/parquet/datacontract_bigint.yaml
index 469e09931..090d5bbab 100644
--- a/tests/fixtures/parquet/datacontract_bigint.yaml
+++ b/tests/fixtures/parquet/datacontract_bigint.yaml
@@ -1,4 +1,4 @@
-dataContractSpecification: 1.2.0
+dataContractSpecification: 1.2.1
 id: bigint-unit-test
 info:
   title: BigInt Unit Test
diff --git a/tests/fixtures/parquet/datacontract_binary.yaml b/tests/fixtures/parquet/datacontract_binary.yaml
index 0f8c995ba..a5db4b227 100644
--- a/tests/fixtures/parquet/datacontract_binary.yaml
+++ b/tests/fixtures/parquet/datacontract_binary.yaml
@@ -1,4 +1,4 @@
-dataContractSpecification: 1.2.0
+dataContractSpecification: 1.2.1
 id: blob-unit-test
 info:
   title: Blob Unit Test
diff --git a/tests/fixtures/parquet/datacontract_boolean.yaml b/tests/fixtures/parquet/datacontract_boolean.yaml
index e0f1672f6..432c2f53a 100644
--- a/tests/fixtures/parquet/datacontract_boolean.yaml
+++ b/tests/fixtures/parquet/datacontract_boolean.yaml
@@ -1,4 +1,4 @@
-dataContractSpecification: 1.2.0
+dataContractSpecification: 1.2.1
 id: boolean-unit-test
 info:
   title: Boolean Unit Test
diff --git a/tests/fixtures/parquet/datacontract_date.yaml b/tests/fixtures/parquet/datacontract_date.yaml
index 2ef52a1d7..ba7af6e56 100644
--- a/tests/fixtures/parquet/datacontract_date.yaml
+++ b/tests/fixtures/parquet/datacontract_date.yaml
@@ -1,4 +1,4 @@
-dataContractSpecification: 1.2.0
+dataContractSpecification: 1.2.1
 id: date-unit-test
 info:
   title: Date Unit Test
diff --git a/tests/fixtures/parquet/datacontract_decimal.yaml b/tests/fixtures/parquet/datacontract_decimal.yaml
index 08d046297..409ef9001 100644
--- a/tests/fixtures/parquet/datacontract_decimal.yaml
+++ b/tests/fixtures/parquet/datacontract_decimal.yaml
@@ -1,4 +1,4 @@
-dataContractSpecification: 1.2.0
+dataContractSpecification: 1.2.1
 id: orders-unit-test
 info:
   title: Orders Unit Test
diff --git a/tests/fixtures/parquet/datacontract_double.yaml b/tests/fixtures/parquet/datacontract_double.yaml
index 0df75d629..e8016c7cf 100644
--- a/tests/fixtures/parquet/datacontract_double.yaml
+++ b/tests/fixtures/parquet/datacontract_double.yaml
@@ -1,4 +1,4 @@
-dataContractSpecification: 1.2.0
+dataContractSpecification: 1.2.1
 id: double-unit-test
 info:
   title: Double Unit Test
diff --git a/tests/fixtures/parquet/datacontract_float.yaml b/tests/fixtures/parquet/datacontract_float.yaml
index d9bee4c21..724ab6cf0 100644
--- a/tests/fixtures/parquet/datacontract_float.yaml
+++ b/tests/fixtures/parquet/datacontract_float.yaml
@@ -1,4 +1,4 @@
-dataContractSpecification: 1.2.0
+dataContractSpecification: 1.2.1
 id: float-unit-test
 info:
   title: Float Unit Test
diff --git a/tests/fixtures/parquet/datacontract_integer.yaml b/tests/fixtures/parquet/datacontract_integer.yaml
index 347c9be8a..22a54562b 100644
--- a/tests/fixtures/parquet/datacontract_integer.yaml
+++ b/tests/fixtures/parquet/datacontract_integer.yaml
@@ -1,4 +1,4 @@
-dataContractSpecification: 1.2.0
+dataContractSpecification: 1.2.1
 id: integer-unit-test
 info:
   title: Integer Unit Test
diff --git a/tests/fixtures/parquet/datacontract_invalid.yaml b/tests/fixtures/parquet/datacontract_invalid.yaml
index 1c2befc18..b4f514f6e 100644
--- a/tests/fixtures/parquet/datacontract_invalid.yaml
+++ b/tests/fixtures/parquet/datacontract_invalid.yaml
@@ -1,4 +1,4 @@
-dataContractSpecification: 1.2.0
+dataContractSpecification: 1.2.1
 id: orders-unit-test
 info:
   title: Orders Unit Test
diff --git a/tests/fixtures/parquet/datacontract_map.yaml b/tests/fixtures/parquet/datacontract_map.yaml
index 54b5856db..89e5eb0d0 100644
--- a/tests/fixtures/parquet/datacontract_map.yaml
+++ b/tests/fixtures/parquet/datacontract_map.yaml
@@ -1,4 +1,4 @@
-dataContractSpecification: 1.2.0
+dataContractSpecification: 1.2.1
 id: map-unit-test
 info:
   title: Map Unit Test
diff --git a/tests/fixtures/parquet/datacontract_string.yaml b/tests/fixtures/parquet/datacontract_string.yaml
index 78eea8721..082362d59 100644
--- a/tests/fixtures/parquet/datacontract_string.yaml
+++ b/tests/fixtures/parquet/datacontract_string.yaml
@@ -1,4 +1,4 @@
-dataContractSpecification: 1.2.0
+dataContractSpecification: 1.2.1
 id: string-unit-test
 info:
   title: String Unit Test
diff --git a/tests/fixtures/parquet/datacontract_struct.yaml b/tests/fixtures/parquet/datacontract_struct.yaml
index ab3a91541..5d4dd7383 100644
--- a/tests/fixtures/parquet/datacontract_struct.yaml
+++ b/tests/fixtures/parquet/datacontract_struct.yaml
@@ -1,4 +1,4 @@
-dataContractSpecification: 1.2.0
+dataContractSpecification: 1.2.1
 id: struct-unit-test
 info:
   title: Struct Unit Test
diff --git a/tests/fixtures/parquet/datacontract_timestamp.yaml b/tests/fixtures/parquet/datacontract_timestamp.yaml
index f4eeab915..b6f913802 100644
--- a/tests/fixtures/parquet/datacontract_timestamp.yaml
+++ b/tests/fixtures/parquet/datacontract_timestamp.yaml
@@ -1,4 +1,4 @@
-dataContractSpecification: 1.2.0
+dataContractSpecification: 1.2.1
 id: iceberg-ingestion
 info:
   title: ingestion to s3/iceberg
diff --git a/tests/fixtures/parquet/datacontract_timestamp_ntz.yaml b/tests/fixtures/parquet/datacontract_timestamp_ntz.yaml
index a380b7a0b..17aae3c97 100644
--- a/tests/fixtures/parquet/datacontract_timestamp_ntz.yaml
+++ b/tests/fixtures/parquet/datacontract_timestamp_ntz.yaml
@@ -1,4 +1,4 @@
-dataContractSpecification: 1.2.0
+dataContractSpecification: 1.2.1
 id: parquet-timestamp-ntz-test
 info:
   title: Parquet Timestamp w/o timezone test
diff --git a/tests/fixtures/postgres-export/datacontract.yaml b/tests/fixtures/postgres-export/datacontract.yaml
index 922b40a45..1fd717bd1 100644
--- a/tests/fixtures/postgres-export/datacontract.yaml
+++ b/tests/fixtures/postgres-export/datacontract.yaml
@@ -1,4 +1,4 @@
-dataContractSpecification: 1.2.0
+dataContractSpecification: 1.2.1
 id: postgres
 info:
   title: postgres
diff --git a/tests/fixtures/postgres/datacontract.yaml b/tests/fixtures/postgres/datacontract.yaml
index 2fb030a88..2154cddb8 100644
--- a/tests/fixtures/postgres/datacontract.yaml
+++ b/tests/fixtures/postgres/datacontract.yaml
@@ -1,4 +1,4 @@
-dataContractSpecification: 1.2.0
+dataContractSpecification: 1.2.1
 id: postgres
 info:
   title: postgres
diff --git a/tests/fixtures/postgres/datacontract_case_sensitive.yaml b/tests/fixtures/postgres/datacontract_case_sensitive.yaml
index 9aa0562a5..e2226340d 100644
--- a/tests/fixtures/postgres/datacontract_case_sensitive.yaml
+++ b/tests/fixtures/postgres/datacontract_case_sensitive.yaml
@@ -1,4 +1,4 @@
-dataContractSpecification: 1.2.0
+dataContractSpecification: 1.2.1
 id: postgres
 info:
   title: postgres
diff --git a/tests/fixtures/postgres/datacontract_servicelevels.yaml b/tests/fixtures/postgres/datacontract_servicelevels.yaml
index a9fb52fe6..c2e001445 100644
--- a/tests/fixtures/postgres/datacontract_servicelevels.yaml
+++ b/tests/fixtures/postgres/datacontract_servicelevels.yaml
@@ -1,4 +1,4 @@
-dataContractSpecification: 1.2.0
+dataContractSpecification: 1.2.1
 id: postgres
 info:
   title: postgres
diff --git a/tests/fixtures/protobuf/datacontract.yaml b/tests/fixtures/protobuf/datacontract.yaml
index 28e95b2fa..eb3f33106 100644
--- a/tests/fixtures/protobuf/datacontract.yaml
+++ b/tests/fixtures/protobuf/datacontract.yaml
@@ -1,4 +1,4 @@
-dataContractSpecification: 1.2.0
+dataContractSpecification: 1.2.1
 id: resolved_alerts
 info:
   contact:
diff --git a/tests/fixtures/quality/datacontract.yaml b/tests/fixtures/quality/datacontract.yaml
index a8e2e54f8..5a44830f8 100644
--- a/tests/fixtures/quality/datacontract.yaml
+++ b/tests/fixtures/quality/datacontract.yaml
@@ -1,4 +1,4 @@
-dataContractSpecification: 1.2.0
+dataContractSpecification: 1.2.1
 id: postgres
 info:
   title: postgres
diff --git a/tests/fixtures/s3-csv/datacontract.yaml b/tests/fixtures/s3-csv/datacontract.yaml
index ab33af6aa..4d6dfaebd 100644
--- a/tests/fixtures/s3-csv/datacontract.yaml
+++ b/tests/fixtures/s3-csv/datacontract.yaml
@@ -1,4 +1,4 @@
-dataContractSpecification: 1.2.0
+dataContractSpecification: 1.2.1
 id: s3-csv
 info:
   title: s30-csv
diff --git a/tests/fixtures/s3-delta/datacontract.yaml b/tests/fixtures/s3-delta/datacontract.yaml
index 89fd4f6f1..d0acef5e5 100644
--- a/tests/fixtures/s3-delta/datacontract.yaml
+++ b/tests/fixtures/s3-delta/datacontract.yaml
@@ -1,4 +1,4 @@
-dataContractSpecification: 1.2.0
+dataContractSpecification: 1.2.1
 id: s3-delta-orders
 info:
   title: S3 Delta Table Test
diff --git a/tests/fixtures/s3-json-complex/datacontract.yaml b/tests/fixtures/s3-json-complex/datacontract.yaml
index 36c6d1592..4b95a284c 100644
--- a/tests/fixtures/s3-json-complex/datacontract.yaml
+++ b/tests/fixtures/s3-json-complex/datacontract.yaml
@@ -1,4 +1,4 @@
-dataContractSpecification: 1.2.0
+dataContractSpecification: 1.2.1
 id: inventory-events
 info:
   title: Inventory Events Feed
diff --git a/tests/fixtures/s3-json-multiple-models/datacontract.yaml b/tests/fixtures/s3-json-multiple-models/datacontract.yaml
index 6f1878068..4347d00b8 100644
--- a/tests/fixtures/s3-json-multiple-models/datacontract.yaml
+++ b/tests/fixtures/s3-json-multiple-models/datacontract.yaml
@@ -1,4 +1,4 @@
-dataContractSpecification: 1.2.0
+dataContractSpecification: 1.2.1
 id: urn:datacontract:checkout:orders-latest
 info:
   title: Orders Latest
diff --git a/tests/fixtures/s3-json-remote/datacontract.yaml b/tests/fixtures/s3-json-remote/datacontract.yaml
index 6bd73100f..4b5e9309c 100644
--- a/tests/fixtures/s3-json-remote/datacontract.yaml
+++ b/tests/fixtures/s3-json-remote/datacontract.yaml
@@ -1,4 +1,4 @@
-dataContractSpecification: 1.2.0
+dataContractSpecification: 1.2.1
 id: urn:datacontract:checkout:orders-latest
 info:
   title: Orders Latest
diff --git a/tests/fixtures/s3-json/datacontract.yaml b/tests/fixtures/s3-json/datacontract.yaml
index 2a6353759..00a3e5781 100644
--- a/tests/fixtures/s3-json/datacontract.yaml
+++ b/tests/fixtures/s3-json/datacontract.yaml
@@ -1,4 +1,4 @@
-dataContractSpecification: 1.2.0
+dataContractSpecification: 1.2.1
 id: inventory-events
 info:
   title: Inventory Events
diff --git a/tests/fixtures/snowflake/datacontract.yaml b/tests/fixtures/snowflake/datacontract.yaml
index fea2ca414..e23bbe1b9 100644
--- a/tests/fixtures/snowflake/datacontract.yaml
+++ b/tests/fixtures/snowflake/datacontract.yaml
@@ -1,4 +1,4 @@
-dataContractSpecification: 1.2.0
+dataContractSpecification: 1.2.1
 id: urn:datacontract:checkout:snowflake_orders_pii_v2
 info:
   title: Orders
diff --git a/tests/fixtures/sodacl/datacontract.yaml b/tests/fixtures/sodacl/datacontract.yaml
index fa8c82305..9cae471a0 100644
--- a/tests/fixtures/sodacl/datacontract.yaml
+++ b/tests/fixtures/sodacl/datacontract.yaml
@@ -1,4 +1,4 @@
-dataContractSpecification: 1.2.0
+dataContractSpecification: 1.2.1
 id: urn:datacontract:checkout:orders-latest
 info:
   title: Orders Latest
diff --git a/tests/fixtures/spark/export/datacontract.yaml b/tests/fixtures/spark/export/datacontract.yaml
index 4bd87ff28..4f8b122c0 100644
--- a/tests/fixtures/spark/export/datacontract.yaml
+++ b/tests/fixtures/spark/export/datacontract.yaml
@@ -1,4 +1,4 @@
-dataContractSpecification: 1.2.0
+dataContractSpecification: 1.2.1
 id: orders
 info:
   title: Orders
diff --git a/tests/fixtures/spark/import/users_datacontract_desc.yml b/tests/fixtures/spark/import/users_datacontract_desc.yml
index 0ffb160b4..d42c8d693 100644
--- a/tests/fixtures/spark/import/users_datacontract_desc.yml
+++ b/tests/fixtures/spark/import/users_datacontract_desc.yml
@@ -1,4 +1,4 @@
-dataContractSpecification: 1.2.0
+dataContractSpecification: 1.2.1
 id: my-data-contract-id
 info:
   title: My Data Contract
diff --git a/tests/fixtures/spark/import/users_datacontract_no_desc.yml b/tests/fixtures/spark/import/users_datacontract_no_desc.yml
index a79bcfd65..10428cc5f 100644
--- a/tests/fixtures/spark/import/users_datacontract_no_desc.yml
+++ b/tests/fixtures/spark/import/users_datacontract_no_desc.yml
@@ -1,4 +1,4 @@
-dataContractSpecification: 1.2.0
+dataContractSpecification: 1.2.1
 id: my-data-contract-id
 info:
   title: My Data Contract
diff --git a/tests/fixtures/spec/datacontract_aliases.yaml b/tests/fixtures/spec/datacontract_aliases.yaml
index 2935ba6f8..a4c9cbb47 100644
--- a/tests/fixtures/spec/datacontract_aliases.yaml
+++ b/tests/fixtures/spec/datacontract_aliases.yaml
@@ -1,4 +1,4 @@
-dataContractSpecification: 1.2.0
+dataContractSpecification: 1.2.1
 id: "123"
 info:
   title: "Test"
diff --git a/tests/fixtures/spec/datacontract_fields_field.yaml b/tests/fixtures/spec/datacontract_fields_field.yaml
index 627f785b0..0a8f5aa32 100644
--- a/tests/fixtures/spec/datacontract_fields_field.yaml
+++ b/tests/fixtures/spec/datacontract_fields_field.yaml
@@ -1,4 +1,4 @@
-dataContractSpecification: 1.2.0
+dataContractSpecification: 1.2.1
 id: "123"
 info:
   title: "Test"
diff --git a/tests/fixtures/sqlserver/datacontract.yaml b/tests/fixtures/sqlserver/datacontract.yaml
index 9713bb6c0..d8f7db72d 100644
--- a/tests/fixtures/sqlserver/datacontract.yaml
+++ b/tests/fixtures/sqlserver/datacontract.yaml
@@ -1,4 +1,4 @@
-dataContractSpecification: 1.2.0
+dataContractSpecification: 1.2.1
 id: sqlserver
 info:
   title: sqlserver
diff --git a/tests/fixtures/trino/datacontract.yaml b/tests/fixtures/trino/datacontract.yaml
index 551eebe44..9eeddbdfa 100644
--- a/tests/fixtures/trino/datacontract.yaml
+++ b/tests/fixtures/trino/datacontract.yaml
@@ -1,4 +1,4 @@
-dataContractSpecification: 1.2.0
+dataContractSpecification: 1.2.1
 id: trino
 info:
   title: trino
diff --git a/tests/test_duckdb_json.py b/tests/test_duckdb_json.py
index f8bf4c182..01a07c943 100644
--- a/tests/test_duckdb_json.py
+++ b/tests/test_duckdb_json.py
@@ -5,7 +5,7 @@
 
 def test_nested_json():
     data_contract_str = """
-dataContractSpecification: 1.2.0
+dataContractSpecification: 1.2.1
 id: "61111-0002"
 info:
   title: Sample data of nested types
diff --git a/tests/test_export_complex_data_contract.py b/tests/test_export_complex_data_contract.py
index c58a7ba41..1700ec889 100644
--- a/tests/test_export_complex_data_contract.py
+++ b/tests/test_export_complex_data_contract.py
@@ -9,7 +9,7 @@ def test_export_complex_data_contract():
     """
     data_contract = DataContract(
         data_contract_str="""
-dataContractSpecification: 1.2.0
+dataContractSpecification: 1.2.1
 id: urn:datacontract:checkout:orders-latest
 info:
   title: Orders Latest
diff --git a/tests/test_export_rdf.py b/tests/test_export_rdf.py
index 12b3589b4..ea1cd03b4 100644
--- a/tests/test_export_rdf.py
+++ b/tests/test_export_rdf.py
@@ -31,12 +31,12 @@ def test_to_rdf():
     file_content = read_file(data_contract_file=data_contract_file)
     data_contract = DataContractSpecification.from_string(file_content)
     expected_rdf = """
-@prefix dc1:  .
-@prefix dcx:  .
+@prefix dc1:  .
+@prefix dcx:  .
 @prefix xsd:  .
 
  a dc1:DataContract ;
-    dc1:dataContractSpecification "1.2.0" ;
+    dc1:dataContractSpecification "1.2.1" ;
     dc1:id "orders-unit-test" ;
     dc1:info [ a dc1:Info ;
             dc1:contact [ a dc1:Contact ;
@@ -98,12 +98,12 @@ def test_to_rdf_complex():
     data_contract = DataContractSpecification.from_string(file_content)
     expected_rdf = """
 @base  .
-@prefix dc1:  .
-@prefix dcx:  .
+@prefix dc1:  .
+@prefix dcx:  .
 @prefix xsd:  .
 
  a dc1:DataContract ;
-    dc1:dataContractSpecification "1.2.0" ;
+    dc1:dataContractSpecification "1.2.1" ;
     dc1:example [ a dc1:Example ;
             dc1:data \"\"\"order_id,order_timestamp,order_total
 "1001","2023-09-09T08:30:00Z",2500
diff --git a/tests/test_export_sodacl.py b/tests/test_export_sodacl.py
index 9777fa602..c34fcf1ab 100644
--- a/tests/test_export_sodacl.py
+++ b/tests/test_export_sodacl.py
@@ -6,7 +6,7 @@
 
 def test_export_sodacl():
     data_contract_specification_str = """
-dataContractSpecification: 1.2.0
+dataContractSpecification: 1.2.1
 models:
   orders:
     description: test
diff --git a/tests/test_import_avro.py b/tests/test_import_avro.py
index 6bab7a8a9..af68bfda2 100644
--- a/tests/test_import_avro.py
+++ b/tests/test_import_avro.py
@@ -26,7 +26,7 @@ def test_import_avro_schema():
     result = DataContract().import_from_source("avro", "fixtures/avro/data/orders.avsc")
 
     expected = """
-dataContractSpecification: 1.2.0
+dataContractSpecification: 1.2.1
 id: my-data-contract-id
 info:
   title: My Data Contract
@@ -123,7 +123,7 @@ def test_import_avro_arrays_of_records_and_nested_arrays():
     result = DataContract().import_from_source("avro", "fixtures/avro/data/arrays.avsc")
 
     expected = """
-dataContractSpecification: 1.2.0
+dataContractSpecification: 1.2.1
 id: my-data-contract-id
 info:
   title: My Data Contract
@@ -174,7 +174,7 @@ def test_import_avro_nested_records():
     result = DataContract().import_from_source("avro", "fixtures/avro/data/nested.avsc")
 
     expected = """
-dataContractSpecification: 1.2.0
+dataContractSpecification: 1.2.1
 id: my-data-contract-id
 info:
   title: My Data Contract
@@ -203,7 +203,7 @@ def test_import_avro_nested_records_with_arrays():
     result = DataContract().import_from_source("avro", "fixtures/avro/data/nested_with_arrays.avsc")
 
     expected = """
-dataContractSpecification: 1.2.0
+dataContractSpecification: 1.2.1
 id: my-data-contract-id
 info:
   title: My Data Contract
@@ -269,7 +269,7 @@ def test_import_avro_logical_types():
     result = DataContract().import_from_source("avro", "fixtures/avro/data/logical_types.avsc")
 
     expected = """
-dataContractSpecification: 1.2.0
+dataContractSpecification: 1.2.1
 id: my-data-contract-id
 info:
   title: My Data Contract
@@ -320,7 +320,7 @@ def test_import_avro_optional_enum():
     result = DataContract().import_from_source("avro", "fixtures/avro/data/optional_enum.avsc")
 
     expected = """
-dataContractSpecification: 1.2.0
+dataContractSpecification: 1.2.1
 id: my-data-contract-id
 info:
   title: My Data Contract
diff --git a/tests/test_import_csv.py b/tests/test_import_csv.py
index 10a7ea22e..76138b941 100644
--- a/tests/test_import_csv.py
+++ b/tests/test_import_csv.py
@@ -37,7 +37,7 @@ def test_import_csv():
     for k in model.fields.keys():
         model.fields[k].examples = None
 
-    expected = f"""dataContractSpecification: 1.2.0
+    expected = f"""dataContractSpecification: 1.2.1
 id: my-data-contract-id
 info:
   title: My Data Contract
diff --git a/tests/test_import_dbt.py b/tests/test_import_dbt.py
index 69aad96d8..89cf8d9a6 100644
--- a/tests/test_import_dbt.py
+++ b/tests/test_import_dbt.py
@@ -70,7 +70,7 @@ def test_import_dbt_manifest():
     result = DataContract().import_from_source("dbt", dbt_manifest)
 
     expected = """
-dataContractSpecification: 1.2.0
+dataContractSpecification: 1.2.1
 id: my-data-contract-id
 info:
   title: jaffle_shop
@@ -234,7 +234,7 @@ def test_import_dbt_manifest_bigquery():
     result = DataContract().import_from_source("dbt", dbt_manifest_bigquery)
 
     expected = """
-dataContractSpecification: 1.2.0
+dataContractSpecification: 1.2.1
 id: my-data-contract-id
 info:
   title: jaffle_shop
@@ -398,7 +398,7 @@ def test_import_dbt_manifest_with_filter_and_empty_columns():
     result = DataContract().import_from_source("dbt", dbt_manifest_empty_columns, dbt_model=["customers"])
 
     expected = """
-dataContractSpecification: 1.2.0
+dataContractSpecification: 1.2.1
 id: my-data-contract-id
 info:
   title: jaffle_shop
@@ -420,7 +420,7 @@ def test_import_dbt_manifest_with_filter():
     result = DataContract().import_from_source("dbt", dbt_manifest, dbt_model=["customers"])
 
     expected = """
-dataContractSpecification: 1.2.0
+dataContractSpecification: 1.2.1
 id: my-data-contract-id
 info:
   title: jaffle_shop
diff --git a/tests/test_import_iceberg.py b/tests/test_import_iceberg.py
index 27b1d4336..5ee3adab6 100644
--- a/tests/test_import_iceberg.py
+++ b/tests/test_import_iceberg.py
@@ -8,7 +8,7 @@
 from datacontract.model.exceptions import DataContractException
 
 expected = """
-dataContractSpecification: 1.2.0
+dataContractSpecification: 1.2.1
 id: my-data-contract-id
 info:
   title: My Data Contract
diff --git a/tests/test_import_parquet.py b/tests/test_import_parquet.py
index 59a735aa6..67de4aa1e 100644
--- a/tests/test_import_parquet.py
+++ b/tests/test_import_parquet.py
@@ -24,7 +24,7 @@ def test_cli():
 def test_import_parquet():
     result = DataContract().import_from_source(format="parquet", source=parquet_file_path)
 
-    expected = """dataContractSpecification: 1.2.0
+    expected = """dataContractSpecification: 1.2.1
 id: my-data-contract-id
 info:
   title: My Data Contract
diff --git a/tests/test_import_protobuf.py b/tests/test_import_protobuf.py
index 384f12253..7bdece2d3 100644
--- a/tests/test_import_protobuf.py
+++ b/tests/test_import_protobuf.py
@@ -27,7 +27,7 @@ def test_cli():
 def test_import_protobuf():
     result = DataContract().import_from_source("protobuf", protobuf_file_path)
 
-    expected = """dataContractSpecification: 1.2.0
+    expected = """dataContractSpecification: 1.2.1
 id: my-data-contract-id
 info:
   title: My Data Contract
diff --git a/tests/test_import_sql_postgres.py b/tests/test_import_sql_postgres.py
index 8f699fe74..8552648d2 100644
--- a/tests/test_import_sql_postgres.py
+++ b/tests/test_import_sql_postgres.py
@@ -29,7 +29,7 @@ def test_import_sql_postgres():
     result = DataContract().import_from_source("sql", sql_file_path, dialect="postgres")
 
     expected = """
-dataContractSpecification: 1.2.0
+dataContractSpecification: 1.2.1
 id: my-data-contract-id
 info:
   title: My Data Contract
@@ -67,7 +67,7 @@ def test_import_sql_constraints():
     result = DataContract().import_from_source("sql", "fixtures/postgres/data/data_constraints.sql", dialect="postgres")
 
     expected = """
-dataContractSpecification: 1.2.0
+dataContractSpecification: 1.2.1
 id: my-data-contract-id
 info:
   title: My Data Contract
diff --git a/tests/test_import_sql_sqlserver.py b/tests/test_import_sql_sqlserver.py
index 8fe7b0a3d..8923153ac 100644
--- a/tests/test_import_sql_sqlserver.py
+++ b/tests/test_import_sql_sqlserver.py
@@ -9,7 +9,7 @@ def test_import_sql_sqlserver():
     result = DataContract().import_from_source("sql", sql_file_path, dialect="sqlserver")
 
     expected = """
-dataContractSpecification: 1.2.0
+dataContractSpecification: 1.2.1
 id: my-data-contract-id
 info:
   title: My Data Contract
diff --git a/tests/test_resolve.py b/tests/test_resolve.py
index 438fc36e9..7c3650da5 100644
--- a/tests/test_resolve.py
+++ b/tests/test_resolve.py
@@ -8,7 +8,7 @@
 def test_resolve_data_contract_simple_definition():
     datacontract = resolve_data_contract(
         data_contract_str="""
-    dataContractSpecification: 1.2.0
+    dataContractSpecification: 1.2.1
     id: my-id
     info:
       title: My Title
@@ -31,7 +31,7 @@ def test_resolve_data_contract_simple_definition():
 def test_resolve_data_contract_complex_definition():
     datacontract = resolve_data_contract(
         data_contract_str="""
-    dataContractSpecification: 1.2.0
+    dataContractSpecification: 1.2.1
     id: my-id
     info:
       title: My Title
@@ -57,7 +57,7 @@ def test_resolve_data_contract_complex_definition():
 def test_resolve_data_contract_array_definition():
     datacontract = resolve_data_contract(
         data_contract_str="""
-    dataContractSpecification: 1.2.0
+    dataContractSpecification: 1.2.1
     id: my-id
     info:
       title: My Title
@@ -87,7 +87,7 @@ def test_resolve_data_contract_array_definition():
 def test_resolve_data_contract_nested_definition():
     datacontract = resolve_data_contract(
         data_contract_str="""
-    dataContractSpecification: 1.2.0
+    dataContractSpecification: 1.2.1
     id: my-id
     info:
       title: My Title
@@ -124,7 +124,7 @@ def test_resolve_data_contract_simple_definition_file():
 
         datacontract = resolve_data_contract(
             data_contract_str=f"""
-        dataContractSpecification: 1.2.0
+        dataContractSpecification: 1.2.1
         id: my-id
         info:
           title: My Title
@@ -155,7 +155,7 @@ def test_resolve_data_contract_complex_definition_file():
 
         datacontract = resolve_data_contract(
             data_contract_str=f"""
-        dataContractSpecification: 1.2.0
+        dataContractSpecification: 1.2.1
         id: my-id
         info:
           title: My Title
@@ -192,7 +192,7 @@ def test_resolve_data_contract_relative_refrence():
 
         datacontract = resolve_data_contract(
             data_contract_str=f"""
-        dataContractSpecification: 1.2.0
+        dataContractSpecification: 1.2.1
         id: my-id
         info:
           title: My Title

From 2b2b473e52b30429e13315513ca206f9655d76cb Mon Sep 17 00:00:00 2001
From: jochen 
Date: Wed, 24 Sep 2025 19:41:39 +0200
Subject: [PATCH 007/150] chore: update dataContractSpecification to version
 1.2.1

---
 CHANGELOG.md                                  |    5 +
 .../schemas/datacontract-1.2.1.init.yaml      |   91 +
 .../schemas/datacontract-1.2.1.schema.json    | 2058 ++++++++++++++
 datacontract/schemas/odcs-3.0.2.schema.json   | 2382 +++++++++++++++++
 4 files changed, 4536 insertions(+)
 create mode 100644 datacontract/schemas/datacontract-1.2.1.init.yaml
 create mode 100644 datacontract/schemas/datacontract-1.2.1.schema.json
 create mode 100644 datacontract/schemas/odcs-3.0.2.schema.json

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 3fdc2e7df..fb02a322c 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -7,6 +7,11 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
 
 ## Unreleased
 
+### Added
+
+- Support for Data Contract Specification v1.2.1 (Data Quality Metrics)
+
+
 ## [0.10.35] - 2025-08-25
 
 ### Added
diff --git a/datacontract/schemas/datacontract-1.2.1.init.yaml b/datacontract/schemas/datacontract-1.2.1.init.yaml
new file mode 100644
index 000000000..29baf9bf8
--- /dev/null
+++ b/datacontract/schemas/datacontract-1.2.1.init.yaml
@@ -0,0 +1,91 @@
+dataContractSpecification: 1.2.1
+id: my-data-contract-id
+info:
+  title: My Data Contract
+  version: 0.0.1
+#  description:
+#  owner:
+#  contact:
+#    name:
+#    url:
+#    email:
+
+
+### servers
+
+#servers:
+#  production:
+#    type: s3
+#    location: s3://
+#    format: parquet
+#    delimiter: new_line
+
+### terms
+
+#terms:
+#  usage:
+#  limitations:
+#  billing:
+#  noticePeriod:
+
+
+### models
+
+# models:
+#   my_model:
+#     description:
+#     type:
+#     fields:
+#       my_field:
+#         type:
+#         description:
+
+
+### definitions
+
+# definitions:
+#   my_field:
+#     domain:
+#     name:
+#     title:
+#     type:
+#     description:
+#     example:
+#     pii:
+#     classification:
+
+
+### servicelevels
+
+#servicelevels:
+#  availability:
+#    description: The server is available during support hours
+#    percentage: 99.9%
+#  retention:
+#    description: Data is retained for one year because!
+#    period: P1Y
+#    unlimited: false
+#  latency:
+#    description: Data is available within 25 hours after the order was placed
+#    threshold: 25h
+#    sourceTimestampField: orders.order_timestamp
+#    processedTimestampField: orders.processed_timestamp
+#  freshness:
+#    description: The age of the youngest row in a table.
+#    threshold: 25h
+#    timestampField: orders.order_timestamp
+#  frequency:
+#    description: Data is delivered once a day
+#    type: batch # or streaming
+#    interval: daily # for batch, either or cron
+#    cron: 0 0 * * * # for batch, either or interval
+#  support:
+#    description: The data is available during typical business hours at headquarters
+#    time: 9am to 5pm in EST on business days
+#    responseTime: 1h
+#  backup:
+#    description: Data is backed up once a week, every Sunday at 0:00 UTC.
+#    interval: weekly
+#    cron: 0 0 * * 0
+#    recoveryTime: 24 hours
+#    recoveryPoint: 1 week
diff --git a/datacontract/schemas/datacontract-1.2.1.schema.json b/datacontract/schemas/datacontract-1.2.1.schema.json
new file mode 100644
index 000000000..b888b1e9c
--- /dev/null
+++ b/datacontract/schemas/datacontract-1.2.1.schema.json
@@ -0,0 +1,2058 @@
+{
+  "$schema": "http://json-schema.org/draft-07/schema#",
+  "type": "object",
+  "title": "DataContractSpecification",
+  "properties": {
+    "dataContractSpecification": {
+      "type": "string",
+      "title": "DataContractSpecificationVersion",
+      "enum": [
+        "1.2.1",
+        "1.2.0",
+        "1.1.0",
+        "0.9.3",
+        "0.9.2",
+        "0.9.1",
+        "0.9.0"
+      ],
+      "description": "Specifies the Data Contract Specification being used."
+    },
+    "id": {
+      "type": "string",
+      "description": "Specifies the identifier of the data contract."
+    },
+    "info": {
+      "type": "object",
+      "properties": {
+        "title": {
+          "type": "string",
+          "description": "The title of the data contract."
+        },
+        "version": {
+          "type": "string",
+          "description": "The version of the data contract document (which is distinct from the Data Contract Specification version or the Data Product implementation version)."
+        },
+        "status": {
+          "type": "string",
+          "description": "The status of the data contract. Can be proposed, in development, active, retired.",
+          "examples": [
+            "proposed",
+            "in development",
+            "active",
+            "deprecated",
+            "retired"
+          ]
+        },
+        "description": {
+          "type": "string",
+          "description": "A description of the data contract."
+        },
+        "owner": {
+          "type": "string",
+          "description": "The owner or team responsible for managing the data contract and providing the data."
+        },
+        "contact": {
+          "type": "object",
+          "properties": {
+            "name": {
+              "type": "string",
+              "description": "The identifying name of the contact person/organization."
+            },
+            "url": {
+              "type": "string",
+              "format": "uri",
+              "description": "The URL pointing to the contact information. This MUST be in the form of a URL."
+            },
+            "email": {
+              "type": "string",
+              "format": "email",
+              "description": "The email address of the contact person/organization. This MUST be in the form of an email address."
+            }
+          },
+          "description": "Contact information for the data contract.",
+          "additionalProperties": true
+        }
+      },
+      "additionalProperties": true,
+      "required": [
+        "title",
+        "version"
+      ],
+      "description": "Metadata and life cycle information about the data contract."
+    },
+    "servers": {
+      "type": "object",
+      "description": "Information about the servers.",
+      "additionalProperties": {
+        "$ref": "#/$defs/BaseServer",
+        "allOf": [
+          {
+            "if": {
+              "properties": {
+                "type": {
+                  "const": "bigquery"
+                }
+              }
+            },
+            "then": {
+              "$ref": "#/$defs/BigQueryServer"
+            }
+          },
+          {
+            "if": {
+              "properties": {
+                "type": {
+                  "const": "postgres"
+                }
+              },
+              "required": [
+                "type"
+              ]
+            },
+            "then": {
+              "$ref": "#/$defs/PostgresServer"
+            }
+          },
+          {
+            "if": {
+              "properties": {
+                "type": {
+                  "const": "s3"
+                }
+              },
+              "required": [
+                "type"
+              ]
+            },
+            "then": {
+              "$ref": "#/$defs/S3Server"
+            }
+          },
+          {
+            "if": {
+              "properties": {
+                "type": {
+                  "const": "sftp"
+                }
+              },
+              "required": [
+                "type"
+              ]
+            },
+            "then": {
+              "$ref": "#/$defs/SftpServer"
+            }
+          },
+          {
+            "if": {
+              "properties": {
+                "type": {
+                  "const": "redshift"
+                }
+              },
+              "required": [
+                "type"
+              ]
+            },
+            "then": {
+              "$ref": "#/$defs/RedshiftServer"
+            }
+          },
+          {
+            "if": {
+              "properties": {
+                "type": {
+                  "const": "azure"
+                }
+              },
+              "required": [
+                "type"
+              ]
+            },
+            "then": {
+              "$ref": "#/$defs/AzureServer"
+            }
+          },
+          {
+            "if": {
+              "properties": {
+                "type": {
+                  "const": "sqlserver"
+                }
+              },
+              "required": [
+                "type"
+              ]
+            },
+            "then": {
+              "$ref": "#/$defs/SqlserverServer"
+            }
+          },
+          {
+            "if": {
+              "properties": {
+                "type": {
+                  "const": "snowflake"
+                }
+              },
+              "required": [
+                "type"
+              ]
+            },
+            "then": {
+              "$ref": "#/$defs/SnowflakeServer"
+            }
+          },
+          {
+            "if": {
+              "properties": {
+                "type": {
+                  "const": "databricks"
+                }
+              },
+              "required": [
+                "type"
+              ]
+            },
+            "then": {
+              "$ref": "#/$defs/DatabricksServer"
+            }
+          },
+          {
+            "if": {
+              "properties": {
+                "type": {
+                  "const": "dataframe"
+                }
+              },
+              "required": [
+                "type"
+              ]
+            },
+            "then": {
+              "$ref": "#/$defs/DataframeServer"
+            }
+          },
+          {
+            "if": {
+              "properties": {
+                "type": {
+                  "const": "glue"
+                }
+              },
+              "required": [
+                "type"
+              ]
+            },
+            "then": {
+              "$ref": "#/$defs/GlueServer"
+            }
+          },
+          {
+            "if": {
+              "properties": {
+                "type": {
+                  "const": "postgres"
+                }
+              },
+              "required": [
+                "type"
+              ]
+            },
+            "then": {
+              "$ref": "#/$defs/PostgresServer"
+            }
+          },
+          {
+            "if": {
+              "properties": {
+                "type": {
+                  "const": "oracle"
+                }
+              },
+              "required": [
+                "type"
+              ]
+            },
+            "then": {
+              "$ref": "#/$defs/OracleServer"
+            }
+          },
+          {
+            "if": {
+              "properties": {
+                "type": {
+                  "const": "kafka"
+                }
+              },
+              "required": [
+                "type"
+              ]
+            },
+            "then": {
+              "$ref": "#/$defs/KafkaServer"
+            }
+          },
+          {
+            "if": {
+              "properties": {
+                "type": {
+                  "const": "pubsub"
+                }
+              },
+              "required": [
+                "type"
+              ]
+            },
+            "then": {
+              "$ref": "#/$defs/PubSubServer"
+            }
+          },
+          {
+            "if": {
+              "properties": {
+                "type": {
+                  "const": "kinesis"
+                }
+              },
+              "required": [
+                "type"
+              ]
+            },
+            "then": {
+              "$ref": "#/$defs/KinesisDataStreamsServer"
+            }
+          },
+          {
+            "if": {
+              "properties": {
+                "type": {
+                  "const": "trino"
+                }
+              },
+              "required": [
+                "type"
+              ]
+            },
+            "then": {
+              "$ref": "#/$defs/TrinoServer"
+            }
+          },
+          {
+            "if": {
+              "properties": {
+                "type": {
+                  "const": "clickhouse"
+                }
+              },
+              "required": [
+                "type"
+              ]
+            },
+            "then": {
+              "$ref": "#/$defs/ClickhouseServer"
+            }
+          },
+          {
+            "if": {
+              "properties": {
+                "type": {
+                  "const": "local"
+                }
+              },
+              "required": [
+                "type"
+              ]
+            },
+            "then": {
+              "$ref": "#/$defs/LocalServer"
+            }
+          }
+        ]
+      }
+    },
+    "terms": {
+      "type": "object",
+      "description": "The terms and conditions of the data contract.",
+      "properties": {
+        "usage": {
+          "type": "string",
+          "description": "The usage describes the way the data is expected to be used. Can contain business and technical information."
+        },
+        "limitations": {
+          "type": "string",
+          "description": "The limitations describe the restrictions on how the data can be used, can be technical or restrictions on what the data may not be used for."
+        },
+        "policies": {
+          "type": "array",
+          "items": {
+            "type": "object",
+            "properties": {
+              "type": {
+                "type": "string",
+                "description": "The type of the policy.",
+                "examples": [
+                  "privacy",
+                  "security",
+                  "retention",
+                  "compliance"
+                ]
+              },
+              "description": {
+                "type": "string",
+                "description": "A description of the policy."
+              },
+              "url": {
+                "type": "string",
+                "format": "uri",
+                "description": "A URL to the policy document."
+              }
+            },
+            "additionalProperties": true
+          },
+          "description": "The limitations describe the restrictions on how the data can be used, can be technical or restrictions on what the data may not be used for."
+        },
+        "billing": {
+          "type": "string",
+          "description": "The billing describes the pricing model for using the data, such as whether it's free, having a monthly fee, or metered pay-per-use."
+        },
+        "noticePeriod": {
+          "type": "string",
+          "description": "The period of time that must be given by either party to terminate or modify a data usage agreement. Uses ISO-8601 period format, e.g., 'P3M' for a period of three months."
+        }
+      },
+      "additionalProperties": true
+    },
+    "models": {
+      "description": "Specifies the logical data model. Use the models name (e.g., the table name) as the key.",
+      "type": "object",
+      "minProperties": 1,
+      "propertyNames": {
+        "pattern": "^[a-zA-Z0-9_-]+$"
+      },
+      "additionalProperties": {
+        "type": "object",
+        "title": "Model",
+        "properties": {
+          "description": {
+            "type": "string"
+          },
+          "type": {
+            "description": "The type of the model. Examples: table, view, object. Default: table.",
+            "type": "string",
+            "title": "ModelType",
+            "default": "table",
+            "enum": [
+              "table",
+              "view",
+              "object"
+            ]
+          },
+          "title": {
+            "type": "string",
+            "description": "An optional string providing a human readable name for the model. Especially useful if the model name is cryptic or contains abbreviations.",
+            "examples": [
+              "Purchase Orders",
+              "Air Shipments"
+            ]
+          },
+          "fields": {
+            "description": "Specifies a field in the data model. Use the field name (e.g., the column name) as the key.",
+            "type": "object",
+            "additionalProperties": {
+              "type": "object",
+              "title": "Field",
+              "properties": {
+                "description": {
+                  "type": "string",
+                  "description": "An optional string describing the semantic of the data in this field."
+                },
+                "title": {
+                  "type": "string",
+                  "description": "An optional string providing a human readable name for the field. Especially useful if the field name is cryptic or contains abbreviations."
+                },
+                "type": {
+                  "$ref": "#/$defs/FieldType"
+                },
+                "required": {
+                  "type": "boolean",
+                  "default": false,
+                  "description": "An indication, if this field must contain a value and may not be null."
+                },
+                "fields": {
+                  "description": "The nested fields (e.g. columns) of the object, record, or struct.",
+                  "type": "object",
+                  "additionalProperties": {
+                    "$ref": "#/properties/models/additionalProperties/properties/fields/additionalProperties"
+                  }
+                },
+                "items": {
+                  "$ref": "#/properties/models/additionalProperties/properties/fields/additionalProperties"
+                },
+                "keys": {
+                  "$ref": "#/properties/models/additionalProperties/properties/fields/additionalProperties"
+                },
+                "values": {
+                  "$ref": "#/properties/models/additionalProperties/properties/fields/additionalProperties"
+                },
+                "primary": {
+                  "type": "boolean",
+                  "deprecationMessage": "Use the primaryKey field instead."
+                },
+                "primaryKey": {
+                  "type": "boolean",
+                  "default": false,
+                  "description": "If this field is a primary key."
+                },
+                "references": {
+                  "type": "string",
+                  "description": "The reference to a field in another model. E.g. use 'orders.order_id' to reference the order_id field of the model orders. Think of defining a foreign key relationship.",
+                  "examples": [
+                    "orders.order_id",
+                    "model.nested_field.field"
+                  ]
+                },
+                "unique": {
+                  "type": "boolean",
+                  "default": false,
+                  "description": "An indication, if the value must be unique within the model."
+                },
+                "enum": {
+                  "type": "array",
+                  "items": {
+                    "type": "string"
+                  },
+                  "uniqueItems": true,
+                  "description": "A value must be equal to one of the elements in this array value. Only evaluated if the value is not null."
+                },
+                "minLength": {
+                  "type": "integer",
+                  "description": "A value must greater than, or equal to, the value of this. Only applies to string types."
+                },
+                "maxLength": {
+                  "type": "integer",
+                  "description": "A value must less than, or equal to, the value of this. Only applies to string types."
+                },
+                "format": {
+                  "type": "string",
+                  "description": "A specific format the value must comply with (e.g., 'email', 'uri', 'uuid').",
+                  "examples": [
+                    "email",
+                    "uri",
+                    "uuid"
+                  ]
+                },
+                "precision": {
+                  "type": "number",
+                  "examples": [
+                    38
+                  ],
+                  "description": "The maximum number of digits in a number. Only applies to numeric values. Defaults to 38."
+                },
+                "scale": {
+                  "type": "number",
+                  "examples": [
+                    0
+                  ],
+                  "description": "The maximum number of decimal places in a number. Only applies to numeric values. Defaults to 0."
+                },
+                "pattern": {
+                  "type": "string",
+                  "description": "A regular expression the value must match. Only applies to string types.",
+                  "examples": [
+                    "^[a-zA-Z0-9_-]+$"
+                  ]
+                },
+                "minimum": {
+                  "type": "number",
+                  "description": "A value of a number must greater than, or equal to, the value of this. Only evaluated if the value is not null. Only applies to numeric values."
+                },
+                "exclusiveMinimum": {
+                  "type": "number",
+                  "description": "A value of a number must greater than the value of this. Only evaluated if the value is not null. Only applies to numeric values."
+                },
+                "maximum": {
+                  "type": "number",
+                  "description": "A value of a number must less than, or equal to, the value of this. Only evaluated if the value is not null. Only applies to numeric values."
+                },
+                "exclusiveMaximum": {
+                  "type": "number",
+                  "description": "A value of a number must less than the value of this. Only evaluated if the value is not null. Only applies to numeric values."
+                },
+                "example": {
+                  "type": "string",
+                  "description": "An example value for this field.",
+                  "deprecationMessage": "Use the examples field instead."
+                },
+                "examples": {
+                  "type": "array",
+                  "description": "A examples value for this field."
+                },
+                "pii": {
+                  "type": "boolean",
+                  "description": "An indication, if this field contains Personal Identifiable Information (PII)."
+                },
+                "classification": {
+                  "type": "string",
+                  "description": "The data class defining the sensitivity level for this field, according to the organization's classification scheme.",
+                  "examples": [
+                    "sensitive",
+                    "restricted",
+                    "internal",
+                    "public"
+                  ]
+                },
+                "tags": {
+                  "type": "array",
+                  "items": {
+                    "type": "string"
+                  },
+                  "description": "Custom metadata to provide additional context."
+                },
+                "links": {
+                  "type": "object",
+                  "description": "Links to external resources.",
+                  "minProperties": 1,
+                  "propertyNames": {
+                    "pattern": "^[a-zA-Z0-9_-]+$"
+                  },
+                  "additionalProperties": {
+                    "type": "string",
+                    "title": "Link",
+                    "description": "A URL to an external resource.",
+                    "format": "uri",
+                    "examples": [
+                      "https://example.com"
+                    ]
+                  }
+                },
+                "$ref": {
+                  "type": "string",
+                  "description": "A reference URI to a definition in the specification, internally or externally. Properties will be inherited from the definition."
+                },
+                "quality": {
+                  "type": "array",
+                  "items": {
+                    "$ref": "#/$defs/Quality"
+                  }
+                },
+                "lineage": {
+                  "$ref": "#/$defs/Lineage"
+                },
+                "config": {
+                  "type": "object",
+                  "description": "Additional metadata for field configuration.",
+                  "additionalProperties": {
+                    "type": [
+                      "string",
+                      "number",
+                      "boolean",
+                      "object",
+                      "array",
+                      "null"
+                    ]
+                  },
+                  "properties": {
+                    "avroType": {
+                      "type": "string",
+                      "description": "Specify the field type to use when exporting the data model to Apache Avro."
+                    },
+                    "avroLogicalType": {
+                      "type": "string",
+                      "description": "Specify the logical field type to use when exporting the data model to Apache Avro."
+                    },
+                    "bigqueryType": {
+                      "type": "string",
+                      "description": "Specify the physical column type that is used in a BigQuery table, e.g., `NUMERIC(5, 2)`."
+                    },
+                    "snowflakeType": {
+                      "type": "string",
+                      "description": "Specify the physical column type that is used in a Snowflake table, e.g., `TIMESTAMP_LTZ`."
+                    },
+                    "redshiftType": {
+                      "type": "string",
+                      "description": "Specify the physical column type that is used in a Redshift table, e.g., `SMALLINT`."
+                    },
+                    "sqlserverType": {
+                      "type": "string",
+                      "description": "Specify the physical column type that is used in a SQL Server table, e.g., `DATETIME2`."
+                    },
+                    "databricksType": {
+                      "type": "string",
+                      "description": "Specify the physical column type that is used in a Databricks Unity Catalog table."
+                    },
+                    "glueType": {
+                      "type": "string",
+                      "description": "Specify the physical column type that is used in an AWS Glue Data Catalog table."
+                    }
+                  }
+                }
+              }
+            }
+          },
+          "primaryKey": {
+            "type": "array",
+            "items": {
+              "type": "string"
+            },
+            "description": "The compound primary key of the model."
+          },
+          "quality": {
+            "type": "array",
+            "items": {
+              "$ref": "#/$defs/Quality"
+            }
+          },
+          "examples": {
+            "type": "array"
+          },
+          "additionalFields": {
+            "type": "boolean",
+            "description": " Specify, if the model can have additional fields that are not defined in the contract.                            ",
+            "default": false
+          },
+          "config": {
+            "type": "object",
+            "description": "Additional metadata for model configuration.",
+            "additionalProperties": {
+              "type": [
+                "string",
+                "number",
+                "boolean",
+                "object",
+                "array",
+                "null"
+              ]
+            },
+            "properties": {
+              "avroNamespace": {
+                "type": "string",
+                "description": "The namespace to use when importing and exporting the data model from / to Apache Avro."
+              }
+            }
+          }
+        }
+      }
+    },
+    "definitions": {
+      "description": "Clear and concise explanations of syntax, semantic, and classification of business objects in a given domain.",
+      "type": "object",
+      "propertyNames": {
+        "pattern": "^[a-zA-Z0-9/_-]+$"
+      },
+      "additionalProperties": {
+        "type": "object",
+        "title": "Definition",
+        "properties": {
+          "domain": {
+            "type": "string",
+            "description": "The domain in which this definition is valid.",
+            "default": "global",
+            "deprecationMessage": "This field is deprecated. Encode the domain into the ID using slashes."
+          },
+          "name": {
+            "type": "string",
+            "description": "The technical name of this definition.",
+            "deprecationMessage": "This field is deprecated. Encode the name into the ID using slashes."
+          },
+          "title": {
+            "type": "string",
+            "description": "The business name of this definition."
+          },
+          "description": {
+            "type": "string",
+            "description": "Clear and concise explanations related to the domain."
+          },
+          "type": {
+            "$ref": "#/$defs/FieldType"
+          },
+          "fields": {
+            "description": "The nested fields (e.g. columns) of the object, record, or struct.",
+            "type": "object",
+            "additionalProperties": {
+              "$ref": "#/properties/models/additionalProperties/properties/fields/additionalProperties"
+            }
+          },
+          "items": {
+            "$ref": "#/properties/models/additionalProperties/properties/fields/additionalProperties"
+          },
+          "keys": {
+            "$ref": "#/properties/models/additionalProperties/properties/fields/additionalProperties"
+          },
+          "values": {
+            "$ref": "#/properties/models/additionalProperties/properties/fields/additionalProperties"
+          },
+          "minLength": {
+            "type": "integer",
+            "description": "A value must be greater than or equal to this value. Applies only to string types."
+          },
+          "maxLength": {
+            "type": "integer",
+            "description": "A value must be less than or equal to this value. Applies only to string types."
+          },
+          "format": {
+            "type": "string",
+            "description": "Specific format requirements for the value (e.g., 'email', 'uri', 'uuid')."
+          },
+          "precision": {
+            "type": "integer",
+            "examples": [
+              38
+            ],
+            "description": "The maximum number of digits in a number. Only applies to numeric values. Defaults to 38."
+          },
+          "scale": {
+            "type": "integer",
+            "examples": [
+              0
+            ],
+            "description": "The maximum number of decimal places in a number. Only applies to numeric values. Defaults to 0."
+          },
+          "pattern": {
+            "type": "string",
+            "description": "A regular expression pattern the value must match. Applies only to string types."
+          },
+          "minimum": {
+            "type": "number",
+            "description": "A value of a number must greater than, or equal to, the value of this. Only evaluated if the value is not null. Only applies to numeric values."
+          },
+          "exclusiveMinimum": {
+            "type": "number",
+            "description": "A value of a number must greater than the value of this. Only evaluated if the value is not null. Only applies to numeric values."
+          },
+          "maximum": {
+            "type": "number",
+            "description": "A value of a number must less than, or equal to, the value of this. Only evaluated if the value is not null. Only applies to numeric values."
+          },
+          "exclusiveMaximum": {
+            "type": "number",
+            "description": "A value of a number must less than the value of this. Only evaluated if the value is not null. Only applies to numeric values."
+          },
+          "example": {
+            "type": "string",
+            "description": "An example value.",
+            "deprecationMessage": "Use the examples field instead."
+          },
+          "examples": {
+            "type": "array",
+            "description": "Example value."
+          },
+          "pii": {
+            "type": "boolean",
+            "description": "Indicates if the field contains Personal Identifiable Information (PII)."
+          },
+          "classification": {
+            "type": "string",
+            "description": "The data class defining the sensitivity level for this field."
+          },
+          "tags": {
+            "type": "array",
+            "items": {
+              "type": "string"
+            },
+            "description": "Custom metadata to provide additional context."
+          },
+          "links": {
+            "type": "object",
+            "description": "Links to external resources.",
+            "minProperties": 1,
+            "propertyNames": {
+              "pattern": "^[a-zA-Z0-9_-]+$"
+            },
+            "additionalProperties": {
+              "type": "string",
+              "title": "Link",
+              "description": "A URL to an external resource.",
+              "format": "uri",
+              "examples": [
+                "https://example.com"
+              ]
+            }
+          }
+        },
+        "required": [
+          "type"
+        ]
+      }
+    },
+    "servicelevels": {
+      "type": "object",
+      "description": "Specifies the service level agreements for the provided data, including availability, data retention policies, latency requirements, data freshness, update frequency, support availability, and backup policies.",
+      "properties": {
+        "availability": {
+          "type": "object",
+          "description": "Availability refers to the promise or guarantee by the service provider about the uptime of the system that provides the data.",
+          "properties": {
+            "description": {
+              "type": "string",
+              "description": "An optional string describing the availability service level.",
+              "example": "The server is available during support hours"
+            },
+            "percentage": {
+              "type": "string",
+              "description": "An optional string describing the guaranteed uptime in percent (e.g., `99.9%`)",
+              "pattern": "^\\d+(\\.\\d+)?%$",
+              "example": "99.9%"
+            }
+          }
+        },
+        "retention": {
+          "type": "object",
+          "description": "Retention covers the period how long data will be available.",
+          "properties": {
+            "description": {
+              "type": "string",
+              "description": "An optional string describing the retention service level.",
+              "example": "Data is retained for one year."
+            },
+            "period": {
+              "type": "string",
+              "description": "An optional period of time, how long data is available. Supported formats: Simple duration (e.g., `1 year`, `30d`) and ISO 8601 duration (e.g, `P1Y`).",
+              "example": "P1Y"
+            },
+            "unlimited": {
+              "type": "boolean",
+              "description": "An optional indicator that data is kept forever.",
+              "example": false
+            },
+            "timestampField": {
+              "type": "string",
+              "description": "An optional reference to the field that contains the timestamp that the period refers to.",
+              "example": "orders.order_timestamp"
+            }
+          }
+        },
+        "latency": {
+          "type": "object",
+          "description": "Latency refers to the maximum amount of time from the source to its destination.",
+          "properties": {
+            "description": {
+              "type": "string",
+              "description": "An optional string describing the latency service level.",
+              "example": "Data is available within 25 hours after the order was placed."
+            },
+            "threshold": {
+              "type": "string",
+              "description": "An optional maximum duration between the source timestamp and the processed timestamp. Supported formats: Simple duration (e.g., `24 hours`, `5s`) and ISO 8601 duration (e.g, `PT24H`).",
+              "example": "25h"
+            },
+            "sourceTimestampField": {
+              "type": "string",
+              "description": "An optional reference to the field that contains the timestamp when the data was provided at the source.",
+              "example": "orders.order_timestamp"
+            },
+            "processedTimestampField": {
+              "type": "string",
+              "description": "An optional reference to the field that contains the processing timestamp, which denotes when the data is made available to consumers of this data contract.",
+              "example": "orders.processed_timestamp"
+            }
+          }
+        },
+        "freshness": {
+          "type": "object",
+          "description": "The maximum age of the youngest row in a table.",
+          "properties": {
+            "description": {
+              "type": "string",
+              "description": "An optional string describing the freshness service level.",
+              "example": "The age of the youngest row in a table is within 25 hours."
+            },
+            "threshold": {
+              "type": "string",
+              "description": "An optional maximum age of the youngest entry. Supported formats: Simple duration (e.g., `24 hours`, `5s`) and ISO 8601 duration (e.g., `PT24H`).",
+              "example": "25h"
+            },
+            "timestampField": {
+              "type": "string",
+              "description": "An optional reference to the field that contains the timestamp that the threshold refers to.",
+              "example": "orders.order_timestamp"
+            }
+          }
+        },
+        "frequency": {
+          "type": "object",
+          "description": "Frequency describes how often data is updated.",
+          "properties": {
+            "description": {
+              "type": "string",
+              "description": "An optional string describing the frequency service level.",
+              "example": "Data is delivered once a day."
+            },
+            "type": {
+              "type": "string",
+              "enum": [
+                "batch",
+                "micro-batching",
+                "streaming",
+                "manual"
+              ],
+              "description": "The method of data processing.",
+              "example": "batch"
+            },
+            "interval": {
+              "type": "string",
+              "description": "Optional. Only for batch: How often the pipeline is triggered, e.g., `daily`.",
+              "example": "daily"
+            },
+            "cron": {
+              "type": "string",
+              "description": "Optional. Only for batch: A cron expression when the pipelines is triggered. E.g., `0 0 * * *`.",
+              "example": "0 0 * * *"
+            }
+          }
+        },
+        "support": {
+          "type": "object",
+          "description": "Support describes the times when support will be available for contact.",
+          "properties": {
+            "description": {
+              "type": "string",
+              "description": "An optional string describing the support service level.",
+              "example": "The data is available during typical business hours at headquarters."
+            },
+            "time": {
+              "type": "string",
+              "description": "An optional string describing the times when support will be available for contact such as `24/7` or `business hours only`.",
+              "example": "9am to 5pm in EST on business days"
+            },
+            "responseTime": {
+              "type": "string",
+              "description": "An optional string describing the time it takes for the support team to acknowledge a request. This does not mean the issue will be resolved immediately, but it assures users that their request has been received and will be dealt with.",
+              "example": "24 hours"
+            }
+          }
+        },
+        "backup": {
+          "type": "object",
+          "description": "Backup specifies details about data backup procedures.",
+          "properties": {
+            "description": {
+              "type": "string",
+              "description": "An optional string describing the backup service level.",
+              "example": "Data is backed up once a week, every Sunday at 0:00 UTC."
+            },
+            "interval": {
+              "type": "string",
+              "description": "An optional interval that defines how often data will be backed up, e.g., `daily`.",
+              "example": "weekly"
+            },
+            "cron": {
+              "type": "string",
+              "description": "An optional cron expression when data will be backed up, e.g., `0 0 * * *`.",
+              "example": "0 0 * * 0"
+            },
+            "recoveryTime": {
+              "type": "string",
+              "description": "An optional Recovery Time Objective (RTO) specifies the maximum amount of time allowed to restore data from a backup after a failure or loss event (e.g., 4 hours, 24 hours).",
+              "example": "24 hours"
+            },
+            "recoveryPoint": {
+              "type": "string",
+              "description": "An optional Recovery Point Objective (RPO) defines the maximum acceptable age of files that must be recovered from backup storage for normal operations to resume after a disaster or data loss event. This essentially measures how much data you can afford to lose, measured in time (e.g., 4 hours, 24 hours).",
+              "example": "1 week"
+            }
+          }
+        }
+      }
+    },
+    "links": {
+      "type": "object",
+      "description": "Links to external resources.",
+      "minProperties": 1,
+      "propertyNames": {
+        "pattern": "^[a-zA-Z0-9_-]+$"
+      },
+      "additionalProperties": {
+        "type": "string",
+        "title": "Link",
+        "description": "A URL to an external resource.",
+        "format": "uri",
+        "examples": [
+          "https://example.com"
+        ]
+      }
+    },
+    "tags": {
+      "type": "array",
+      "items": {
+        "type": "string",
+        "description": "Tags to facilitate searching and filtering.",
+        "examples": [
+          "databricks",
+          "pii",
+          "sensitive"
+        ]
+      },
+      "description": "Tags to facilitate searching and filtering."
+    }
+  },
+  "required": [
+    "dataContractSpecification",
+    "id",
+    "info"
+  ],
+  "$defs": {
+    "FieldType": {
+      "type": "string",
+      "title": "FieldType",
+      "description": "The logical data type of the field.",
+      "enum": [
+        "number",
+        "decimal",
+        "numeric",
+        "int",
+        "integer",
+        "long",
+        "bigint",
+        "float",
+        "double",
+        "string",
+        "text",
+        "varchar",
+        "boolean",
+        "timestamp",
+        "timestamp_tz",
+        "timestamp_ntz",
+        "date",
+        "time",
+        "array",
+        "map",
+        "object",
+        "record",
+        "struct",
+        "bytes",
+        "variant",
+        "json",
+        "null"
+      ]
+    },
+    "BaseServer": {
+      "type": "object",
+      "properties": {
+        "description": {
+          "type": "string",
+          "description": "An optional string describing the servers."
+        },
+        "environment": {
+          "type": "string",
+          "description": "The environment in which the servers are running. Examples: prod, sit, stg."
+        },
+        "type": {
+          "type": "string",
+          "description": "The type of the data product technology that implements the data contract.",
+          "examples": [
+            "azure",
+            "bigquery",
+            "BigQuery",
+            "clickhouse",
+            "databricks",
+            "dataframe",
+            "glue",
+            "kafka",
+            "kinesis",
+            "local",
+            "oracle",
+            "postgres",
+            "pubsub",
+            "redshift",
+            "sftp",
+            "sqlserver",
+            "snowflake",
+            "s3",
+            "trino"
+          ]
+        },
+        "roles": {
+          "description": " An optional array of roles that are available and can be requested to access the server for role-based access control. E.g. separate roles for different regions or sensitive data.",
+          "type": "array",
+          "items": {
+            "type": "object",
+            "properties": {
+              "name": {
+                "type": "string",
+                "description": "The name of the role."
+              },
+              "description": {
+                "type": "string",
+                "description": "A description of the role and what access the role provides."
+              }
+            },
+            "required": [
+              "name"
+            ]
+          }
+        }
+      },
+      "additionalProperties": true,
+      "required": [
+        "type"
+      ]
+    },
+    "BigQueryServer": {
+      "type": "object",
+      "title": "BigQueryServer",
+      "properties": {
+        "project": {
+          "type": "string",
+          "description": "The GCP project name."
+        },
+        "dataset": {
+          "type": "string",
+          "description": "The GCP dataset name."
+        }
+      },
+      "required": [
+        "project",
+        "dataset"
+      ]
+    },
+    "S3Server": {
+      "type": "object",
+      "title": "S3Server",
+      "properties": {
+        "location": {
+          "type": "string",
+          "format": "uri",
+          "description": "S3 URL, starting with `s3://`",
+          "examples": [
+            "s3://datacontract-example-orders-latest/data/{model}/*.json"
+          ]
+        },
+        "endpointUrl": {
+          "type": "string",
+          "format": "uri",
+          "description": "The server endpoint for S3-compatible servers.",
+          "examples": [
+            "https://minio.example.com"
+          ]
+        },
+        "format": {
+          "type": "string",
+          "enum": [
+            "parquet",
+            "delta",
+            "json",
+            "csv"
+          ],
+          "description": "File format."
+        },
+        "delimiter": {
+          "type": "string",
+          "enum": [
+            "new_line",
+            "array"
+          ],
+          "description": "Only for format = json. How multiple json documents are delimited within one file"
+        }
+      },
+      "required": [
+        "location"
+      ]
+    },
+    "SftpServer": {
+      "type": "object",
+      "title": "SftpServer",
+      "properties": {
+        "location": {
+          "type": "string",
+          "format": "uri",
+          "pattern": "^sftp://.*",
+          "description": "SFTP URL, starting with `sftp://`",
+          "examples": [
+            "sftp://123.123.12.123/{model}/*.json"
+          ]
+        },
+        "format": {
+          "type": "string",
+          "enum": [
+            "parquet",
+            "delta",
+            "json",
+            "csv"
+          ],
+          "description": "File format."
+        },
+        "delimiter": {
+          "type": "string",
+          "enum": [
+            "new_line",
+            "array"
+          ],
+          "description": "Only for format = json. How multiple json documents are delimited within one file"
+        }
+      },
+      "required": [
+        "location"
+      ]
+    },
+    "RedshiftServer": {
+      "type": "object",
+      "title": "RedshiftServer",
+      "properties": {
+        "account": {
+          "type": "string",
+          "description": "An optional string describing the server."
+        },
+        "host": {
+          "type": "string",
+          "description": "An optional string describing the host name."
+        },
+        "database": {
+          "type": "string",
+          "description": "An optional string describing the server."
+        },
+        "schema": {
+          "type": "string",
+          "description": "An optional string describing the server."
+        },
+        "clusterIdentifier": {
+          "type": "string",
+          "description": "An optional string describing the cluster's identifier.",
+          "examples": [
+            "redshift-prod-eu",
+            "analytics-cluster"
+          ]
+        },
+        "port": {
+          "type": "integer",
+          "description": "An optional string describing the cluster's port.",
+          "examples": [
+            5439
+          ]
+        },
+        "endpoint": {
+          "type": "string",
+          "description": "An optional string describing the cluster's endpoint.",
+          "examples": [
+            "analytics-cluster.example.eu-west-1.redshift.amazonaws.com:5439/analytics"
+          ]
+        }
+      },
+      "additionalProperties": true,
+      "required": [
+        "account",
+        "database",
+        "schema"
+      ]
+    },
+    "AzureServer": {
+      "type": "object",
+      "title": "AzureServer",
+      "properties": {
+        "location": {
+          "type": "string",
+          "format": "uri",
+          "description": "Path to Azure Blob Storage or Azure Data Lake Storage (ADLS), supports globs. Recommended pattern is 'abfss:///'",
+          "examples": [
+            "abfss://my_container_name/path",
+            "abfss://my_container_name/path/*.json",
+            "az://my_storage_account_name.blob.core.windows.net/my_container/path/*.parquet",
+            "abfss://my_storage_account_name.dfs.core.windows.net/my_container_name/path/*.parquet"
+          ]
+        },
+        "format": {
+          "type": "string",
+          "enum": [
+            "parquet",
+            "delta",
+            "json",
+            "csv"
+          ],
+          "description": "File format."
+        },
+        "delimiter": {
+          "type": "string",
+          "enum": [
+            "new_line",
+            "array"
+          ],
+          "description": "Only for format = json. How multiple json documents are delimited within one file"
+        }
+      },
+      "required": [
+        "location",
+        "format"
+      ]
+    },
+    "SqlserverServer": {
+      "type": "object",
+      "title": "SqlserverServer",
+      "properties": {
+        "host": {
+          "type": "string",
+          "description": "The host to the database server",
+          "examples": [
+            "localhost"
+          ]
+        },
+        "port": {
+          "type": "integer",
+          "description": "The port to the database server.",
+          "default": 1433,
+          "examples": [
+            1433
+          ]
+        },
+        "database": {
+          "type": "string",
+          "description": "The name of the database.",
+          "examples": [
+            "database"
+          ]
+        },
+        "schema": {
+          "type": "string",
+          "description": "The name of the schema in the database.",
+          "examples": [
+            "dbo"
+          ]
+        }
+      },
+      "required": [
+        "host",
+        "database",
+        "schema"
+      ]
+    },
+    "SnowflakeServer": {
+      "type": "object",
+      "title": "SnowflakeServer",
+      "properties": {
+        "account": {
+          "type": "string",
+          "description": "An optional string describing the server."
+        },
+        "database": {
+          "type": "string",
+          "description": "An optional string describing the server."
+        },
+        "schema": {
+          "type": "string",
+          "description": "An optional string describing the server."
+        }
+      },
+      "required": [
+        "account",
+        "database",
+        "schema"
+      ]
+    },
+    "DatabricksServer": {
+      "type": "object",
+      "title": "DatabricksServer",
+      "properties": {
+        "host": {
+          "type": "string",
+          "description": "The Databricks host",
+          "examples": [
+            "dbc-abcdefgh-1234.cloud.databricks.com"
+          ]
+        },
+        "catalog": {
+          "type": "string",
+          "description": "The name of the Hive or Unity catalog"
+        },
+        "schema": {
+          "type": "string",
+          "description": "The schema name in the catalog"
+        }
+      },
+      "required": [
+        "catalog",
+        "schema"
+      ]
+    },
+    "DataframeServer": {
+      "type": "object",
+      "title": "DataframeServer",
+      "required": [
+        "type"
+      ]
+    },
+    "GlueServer": {
+      "type": "object",
+      "title": "GlueServer",
+      "properties": {
+        "account": {
+          "type": "string",
+          "description": "The AWS Glue account",
+          "examples": [
+            "1234-5678-9012"
+          ]
+        },
+        "database": {
+          "type": "string",
+          "description": "The AWS Glue database name",
+          "examples": [
+            "my_database"
+          ]
+        },
+        "location": {
+          "type": "string",
+          "format": "uri",
+          "description": "The AWS S3 path. Must be in the form of a URL.",
+          "examples": [
+            "s3://datacontract-example-orders-latest/data/{model}"
+          ]
+        },
+        "format": {
+          "type": "string",
+          "description": "The format of the files",
+          "examples": [
+            "parquet",
+            "csv",
+            "json",
+            "delta"
+          ]
+        }
+      },
+      "required": [
+        "account",
+        "database"
+      ]
+    },
+    "PostgresServer": {
+      "type": "object",
+      "title": "PostgresServer",
+      "properties": {
+        "host": {
+          "type": "string",
+          "description": "The host to the database server",
+          "examples": [
+            "localhost"
+          ]
+        },
+        "port": {
+          "type": "integer",
+          "description": "The port to the database server."
+        },
+        "database": {
+          "type": "string",
+          "description": "The name of the database.",
+          "examples": [
+            "postgres"
+          ]
+        },
+        "schema": {
+          "type": "string",
+          "description": "The name of the schema in the database.",
+          "examples": [
+            "public"
+          ]
+        }
+      },
+      "required": [
+        "host",
+        "port",
+        "database",
+        "schema"
+      ]
+    },
+    "OracleServer": {
+      "type": "object",
+      "title": "OracleServer",
+      "properties": {
+        "host": {
+          "type": "string",
+          "description": "The host to the oracle server",
+          "examples": [
+            "localhost"
+          ]
+        },
+        "port": {
+          "type": "integer",
+          "description": "The port to the oracle server.",
+          "examples": [
+            1523
+          ]
+        },
+        "serviceName": {
+          "type": "string",
+          "description": "The name of the service.",
+          "examples": [
+            "service"
+          ]
+        }
+      },
+      "required": [
+        "host",
+        "port",
+        "serviceName"
+      ]
+    },
+    "KafkaServer": {
+      "type": "object",
+      "title": "KafkaServer",
+      "description": "Kafka Server",
+      "properties": {
+        "host": {
+          "type": "string",
+          "description": "The bootstrap server of the kafka cluster."
+        },
+        "topic": {
+          "type": "string",
+          "description": "The topic name."
+        },
+        "format": {
+          "type": "string",
+          "description": "The format of the message. Examples: json, avro, protobuf.",
+          "default": "json"
+        }
+      },
+      "required": [
+        "host",
+        "topic"
+      ]
+    },
+    "PubSubServer": {
+      "type": "object",
+      "title": "PubSubServer",
+      "properties": {
+        "project": {
+          "type": "string",
+          "description": "The GCP project name."
+        },
+        "topic": {
+          "type": "string",
+          "description": "The topic name."
+        }
+      },
+      "required": [
+        "project",
+        "topic"
+      ]
+    },
+    "KinesisDataStreamsServer": {
+      "type": "object",
+      "title": "KinesisDataStreamsServer",
+      "description": "Kinesis Data Streams Server",
+      "properties": {
+        "stream": {
+          "type": "string",
+          "description": "The name of the Kinesis data stream."
+        },
+        "region": {
+          "type": "string",
+          "description": "AWS region.",
+          "examples": [
+            "eu-west-1"
+          ]
+        },
+        "format": {
+          "type": "string",
+          "description": "The format of the record",
+          "examples": [
+            "json",
+            "avro",
+            "protobuf"
+          ]
+        }
+      },
+      "required": [
+        "stream"
+      ]
+    },
+    "TrinoServer": {
+      "type": "object",
+      "title": "TrinoServer",
+      "properties": {
+        "host": {
+          "type": "string",
+          "description": "The Trino host URL.",
+          "examples": [
+            "localhost"
+          ]
+        },
+        "port": {
+          "type": "integer",
+          "description": "The Trino port."
+        },
+        "catalog": {
+          "type": "string",
+          "description": "The name of the catalog.",
+          "examples": [
+            "hive"
+          ]
+        },
+        "schema": {
+          "type": "string",
+          "description": "The name of the schema in the database.",
+          "examples": [
+            "my_schema"
+          ]
+        }
+      },
+      "required": [
+        "host",
+        "port",
+        "catalog",
+        "schema"
+      ]
+    },
+    "ClickhouseServer": {
+      "type": "object",
+      "title": "ClickhouseServer",
+      "properties": {
+        "host": {
+          "type": "string",
+          "description": "The host to the database server",
+          "examples": [
+            "localhost"
+          ]
+        },
+        "port": {
+          "type": "integer",
+          "description": "The port to the database server."
+        },
+        "database": {
+          "type": "string",
+          "description": "The name of the database.",
+          "examples": [
+            "postgres"
+          ]
+        }
+      },
+      "required": [
+        "host",
+        "port",
+        "database"
+      ]
+    },
+    "LocalServer": {
+      "type": "object",
+      "title": "LocalServer",
+      "properties": {
+        "path": {
+          "type": "string",
+          "description": "The relative or absolute path to the data file(s).",
+          "examples": [
+            "./folder/data.parquet",
+            "./folder/*.parquet"
+          ]
+        },
+        "format": {
+          "type": "string",
+          "description": "The format of the file(s)",
+          "examples": [
+            "json",
+            "parquet",
+            "delta",
+            "csv"
+          ]
+        }
+      },
+      "required": [
+        "path",
+        "format"
+      ]
+    },
+    "Quality": {
+      "allOf": [
+        {
+          "type": "object",
+          "properties": {
+            "type": {
+              "type": "string",
+              "description": "The type of quality check",
+              "enum": [
+                "text",
+                "library",
+                "sql",
+                "custom"
+              ]
+            },
+            "description": {
+              "type": "string",
+              "description": "A plain text describing the quality attribute in natural language."
+            }
+          }
+        },
+        {
+          "if": {
+            "properties": {
+              "type": {
+                "const": "text"
+              }
+            }
+          },
+          "then": {
+            "required": [
+              "description"
+            ]
+          }
+        },
+        {
+          "if": {
+            "properties": {
+              "type": {
+                "const": "sql"
+              }
+            }
+          },
+          "then": {
+            "properties": {
+              "query": {
+                "type": "string",
+                "description": "A SQL query that returns a single number to compare with the threshold."
+              },
+              "dialect": {
+                "type": "string",
+                "description": "The SQL dialect that is used for the query. Should be compatible to the server.type.",
+                "examples": [
+                  "athena",
+                  "bigquery",
+                  "redshift",
+                  "snowflake",
+                  "trino",
+                  "postgres",
+                  "oracle"
+                ]
+              },
+              "mustBe": {
+                "type": "number"
+              },
+              "mustNotBe": {
+                "type": "number"
+              },
+              "mustBeGreaterThan": {
+                "type": "number"
+              },
+              "mustBeGreaterOrEqualTo": {
+                "type": "number"
+              },
+              "mustBeGreaterThanOrEqualTo": {
+                "type": "number",
+                "deprecated": true
+              },
+              "mustBeLessThan": {
+                "type": "number"
+              },
+              "mustBeLessThanOrEqualTo": {
+                "type": "number",
+                "deprecated": true
+              },
+              "mustBeLessOrEqualTo": {
+                "type": "number"
+              },
+              "mustBeBetween": {
+                "type": "array",
+                "items": {
+                  "type": "number"
+                },
+                "minItems": 2,
+                "maxItems": 2
+              },
+              "mustNotBeBetween": {
+                "type": "array",
+                "items": {
+                  "type": "number"
+                },
+                "minItems": 2,
+                "maxItems": 2
+              }
+            },
+            "required": [
+              "query"
+            ]
+          }
+        },
+        {
+          "if": {
+            "anyOf": [
+              {
+                "properties": {
+                  "type": {
+                    "const": "library"
+                  }
+                }
+              },
+              {
+                "properties": {
+                  "metric": {
+                    "type": "string"
+                  }
+                },
+                "required": ["metric"]
+              }
+            ]
+          },
+          "then": {
+            "properties": {
+              "metric": {
+                "type": "string",
+                "description": "The DataQualityLibrary metric to use for the quality check.",
+                "enum": ["nullValues", "missingValues", "invalidValues", "duplicateValues", "rowCount"]
+              },
+              "rule": {
+                "type": "string",
+                "deprecated": true,
+                "description": "Deprecated. Use metric instead"
+              },
+              "arguments": {
+                "type": "object",
+                "description": "Additional metric-specific parameters for the quality check.",
+                "additionalProperties": {
+                  "type": ["string", "number", "boolean", "array", "object"]
+                }
+              },
+              "mustBe": {
+                "description": "Must be equal to the value to be valid. When using numbers, it is equivalent to '='."
+              },
+              "mustNotBe": {
+                "description": "Must not be equal to the value to be valid. When using numbers, it is equivalent to '!='."
+              },
+              "mustBeGreaterThan": {
+                "type": "number",
+                "description": "Must be greater than the value to be valid. It is equivalent to '>'."
+              },
+              "mustBeGreaterOrEqualTo": {
+                "type": "number",
+                "description": "Must be greater than or equal to the value to be valid. It is equivalent to '>='."
+              },
+              "mustBeLessThan": {
+                "type": "number",
+                "description": "Must be less than the value to be valid. It is equivalent to '<'."
+              },
+              "mustBeLessOrEqualTo": {
+                "type": "number",
+                "description": "Must be less than or equal to the value to be valid. It is equivalent to '<='."
+              },
+              "mustBeBetween": {
+                "type": "array",
+                "description": "Must be between the two numbers to be valid. Smallest number first in the array.",
+                "minItems": 2,
+                "maxItems": 2,
+                "uniqueItems": true,
+                "items": {
+                  "type": "number"
+                }
+              },
+              "mustNotBeBetween": {
+                "type": "array",
+                "description": "Must not be between the two numbers to be valid. Smallest number first in the array.",
+                "minItems": 2,
+                "maxItems": 2,
+                "uniqueItems": true,
+                "items": {
+                  "type": "number"
+                }
+              }
+            },
+            "required": [
+              "metric"
+            ]
+          }
+        },
+        {
+          "if": {
+            "properties": {
+              "type": {
+                "const": "custom"
+              }
+            }
+          },
+          "then": {
+            "properties": {
+              "description": {
+                "type": "string",
+                "description": "A plain text describing the quality attribute in natural language."
+              },
+              "engine": {
+                "type": "string",
+                "examples": [
+                  "soda",
+                  "great-expectations"
+                ],
+                "description": "The engine used for custom quality checks."
+              },
+              "implementation": {
+                "type": [
+                  "object",
+                  "array",
+                  "string"
+                ],
+                "description": "Engine-specific quality checks and expectations."
+              }
+            },
+            "required": [
+              "engine"
+            ]
+          }
+        }
+      ]
+    },
+    "Lineage": {
+      "type": "object",
+      "properties": {
+        "inputFields": {
+          "type": "array",
+          "items": {
+            "type": "object",
+            "properties": {
+              "namespace": {
+                "type": "string",
+                "description": "The input dataset namespace"
+              },
+              "name": {
+                "type": "string",
+                "description": "The input dataset name"
+              },
+              "field": {
+                "type": "string",
+                "description": "The input field"
+              },
+              "transformations": {
+                "type": "array",
+                "items": {
+                  "type": "object",
+                  "properties": {
+                    "type": {
+                      "description": "The type of the transformation. Allowed values are: DIRECT, INDIRECT",
+                      "type": "string"
+                    },
+                    "subtype": {
+                      "type": "string",
+                      "description": "The subtype of the transformation"
+                    },
+                    "description": {
+                      "type": "string",
+                      "description": "a string representation of the transformation applied"
+                    },
+                    "masking": {
+                      "type": "boolean",
+                      "description": "is transformation masking the data or not"
+                    }
+                  },
+                  "required": [
+                    "type"
+                  ],
+                  "additionalProperties": true
+                }
+              }
+            },
+            "additionalProperties": true,
+            "required": [
+              "namespace",
+              "name",
+              "field"
+            ]
+          }
+        },
+        "transformationDescription": {
+          "type": "string",
+          "description": "a string representation of the transformation applied",
+          "deprecated": true
+        },
+        "transformationType": {
+          "type": "string",
+          "description": "IDENTITY|MASKED reflects a clearly defined behavior. IDENTITY: exact same as input; MASKED: no original data available (like a hash of PII for example)",
+          "deprecated": true
+        }
+      },
+      "additionalProperties": true,
+      "required": [
+        "inputFields"
+      ]
+    }
+  }
+}
diff --git a/datacontract/schemas/odcs-3.0.2.schema.json b/datacontract/schemas/odcs-3.0.2.schema.json
new file mode 100644
index 000000000..cb9bfb9d8
--- /dev/null
+++ b/datacontract/schemas/odcs-3.0.2.schema.json
@@ -0,0 +1,2382 @@
+{
+  "$schema": "https://json-schema.org/draft/2019-09/schema",
+  "title": "Open Data Contract Standard (ODCS)",
+  "description": "An open data contract specification to establish agreement between data producers and consumers.",
+  "type": "object",
+  "properties": {
+    "version": {
+      "type": "string",
+      "description": "Current version of the data contract."
+    },
+    "kind": {
+      "type": "string",
+      "default": "DataContract",
+      "description": "The kind of file this is. Valid value is `DataContract`.",
+      "enum": ["DataContract"]
+    },
+    "apiVersion": {
+      "type": "string",
+      "default": "v3.0.2",
+      "description": "Version of the standard used to build data contract. Default value is v3.0.2.",
+      "enum": ["v3.0.2","v3.0.1", "v3.0.0", "v2.2.2", "v2.2.1", "v2.2.0"]
+    },
+    "id": {
+      "type": "string",
+      "description": "A unique identifier used to reduce the risk of dataset name collisions, such as a UUID."
+    },
+    "name": {
+      "type": "string",
+      "description": "Name of the data contract."
+    },
+    "tenant": {
+      "type": "string",
+      "description": "Indicates the property the data is primarily associated with. Value is case insensitive."
+    },
+    "tags": {
+      "$ref": "#/$defs/Tags"
+    },
+    "status": {
+      "type": "string",
+      "description": "Current status of the dataset.",
+      "examples": [
+        "proposed", "draft", "active", "deprecated", "retired"
+      ]
+    },
+    "servers": {
+      "type": "array",
+      "description": "List of servers where the datasets reside.",
+      "items": {
+        "$ref": "#/$defs/Server"
+      }
+    },
+    "dataProduct": {
+      "type": "string",
+      "description": "The name of the data product."
+    },
+    "description": {
+      "type": "object",
+      "description": "High level description of the dataset.",
+      "properties": {
+        "usage": {
+          "type": "string",
+          "description": "Intended usage of the dataset."
+        },
+        "purpose": {
+          "type": "string",
+          "description": "Purpose of the dataset."
+        },
+        "limitations": {
+          "type": "string",
+          "description": "Limitations of the dataset."
+        },
+        "authoritativeDefinitions": {
+          "$ref": "#/$defs/AuthoritativeDefinitions"
+        },
+        "customProperties": {
+          "$ref": "#/$defs/CustomProperties"
+        }
+      }
+    },
+    "domain": {
+      "type": "string",
+      "description": "Name of the logical data domain.",
+      "examples": ["imdb_ds_aggregate", "receiver_profile_out", "transaction_profile_out"]
+    },
+    "schema": {
+      "type": "array",
+      "description": "A list of elements within the schema to be cataloged.",
+      "items": {
+        "$ref": "#/$defs/SchemaObject"
+      }
+    },
+    "support": {
+      "$ref": "#/$defs/Support"
+    },
+    "price": {
+      "$ref": "#/$defs/Pricing"
+    },
+    "team": {
+      "type": "array",
+      "items": {
+        "$ref": "#/$defs/Team"
+      }
+    },
+    "roles": {
+      "type": "array",
+      "description": "A list of roles that will provide user access to the dataset.",
+      "items": {
+        "$ref": "#/$defs/Role"
+      }
+    },
+    "slaDefaultElement": {
+      "type": "string",
+      "description": "Element (using the element path notation) to do the checks on."
+    },
+    "slaProperties": {
+      "type": "array",
+      "description": "A list of key/value pairs for SLA specific properties. There is no limit on the type of properties (more details to come).",
+      "items": {
+        "$ref": "#/$defs/ServiceLevelAgreementProperty"
+      }
+    },
+    "authoritativeDefinitions": {
+      "$ref": "#/$defs/AuthoritativeDefinitions"
+    },
+    "customProperties": {
+      "$ref": "#/$defs/CustomProperties"
+    },
+    "contractCreatedTs": {
+      "type": "string",
+      "format": "date-time",
+      "description": "Timestamp in UTC of when the data contract was created."
+    }
+  },
+  "required": ["version", "apiVersion", "kind", "id", "status"],
+  "additionalProperties": false,
+  "$defs": {
+    "Server": {
+      "type": "object",
+      "description": "Data source details of where data is physically stored.",
+      "properties": {
+        "server": {
+          "type": "string",
+          "description": "Identifier of the server."
+        },
+        "type": {
+          "type": "string",
+          "description": "Type of the server.",
+          "enum": [
+            "api", "athena", "azure", "bigquery", "clickhouse", "databricks", "denodo", "dremio",
+            "duckdb", "glue", "cloudsql", "db2", "informix", "kafka", "kinesis", "local",
+            "mysql", "oracle", "postgresql", "postgres", "presto", "pubsub",
+            "redshift", "s3", "sftp", "snowflake", "sqlserver", "synapse", "trino", "vertica", "custom"
+          ]
+        },
+        "description": {
+          "type": "string",
+          "description": "Description of the server."
+        },
+        "environment": {
+          "type": "string",
+          "description": "Environment of the server.",
+          "examples": ["prod", "preprod", "dev", "uat"]
+        },
+        "roles": {
+          "type": "array",
+          "description": "List of roles that have access to the server.",
+          "items": {
+            "$ref": "#/$defs/Role"
+          }
+        },
+        "customProperties": {
+          "$ref": "#/$defs/CustomProperties"
+        }
+      },
+      "allOf": [
+        {
+          "if": {
+            "properties": {
+              "type": {
+                "const": "api"
+              }
+            },
+            "required": ["type"]
+          },
+          "then": {
+            "$ref": "#/$defs/ServerSource/ApiServer"
+          }
+        },
+        {
+          "if": {
+            "properties": {
+              "type": {
+                "const": "athena"
+              }
+            },
+            "required": ["type"]
+          },
+          "then": {
+            "$ref": "#/$defs/ServerSource/AthenaServer"
+          }
+        },
+        {
+          "if": {
+            "properties": {
+              "type": {
+                "const": "azure"
+              }
+            },
+            "required": ["type"]
+          },
+          "then": {
+            "$ref": "#/$defs/ServerSource/AzureServer"
+          }
+        },
+        {
+          "if": {
+            "properties": {
+              "type": {
+                "const": "bigquery"
+              }
+            },
+            "required": ["type"]
+          },
+          "then": {
+            "$ref": "#/$defs/ServerSource/BigQueryServer"
+          }
+        },
+        {
+          "if": {
+            "properties": {
+              "type": {
+                "const": "clickhouse"
+              }
+            },
+            "required": ["type"]
+          },
+          "then": {
+            "$ref": "#/$defs/ServerSource/ClickHouseServer"
+          }
+        },
+        {
+          "if": {
+            "properties": {
+              "type": {
+                "const": "databricks"
+              }
+            },
+            "required": ["type"]
+          },
+          "then": {
+            "$ref": "#/$defs/ServerSource/DatabricksServer"
+          }
+        },
+        {
+          "if": {
+            "properties": {
+              "type": {
+                "const": "denodo"
+              }
+            },
+            "required": ["type"]
+          },
+          "then": {
+            "$ref": "#/$defs/ServerSource/DenodoServer"
+          }
+        },
+        {
+          "if": {
+            "properties": {
+              "type": {
+                "const": "dremio"
+              }
+            },
+            "required": ["type"]
+          },
+          "then": {
+            "$ref": "#/$defs/ServerSource/DremioServer"
+          }
+        },
+        {
+          "if": {
+            "properties": {
+              "type": {
+                "const": "duckdb"
+              }
+            },
+            "required": ["type"]
+          },
+          "then": {
+            "$ref": "#/$defs/ServerSource/DuckdbServer"
+          }
+        },
+        {
+          "if": {
+            "properties": {
+              "type": {
+                "const": "glue"
+              }
+            },
+            "required": ["type"]
+          },
+          "then": {
+            "$ref": "#/$defs/ServerSource/GlueServer"
+          }
+        },
+        {
+          "if": {
+            "properties": {
+              "type": {
+                "const": "cloudsql"
+              }
+            },
+            "required": ["type"]
+          },
+          "then": {
+            "$ref": "#/$defs/ServerSource/GoogleCloudSqlServer"
+          }
+        },
+        {
+          "if": {
+            "properties": {
+              "type": {
+                "const": "db2"
+              }
+            },
+            "required": ["type"]
+          },
+          "then": {
+            "$ref": "#/$defs/ServerSource/IBMDB2Server"
+          }
+        },
+        {
+          "if": {
+            "properties": {
+              "type": {
+                "const": "informix"
+              }
+            },
+            "required": ["type"]
+          },
+          "then": {
+            "$ref": "#/$defs/ServerSource/InformixServer"
+          }
+        },
+
+        {
+          "if": {
+            "properties": {
+              "type": {
+                "const": "custom"
+              }
+            },
+            "required": ["type"]
+          },
+          "then": {
+            "$ref": "#/$defs/ServerSource/CustomServer"
+          }
+        },
+        {
+          "if": {
+            "properties": {
+              "type": {
+                "const": "kafka"
+              }
+            },
+            "required": ["type"]
+          },
+          "then": {
+            "$ref": "#/$defs/ServerSource/KafkaServer"
+          }
+        },
+        {
+          "if": {
+            "properties": {
+              "type": {
+                "const": "kinesis"
+              }
+            },
+            "required": ["type"]
+          },
+          "then": {
+            "$ref": "#/$defs/ServerSource/KinesisServer"
+          }
+        },
+        {
+          "if": {
+            "properties": {
+              "type": {
+                "const": "local"
+              }
+            },
+            "required": ["type"]
+          },
+          "then": {
+            "$ref": "#/$defs/ServerSource/LocalServer"
+          }
+        },
+        {
+          "if": {
+            "properties": {
+              "type": {
+                "const": "mysql"
+              }
+            },
+            "required": ["type"]
+          },
+          "then": {
+            "$ref": "#/$defs/ServerSource/MySqlServer"
+          }
+        },
+        {
+          "if": {
+            "properties": {
+              "type": {
+                "const": "oracle"
+              }
+            },
+            "required": ["type"]
+          },
+          "then": {
+            "$ref": "#/$defs/ServerSource/OracleServer"
+          }
+        },
+        {
+          "if": {
+            "properties": {
+              "type": {
+                "const": "postgresql"
+              }
+            },
+            "required": ["type"]
+          },
+          "then": {
+            "$ref": "#/$defs/ServerSource/PostgresServer"
+          }
+        },
+        {
+          "if": {
+            "properties": {
+              "type": {
+                "const": "postgres"
+              }
+            },
+            "required": ["type"]
+          },
+          "then": {
+            "$ref": "#/$defs/ServerSource/PostgresServer"
+          }
+        },
+        {
+          "if": {
+            "properties": {
+              "type": {
+                "const": "presto"
+              }
+            },
+            "required": ["type"]
+          },
+          "then": {
+            "$ref": "#/$defs/ServerSource/PrestoServer"
+          }
+        },
+        {
+          "if": {
+            "properties": {
+              "type": {
+                "const": "pubsub"
+              }
+            },
+            "required": ["type"]
+          },
+          "then": {
+            "$ref": "#/$defs/ServerSource/PubSubServer"
+          }
+        },
+        {
+          "if": {
+            "properties": {
+              "type": {
+                "const": "redshift"
+              }
+            },
+            "required": ["type"]
+          },
+          "then": {
+            "$ref": "#/$defs/ServerSource/RedshiftServer"
+          }
+        },
+        {
+          "if": {
+            "properties": {
+              "type": {
+                "const": "s3"
+              }
+            },
+            "required": ["type"]
+          },
+          "then": {
+            "$ref": "#/$defs/ServerSource/S3Server"
+          }
+        },
+        {
+          "if": {
+            "properties": {
+              "type": {
+                "const": "sftp"
+              }
+            },
+            "required": ["type"]
+          },
+          "then": {
+            "$ref": "#/$defs/ServerSource/SftpServer"
+          }
+        },
+        {
+          "if": {
+            "properties": {
+              "type": {
+                "const": "snowflake"
+              }
+            },
+            "required": ["type"]
+          },
+          "then": {
+            "$ref": "#/$defs/ServerSource/SnowflakeServer"
+          }
+        },
+        {
+          "if": {
+            "properties": {
+              "type": {
+                "const": "sqlserver"
+              }
+            },
+            "required": ["type"]
+          },
+          "then": {
+            "$ref": "#/$defs/ServerSource/SqlserverServer"
+          }
+        },
+        {
+          "if": {
+            "properties": {
+              "type": {
+                "const": "synapse"
+              }
+            },
+            "required": ["type"]
+          },
+          "then": {
+            "$ref": "#/$defs/ServerSource/SynapseServer"
+          }
+        },
+        {
+          "if": {
+            "properties": {
+              "type": {
+                "const": "trino"
+              }
+            },
+            "required": ["type"]
+          },
+          "then": {
+            "$ref": "#/$defs/ServerSource/TrinoServer"
+          }
+        },
+        {
+          "if": {
+            "properties": {
+              "type": {
+                "const": "vertica"
+              }
+            },
+            "required": ["type"]
+          },
+          "then": {
+            "$ref": "#/$defs/ServerSource/VerticaServer"
+          }
+        }
+      ],
+      "required": ["server", "type"]
+    },
+    "ServerSource": {
+      "ApiServer": {
+        "type": "object",
+        "title": "AthenaServer",
+        "properties": {
+          "location": {
+            "type": "string",
+            "format": "uri",
+            "description": "The url to the API.",
+            "examples": [
+              "https://api.example.com/v1"
+            ]
+          }
+        },
+        "required": [
+          "location"
+        ]
+      },
+      "AthenaServer": {
+        "type": "object",
+        "title": "AthenaServer",
+        "properties": {
+          "stagingDir": {
+            "type": "string",
+            "format": "uri",
+            "description": "Amazon Athena automatically stores query results and metadata information for each query that runs in a query result location that you can specify in Amazon S3.",
+            "examples": [
+              "s3://my_storage_account_name/my_container/path"
+            ]
+          },
+          "schema": {
+            "type": "string",
+            "description": "Identify the schema in the data source in which your tables exist."
+          },
+          "catalog": {
+            "type": "string",
+            "description": "Identify the name of the Data Source, also referred to as a Catalog.",
+            "default": "awsdatacatalog"
+          },
+          "regionName": {
+            "type": "string",
+            "description": "The region your AWS account uses.",
+            "examples": ["eu-west-1"]
+          }
+        },
+        "required": [
+          "stagingDir",
+          "schema"
+        ]
+      },
+      "AzureServer": {
+        "type": "object",
+        "title": "AzureServer",
+        "properties": {
+          "location": {
+            "type": "string",
+            "format": "uri",
+            "description": "Fully qualified path to Azure Blob Storage or Azure Data Lake Storage (ADLS), supports globs.",
+            "examples": [
+              "az://my_storage_account_name.blob.core.windows.net/my_container/path/*.parquet",
+              "abfss://my_storage_account_name.dfs.core.windows.net/my_container_name/path/*.parquet"
+            ]
+          },
+          "format": {
+            "type": "string",
+            "enum": [
+              "parquet",
+              "delta",
+              "json",
+              "csv"
+            ],
+            "description": "File format."
+          },
+          "delimiter": {
+            "type": "string",
+            "enum": [
+              "new_line",
+              "array"
+            ],
+            "description": "Only for format = json. How multiple json documents are delimited within one file"
+          }
+        },
+        "required": [
+          "location",
+          "format"
+        ]
+      },
+      "BigQueryServer": {
+        "type": "object",
+        "title": "BigQueryServer",
+        "properties": {
+          "project": {
+            "type": "string",
+            "description": "The GCP project name."
+          },
+          "dataset": {
+            "type": "string",
+            "description": "The GCP dataset name."
+          }
+        },
+        "required": [
+          "project",
+          "dataset"
+        ]
+      },
+      "ClickHouseServer": {
+        "type": "object",
+        "title": "ClickHouseServer",
+        "properties": {
+          "host": {
+            "type": "string",
+            "description": "The host of the ClickHouse server."
+          },
+          "port": {
+            "type": "integer",
+            "description": "The port to the ClickHouse server."
+          },
+          "database": {
+            "type": "string",
+            "description": "The name of the database."
+          }
+        },
+        "required": [
+          "host",
+          "port",
+          "database"
+        ]
+      },
+      "DatabricksServer": {
+        "type": "object",
+        "title": "DatabricksServer",
+        "properties": {
+          "host": {
+            "type": "string",
+            "description": "The Databricks host",
+            "examples": [
+              "dbc-abcdefgh-1234.cloud.databricks.com"
+            ]
+          },
+          "catalog": {
+            "type": "string",
+            "description": "The name of the Hive or Unity catalog"
+          },
+          "schema": {
+            "type": "string",
+            "description": "The schema name in the catalog"
+          }
+        },
+        "required": [
+          "catalog",
+          "schema"
+        ]
+      },
+      "DenodoServer": {
+        "type": "object",
+        "title": "DenodoServer",
+        "properties": {
+          "host": {
+            "type": "string",
+            "description": "The host of the Denodo server."
+          },
+          "port": {
+            "type": "integer",
+            "description": "The port of the Denodo server."
+          },
+          "database": {
+            "type": "string",
+            "description": "The name of the database."
+          }
+        },
+        "required": [
+          "host",
+          "port"
+        ]
+      },
+      "DremioServer": {
+        "type": "object",
+        "title": "DremioServer",
+        "properties": {
+          "host": {
+            "type": "string",
+            "description": "The host of the Dremio server."
+          },
+          "port": {
+            "type": "integer",
+            "description": "The port of the Dremio server."
+          },
+          "schema": {
+            "type": "string",
+            "description": "The name of the schema."
+          }
+        },
+        "required": [
+          "host",
+          "port"
+        ]
+      },
+      "DuckdbServer": {
+        "type": "object",
+        "title": "DuckdbServer",
+        "properties": {
+          "database": {
+            "type": "string",
+            "description": "Path to duckdb database file."
+          },
+          "schema": {
+            "type": "integer",
+            "description": "The name of the schema."
+          }
+        },
+        "required": [
+          "database"
+        ]
+      },
+      "GlueServer": {
+        "type": "object",
+        "title": "GlueServer",
+        "properties": {
+          "account": {
+            "type": "string",
+            "description": "The AWS Glue account",
+            "examples": [
+              "1234-5678-9012"
+            ]
+          },
+          "database": {
+            "type": "string",
+            "description": "The AWS Glue database name",
+            "examples": [
+              "my_database"
+            ]
+          },
+          "location": {
+            "type": "string",
+            "format": "uri",
+            "description": "The AWS S3 path. Must be in the form of a URL.",
+            "examples": [
+              "s3://datacontract-example-orders-latest/data/{model}"
+            ]
+          },
+          "format": {
+            "type": "string",
+            "description": "The format of the files",
+            "examples": [
+              "parquet",
+              "csv",
+              "json",
+              "delta"
+            ]
+          }
+        },
+        "required": [
+          "account",
+          "database"
+        ]
+      },
+      "GoogleCloudSqlServer": {
+        "type": "object",
+        "title": "GoogleCloudSqlServer",
+        "properties": {
+          "host": {
+            "type": "string",
+            "description": "The host of the Google Cloud Sql server."
+          },
+          "port": {
+            "type": "integer",
+            "description": "The port of the Google Cloud Sql server."
+          },
+          "database": {
+            "type": "string",
+            "description": "The name of the database."
+          },
+          "schema": {
+            "type": "string",
+            "description": "The name of the schema."
+          }
+        },
+        "required": [
+          "host",
+          "port",
+          "database",
+          "schema"
+        ]
+      },
+      "IBMDB2Server": {
+        "type": "object",
+        "title": "IBMDB2Server",
+        "properties": {
+          "host": {
+            "type": "string",
+            "description": "The host of the IBM DB2 server."
+          },
+          "port": {
+            "type": "integer",
+            "description": "The port of the IBM DB2 server."
+          },
+          "database": {
+            "type": "string",
+            "description": "The name of the database."
+          },
+          "schema": {
+            "type": "string",
+            "description": "The name of the schema."
+          }
+        },
+        "required": [
+          "host",
+          "port",
+          "database"
+        ]
+      },
+      "InformixServer": {
+        "type": "object",
+        "title": "InformixServer",
+        "properties": {
+          "host": {
+            "type": "string",
+            "description": "The host to the Informix server. "
+          },
+          "port": {
+            "type": "integer",
+            "description": "The port to the Informix server. Defaults to 9088."
+          },
+          "database": {
+            "type": "string",
+            "description": "The name of the database."
+          }
+        },
+        "required": [
+          "host",
+          "database"
+        ]
+      },
+      "CustomServer": {
+        "type": "object",
+        "title": "CustomServer",
+        "properties": {
+          "account": {
+            "type": "string",
+            "description": "Account used by the server."
+          },
+          "catalog": {
+            "type": "string",
+            "description": "Name of the catalog."
+          },
+          "database": {
+            "type": "string",
+            "description": "Name of the database."
+          },
+          "dataset": {
+            "type": "string",
+            "description": "Name of the dataset."
+          },
+          "delimiter": {
+            "type": "string",
+            "description": "Delimiter."
+          },
+          "endpointUrl": {
+            "type": "string",
+            "description": "Server endpoint.",
+            "format": "uri"
+          },
+          "format": {
+            "type": "string",
+            "description": "File format."
+          },
+          "host": {
+            "type": "string",
+            "description": "Host name or IP address."
+          },
+          "location": {
+            "type": "string",
+            "description": "A URL to a location.",
+            "format": "uri"
+          },
+          "path": {
+            "type": "string",
+            "description": "Relative or absolute path to the data file(s)."
+          },
+          "port": {
+            "type": "integer",
+            "description": "Port to the server. No default value is assumed for custom servers."
+          },
+          "project": {
+            "type": "string",
+            "description": "Project name."
+          },
+          "region": {
+            "type": "string",
+            "description": "Cloud region."
+          },
+          "regionName": {
+            "type": "string",
+            "description": "Region name."
+          },
+          "schema": {
+            "type": "string",
+            "description": "Name of the schema."
+          },
+          "serviceName": {
+            "type": "string",
+            "description": "Name of the service."
+          },
+          "stagingDir": {
+            "type": "string",
+            "description": "Staging directory."
+          },
+          "warehouse": {
+            "type": "string",
+            "description": "Name of the cluster or warehouse."
+          }
+        }
+      },
+      "KafkaServer": {
+        "type": "object",
+        "title": "KafkaServer",
+        "description": "Kafka Server",
+        "properties": {
+          "host": {
+            "type": "string",
+            "description": "The bootstrap server of the kafka cluster."
+          },
+          "format": {
+            "type": "string",
+            "description": "The format of the messages.",
+            "examples": ["json", "avro", "protobuf", "xml"],
+            "default": "json"
+          }
+        },
+        "required": [
+          "host"
+        ]
+      },
+      "KinesisServer": {
+        "type": "object",
+        "title": "KinesisDataStreamsServer",
+        "description": "Kinesis Data Streams Server",
+        "properties": {
+          "region": {
+            "type": "string",
+            "description": "AWS region.",
+            "examples": [
+              "eu-west-1"
+            ]
+          },
+          "format": {
+            "type": "string",
+            "description": "The format of the record",
+            "examples": [
+              "json",
+              "avro",
+              "protobuf"
+            ]
+          }
+        }
+      },
+      "LocalServer": {
+        "type": "object",
+        "title": "LocalServer",
+        "properties": {
+          "path": {
+            "type": "string",
+            "description": "The relative or absolute path to the data file(s).",
+            "examples": [
+              "./folder/data.parquet",
+              "./folder/*.parquet"
+            ]
+          },
+          "format": {
+            "type": "string",
+            "description": "The format of the file(s)",
+            "examples": [
+              "json",
+              "parquet",
+              "delta",
+              "csv"
+            ]
+          }
+        },
+        "required": [
+          "path",
+          "format"
+        ]
+      },
+      "MySqlServer": {
+        "type": "object",
+        "title": "MySqlServer",
+        "properties": {
+          "host": {
+            "type": "string",
+            "description": "The host of the MySql server."
+          },
+          "port": {
+            "type": "integer",
+            "description": "The port of the MySql server."
+          },
+          "database": {
+            "type": "string",
+            "description": "The name of the database."
+          }
+        },
+        "required": [
+          "host",
+          "port",
+          "database"
+        ]
+      },
+      "OracleServer": {
+        "type": "object",
+        "title": "OracleServer",
+        "properties": {
+          "host": {
+            "type": "string",
+            "description": "The host to the oracle server",
+            "examples": [
+              "localhost"
+            ]
+          },
+          "port": {
+            "type": "integer",
+            "description": "The port to the oracle server.",
+            "examples": [
+              1523
+            ]
+          },
+          "serviceName": {
+            "type": "string",
+            "description": "The name of the service.",
+            "examples": [
+              "service"
+            ]
+          }
+        },
+        "required": [
+          "host",
+          "port",
+          "serviceName"
+        ]
+      },
+      "PostgresServer": {
+        "type": "object",
+        "title": "PostgresServer",
+        "properties": {
+          "host": {
+            "type": "string",
+            "description": "The host to the Postgres server"
+          },
+          "port": {
+            "type": "integer",
+            "description": "The port to the Postgres server."
+          },
+          "database": {
+            "type": "string",
+            "description": "The name of the database."
+          },
+          "schema": {
+            "type": "string",
+            "description": "The name of the schema in the database."
+          }
+        },
+        "required": [
+          "host",
+          "port",
+          "database",
+          "schema"
+        ]
+      },
+      "PrestoServer": {
+        "type": "object",
+        "title": "PrestoServer",
+        "properties": {
+          "host": {
+            "type": "string",
+            "description": "The host to the Presto server",
+            "examples": [
+              "localhost:8080"
+            ]
+          },
+          "catalog": {
+            "type": "string",
+            "description": "The name of the catalog.",
+            "examples": [
+              "postgres"
+            ]
+          },
+          "schema": {
+            "type": "string",
+            "description": "The name of the schema.",
+            "examples": [
+              "public"
+            ]
+          }
+        },
+        "required": [
+          "host"
+        ]
+      },
+      "PubSubServer": {
+        "type": "object",
+        "title": "PubSubServer",
+        "properties": {
+          "project": {
+            "type": "string",
+            "description": "The GCP project name."
+          }
+        },
+        "required": [
+          "project"
+        ]
+      },
+      "RedshiftServer": {
+        "type": "object",
+        "title": "RedshiftServer",
+        "properties": {
+          "host": {
+            "type": "string",
+            "description": "An optional string describing the server."
+          },
+          "database": {
+            "type": "string",
+            "description": "The name of the database."
+          },
+          "schema": {
+            "type": "string",
+            "description": "The name of the schema."
+          },
+          "region": {
+            "type": "string",
+            "description": "AWS region of Redshift server.",
+            "examples": ["us-east-1"]
+          },
+          "account": {
+            "type": "string",
+            "description": "The account used by the server."
+          }
+        },
+        "required": [
+          "database",
+          "schema"
+        ]
+      },
+      "S3Server": {
+        "type": "object",
+        "title": "S3Server",
+        "properties": {
+          "location": {
+            "type": "string",
+            "format": "uri",
+            "description": "S3 URL, starting with `s3://`",
+            "examples": [
+              "s3://datacontract-example-orders-latest/data/{model}/*.json"
+            ]
+          },
+          "endpointUrl": {
+            "type": "string",
+            "format": "uri",
+            "description": "The server endpoint for S3-compatible servers.",
+            "examples": ["https://minio.example.com"]
+          },
+          "format": {
+            "type": "string",
+            "enum": [
+              "parquet",
+              "delta",
+              "json",
+              "csv"
+            ],
+            "description": "File format."
+          },
+          "delimiter": {
+            "type": "string",
+            "enum": [
+              "new_line",
+              "array"
+            ],
+            "description": "Only for format = json. How multiple json documents are delimited within one file"
+          }
+        },
+        "required": [
+          "location"
+        ]
+      },
+      "SftpServer": {
+        "type": "object",
+        "title": "SftpServer",
+        "properties": {
+          "location": {
+            "type": "string",
+            "format": "uri",
+            "pattern": "^sftp://.*",
+            "description": "SFTP URL, starting with `sftp://`",
+            "examples": [
+              "sftp://123.123.12.123/{model}/*.json"
+            ]
+          },
+          "format": {
+            "type": "string",
+            "enum": [
+              "parquet",
+              "delta",
+              "json",
+              "csv"
+            ],
+            "description": "File format."
+          },
+          "delimiter": {
+            "type": "string",
+            "enum": [
+              "new_line",
+              "array"
+            ],
+            "description": "Only for format = json. How multiple json documents are delimited within one file"
+          }
+        },
+        "required": [
+          "location"
+        ]
+      },
+      "SnowflakeServer": {
+        "type": "object",
+        "title": "SnowflakeServer",
+        "properties": {
+          "host": {
+            "type": "string",
+            "description": "The host to the Snowflake server"
+          },
+          "port": {
+            "type": "integer",
+            "description": "The port to the Snowflake server."
+          },
+          "account": {
+            "type": "string",
+            "description": "The Snowflake account used by the server."
+          },
+          "database": {
+            "type": "string",
+            "description": "The name of the database."
+          },
+          "schema": {
+            "type": "string",
+            "description": "The name of the schema."
+          },
+          "warehouse": {
+            "type": "string",
+            "description": "The name of the cluster of resources that is a Snowflake virtual warehouse."
+          }
+        },
+        "required": [
+          "account",
+          "database",
+          "schema"
+        ]
+      },
+      "SqlserverServer": {
+        "type": "object",
+        "title": "SqlserverServer",
+        "properties": {
+          "host": {
+            "type": "string",
+            "description": "The host to the database server",
+            "examples": [
+              "localhost"
+            ]
+          },
+          "port": {
+            "type": "integer",
+            "description": "The port to the database server.",
+            "default": 1433,
+            "examples": [
+              1433
+            ]
+          },
+          "database": {
+            "type": "string",
+            "description": "The name of the database.",
+            "examples": [
+              "database"
+            ]
+          },
+          "schema": {
+            "type": "string",
+            "description": "The name of the schema in the database.",
+            "examples": [
+              "dbo"
+            ]
+          }
+        },
+        "required": [
+          "host",
+          "database",
+          "schema"
+        ]
+      },
+      "SynapseServer": {
+        "type": "object",
+        "title": "SynapseServer",
+        "properties": {
+          "host": {
+            "type": "string",
+            "description": "The host of the Synapse server."
+          },
+          "port": {
+            "type": "integer",
+            "description": "The port of the Synapse server."
+          },
+          "database": {
+            "type": "string",
+            "description": "The name of the database."
+          }
+        },
+        "required": [
+          "host",
+          "port",
+          "database"
+        ]
+      },
+      "TrinoServer": {
+        "type": "object",
+        "title": "TrinoServer",
+        "properties": {
+          "host": {
+            "type": "string",
+            "description": "The Trino host URL.",
+            "examples": [
+              "localhost"
+            ]
+          },
+          "port": {
+            "type": "integer",
+            "description": "The Trino port."
+          },
+          "catalog": {
+            "type": "string",
+            "description": "The name of the catalog.",
+            "examples": [
+              "hive"
+            ]
+          },
+          "schema": {
+            "type": "string",
+            "description": "The name of the schema in the database.",
+            "examples": [
+              "my_schema"
+            ]
+          }
+        },
+        "required": [
+          "host",
+          "port",
+          "catalog",
+          "schema"
+        ]
+      },
+      "VerticaServer": {
+        "type": "object",
+        "title": "VerticaServer",
+        "properties": {
+          "host": {
+            "type": "string",
+            "description": "The host of the Vertica server."
+          },
+          "port": {
+            "type": "integer",
+            "description": "The port of the Vertica server."
+          },
+          "database": {
+            "type": "string",
+            "description": "The name of the database."
+          },
+          "schema": {
+            "type": "string",
+            "description": "The name of the schema."
+          }
+        },
+        "required": [
+          "host",
+          "port",
+          "database",
+          "schema"
+        ]
+      }
+    },
+    "SchemaElement": {
+      "type": "object",
+      "properties": {
+        "name": {
+          "type": "string",
+          "description": "Name of the element."
+        },
+        "physicalType": {
+          "type": "string",
+          "description": "The physical element data type in the data source.",
+          "examples": ["table", "view", "topic", "file"]
+        },
+        "description": {
+          "type": "string",
+          "description": "Description of the element."
+        },
+        "businessName": {
+          "type": "string",
+          "description": "The business name of the element."
+        },
+        "authoritativeDefinitions": {
+          "$ref": "#/$defs/AuthoritativeDefinitions"
+        },
+        "tags": {
+          "$ref": "#/$defs/Tags"
+        },
+        "customProperties": {
+          "$ref": "#/$defs/CustomProperties"
+        }
+      }
+    },
+    "SchemaObject": {
+      "type": "object",
+      "properties": {
+        "logicalType": {
+          "type": "string",
+          "description": "The logical element data type.",
+          "enum": ["object"]
+        },
+        "physicalName": {
+          "type": "string",
+          "description": "Physical name.",
+          "examples": ["table_1_2_0"]
+        },
+        "dataGranularityDescription": {
+          "type": "string",
+          "description": "Granular level of the data in the object.",
+          "examples": ["Aggregation by country"]
+        },
+        "properties": {
+          "type": "array",
+          "description": "A list of properties for the object.",
+          "items": {
+            "$ref": "#/$defs/SchemaProperty"
+          }
+        },
+        "quality": {
+          "$ref": "#/$defs/DataQualityChecks"
+        }
+      },
+      "allOf": [
+        {
+          "$ref": "#/$defs/SchemaElement"
+        }
+      ],
+      "required": ["name"],
+      "unevaluatedProperties": false
+    },
+    "SchemaBaseProperty": {
+      "type": "object",
+      "properties": {
+        "primaryKey": {
+          "type": "boolean",
+          "description": "Boolean value specifying whether the element is primary or not. Default is false."
+        },
+        "primaryKeyPosition": {
+          "type": "integer",
+          "default": -1,
+          "description": "If element is a primary key, the position of the primary key element. Starts from 1. Example of `account_id, name` being primary key columns, `account_id` has primaryKeyPosition 1 and `name` primaryKeyPosition 2. Default to -1."
+        },
+        "logicalType": {
+          "type": "string",
+          "description": "The logical element data type.",
+          "enum": ["string", "date", "number", "integer", "object", "array", "boolean"]
+        },
+        "logicalTypeOptions": {
+          "type": "object",
+          "description": "Additional optional metadata to describe the logical type."
+        },
+        "physicalType": {
+          "type": "string",
+          "description": "The physical element data type in the data source. For example, VARCHAR(2), DOUBLE, INT."
+        },
+        "physicalName": {
+          "type": "string",
+          "description": "Physical name.",
+          "examples": ["col_str_a"]
+        },
+        "required": {
+          "type": "boolean",
+          "default": false,
+          "description": "Indicates if the element may contain Null values; possible values are true and false. Default is false."
+        },
+        "unique": {
+          "type": "boolean",
+          "default": false,
+          "description": "Indicates if the element contains unique values; possible values are true and false. Default is false."
+        },
+        "partitioned": {
+          "type": "boolean",
+          "default": false,
+          "description": "Indicates if the element is partitioned; possible values are true and false."
+        },
+        "partitionKeyPosition": {
+          "type": "integer",
+          "default": -1,
+          "description": "If element is used for partitioning, the position of the partition element. Starts from 1. Example of `country, year` being partition columns, `country` has partitionKeyPosition 1 and `year` partitionKeyPosition 2. Default to -1."
+        },
+        "classification": {
+          "type": "string",
+          "description": "Can be anything, like confidential, restricted, and public to more advanced categorization. Some companies like PayPal, use data classification indicating the class of data in the element; expected values are 1, 2, 3, 4, or 5.",
+          "examples": ["confidential", "restricted", "public"]
+        },
+        "encryptedName": {
+          "type": "string",
+          "description": "The element name within the dataset that contains the encrypted element value. For example, unencrypted element `email_address` might have an encryptedName of `email_address_encrypt`."
+        },
+        "transformSourceObjects": {
+          "type": "array",
+          "description": "List of objects in the data source used in the transformation.",
+          "items": {
+            "type": "string"
+          }
+        },
+        "transformLogic": {
+          "type": "string",
+          "description": "Logic used in the element transformation."
+        },
+        "transformDescription": {
+          "type": "string",
+          "description": "Describes the transform logic in very simple terms."
+        },
+        "examples": {
+          "type": "array",
+          "description": "List of sample element values.",
+          "items": {
+            "$ref": "#/$defs/AnyType"
+          }
+        },
+        "criticalDataElement": {
+          "type": "boolean",
+          "default": false,
+          "description": "True or false indicator; If element is considered a critical data element (CDE) then true else false."
+        },
+        "quality": {
+          "$ref": "#/$defs/DataQualityChecks"
+        }
+      },
+      "allOf": [
+        {
+          "$ref": "#/$defs/SchemaElement"
+        },
+        {
+          "if": {
+            "properties": {
+              "logicalType": {
+                "const": "string"
+              }
+            }
+          },
+          "then": {
+            "properties": {
+              "logicalTypeOptions": {
+                "type": "object",
+                "properties": {
+                  "minLength": {
+                    "type": "integer",
+                    "minimum": 0,
+                    "description": "Minimum length of the string."
+                  },
+                  "maxLength": {
+                    "type": "integer",
+                    "minimum": 0,
+                    "description": "Maximum length of the string."
+                  },
+                  "pattern": {
+                    "type": "string",
+                    "description": "Regular expression pattern to define valid value. Follows regular expression syntax from ECMA-262 (https://262.ecma-international.org/5.1/#sec-15.10.1)."
+                  },
+                  "format": {
+                    "type": "string",
+                    "examples": ["password", "byte", "binary", "email", "uuid", "uri", "hostname", "ipv4", "ipv6"],
+                    "description": "Provides extra context about what format the string follows."
+                  }
+                },
+                "additionalProperties": false
+              }
+            }
+          }
+        },
+        {
+          "if": {
+            "properties": {
+              "logicalType": {
+                "const": "date"
+              }
+            }
+          },
+          "then": {
+            "properties": {
+              "logicalTypeOptions": {
+                "type": "object",
+                "properties": {
+                  "format": {
+                    "type": "string",
+                    "examples": ["yyyy-MM-dd", "yyyy-MM-dd HH:mm:ss", "HH:mm:ss"],
+                    "description": "Format of the date. Follows the format as prescribed by [JDK DateTimeFormatter](https://docs.oracle.com/javase/8/docs/api/java/time/format/DateTimeFormatter.html). For example, format 'yyyy-MM-dd'."
+                  },
+                  "exclusiveMaximum": {
+                    "type": "boolean",
+                    "default": false,
+                    "description": "If set to true, all values are strictly less than the maximum value (values < maximum). Otherwise, less than or equal to the maximum value (values <= maximum)."
+                  },
+                  "maximum": {
+                    "type": "string",
+                    "description": "All date values are less than or equal to this value (values <= maximum)."
+                  },
+                  "exclusiveMinimum": {
+                    "type": "boolean",
+                    "default": false,
+                    "description": "If set to true, all values are strictly greater than the minimum value (values > minimum). Otherwise, greater than or equal to the minimum value (values >= minimum)."
+                  },
+                  "minimum": {
+                    "type": "string",
+                    "description": "All date values are greater than or equal to this value (values >= minimum)."
+                  }
+                },
+                "additionalProperties": false
+              }
+            }
+          }
+        },
+        {
+          "if": {
+            "anyOf": [
+              {
+                "properties": {
+                  "logicalType": {
+                    "const": "integer"
+                  }
+                }
+              }
+            ]
+          },
+          "then": {
+            "properties": {
+              "logicalTypeOptions": {
+                "type": "object",
+                "properties": {
+                  "multipleOf": {
+                    "type": "number",
+                    "exclusiveMinimum": 0,
+                    "description": "Values must be multiples of this number. For example, multiple of 5 has valid values 0, 5, 10, -5."
+                  },
+                  "maximum": {
+                    "type": "number",
+                    "description": "All values are less than or equal to this value (values <= maximum)."
+                  },
+                  "exclusiveMaximum": {
+                    "type": "boolean",
+                    "default": false,
+                    "description": "If set to true, all values are strictly less than the maximum value (values < maximum). Otherwise, less than or equal to the maximum value (values <= maximum)."
+                  },
+                  "minimum": {
+                    "type": "number",
+                    "description": "All values are greater than or equal to this value (values >= minimum)."
+                  },
+                  "exclusiveMinimum": {
+                    "type": "boolean",
+                    "default": false,
+                    "description": "If set to true, all values are strictly greater than the minimum value (values > minimum). Otherwise, greater than or equal to the minimum value (values >= minimum)."
+                  },
+                  "format": {
+                    "type": "string",
+                    "default": "i32",
+                    "description": "Format of the value in terms of how many bits of space it can use and whether it is signed or unsigned (follows the Rust integer types).",
+                    "enum": ["i8", "i16", "i32", "i64", "i128", "u8", "u16", "u32", "u64", "u128"]
+                  }
+                },
+                "additionalProperties": false
+              }
+            }
+          }
+        },
+        {
+          "if": {
+            "anyOf": [
+              {
+                "properties": {
+                  "logicalType": {
+                    "const": "number"
+                  }
+                }
+              }
+            ]
+          },
+          "then": {
+            "properties": {
+              "logicalTypeOptions": {
+                "type": "object",
+                "properties": {
+                  "multipleOf": {
+                    "type": "number",
+                    "exclusiveMinimum": 0,
+                    "description": "Values must be multiples of this number. For example, multiple of 5 has valid values 0, 5, 10, -5."
+                  },
+                  "maximum": {
+                    "type": "number",
+                    "description": "All values are less than or equal to this value (values <= maximum)."
+                  },
+                  "exclusiveMaximum": {
+                    "type": "boolean",
+                    "default": false,
+                    "description": "If set to true, all values are strictly less than the maximum value (values < maximum). Otherwise, less than or equal to the maximum value (values <= maximum)."
+                  },
+                  "minimum": {
+                    "type": "number",
+                    "description": "All values are greater than or equal to this value (values >= minimum)."
+                  },
+                  "exclusiveMinimum": {
+                    "type": "boolean",
+                    "default": false,
+                    "description": "If set to true, all values are strictly greater than the minimum value (values > minimum). Otherwise, greater than or equal to the minimum value (values >= minimum)."
+                  },
+                  "format": {
+                    "type": "string",
+                    "default": "i32",
+                    "description": "Format of the value in terms of how many bits of space it can use (follows the Rust float types).",
+                    "enum": ["f32", "f64"]
+                  }
+                },
+                "additionalProperties": false
+              }
+            }
+          }
+        },
+        {
+          "if": {
+            "properties": {
+              "logicalType": {
+                "const": "object"
+              }
+            }
+          },
+          "then": {
+            "properties": {
+              "logicalTypeOptions": {
+                "type": "object",
+                "properties": {
+                  "maxProperties": {
+                    "type": "integer",
+                    "minimum": 0,
+                    "description": "Maximum number of properties."
+                  },
+                  "minProperties": {
+                    "type": "integer",
+                    "minimum": 0,
+                    "default": 0,
+                    "description": "Minimum number of properties."
+                  },
+                  "required": {
+                    "type": "array",
+                    "items": {
+                      "type": "string"
+                    },
+                    "minItems": 1,
+                    "uniqueItems": true,
+                    "description": "Property names that are required to exist in the object."
+                  }
+                },
+                "additionalProperties": false
+              },
+              "properties": {
+                "type": "array",
+                "description": "A list of properties for the object.",
+                "items": {
+                  "$ref": "#/$defs/SchemaProperty"
+                }
+              }
+            }
+          }
+        },
+        {
+          "if": {
+            "properties": {
+              "logicalType": {
+                "const": "array"
+              }
+            }
+          },
+          "then": {
+            "properties": {
+              "logicalTypeOptions": {
+                "type": "object",
+                "properties": {
+                  "maxItems": {
+                    "type": "integer",
+                    "minimum": 0,
+                    "description": "Maximum number of items."
+                  },
+                  "minItems": {
+                    "type": "integer",
+                    "minimum": 0,
+                    "default": 0,
+                    "description": "Minimum number of items"
+                  },
+                  "uniqueItems": {
+                    "type": "boolean",
+                    "default": false,
+                    "description": "If set to true, all items in the array are unique."
+                  }
+                },
+                "additionalProperties": false
+              },
+              "items": {
+                "$ref": "#/$defs/SchemaItemProperty",
+                "description": "List of items in an array (only applicable when `logicalType: array`)."
+              }
+            }
+          }
+        }
+      ]
+    },
+    "SchemaProperty": {
+      "type": "object",
+      "$ref": "#/$defs/SchemaBaseProperty",
+      "required": ["name"],
+      "unevaluatedProperties": false
+    },
+    "SchemaItemProperty": {
+      "type": "object",
+      "$ref": "#/$defs/SchemaBaseProperty",
+      "properties": {
+        "properties": {
+          "type": "array",
+          "description": "A list of properties for the object.",
+          "items": {
+            "$ref": "#/$defs/SchemaProperty"
+          }
+        }
+      },
+      "unevaluatedProperties": false
+    },
+    "Tags": {
+      "type": "array",
+      "description": "A list of tags that may be assigned to the elements (object or property); the tags keyword may appear at any level. Tags may be used to better categorize an element. For example, `finance`, `sensitive`, `employee_record`.",
+      "examples": ["finance", "sensitive", "employee_record"],
+      "items": {
+        "type": "string"
+      }
+    },
+    "DataQuality": {
+      "type": "object",
+      "properties": {
+        "authoritativeDefinitions": {
+          "$ref": "#/$defs/AuthoritativeDefinitions"
+        },
+        "businessImpact": {
+          "type": "string",
+          "description": "Consequences of the rule failure.",
+          "examples": ["operational", "regulatory"]
+        },
+        "customProperties": {
+          "type": "array",
+          "description": "Additional properties required for rule execution.",
+          "items": {
+            "$ref": "#/$defs/CustomProperty"
+          }
+        },
+        "description": {
+          "type": "string",
+          "description": "Describe the quality check to be completed."
+        },
+        "dimension": {
+          "type": "string",
+          "description": "The key performance indicator (KPI) or dimension for data quality.",
+          "enum": ["accuracy", "completeness", "conformity", "consistency", "coverage", "timeliness", "uniqueness"]
+        },
+        "method": {
+          "type": "string",
+          "examples": ["reconciliation"]
+        },
+        "name": {
+          "type": "string",
+          "description": "Name of the data quality check."
+        },
+        "schedule": {
+          "type": "string",
+          "description": "Rule execution schedule details.",
+          "examples": ["0 20 * * *"]
+        },
+        "scheduler": {
+          "type": "string",
+          "description": "The name or type of scheduler used to start the data quality check.",
+          "examples": ["cron"]
+        },
+        "severity": {
+          "type": "string",
+          "description": "The severance of the quality rule.",
+          "examples": ["info", "warning", "error"]
+        },
+        "tags": {
+          "$ref": "#/$defs/Tags"
+        },
+        "type": {
+          "type": "string",
+          "description": "The type of quality check. 'text' is human-readable text that describes the quality of the data. 'library' is a set of maintained predefined quality attributes such as row count or unique. 'sql' is an individual SQL query that returns a value that can be compared. 'custom' is quality attributes that are vendor-specific, such as Soda or Great Expectations.",
+          "enum": ["text", "library", "sql", "custom"],
+          "default": "library"
+        },
+        "unit": {
+          "type": "string",
+          "description": "Unit the rule is using, popular values are `rows` or `percent`, but any value is allowed.",
+          "examples": ["rows", "percent"]
+        }
+      },
+      "allOf": [
+        {
+          "if": {
+            "properties": {
+              "type": {
+                "const": "library"
+              }
+            }
+          },
+          "then": {
+            "$ref": "#/$defs/DataQualityLibrary"
+          }
+        },
+        {
+          "if": {
+            "properties": {
+              "type": {
+                "const": "sql"
+              }
+            },
+            "required": ["type"]
+          },
+          "then": {
+            "$ref": "#/$defs/DataQualitySql"
+          }
+        },
+        {
+          "if": {
+            "properties": {
+              "type": {
+                "const": "custom"
+              }
+            },
+            "required": ["type"]
+          },
+          "then": {
+            "$ref": "#/$defs/DataQualityCustom"
+          }
+        }
+      ]
+    },
+    "DataQualityChecks": {
+      "type": "array",
+      "description": "Data quality rules with all the relevant information for rule setup and execution.",
+      "items": {
+        "$ref": "#/$defs/DataQuality"
+      }
+    },
+    "DataQualityLibrary": {
+      "type": "object",
+      "properties": {
+        "rule": {
+          "type": "string",
+          "description": "Define a data quality check based on the predefined rules as per ODCS.",
+          "examples": ["duplicateCount", "validValues", "rowCount"]
+        },
+        "mustBe": {
+          "description": "Must be equal to the value to be valid. When using numbers, it is equivalent to '='."
+        },
+        "mustNotBe": {
+          "description": "Must not be equal to the value to be valid. When using numbers, it is equivalent to '!='."
+        },
+        "mustBeGreaterThan": {
+          "type": "number",
+          "description": "Must be greater than the value to be valid. It is equivalent to '>'."
+        },
+        "mustBeGreaterOrEqualTo": {
+          "type": "number",
+          "description": "Must be greater than or equal to the value to be valid. It is equivalent to '>='."
+        },
+        "mustBeLessThan": {
+          "type": "number",
+          "description": "Must be less than the value to be valid. It is equivalent to '<'."
+        },
+        "mustBeLessOrEqualTo": {
+          "type": "number",
+          "description": "Must be less than or equal to the value to be valid. It is equivalent to '<='."
+        },
+        "mustBeBetween": {
+          "type": "array",
+          "description": "Must be between the two numbers to be valid. Smallest number first in the array.",
+          "minItems": 2,
+          "maxItems": 2,
+          "uniqueItems": true,
+          "items": {
+            "type": "number"
+          }
+        },
+        "mustNotBeBetween": {
+          "type": "array",
+          "description": "Must not be between the two numbers to be valid. Smallest number first in the array.",
+          "minItems": 2,
+          "maxItems": 2,
+          "uniqueItems": true,
+          "items": {
+            "type": "number"
+          }
+        }
+      },
+      "required": ["rule"]
+    },
+    "DataQualitySql": {
+      "type": "object",
+      "properties": {
+        "query": {
+          "type": "string",
+          "description": "Query string that adheres to the dialect of the provided server.",
+          "examples": ["SELECT COUNT(*) FROM ${table} WHERE ${column} IS NOT NULL"]
+        }
+      },
+      "required": ["query"]
+    },
+    "DataQualityCustom": {
+      "type": "object",
+      "properties": {
+        "engine": {
+          "type": "string",
+          "description": "Name of the engine which executes the data quality checks.",
+          "examples": ["soda", "great-expectations", "monte-carlo", "dbt"]
+        },
+        "implementation": {
+          "oneOf": [
+            {
+              "type": "string"
+            },
+            {
+              "type": "object"
+            }
+          ]
+        }
+      },
+      "required": ["engine", "implementation"]
+    },
+    "AuthoritativeDefinitions": {
+      "type": "array",
+      "description": "List of links to sources that provide more details on the dataset; examples would be a link to an external definition, a training video, a git repo, data catalog, or another tool. Authoritative definitions follow the same structure in the standard.",
+      "items": {
+        "type": "object",
+        "properties": {
+          "url": {
+            "type": "string",
+            "description": "URL to the authority."
+          },
+          "type": {
+            "type": "string",
+            "description": "Type of definition for authority: v2.3 adds standard values: `businessDefinition`, `transformationImplementation`, `videoTutorial`, `tutorial`, and `implementation`.",
+            "examples": ["businessDefinition", "transformationImplementation", "videoTutorial", "tutorial", "implementation"]
+          }
+        },
+        "required": ["url", "type"]
+      }
+    },
+    "Support": {
+      "type": "array",
+      "description": "Top level for support channels.",
+      "items": {
+        "$ref": "#/$defs/SupportItem"
+      }
+    },
+    "SupportItem": {
+      "type": "object",
+      "properties": {
+        "channel": {
+          "type": "string",
+          "description": "Channel name or identifier."
+        },
+        "url": {
+          "type": "string",
+          "description": "Access URL using normal [URL scheme](https://en.wikipedia.org/wiki/URL#Syntax) (https, mailto, etc.)."
+        },
+        "description": {
+          "type": "string",
+          "description": "Description of the channel, free text."
+        },
+        "tool": {
+          "type": "string",
+          "description": "Name of the tool, value can be `email`, `slack`, `teams`, `discord`, `ticket`, or `other`.",
+          "examples": ["email", "slack", "teams", "discord", "ticket", "other"]
+        },
+        "scope": {
+          "type": "string",
+          "description": "Scope can be: `interactive`, `announcements`, `issues`.",
+          "examples": ["interactive", "announcements", "issues"]
+        },
+        "invitationUrl": {
+          "type": "string",
+          "description": "Some tools uses invitation URL for requesting or subscribing. Follows the [URL scheme](https://en.wikipedia.org/wiki/URL#Syntax)."
+        }
+      },
+      "required": ["channel", "url"]
+    },
+    "Pricing": {
+      "type": "object",
+      "properties": {
+        "priceAmount": {
+          "type": "number",
+          "description": "Subscription price per unit of measure in `priceUnit`."
+        },
+        "priceCurrency": {
+          "type": "string",
+          "description": "Currency of the subscription price in `price.priceAmount`."
+        },
+        "priceUnit": {
+          "type": "string",
+          "description": "The unit of measure for calculating cost. Examples megabyte, gigabyte."
+        }
+      }
+    },
+    "Team": {
+      "type": "object",
+      "properties": {
+        "username": {
+          "type": "string",
+          "description": "The user's username or email.",
+          "examples": [
+            "mail@example.com",
+            "uid12345678"
+          ]
+        },
+        "name": {
+          "type": "string",
+          "description": "The user's name.",
+          "examples": [
+            "Jane Doe"
+          ]
+        },
+        "description": {
+          "type": "string",
+          "description": "The user's description."
+        },
+        "role": {
+          "type": "string",
+          "description": "The user's job role; Examples might be owner, data steward. There is no limit on the role."
+        },
+        "dateIn": {
+          "type": "string",
+          "format": "date",
+          "description": "The date when the user joined the team."
+        },
+        "dateOut": {
+          "type": "string",
+          "format": "date",
+          "description": "The date when the user ceased to be part of the team."
+        },
+        "replacedByUsername": {
+          "type": "string",
+          "description": "The username of the user who replaced the previous user."
+        }
+      }
+    },
+    "Role": {
+      "type": "object",
+      "properties": {
+        "role": {
+          "type": "string",
+          "description": "Name of the IAM role that provides access to the dataset."
+        },
+        "description": {
+          "type": "string",
+          "description": "Description of the IAM role and its permissions."
+        },
+        "access": {
+          "type": "string",
+          "description": "The type of access provided by the IAM role."
+        },
+        "firstLevelApprovers": {
+          "type": "string",
+          "description": "The name(s) of the first-level approver(s) of the role."
+        },
+        "secondLevelApprovers": {
+          "type": "string",
+          "description": "The name(s) of the second-level approver(s) of the role."
+        },
+        "customProperties": {
+          "$ref": "#/$defs/CustomProperties"
+        }
+      },
+      "required": ["role"]
+    },
+    "ServiceLevelAgreementProperty": {
+      "type": "object",
+      "properties": {
+        "property": {
+          "type": "string",
+          "description": "Specific property in SLA, check the periodic table. May requires units (more details to come)."
+        },
+        "value": {
+          "anyOf": [
+            {
+              "type": "string"
+            },
+            {
+              "type": "number"
+            },
+            {
+              "type": "integer"
+            },
+            {
+              "type": "boolean"
+            },
+            {
+              "type": "null"
+            }
+          ],
+          "description": "Agreement value. The label will change based on the property itself."
+        },
+        "valueExt": {
+          "$ref": "#/$defs/AnyNonCollectionType",
+          "description": "Extended agreement value. The label will change based on the property itself."
+        },
+        "unit": {
+          "type": "string",
+          "description": "**d**, day, days for days; **y**, yr, years for years, etc. Units use the ISO standard."
+        },
+        "element": {
+          "type": "string",
+          "description": "Element(s) to check on. Multiple elements should be extremely rare and, if so, separated by commas."
+        },
+        "driver": {
+          "type": "string",
+          "description": "Describes the importance of the SLA from the list of: `regulatory`, `analytics`, or `operational`.",
+          "examples": ["regulatory", "analytics", "operational"]
+        }
+      },
+      "required": ["property", "value"]
+    },
+    "CustomProperties": {
+      "type": "array",
+      "description": "A list of key/value pairs for custom properties.",
+      "items": {
+        "$ref": "#/$defs/CustomProperty"
+      }
+    },
+    "CustomProperty": {
+      "type": "object",
+      "properties": {
+        "property": {
+          "type": "string",
+          "description": "The name of the key. Names should be in camel case–the same as if they were permanent properties in the contract."
+        },
+        "value": {
+          "$ref": "#/$defs/AnyType",
+          "description": "The value of the key."
+        }
+      }
+    },
+    "AnyType": {
+      "anyOf": [
+        {
+          "type": "string"
+        },
+        {
+          "type": "number"
+        },
+        {
+          "type": "integer"
+        },
+        {
+          "type": "boolean"
+        },
+        {
+          "type": "null"
+        },
+        {
+          "type": "array"
+        },
+        {
+          "type": "object"
+        }
+      ]
+    },
+    "AnyNonCollectionType": {
+      "anyOf": [
+        {
+          "type": "string"
+        },
+        {
+          "type": "number"
+        },
+        {
+          "type": "integer"
+        },
+        {
+          "type": "boolean"
+        },
+        {
+          "type": "null"
+        }
+      ]
+    }
+  }
+}

From b946d739caf4b3150a730db36c3dc85f718b2c89 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Maximilian=20R=C3=BCnz?=
 <32196397+MaxRuenz@users.noreply.github.com>
Date: Wed, 24 Sep 2025 20:20:20 +0200
Subject: [PATCH 008/150] Fix: Import table tags from a ODCS v3 (#895)

* Fix: Import table tags from a ODCS v3

* Update CHANGELOG.md

---------

Co-authored-by: jochenchrist 
Co-authored-by: jochenchrist 
---
 CHANGELOG.md                                         | 4 ++++
 datacontract/imports/odcs_v3_importer.py             | 6 +++++-
 tests/fixtures/odcs_v3/full-example.datacontract.yml | 2 ++
 tests/fixtures/odcs_v3/full-example.odcs.yaml        | 4 ++--
 4 files changed, 13 insertions(+), 3 deletions(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index fb02a322c..c785d8ac3 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -11,6 +11,10 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
 
 - Support for Data Contract Specification v1.2.1 (Data Quality Metrics)
 
+### Fixed
+
+- Import the table tags from a open data contract spec v3 (#895)
+
 
 ## [0.10.35] - 2025-08-25
 
diff --git a/datacontract/imports/odcs_v3_importer.py b/datacontract/imports/odcs_v3_importer.py
index 9054dab8a..255593ef0 100644
--- a/datacontract/imports/odcs_v3_importer.py
+++ b/datacontract/imports/odcs_v3_importer.py
@@ -207,7 +207,11 @@ def import_models(odcs: Any) -> Dict[str, Model]:
         schema_physical_name = odcs_schema.physicalName
         schema_description = odcs_schema.description if odcs_schema.description is not None else ""
         model_name = schema_physical_name if schema_physical_name is not None else schema_name
-        model = Model(description=" ".join(schema_description.splitlines()) if schema_description else "", type="table")
+        model = Model(
+            description=" ".join(schema_description.splitlines()) if schema_description else "",
+            type="table",
+            tags=odcs_schema.tags if odcs_schema.tags is not None else None,
+        )
         model.fields = import_fields(odcs_schema.properties, custom_type_mappings, server_type=get_server_type(odcs))
         if odcs_schema.quality is not None:
             model.quality = convert_quality_list(odcs_schema.quality)
diff --git a/tests/fixtures/odcs_v3/full-example.datacontract.yml b/tests/fixtures/odcs_v3/full-example.datacontract.yml
index a96eb4cf7..8d6f79d40 100644
--- a/tests/fixtures/odcs_v3/full-example.datacontract.yml
+++ b/tests/fixtures/odcs_v3/full-example.datacontract.yml
@@ -23,6 +23,7 @@ models:
     title: tbl
     description: Provides core payment metrics
     type: table
+    tags: [ "table" ]
     fields:
       txn_ref_dt:
         title: transaction reference date
@@ -30,6 +31,7 @@ models:
         required: false
         primaryKey: false
         description: Reference date for transaction
+        tags: [ "column" ]
         examples:
           - "2022-10-03"
           - "2020-01-28"
diff --git a/tests/fixtures/odcs_v3/full-example.odcs.yaml b/tests/fixtures/odcs_v3/full-example.odcs.yaml
index 35d1a03bd..a606ce0be 100644
--- a/tests/fixtures/odcs_v3/full-example.odcs.yaml
+++ b/tests/fixtures/odcs_v3/full-example.odcs.yaml
@@ -33,7 +33,7 @@ schema:
         type: businessDefinition
       - url: https://youtu.be/jbY1BKFj9ec
         type: videoTutorial
-    tags: [ ]
+    tags: [ "table" ]
     dataGranularityDescription: Aggregation on columns txn_ref_dt, pmt_txn_id
     properties:
       - name: txn_ref_dt
@@ -47,7 +47,7 @@ schema:
         partitioned: true
         partitionKeyPosition: 1
         criticalDataElement: false
-        tags: [ ]
+        tags: [ "column" ]
         classification: public
         transformSourceObjects:
           - table_name_1

From 97719efc9f906e3678cb07dc55f34323fb27f094 Mon Sep 17 00:00:00 2001
From: jochen 
Date: Thu, 25 Sep 2025 16:51:48 +0200
Subject: [PATCH 009/150] chore: update dataContractSpecification to version
 1.2.1

---
 datacontract/engines/data_contract_checks.py |  4 ++++
 pyproject.toml                               |  2 +-
 tests/fixtures/quality/datacontract.yaml     | 18 ++++++++++++++++++
 3 files changed, 23 insertions(+), 1 deletion(-)

diff --git a/datacontract/engines/data_contract_checks.py b/datacontract/engines/data_contract_checks.py
index c98dcc1b7..cd1b93a77 100644
--- a/datacontract/engines/data_contract_checks.py
+++ b/datacontract/engines/data_contract_checks.py
@@ -563,10 +563,14 @@ def to_sodacl_threshold(quality: Quality) -> str | None:
         return f"!= {quality.mustNotBe}"
     if quality.mustBeGreaterThan is not None:
         return f"> {quality.mustBeGreaterThan}"
+    if quality.mustBeGreaterOrEqualTo is not None:
+        return f">= {quality.mustBeGreaterOrEqualTo}"
     if quality.mustBeGreaterThanOrEqualTo is not None:
         return f">= {quality.mustBeGreaterThanOrEqualTo}"
     if quality.mustBeLessThan is not None:
         return f"< {quality.mustBeLessThan}"
+    if quality.mustBeLessOrEqualTo is not None:
+        return f"<= {quality.mustBeLessOrEqualTo}"
     if quality.mustBeLessThanOrEqualTo is not None:
         return f"<= {quality.mustBeLessThanOrEqualTo}"
     if quality.mustBeBetween is not None:
diff --git a/pyproject.toml b/pyproject.toml
index c4ac8846a..35315e295 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -33,7 +33,7 @@ dependencies = [
   "boto3>=1.34.41,<2.0.0",
   "Jinja2>=3.1.5,<4.0.0",
   "jinja_partials>=0.2.1,<1.0.0",
-  "datacontract-specification>=1.2.0,<2.0.0",
+  "datacontract-specification>=1.2.3,<2.0.0",
   "open-data-contract-standard>=3.0.4,<4.0.0",
 ]
 
diff --git a/tests/fixtures/quality/datacontract.yaml b/tests/fixtures/quality/datacontract.yaml
index 5a44830f8..fcfc2d276 100644
--- a/tests/fixtures/quality/datacontract.yaml
+++ b/tests/fixtures/quality/datacontract.yaml
@@ -20,6 +20,17 @@ models:
         required: true
         unique: true
         pattern: "[A-Za-z]{2}-\\d{3}-[A-Za-z]{2}$"
+        quality:
+          - type: library
+            metric: missingValues
+            arguments:
+              missingValues: [null, '', 'N/A']
+            mustBe: 0
+          - type: library
+            metric: invalidValues
+            arguments:
+              invalidValues: ['1234567890']
+            mustBe: 0
       field_two:
         type: integer
         minimum: 10
@@ -30,6 +41,9 @@ models:
             dialect: postgres
             query: SELECT percentile_cont(0.95) WITHIN GROUP (ORDER BY field_two) AS percentile_95 FROM my_table
             mustBeBetween: [ 1000, 49900 ]
+          - type: library
+            metric: nullValues
+            mustBe: 0
       field_three:
         type: timestamp
     # model level quality checks
@@ -50,3 +64,7 @@ models:
           SELECT count(*) as row_count
           FROM {model}
         mustBeGreaterThan: 5
+      - type: library
+        metric: rowCount
+        description: Row Count 2
+        mustBeGreaterThan: 5

From bc33be33cad4d34a6c1ec422e447c846b85d35ed Mon Sep 17 00:00:00 2001
From: Ababacar Sy BADIANE <169177304+Z01ASYBA@users.noreply.github.com>
Date: Fri, 26 Sep 2025 14:46:04 +0200
Subject: [PATCH 010/150] fix: fix dqx export format for table-level (#886)

Co-authored-by: jochenchrist 
---
 CHANGELOG.md                         |   2 +-
 datacontract/export/dqx_converter.py |  13 +-
 tests/fixtures/dqx/datacontract.yaml |  17 +-
 tests/test_export_dqx.py             | 421 +++++++++------------------
 4 files changed, 146 insertions(+), 307 deletions(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index c785d8ac3..6831ee3f8 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -13,9 +13,9 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
 
 ### Fixed
 
+- Export to DQX: Correct DQX format for global-level quality check of data contract export. (#877)
 - Import the table tags from a open data contract spec v3 (#895)
 
-
 ## [0.10.35] - 2025-08-25
 
 ### Added
diff --git a/datacontract/export/dqx_converter.py b/datacontract/export/dqx_converter.py
index 8ec395fbf..b68168473 100644
--- a/datacontract/export/dqx_converter.py
+++ b/datacontract/export/dqx_converter.py
@@ -61,13 +61,14 @@ def process_quality_rule(rule: Quality, column_name: str) -> Dict[str, Any]:
     specification = rule_data[DqxKeys.SPECIFICATION]
     check = specification[DqxKeys.CHECK]
 
-    arguments = check.setdefault(DqxKeys.ARGUMENTS, {})
+    if column_name:
+        arguments = check.setdefault(DqxKeys.ARGUMENTS, {})
 
-    if DqxKeys.COL_NAME not in arguments and DqxKeys.COL_NAMES not in arguments and DqxKeys.COLUMNS not in arguments:
-        if check[DqxKeys.FUNCTION] not in ("is_unique", "foreign_key"):
-            arguments[DqxKeys.COL_NAME] = column_name
-        else:
-            arguments[DqxKeys.COLUMNS] = [column_name]
+        if DqxKeys.COL_NAME not in arguments and DqxKeys.COL_NAMES not in arguments and DqxKeys.COLUMNS not in arguments:
+            if check[DqxKeys.FUNCTION] not in ("is_unique", "foreign_key"):
+                arguments[DqxKeys.COL_NAME] = column_name
+            else:
+                arguments[DqxKeys.COLUMNS] = [column_name]
 
     return specification
 
diff --git a/tests/fixtures/dqx/datacontract.yaml b/tests/fixtures/dqx/datacontract.yaml
index 67be1a8a7..8e51d9e0e 100644
--- a/tests/fixtures/dqx/datacontract.yaml
+++ b/tests/fixtures/dqx/datacontract.yaml
@@ -339,12 +339,11 @@ models:
           filter: "interaction_type IN ('click', 'view', 'purchase')"  # Example Filter
           check:
             function: "is_not_null"
-            arguments:
-              for_each_column:
-                - user_id
-                - interaction_id
-                - interaction_type
-                - interaction_timestamp
+            for_each_column:
+              - user_id
+              - interaction_id
+              - interaction_type
+              - interaction_timestamp
       - type: custom
         description: Item value present and within range for purchases.
         engine: dqx
@@ -390,9 +389,9 @@ models:
           check:
             function: is_unique
             arguments:
-              for_each_column:
-                  - [user_id, interaction_date]
-                  - [interaction_id]
+              columns:
+                - user_id
+                - interaction_date
       - type: custom
         description: Interaction value has to be under 1000
         engine: dqx
diff --git a/tests/test_export_dqx.py b/tests/test_export_dqx.py
index e77c2f1e0..cfebb3e6d 100644
--- a/tests/test_export_dqx.py
+++ b/tests/test_export_dqx.py
@@ -16,297 +16,136 @@ def test_cli():
 def test_to_dqx():
     actual = DataContract(data_contract_file="fixtures/dqx/datacontract.yaml").export("dqx")
     # Expected quality rules (based on the data contract)
-    expected_rules = [
-        {
-            "check": {
-                "arguments": {"column": "interaction_id"},
-                "function": "is_not_null",
-            },
-            "criticality": "error",
-        },
-        {
-            "check": {"arguments": {"column": "user_id"}, "function": "is_not_null"},
-            "criticality": "error",
-        },
-        {
-            "check": {
-                "arguments": {
-                    "columns": ["user_id"],
-                    "ref_columns": ["id"],
-                    "ref_table": "catalog1.schema1.user",
-                },
-                "function": "foreign_key",
-            },
-            "criticality": "error",
-        },
-        {
-            "check": {"arguments": {"columns": ["user_id"]}, "function": "is_unique"},
-            "criticality": "error",
-        },
-        {
-            "check": {
-                "arguments": {
-                    "allowed": ["click", "view", "purchase", "like", "share"],
-                    "column": "interaction_type",
-                },
-                "function": "is_in_list",
-            },
-            "criticality": "error",
-        },
-        {
-            "check": {
-                "arguments": {
-                    "column": "interaction_timestamp",
-                    "timestamp_format": "yyyy-MM-dd HH:mm:ss",
-                },
-                "function": "is_valid_timestamp",
-            },
-            "criticality": "error",
-        },
-        {
-            "check": {
-                "arguments": {"column": "interaction_timestamp", "offset": "1h"},
-                "function": "not_in_future",
-            },
-            "criticality": "warning",
-        },
-        {
-            "check": {"arguments": {"column": "item_id"}, "function": "is_not_null"},
-            "criticality": "minor",
-        },
-        {
-            "check": {
-                "arguments": {
-                    "column": "interaction_value",
-                    "max_limit": 1000,
-                    "min_limit": 0,
-                },
-                "function": "is_in_range",
-            },
-            "criticality": "minor",
-        },
-        {
-            "check": {
-                "arguments": {
-                    "column": "location",
-                    "regex": "^[A-Za-z]+(?:[\\s-][A-Za-z]+)*$",
-                },
-                "function": "regex_match",
-            },
-            "criticality": "minor",
-        },
-        {
-            "check": {
-                "arguments": {
-                    "allowed": ["mobile", "desktop", "tablet"],
-                    "column": "device",
-                },
-                "function": "is_in_list",
-            },
-            "criticality": "minor",
-        },
-        {
-            "check": {
-                "arguments": {
-                    "column": "interaction_date",
-                    "date_format": "yyyy-MM-dd",
-                },
-                "function": "is_valid_date",
-            },
-            "criticality": "error",
-        },
-        {
-            "check": {
-                "arguments": {"column": "time_since_last_interaction", "days": 30},
-                "function": "is_older_than_n_days",
-            },
-            "criticality": "minor",
-        },
-        {
-            "check": {
-                "arguments": {
-                    "column": "is_active",
-                    "expression": "is_active IN ('true', 'false')",
-                },
-                "function": "sql_expression",
-            },
-            "criticality": "minor",
-        },
-        {
-            "check": {
-                "arguments": {
-                    "column": "user_profile.age",
-                    "max_limit": 120,
-                    "min_limit": 13,
-                },
-                "function": "is_in_range",
-            },
-            "criticality": "minor",
-        },
-        {
-            "check": {
-                "arguments": {
-                    "allowed": ["male", "female", "other"],
-                    "column": "user_profile.gender",
-                },
-                "function": "is_in_list",
-            },
-            "criticality": "minor",
-        },
-        {
-            "check": {
-                "arguments": {
-                    "column": "user_profile.location_details.country",
-                    "regex": "^[A-Z]{2}$",
-                },
-                "function": "regex_match",
-            },
-            "criticality": "minor",
-        },
-        {
-            "check": {
-                "arguments": {"column": "related_items"},
-                "function": "is_not_null_and_not_empty_array",
-            },
-            "criticality": "minor",
-        },
-        {
-            "check": {
-                "arguments": {
-                    "column": "interaction_context.page_url",
-                    "regex": "^https?://.+$",
-                },
-                "function": "regex_match",
-            },
-            "criticality": "minor",
-        },
-        {
-            "check": {
-                "arguments": {
-                    "allowed": ["mobile", "desktop", "tablet", "tv"],
-                    "column": "interaction_context.device_type",
-                },
-                "function": "is_in_list",
-            },
-            "criticality": "minor",
-        },
-        {
-            "check": {
-                "arguments": {
-                    "for_each_column": [
-                        "user_id",
-                        "interaction_id",
-                        "interaction_type",
-                        "interaction_timestamp",
-                    ]
-                },
-                "function": "is_not_null",
-            },
-            "criticality": "error",
-            "filter": "interaction_type IN ('click', 'view', 'purchase')",
-        },
-        {
-            "check": {
-                "arguments": {
-                    "column": "interaction_value",
-                    "max_limit": 1000,
-                    "min_limit": 0,
-                },
-                "function": "is_in_range",
-            },
-            "criticality": "warning",
-            "filter": "interaction_type = 'purchase'",
-        },
-        {
-            "check": {
-                "arguments": {"allowed": ["mobile", "tablet"], "column": "device"},
-                "function": "is_in_list",
-            },
-            "criticality": "minor",
-            "filter": "device = 'mobile'",
-        },
-        {
-            "check": {
-                "arguments": {
-                    "column": "user_profile.age",
-                    "max_limit": 120,
-                    "min_limit": 13,
-                },
-                "function": "is_in_range",
-            },
-            "criticality": "minor",
-            "filter": "user_profile.age IS NOT NULL",
-        },
-        {
-            "check": {
-                "arguments": {
-                    "for_each_column": [
-                        ["user_id", "interaction_date"],
-                        ["interaction_id"],
-                    ]
-                },
-                "function": "is_unique",
-            },
-            "criticality": "error",
-        },
-        {
-            "check": {
-                "arguments": {
-                    "aggr_type": "max",
-                    "column": "interaction_value",
-                    "limit": 1000,
-                },
-                "function": "is_aggr_not_greater_than",
-            },
-            "criticality": "error",
-            "filter": "interaction_type = 'purchase'",
-        },
-        {
-            "check": {
-                "arguments": {
-                    "aggr_type": "min",
-                    "column": "user_profile.age",
-                    "group_by": ["user_id"],
-                    "limit": 21,
-                },
-                "function": "is_aggr_not_less_than",
-            },
-            "criticality": "error",
-        },
-        {
-            "check": {
-                "arguments": {
-                    "aggr_type": "count",
-                    "column": "interaction_date",
-                    "limit": 24,
-                },
-                "function": "is_aggr_equal",
-            },
-            "criticality": "error",
-        },
-        {
-            "check": {
-                "arguments": {
-                    "columns": ["user_id"],
-                    "ref_columns": ["id"],
-                    "ref_df_name": "df_user",
-                },
-                "function": "foreign_key",
-            },
-            "criticality": "error",
-        },
-        {
-            "check": {
-                "arguments": {
-                    "columns": ["user_id"],
-                    "ref_columns": ["id"],
-                    "ref_table": "catalog1.schema1.user",
-                },
-                "function": "foreign_key",
-            },
-            "criticality": "error",
-        },
-    ]
+    expected_rules = [{'check': {'arguments': {'column': 'interaction_id'},
+                                 'function': 'is_not_null'},
+                       'criticality': 'error'},
+                      {'check': {'arguments': {'column': 'user_id'}, 'function': 'is_not_null'},
+                       'criticality': 'error'},
+                      {'check': {'arguments': {'columns': ['user_id'],
+                                               'ref_columns': ['id'],
+                                               'ref_table': 'catalog1.schema1.user'},
+                                 'function': 'foreign_key'},
+                       'criticality': 'error'},
+                      {'check': {'arguments': {'columns': ['user_id']}, 'function': 'is_unique'},
+                       'criticality': 'error'},
+                      {'check': {'arguments': {'allowed': ['click',
+                                                           'view',
+                                                           'purchase',
+                                                           'like',
+                                                           'share'],
+                                               'column': 'interaction_type'},
+                                 'function': 'is_in_list'},
+                       'criticality': 'error'},
+                      {'check': {'arguments': {'column': 'interaction_timestamp',
+                                               'timestamp_format': 'yyyy-MM-dd HH:mm:ss'},
+                                 'function': 'is_valid_timestamp'},
+                       'criticality': 'error'},
+                      {'check': {'arguments': {'column': 'interaction_timestamp', 'offset': '1h'},
+                                 'function': 'not_in_future'},
+                       'criticality': 'warning'},
+                      {'check': {'arguments': {'column': 'item_id'}, 'function': 'is_not_null'},
+                       'criticality': 'minor'},
+                      {'check': {'arguments': {'column': 'interaction_value',
+                                               'max_limit': 1000,
+                                               'min_limit': 0},
+                                 'function': 'is_in_range'},
+                       'criticality': 'minor'},
+                      {'check': {'arguments': {'column': 'location',
+                                               'regex': '^[A-Za-z]+(?:[\\s-][A-Za-z]+)*$'},
+                                 'function': 'regex_match'},
+                       'criticality': 'minor'},
+                      {'check': {'arguments': {'allowed': ['mobile', 'desktop', 'tablet'],
+                                               'column': 'device'},
+                                 'function': 'is_in_list'},
+                       'criticality': 'minor'},
+                      {'check': {'arguments': {'column': 'interaction_date',
+                                               'date_format': 'yyyy-MM-dd'},
+                                 'function': 'is_valid_date'},
+                       'criticality': 'error'},
+                      {'check': {'arguments': {'column': 'time_since_last_interaction', 'days': 30},
+                                 'function': 'is_older_than_n_days'},
+                       'criticality': 'minor'},
+                      {'check': {'arguments': {'column': 'is_active',
+                                               'expression': "is_active IN ('true', 'false')"},
+                                 'function': 'sql_expression'},
+                       'criticality': 'minor'},
+                      {'check': {'arguments': {'column': 'user_profile.age',
+                                               'max_limit': 120,
+                                               'min_limit': 13},
+                                 'function': 'is_in_range'},
+                       'criticality': 'minor'},
+                      {'check': {'arguments': {'allowed': ['male', 'female', 'other'],
+                                               'column': 'user_profile.gender'},
+                                 'function': 'is_in_list'},
+                       'criticality': 'minor'},
+                      {'check': {'arguments': {'column': 'user_profile.location_details.country',
+                                               'regex': '^[A-Z]{2}$'},
+                                 'function': 'regex_match'},
+                       'criticality': 'minor'},
+                      {'check': {'arguments': {'column': 'related_items'},
+                                 'function': 'is_not_null_and_not_empty_array'},
+                       'criticality': 'minor'},
+                      {'check': {'arguments': {'column': 'interaction_context.page_url',
+                                               'regex': '^https?://.+$'},
+                                 'function': 'regex_match'},
+                       'criticality': 'minor'},
+                      {'check': {'arguments': {'allowed': ['mobile', 'desktop', 'tablet', 'tv'],
+                                               'column': 'interaction_context.device_type'},
+                                 'function': 'is_in_list'},
+                       'criticality': 'minor'},
+                      {'check': {'for_each_column': ['user_id',
+                                                     'interaction_id',
+                                                     'interaction_type',
+                                                     'interaction_timestamp'],
+                                 'function': 'is_not_null'},
+                       'criticality': 'error',
+                       'filter': "interaction_type IN ('click', 'view', 'purchase')"},
+                      {'check': {'arguments': {'column': 'interaction_value',
+                                               'max_limit': 1000,
+                                               'min_limit': 0},
+                                 'function': 'is_in_range'},
+                       'criticality': 'warning',
+                       'filter': "interaction_type = 'purchase'"},
+                      {'check': {'arguments': {'allowed': ['mobile', 'tablet'], 'column': 'device'},
+                                 'function': 'is_in_list'},
+                       'criticality': 'minor',
+                       'filter': "device = 'mobile'"},
+                      {'check': {'arguments': {'column': 'user_profile.age',
+                                               'max_limit': 120,
+                                               'min_limit': 13},
+                                 'function': 'is_in_range'},
+                       'criticality': 'minor',
+                       'filter': 'user_profile.age IS NOT NULL'},
+                      {'check': {'arguments': {'columns': ['user_id', 'interaction_date']},
+                                 'function': 'is_unique'},
+                       'criticality': 'error'},
+                      {'check': {'arguments': {'aggr_type': 'max',
+                                               'column': 'interaction_value',
+                                               'limit': 1000},
+                                 'function': 'is_aggr_not_greater_than'},
+                       'criticality': 'error',
+                       'filter': "interaction_type = 'purchase'"},
+                      {'check': {'arguments': {'aggr_type': 'min',
+                                               'column': 'user_profile.age',
+                                               'group_by': ['user_id'],
+                                               'limit': 21},
+                                 'function': 'is_aggr_not_less_than'},
+                       'criticality': 'error'},
+                      {'check': {'arguments': {'aggr_type': 'count',
+                                               'column': 'interaction_date',
+                                               'limit': 24},
+                                 'function': 'is_aggr_equal'},
+                       'criticality': 'error'},
+                      {'check': {'arguments': {'columns': ['user_id'],
+                                               'ref_columns': ['id'],
+                                               'ref_df_name': 'df_user'},
+                                 'function': 'foreign_key'},
+                       'criticality': 'error'},
+                      {'check': {'arguments': {'columns': ['user_id'],
+                                               'ref_columns': ['id'],
+                                               'ref_table': 'catalog1.schema1.user'},
+                                 'function': 'foreign_key'},
+                       'criticality': 'error'}]
     assert yaml.safe_load(actual) == expected_rules
 
 

From 2defd9a2142200b7ec16e4de053425cd057598a7 Mon Sep 17 00:00:00 2001
From: jochen 
Date: Sun, 28 Sep 2025 13:20:50 +0200
Subject: [PATCH 011/150] feat: add quality checks for row count and duplicate
 values in data contracts

---
 datacontract/engines/data_contract_checks.py | 249 +++++++++++++++++++
 tests/fixtures/quality/data/data.invalid.sql |   3 +-
 tests/fixtures/quality/datacontract.yaml     |   7 +-
 tests/test_test_quality.py                   |   4 +
 4 files changed, 258 insertions(+), 5 deletions(-)

diff --git a/datacontract/engines/data_contract_checks.py b/datacontract/engines/data_contract_checks.py
index cd1b93a77..affbb947a 100644
--- a/datacontract/engines/data_contract_checks.py
+++ b/datacontract/engines/data_contract_checks.py
@@ -468,6 +468,212 @@ def check_field_regex(model_name: str, field_name: str, pattern: str, quoting_co
     )
 
 
+def check_row_count(model_name: str, threshold: str, quoting_config: QuotingConfig = QuotingConfig()):
+    check_type = "row_count"
+    check_key = f"{model_name}__{check_type}"
+    sodacl_check_dict = {
+        checks_for(model_name, quoting_config.quote_model_name): [
+            {
+                f"row_count {threshold}": {"name": check_key},
+            }
+        ],
+    }
+    return Check(
+        id=str(uuid.uuid4()),
+        key=check_key,
+        category="schema",
+        type=check_type,
+        name=f"Check that model {model_name} has row_count {threshold}",
+        model=model_name,
+        field=None,
+        engine="soda",
+        language="sodacl",
+        implementation=yaml.dump(sodacl_check_dict),
+    )
+
+
+def check_model_duplicate_values(
+    model_name: str, cols: list[str], threshold: str, quoting_config: QuotingConfig = QuotingConfig()
+):
+    check_type = "model_duplicate_values"
+    check_key = f"{model_name}__{check_type}"
+    col_joined = ", ".join(cols)
+    sodacl_check_dict = {
+        checks_for(model_name, quoting_config.quote_model_name): [
+            {
+                f"duplicate_count({col_joined}) {threshold}": {"name": check_key},
+            }
+        ],
+    }
+    return Check(
+        id=str(uuid.uuid4()),
+        key=check_key,
+        category="quality",
+        type=check_type,
+        name=f"Check that model {model_name} has duplicate_count {threshold} for columns {col_joined}",
+        model=model_name,
+        field=None,
+        engine="soda",
+        language="sodacl",
+        implementation=yaml.dump(sodacl_check_dict),
+    )
+
+
+def check_field_duplicate_values(
+    model_name: str, field_name: str, threshold: str, quoting_config: QuotingConfig = QuotingConfig()
+):
+    if quoting_config.quote_field_name:
+        field_name_for_soda = f'"{field_name}"'
+    else:
+        field_name_for_soda = field_name
+
+    check_type = "field_duplicate_values"
+    check_key = f"{model_name}__{field_name}__{check_type}"
+    sodacl_check_dict = {
+        checks_for(model_name, quoting_config.quote_model_name): [
+            {
+                f"duplicate_count({field_name_for_soda}) {threshold}": {
+                    "name": check_key,
+                },
+            }
+        ],
+    }
+    return Check(
+        id=str(uuid.uuid4()),
+        key=check_key,
+        category="quality",
+        type=check_type,
+        name=f"Check that field {field_name} has duplicate_count {threshold}",
+        model=model_name,
+        field=field_name,
+        engine="soda",
+        language="sodacl",
+        implementation=yaml.dump(sodacl_check_dict),
+    )
+
+
+def check_field_null_values(
+    model_name: str, field_name: str, threshold: str, quoting_config: QuotingConfig = QuotingConfig()
+):
+    if quoting_config.quote_field_name:
+        field_name_for_soda = f'"{field_name}"'
+    else:
+        field_name_for_soda = field_name
+
+    check_type = "field_null_values"
+    check_key = f"{model_name}__{field_name}__{check_type}"
+    sodacl_check_dict = {
+        checks_for(model_name, quoting_config.quote_model_name): [
+            {
+                f"missing_count({field_name_for_soda}) {threshold}": {
+                    "name": check_key,
+                },
+            }
+        ],
+    }
+    return Check(
+        id=str(uuid.uuid4()),
+        key=check_key,
+        category="quality",
+        type=check_type,
+        name=f"Check that field {field_name} has missing_count {threshold}",
+        model=model_name,
+        field=field_name,
+        engine="soda",
+        language="sodacl",
+        implementation=yaml.dump(sodacl_check_dict),
+    )
+
+
+def check_field_invalid_values(
+    model_name: str,
+    field_name: str,
+    threshold: str,
+    valid_values: list = None,
+    quoting_config: QuotingConfig = QuotingConfig(),
+):
+    if quoting_config.quote_field_name:
+        field_name_for_soda = f'"{field_name}"'
+    else:
+        field_name_for_soda = field_name
+
+    check_type = "field_invalid_values"
+    check_key = f"{model_name}__{field_name}__{check_type}"
+
+    sodacl_check_config = {
+        "name": check_key,
+    }
+
+    if valid_values is not None:
+        sodacl_check_config["valid values"] = valid_values
+
+    sodacl_check_dict = {
+        checks_for(model_name, quoting_config.quote_model_name): [
+            {
+                f"invalid_count({field_name_for_soda}) {threshold}": sodacl_check_config,
+            }
+        ],
+    }
+    return Check(
+        id=str(uuid.uuid4()),
+        key=check_key,
+        category="quality",
+        type=check_type,
+        name=f"Check that field {field_name} has invalid_count {threshold}",
+        model=model_name,
+        field=field_name,
+        engine="soda",
+        language="sodacl",
+        implementation=yaml.dump(sodacl_check_dict),
+    )
+
+
+def check_field_missing_values(
+    model_name: str,
+    field_name: str,
+    threshold: str,
+    missing_values: list = None,
+    quoting_config: QuotingConfig = QuotingConfig(),
+):
+    if quoting_config.quote_field_name:
+        field_name_for_soda = f'"{field_name}"'
+    else:
+        field_name_for_soda = field_name
+
+    check_type = "field_missing_values"
+    check_key = f"{model_name}__{field_name}__{check_type}"
+
+    sodacl_check_config = {
+        "name": check_key,
+    }
+
+    if missing_values is not None:
+        # Filter out null/None values as SodaCL handles these automatically
+        filtered_missing_values = [v for v in missing_values if v is not None]
+        if filtered_missing_values:
+            sodacl_check_config["missing values"] = filtered_missing_values
+
+    sodacl_check_dict = {
+        checks_for(model_name, quoting_config.quote_model_name): [
+            {
+                f"missing_count({field_name_for_soda}) {threshold}": sodacl_check_config,
+            }
+        ],
+    }
+    return Check(
+        id=str(uuid.uuid4()),
+        key=check_key,
+        category="quality",
+        type=check_type,
+        name=f"Check that field {field_name} has missing_count {threshold}",
+        model=model_name,
+        field=field_name,
+        engine="soda",
+        language="sodacl",
+        implementation=yaml.dump(sodacl_check_dict),
+    )
+
+
 def check_quality_list(
     model_name, field_name, quality_list: List[Quality], quoting_config: QuotingConfig = QuotingConfig()
 ) -> List[Check]:
@@ -519,6 +725,49 @@ def check_quality_list(
                     implementation=yaml.dump(sodacl_check_dict),
                 )
             )
+        elif quality.metric is not None:
+            threshold = to_sodacl_threshold(quality)
+
+            if threshold is None:
+                logger.warning(f"Quality metric {quality.metric} has no valid threshold")
+                continue
+
+            if quality.metric == "rowCount":
+                checks.append(check_row_count(model_name, threshold, quoting_config))
+            elif quality.metric == "duplicateValues":
+                if field_name is None:
+                    # TODO check that quality.arguments.get("properties") is a list of strings and contains at lease one property
+                    checks.append(
+                        check_model_duplicate_values(
+                            model_name, quality.arguments.get("properties"), threshold, quoting_config
+                        )
+                    )
+                else:
+                    checks.append(check_field_duplicate_values(model_name, field_name, threshold, quoting_config))
+            elif quality.metric == "nullValues":
+                if field_name is not None:
+                    checks.append(check_field_null_values(model_name, field_name, threshold, quoting_config))
+                else:
+                    logger.warning("Quality check nullValues is only supported at field level")
+            elif quality.metric == "invalidValues":
+                if field_name is not None:
+                    valid_values = quality.arguments.get("validValues") if quality.arguments else None
+                    checks.append(
+                        check_field_invalid_values(model_name, field_name, threshold, valid_values, quoting_config)
+                    )
+                else:
+                    logger.warning("Quality check invalidValues is only supported at field level")
+            elif quality.metric == "missingValues":
+                if field_name is not None:
+                    missing_values = quality.arguments.get("missingValues") if quality.arguments else None
+                    checks.append(
+                        check_field_missing_values(model_name, field_name, threshold, missing_values, quoting_config)
+                    )
+                else:
+                    logger.warning("Quality check missingValues is only supported at field level")
+            else:
+                logger.warning(f"Quality check {quality.metric} is not yet supported")
+
         count += 1
 
     return checks
diff --git a/tests/fixtures/quality/data/data.invalid.sql b/tests/fixtures/quality/data/data.invalid.sql
index 742cb3d9d..5a3fdcb23 100644
--- a/tests/fixtures/quality/data/data.invalid.sql
+++ b/tests/fixtures/quality/data/data.invalid.sql
@@ -15,5 +15,4 @@ INSERT INTO public.my_table (field_one, field_two, field_three) VALUES
                                                            ('VS-079-OH', 85, '2023-04-15 00:50:32'),
                                                            ('DN-297-XY', 79, '2023-11-08 12:55:42'),
                                                            ('ZE-172-FP', 14, '2023-12-03 18:38:38'),
-                                                           ('ID-840-EG', 89, '2023-10-02 17:17:58'),
-                                                           ('FK-230-KZ', 64, '2023-11-27 15:21:48');
+                                                           ('ID-840-EG', 89, '2023-10-02 17:17:58');
\ No newline at end of file
diff --git a/tests/fixtures/quality/datacontract.yaml b/tests/fixtures/quality/datacontract.yaml
index fcfc2d276..9ff801fee 100644
--- a/tests/fixtures/quality/datacontract.yaml
+++ b/tests/fixtures/quality/datacontract.yaml
@@ -29,7 +29,7 @@ models:
           - type: library
             metric: invalidValues
             arguments:
-              invalidValues: ['1234567890']
+              validValues: ['CX-263-DU', 'IK-894-MN', 'ER-399-JY', 'MT-939-FH', 'LV-849-MI', 'VS-079-OH', 'DN-297-XY', 'ZE-172-FP', 'ID-840-EG', 'FK-230-KZ']
             mustBe: 0
       field_two:
         type: integer
@@ -64,7 +64,8 @@ models:
           SELECT count(*) as row_count
           FROM {model}
         mustBeGreaterThan: 5
-      - type: library
+      - name: rowCount must be greater than 0
+        type: library
         metric: rowCount
         description: Row Count 2
-        mustBeGreaterThan: 5
+        mustBeGreaterThan: 9
diff --git a/tests/test_test_quality.py b/tests/test_test_quality.py
index 8260be1eb..3fac59c18 100644
--- a/tests/test_test_quality.py
+++ b/tests/test_test_quality.py
@@ -55,6 +55,10 @@ def test_test_quality_invalid(postgres_container, monkeypatch):
         and check.result == ResultEnum.failed
         for check in run.checks
     )
+    assert any(
+        check.name == "Check that model my_table has row_count > 9" and check.result == ResultEnum.failed
+        for check in run.checks
+    )
 
 
 def _setup_datacontract(file):

From 1a50660b7c4b143b8919ba951166bdd3400244ac Mon Sep 17 00:00:00 2001
From: jochenchrist 
Date: Sun, 28 Sep 2025 16:40:26 +0200
Subject: [PATCH 012/150] Update README.md

---
 README.md | 1 +
 1 file changed, 1 insertion(+)

diff --git a/README.md b/README.md
index 03e55e0e7..ba3974f37 100644
--- a/README.md
+++ b/README.md
@@ -2015,6 +2015,7 @@ models:
 ```bash
 # make sure uv is installed
 uv python pin 3.11
+uv venv
 uv pip install -e '.[dev]'
 uv run ruff check
 uv run pytest

From e327e092ce0e09442762e19beb624c5f35be90c3 Mon Sep 17 00:00:00 2001
From: jochen 
Date: Sun, 28 Sep 2025 21:08:57 +0200
Subject: [PATCH 013/150] Disable failing test

---
 tests/test_test_s3_delta.py | 6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)

diff --git a/tests/test_test_s3_delta.py b/tests/test_test_s3_delta.py
index 3319214e4..0b7cf3bb5 100644
--- a/tests/test_test_s3_delta.py
+++ b/tests/test_test_s3_delta.py
@@ -25,13 +25,11 @@ def minio_container():
         yield minio_container
 
 
-@pytest.mark.skipif(
-    os.getenv("CI") == "true",
+@pytest.mark.skip(
     reason="""
-Runs locally on mac, but fails on CI with
 InvalidTokenIdThe security token included in the request is invalidfixtures/s3-delta/data/orders.delta/_delta_log/_last_checkpointtest-bucket/test-bucket/fixtures/s3-delta/data/orders.delta/_delta_log/_last_checkpoint)
 
-Need to investigate why the token is invalid on CI.
+Need to investigate why the token is invalid, or to deprecate Delta support
 """,
 )
 def test_test_s3_delta(minio_container, monkeypatch):

From 92a19d34b413c38338d11faf080ac9cdff942156 Mon Sep 17 00:00:00 2001
From: Simon Harrer 
Date: Wed, 8 Oct 2025 14:29:48 +0200
Subject: [PATCH 014/150] docs: update installation instructions in README.md

---
 README.md | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/README.md b/README.md
index ba3974f37..5da87811b 100644
--- a/README.md
+++ b/README.md
@@ -23,9 +23,9 @@ We have a _servers_ section with endpoint details to the S3 bucket, _models_ for
 
 This data contract contains all information to connect to S3 and check that the actual data meets the defined schema and quality requirements. We can use this information to test if the actual data product in S3 is compliant to the data contract.
 
-Let's use [pip](https://pip.pypa.io/en/stable/getting-started/) to install the CLI (or use the [Docker image](#docker)),
+Let's use [uv](https://docs.astral.sh/uv/) to install the CLI (or use the [Docker image](#docker)),
 ```bash
-$ python3 -m pip install 'datacontract-cli[all]'
+$ uv tool install 'datacontract-cli[all]'
 ```
 
 

From ffdc2e6b05d977f1c083e2aac29a89d8ece6582b Mon Sep 17 00:00:00 2001
From: Pete <64353771+PeteZ238@users.noreply.github.com>
Date: Fri, 17 Oct 2025 08:39:47 +0100
Subject: [PATCH 015/150] feat(BIG QUERY): Add support for BQ flexible schema
 in Data Contract Checks (#910)

* test(BIGQUERY): Test CI with altered fixture

Signed-off-by: Pete Zoumpoulis 

* fix(BIG QUERY): Add support for BQ flexible schema in Data Contract Checks

* docs(CHANGELOG): Updated the changelog

* chore(COMMENT): Removed inline comment

* test(BIGQUERY): Test CI with altered fixture

Signed-off-by: Pete Zoumpoulis 

* fix(BIG QUERY): Add support for BQ flexible schema in Data Contract Checks

* docs(CHANGELOG): Updated the changelog

* chore(COMMENT): Removed inline comment

* fix(COMMENTS): Addressed PR comments

* fix(COMMENTS): Addressed PR comments

* fix(TEST CODE): Reverse code for testing

* Refactor QuotingConfig and add BigQuery table name test

---------

Signed-off-by: Pete Zoumpoulis 
Co-authored-by: jochen 
---
 .pre-commit-config.yaml                       |  2 +-
 CHANGELOG.md                                  |  5 +-
 datacontract/engines/data_contract_checks.py  | 58 +++++++++++--------
 ...t_tablename_starting_with_number.odcs.yaml | 54 +++++++++++++++++
 tests/test_test_bigquery.py                   | 14 +++++
 5 files changed, 105 insertions(+), 28 deletions(-)
 create mode 100644 tests/fixtures/bigquery/datacontract_tablename_starting_with_number.odcs.yaml

diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
index a2251d7d7..8939841a2 100644
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@@ -1,7 +1,7 @@
 repos:
 - repo: https://github.com/astral-sh/ruff-pre-commit
   # Ruff version.
-  rev: v0.4.7
+  rev: v0.13.3
   hooks:
     # Run the linter.
     - id: ruff
diff --git a/CHANGELOG.md b/CHANGELOG.md
index 6831ee3f8..26bf661ee 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -10,6 +10,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
 ### Added
 
 - Support for Data Contract Specification v1.2.1 (Data Quality Metrics)
+- Support for BigQuery Flexible Schema in Data Contract Checks (#909)
 
 ### Fixed
 
@@ -83,7 +84,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
 
 ### Fixed
 - `pytest tests\test_api.py`: Fixed an issue where special characters were not read correctly from file.
-- `datacontract export --format mermaid`: Fixed an issue where the `mermaid` export did not handle references correctly 
+- `datacontract export --format mermaid`: Fixed an issue where the `mermaid` export did not handle references correctly
 
 ## [0.10.28] - 2025-06-05
 
@@ -296,7 +297,7 @@ Code for proto to datacontract (#696)
 
 ### Fixed
 - SQL Server: cannot escape reserved word on model (#557)
-- Export dbt-staging-sql error on multi models contracts (#587) 
+- Export dbt-staging-sql error on multi models contracts (#587)
 
 ### Removed
 - OpenTelemetry publisher, as it was hardly used
diff --git a/datacontract/engines/data_contract_checks.py b/datacontract/engines/data_contract_checks.py
index affbb947a..84bd52820 100644
--- a/datacontract/engines/data_contract_checks.py
+++ b/datacontract/engines/data_contract_checks.py
@@ -15,6 +15,7 @@
 class QuotingConfig:
     quote_field_name: bool = False
     quote_model_name: bool = False
+    quote_model_name_with_backticks: bool = False
 
 
 def create_checks(data_contract_spec: DataContractSpecification, server: Server) -> List[Check]:
@@ -35,15 +36,18 @@ def to_model_checks(model_key, model_value, server: Server) -> List[Check]:
 
     check_types = is_check_types(server)
 
-    quoting_config = QuotingConfig(
-        quote_field_name=server_type in ["postgres", "sqlserver"],
-        quote_model_name=server_type in ["postgres", "sqlserver"],
+    type1 = server.type if server and server.type else None
+    config = QuotingConfig(
+        quote_field_name=type1 in ["postgres", "sqlserver"],
+        quote_model_name=type1 in ["postgres", "sqlserver"],
+        quote_model_name_with_backticks=type1 == "bigquery",
     )
+    quoting_config = config
 
     for field_name, field in fields.items():
         checks.append(check_field_is_present(model_name, field_name, quoting_config))
         if check_types and field.type is not None:
-            sql_type = convert_to_sql_type(field, server_type)
+            sql_type: str = convert_to_sql_type(field, server_type)
             checks.append(check_field_type(model_name, field_name, sql_type, quoting_config))
         if field.required:
             checks.append(check_field_required(model_name, field_name, quoting_config))
@@ -82,9 +86,11 @@ def to_model_checks(model_key, model_value, server: Server) -> List[Check]:
     return checks
 
 
-def checks_for(model_name, quote_model_name: bool):
-    if quote_model_name:
+def checks_for(model_name: str, quoting_config: QuotingConfig, check_type: str) -> str:
+    if quoting_config.quote_model_name:
         return f'checks for "{model_name}"'
+    elif quoting_config.quote_model_name_with_backticks and check_type not in ["field_is_present", "field_type"]:
+        return f"checks for `{model_name}`"
     return f"checks for {model_name}"
 
 
@@ -114,7 +120,7 @@ def check_field_is_present(model_name, field_name, quoting_config: QuotingConfig
     check_type = "field_is_present"
     check_key = f"{model_name}__{field_name}__{check_type}"
     sodacl_check_dict = {
-        checks_for(model_name, quoting_config.quote_model_name): [
+        checks_for(model_name, quoting_config, check_type): [
             {
                 "schema": {
                     "name": check_key,
@@ -145,7 +151,7 @@ def check_field_type(
     check_type = "field_type"
     check_key = f"{model_name}__{field_name}__{check_type}"
     sodacl_check_dict = {
-        checks_for(model_name, quoting_config.quote_model_name): [
+        checks_for(model_name, quoting_config, check_type): [
             {
                 "schema": {
                     "name": check_key,
@@ -181,7 +187,7 @@ def check_field_required(model_name: str, field_name: str, quoting_config: Quoti
     check_type = "field_required"
     check_key = f"{model_name}__{field_name}__{check_type}"
     sodacl_check_dict = {
-        checks_for(model_name, quoting_config.quote_model_name): [
+        checks_for(model_name, quoting_config, check_type): [
             {
                 f"missing_count({field_name_for_soda}) = 0": {
                     "name": check_key,
@@ -212,7 +218,7 @@ def check_field_unique(model_name: str, field_name: str, quoting_config: Quoting
     check_type = "field_unique"
     check_key = f"{model_name}__{field_name}__{check_type}"
     sodacl_check_dict = {
-        checks_for(model_name, quoting_config.quote_model_name): [
+        checks_for(model_name, quoting_config, check_type): [
             {
                 f"duplicate_count({field_name_for_soda}) = 0": {
                     "name": check_key,
@@ -245,7 +251,7 @@ def check_field_min_length(
     check_type = "field_min_length"
     check_key = f"{model_name}__{field_name}__{check_type}"
     sodacl_check_dict = {
-        checks_for(model_name, quoting_config.quote_model_name): [
+        checks_for(model_name, quoting_config, check_type): [
             {
                 f"invalid_count({field_name_for_soda}) = 0": {
                     "name": check_key,
@@ -279,7 +285,7 @@ def check_field_max_length(
     check_type = "field_max_length"
     check_key = f"{model_name}__{field_name}__{check_type}"
     sodacl_check_dict = {
-        checks_for(model_name, quoting_config.quote_model_name): [
+        checks_for(model_name, quoting_config, check_type): [
             {
                 f"invalid_count({field_name_for_soda}) = 0": {
                     "name": check_key,
@@ -313,7 +319,7 @@ def check_field_minimum(
     check_type = "field_minimum"
     check_key = f"{model_name}__{field_name}__{check_type}"
     sodacl_check_dict = {
-        checks_for(model_name, quoting_config.quote_model_name): [
+        checks_for(model_name, quoting_config, check_type): [
             {
                 f"invalid_count({field_name_for_soda}) = 0": {
                     "name": check_key,
@@ -347,7 +353,7 @@ def check_field_maximum(
     check_type = "field_maximum"
     check_key = f"{model_name}__{field_name}__{check_type}"
     sodacl_check_dict = {
-        checks_for(model_name, quoting_config.quote_model_name): [
+        checks_for(model_name, quoting_config, check_type): [
             {
                 f"invalid_count({field_name_for_soda}) = 0": {
                     "name": check_key,
@@ -381,7 +387,7 @@ def check_field_not_equal(
     check_type = "field_not_equal"
     check_key = f"{model_name}__{field_name}__{check_type}"
     sodacl_check_dict = {
-        checks_for(model_name, quoting_config.quote_model_name): [
+        checks_for(model_name, quoting_config, check_type): [
             {
                 f"invalid_count({field_name_for_soda}) = 0": {
                     "name": check_key,
@@ -413,7 +419,7 @@ def check_field_enum(model_name: str, field_name: str, enum: list, quoting_confi
     check_type = "field_enum"
     check_key = f"{model_name}__{field_name}__{check_type}"
     sodacl_check_dict = {
-        checks_for(model_name, quoting_config.quote_model_name): [
+        checks_for(model_name, quoting_config, check_type): [
             {
                 f"invalid_count({field_name_for_soda}) = 0": {
                     "name": check_key,
@@ -445,7 +451,7 @@ def check_field_regex(model_name: str, field_name: str, pattern: str, quoting_co
     check_type = "field_regex"
     check_key = f"{model_name}__{field_name}__{check_type}"
     sodacl_check_dict = {
-        checks_for(model_name, quoting_config.quote_model_name): [
+        checks_for(model_name, quoting_config, check_type): [
             {
                 f"invalid_count({field_name_for_soda}) = 0": {
                     "name": check_key,
@@ -472,7 +478,7 @@ def check_row_count(model_name: str, threshold: str, quoting_config: QuotingConf
     check_type = "row_count"
     check_key = f"{model_name}__{check_type}"
     sodacl_check_dict = {
-        checks_for(model_name, quoting_config.quote_model_name): [
+        checks_for(model_name, quoting_config, check_type): [
             {
                 f"row_count {threshold}": {"name": check_key},
             }
@@ -499,7 +505,7 @@ def check_model_duplicate_values(
     check_key = f"{model_name}__{check_type}"
     col_joined = ", ".join(cols)
     sodacl_check_dict = {
-        checks_for(model_name, quoting_config.quote_model_name): [
+        checks_for(model_name, quoting_config, check_type): [
             {
                 f"duplicate_count({col_joined}) {threshold}": {"name": check_key},
             }
@@ -530,7 +536,7 @@ def check_field_duplicate_values(
     check_type = "field_duplicate_values"
     check_key = f"{model_name}__{field_name}__{check_type}"
     sodacl_check_dict = {
-        checks_for(model_name, quoting_config.quote_model_name): [
+        checks_for(model_name, quoting_config, check_type): [
             {
                 f"duplicate_count({field_name_for_soda}) {threshold}": {
                     "name": check_key,
@@ -563,7 +569,7 @@ def check_field_null_values(
     check_type = "field_null_values"
     check_key = f"{model_name}__{field_name}__{check_type}"
     sodacl_check_dict = {
-        checks_for(model_name, quoting_config.quote_model_name): [
+        checks_for(model_name, quoting_config, check_type): [
             {
                 f"missing_count({field_name_for_soda}) {threshold}": {
                     "name": check_key,
@@ -608,7 +614,7 @@ def check_field_invalid_values(
         sodacl_check_config["valid values"] = valid_values
 
     sodacl_check_dict = {
-        checks_for(model_name, quoting_config.quote_model_name): [
+        checks_for(model_name, quoting_config, check_type): [
             {
                 f"invalid_count({field_name_for_soda}) {threshold}": sodacl_check_config,
             }
@@ -654,7 +660,7 @@ def check_field_missing_values(
             sodacl_check_config["missing values"] = filtered_missing_values
 
     sodacl_check_dict = {
-        checks_for(model_name, quoting_config.quote_model_name): [
+        checks_for(model_name, quoting_config, check_type): [
             {
                 f"missing_count({field_name_for_soda}) {threshold}": sodacl_check_config,
             }
@@ -790,6 +796,8 @@ def prepare_query(
 
     if quoting_config.quote_model_name:
         model_name_for_soda = f'"{model_name}"'
+    elif quoting_config.quote_model_name_with_backticks:
+        model_name_for_soda = f"`{model_name}`"
     else:
         model_name_for_soda = model_name
 
@@ -892,7 +900,7 @@ def to_servicelevel_freshness_check(data_contract_spec: DataContractSpecificatio
     check_key = "servicelevel_freshness"
 
     sodacl_check_dict = {
-        checks_for(model_name, False): [
+        checks_for(model_name, QuotingConfig(), check_type): [
             {
                 f"freshness({field_name}) < {threshold}": {
                     "name": check_key,
@@ -944,7 +952,7 @@ def to_servicelevel_retention_check(data_contract_spec) -> Check | None:
     check_type = "servicelevel_retention"
     check_key = "servicelevel_retention"
     sodacl_check_dict = {
-        checks_for(model_name, False): [
+        checks_for(model_name, QuotingConfig(), check_type): [
             {
                 f"orders_servicelevel_retention < {period_in_seconds}": {
                     "orders_servicelevel_retention expression": f"TIMESTAMPDIFF(SECOND, MIN({field_name}), CURRENT_TIMESTAMP)",
diff --git a/tests/fixtures/bigquery/datacontract_tablename_starting_with_number.odcs.yaml b/tests/fixtures/bigquery/datacontract_tablename_starting_with_number.odcs.yaml
new file mode 100644
index 000000000..3f6ffb8e5
--- /dev/null
+++ b/tests/fixtures/bigquery/datacontract_tablename_starting_with_number.odcs.yaml
@@ -0,0 +1,54 @@
+kind: DataContract
+apiVersion: v3.0.1
+id: bigquery_tablename_staring_with_number
+version: 0.0.1
+status: draft
+servers:
+- server: my-dataproduct/bigquery
+  type: bigquery
+  dataset: datacontract_cli
+  project: datameshexample-product
+schema:
+- name: 100_tablename_staring_with_number
+  physicalType: table
+  logicalType: object
+  properties:
+  - name: some_string
+    physicalType: string
+    logicalType: string
+    unique: true
+  - name: some_record
+    physicalType: STRUCT
+    logicalType: object
+    properties:
+    - name: some_field_1
+      physicalType: string
+      logicalType: string
+    - name: some_field_2
+      physicalType: string
+      logicalType: string
+  - name: some_array_of_strings
+    physicalType: ARRAY
+    logicalType: array
+    items:
+      physicalType: string
+      logicalType: string
+  - name: some_array_of_records
+    physicalType: ARRAY>
+    logicalType: array
+    items:
+      physicalType: record
+      logicalType: object
+      properties:
+      - name: some_other_field_1
+        physicalType: string
+        logicalType: string
+      - name: some_other_field_2
+        physicalType: string
+        logicalType: string
+  - name: some_json
+    physicalType: json
+    logicalType: string
+  - name: some_range_of_timestamp
+    physicalType: RANGE
+    logicalType: object
diff --git a/tests/test_test_bigquery.py b/tests/test_test_bigquery.py
index d62a5a291..6c7eae40d 100644
--- a/tests/test_test_bigquery.py
+++ b/tests/test_test_bigquery.py
@@ -41,3 +41,17 @@ def test_test_bigquery_complex_tables():
     print(run.pretty())
     assert run.result == "passed"
     assert all(check.result == "passed" for check in run.checks)
+
+
+@pytest.mark.skipif(
+    os.environ.get("DATACONTRACT_BIGQUERY_ACCOUNT_INFO_JSON_PATH") is None,
+    reason="Requires DATACONTRACT_BIGQUERY_ACCOUNT_INFO_JSON_PATH to be set",
+)
+def test_test_bigquery_tablename_staring_with_number():
+    data_contract = DataContract(data_contract_file="fixtures/bigquery/datacontract_tablename_starting_with_number.odcs.yaml")
+
+    run = data_contract.test()
+
+    print(run.pretty())
+    assert run.result == "passed"
+    assert all(check.result == "passed" for check in run.checks)

From 42356641e5529392c7a088f21552bb3220e5eb1b Mon Sep 17 00:00:00 2001
From: Maxime Bourgeois <64260627+7MAXIME7-dev@users.noreply.github.com>
Date: Fri, 17 Oct 2025 09:48:31 +0200
Subject: [PATCH 016/150] hotfix: import_from_source() as class method +
 deprecation warning on old func (#907)

* hotfix: import_from_source() as class method + deprecation warning on old func

* hotfix: changelog updated for DataContract().import_from_source() deprecation

* hotfix: tests passing + remove deprecation method and warning because class method can be accessed both from instance or class itself

---------

Co-authored-by: maximebourgeois 
---
 CHANGELOG.md                              |   4 +
 README.md                                 |  16 +-
 datacontract/cli.py                       |   2 +-
 datacontract/data_contract.py             |  29 +-
 datacontract/export/dbt_converter.py      |  24 +-
 datacontract/export/dqx_converter.py      |   6 +-
 datacontract/export/markdown_converter.py |   6 +-
 tests/test_export_dbt_models.py           |  24 +-
 tests/test_export_dqx.py                  | 314 +++++++++++++---------
 tests/test_import_avro.py                 |  12 +-
 tests/test_import_bigquery.py             |   4 +-
 tests/test_import_csv.py                  |   2 +-
 tests/test_import_dbml.py                 |   6 +-
 tests/test_import_dbt.py                  |   8 +-
 tests/test_import_glue.py                 |   6 +-
 tests/test_import_json.py                 |   6 +-
 tests/test_import_jsonschema.py           |   6 +-
 tests/test_import_odcs_v3.py              |   4 +-
 tests/test_import_parquet.py              |   2 +-
 tests/test_import_protobuf.py             |   2 +-
 tests/test_import_spark.py                |   8 +-
 tests/test_import_sql_postgres.py         |   4 +-
 tests/test_import_sql_sqlserver.py        |   2 +-
 tests/test_import_unity_file.py           |   6 +-
 tests/test_roundtrip_jsonschema.py        |   2 +-
 25 files changed, 283 insertions(+), 222 deletions(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 26bf661ee..4c16a045d 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -12,6 +12,10 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
 - Support for Data Contract Specification v1.2.1 (Data Quality Metrics)
 - Support for BigQuery Flexible Schema in Data Contract Checks (#909)
 
+### Changed
+
+- `DataContract().import_from_source()` as an instance method is now deprecated. Use `DataContract.import_from_source()` as a class method instead.
+
 ### Fixed
 
 - Export to DQX: Correct DQX format for global-level quality check of data contract export. (#877)
diff --git a/README.md b/README.md
index 5da87811b..bea8defd7 100644
--- a/README.md
+++ b/README.md
@@ -1514,20 +1514,20 @@ datacontract import --format spark --source "users,orders"
 
 ```bash
 # Example: Import Spark table
-DataContract().import_from_source("spark", "users")
-DataContract().import_from_source(format = "spark", source = "users")
+DataContract.import_from_source("spark", "users")
+DataContract.import_from_source(format = "spark", source = "users")
 
 # Example: Import Spark dataframe
-DataContract().import_from_source("spark", "users", dataframe = df_user)
-DataContract().import_from_source(format = "spark", source = "users", dataframe = df_user)
+DataContract.import_from_source("spark", "users", dataframe = df_user)
+DataContract.import_from_source(format = "spark", source = "users", dataframe = df_user)
 
 # Example: Import Spark table + table description
-DataContract().import_from_source("spark", "users", description = "description") 
-DataContract().import_from_source(format = "spark", source = "users", description = "description")
+DataContract.import_from_source("spark", "users", description = "description") 
+DataContract.import_from_source(format = "spark", source = "users", description = "description")
 
 # Example: Import Spark dataframe + table description
-DataContract().import_from_source("spark", "users", dataframe = df_user, description = "description")
-DataContract().import_from_source(format = "spark", source = "users", dataframe = df_user, description = "description")
+DataContract.import_from_source("spark", "users", dataframe = df_user, description = "description")
+DataContract.import_from_source(format = "spark", source = "users", dataframe = df_user, description = "description")
 ```
 
 #### DBML
diff --git a/datacontract/cli.py b/datacontract/cli.py
index 475493bb0..8e27e335c 100644
--- a/datacontract/cli.py
+++ b/datacontract/cli.py
@@ -331,7 +331,7 @@ def import_(
     """
     Create a data contract from the given source location. Saves to file specified by `output` option if present, otherwise prints to stdout.
     """
-    result = DataContract().import_from_source(
+    result = DataContract.import_from_source(
         format=format,
         source=source,
         spec=spec,
diff --git a/datacontract/data_contract.py b/datacontract/data_contract.py
index 43f11ff0c..e06cac386 100644
--- a/datacontract/data_contract.py
+++ b/datacontract/data_contract.py
@@ -292,10 +292,9 @@ def export(
                 export_args=kwargs,
             )
 
-    # REFACTOR THIS
-    # could be a class method, not using anything from the instance
+    @classmethod
     def import_from_source(
-        self,
+        cls,
         format: str,
         source: typing.Optional[str] = None,
         template: typing.Optional[str] = None,
@@ -307,7 +306,7 @@ def import_from_source(
         owner = kwargs.get("owner")
 
         if spec == Spec.odcs or format == ImportFormat.excel:
-            data_contract_specification_initial = DataContract.init(template=template, schema=schema)
+            data_contract_specification_initial = cls.init(template=template, schema=schema)
 
             odcs_imported = importer_factory.create(format).import_source(
                 data_contract_specification=data_contract_specification_initial, source=source, import_args=kwargs
@@ -317,12 +316,12 @@ def import_from_source(
                 # convert automatically
                 odcs_imported = to_odcs_v3(odcs_imported)
 
-            self._overwrite_id_in_odcs(odcs_imported, id)
-            self._overwrite_owner_in_odcs(odcs_imported, owner)
+            cls._overwrite_id_in_odcs(odcs_imported, id)
+            cls._overwrite_owner_in_odcs(odcs_imported, owner)
 
             return odcs_imported
         elif spec == Spec.datacontract_specification:
-            data_contract_specification_initial = DataContract.init(template=template, schema=schema)
+            data_contract_specification_initial = cls.init(template=template, schema=schema)
 
             data_contract_specification_imported = importer_factory.create(format).import_source(
                 data_contract_specification=data_contract_specification_initial, source=source, import_args=kwargs
@@ -334,8 +333,8 @@ def import_from_source(
                     data_contract_specification_initial, data_contract_specification_imported
                 )
 
-            self._overwrite_id_in_data_contract_specification(data_contract_specification_imported, id)
-            self._overwrite_owner_in_data_contract_specification(data_contract_specification_imported, owner)
+            cls._overwrite_id_in_data_contract_specification(data_contract_specification_imported, id)
+            cls._overwrite_owner_in_data_contract_specification(data_contract_specification_imported, owner)
 
             return data_contract_specification_imported
         else:
@@ -347,16 +346,18 @@ def import_from_source(
                 engine="datacontract",
             )
 
+    @staticmethod
     def _overwrite_id_in_data_contract_specification(
-        self, data_contract_specification: DataContractSpecification, id: str | None
+        data_contract_specification: DataContractSpecification, id: str | None
     ):
         if not id:
             return
 
         data_contract_specification.id = id
 
+    @staticmethod
     def _overwrite_owner_in_data_contract_specification(
-        self, data_contract_specification: DataContractSpecification, owner: str | None
+        data_contract_specification: DataContractSpecification, owner: str | None
     ):
         if not owner:
             return
@@ -365,7 +366,8 @@ def _overwrite_owner_in_data_contract_specification(
             data_contract_specification.info = Info()
         data_contract_specification.info.owner = owner
 
-    def _overwrite_owner_in_odcs(self, odcs: OpenDataContractStandard, owner: str | None):
+    @staticmethod
+    def _overwrite_owner_in_odcs(odcs: OpenDataContractStandard, owner: str | None):
         if not owner:
             return
 
@@ -377,7 +379,8 @@ def _overwrite_owner_in_odcs(self, odcs: OpenDataContractStandard, owner: str |
                 return
         odcs.customProperties.append(CustomProperty(property="owner", value=owner))
 
-    def _overwrite_id_in_odcs(self, odcs: OpenDataContractStandard, id: str | None):
+    @staticmethod
+    def _overwrite_id_in_odcs(odcs: OpenDataContractStandard, id: str | None):
         if not id:
             return
 
diff --git a/datacontract/export/dbt_converter.py b/datacontract/export/dbt_converter.py
index fef30373c..1bb6baf66 100644
--- a/datacontract/export/dbt_converter.py
+++ b/datacontract/export/dbt_converter.py
@@ -115,28 +115,28 @@ def _to_dbt_model(
         dbt_model["config"]["contract"] = {"enforced": True}
     if model_value.description is not None:
         dbt_model["description"] = model_value.description.strip().replace("\n", " ")
-        
+
     # Handle model-level primaryKey (before columns for better YAML ordering)
     primary_key_columns = []
-    if hasattr(model_value, 'primaryKey') and model_value.primaryKey:
+    if hasattr(model_value, "primaryKey") and model_value.primaryKey:
         if isinstance(model_value.primaryKey, list) and len(model_value.primaryKey) > 1:
             # Multiple columns: use dbt_utils.unique_combination_of_columns
-            dbt_model["data_tests"] = [{
-                "dbt_utils.unique_combination_of_columns": {
-                    "combination_of_columns": model_value.primaryKey
-                }
-            }]
+            dbt_model["data_tests"] = [
+                {"dbt_utils.unique_combination_of_columns": {"combination_of_columns": model_value.primaryKey}}
+            ]
         elif isinstance(model_value.primaryKey, list) and len(model_value.primaryKey) == 1:
             # Single column: handle at column level (pass to _to_columns)
             primary_key_columns = model_value.primaryKey
         elif isinstance(model_value.primaryKey, str):
             # Single column as string: handle at column level
             primary_key_columns = [model_value.primaryKey]
-    
-    columns = _to_columns(data_contract_spec, model_value.fields, _supports_constraints(model_type), adapter_type, primary_key_columns)
+
+    columns = _to_columns(
+        data_contract_spec, model_value.fields, _supports_constraints(model_type), adapter_type, primary_key_columns
+    )
     if columns:
         dbt_model["columns"] = columns
-            
+
     return dbt_model
 
 
@@ -207,8 +207,8 @@ def _to_column(
             column.setdefault("constraints", []).append({"type": "not_null"})
         else:
             column["data_tests"].append("not_null")
-    
-    # Handle unique constraint  
+
+    # Handle unique constraint
     if field.unique or is_primary_key:
         if supports_constraints:
             column.setdefault("constraints", []).append({"type": "unique"})
diff --git a/datacontract/export/dqx_converter.py b/datacontract/export/dqx_converter.py
index b68168473..7b3ab0f19 100644
--- a/datacontract/export/dqx_converter.py
+++ b/datacontract/export/dqx_converter.py
@@ -64,7 +64,11 @@ def process_quality_rule(rule: Quality, column_name: str) -> Dict[str, Any]:
     if column_name:
         arguments = check.setdefault(DqxKeys.ARGUMENTS, {})
 
-        if DqxKeys.COL_NAME not in arguments and DqxKeys.COL_NAMES not in arguments and DqxKeys.COLUMNS not in arguments:
+        if (
+            DqxKeys.COL_NAME not in arguments
+            and DqxKeys.COL_NAMES not in arguments
+            and DqxKeys.COLUMNS not in arguments
+        ):
             if check[DqxKeys.FUNCTION] not in ("is_unique", "foreign_key"):
                 arguments[DqxKeys.COL_NAME] = column_name
             else:
diff --git a/datacontract/export/markdown_converter.py b/datacontract/export/markdown_converter.py
index 077f22d20..9ddebfd9c 100644
--- a/datacontract/export/markdown_converter.py
+++ b/datacontract/export/markdown_converter.py
@@ -312,11 +312,7 @@ def extra_to_markdown(obj: BaseModel, is_in_table_cell: bool = False) -> str:
     row_suffix = "
" if is_in_table_cell else "" def render_header(key: str) -> str: - return ( - f"{bullet_char} **{key}:** " - if is_in_table_cell - else f"\n### {key.capitalize()}\n" - ) + return f"{bullet_char} **{key}:** " if is_in_table_cell else f"\n### {key.capitalize()}\n" parts: list[str] = [] for key_extra, value_extra in extra.items(): diff --git a/tests/test_export_dbt_models.py b/tests/test_export_dbt_models.py index a3f4ff37b..06dba72fe 100644 --- a/tests/test_export_dbt_models.py +++ b/tests/test_export_dbt_models.py @@ -200,7 +200,7 @@ def test_to_dbt_models_with_no_model_type(): def test_to_dbt_models_with_model_level_composite_primary_key(): """Test model-level primaryKey with multiple columns generates dbt_utils.unique_combination_of_columns""" from datacontract.model.data_contract_specification import DataContractSpecification, Field, Info, Model - + # Create test data with model-level composite primaryKey data_contract = DataContractSpecification( id="my-data-contract-id", @@ -212,12 +212,12 @@ def test_to_dbt_models_with_model_level_composite_primary_key(): fields={ "tenant_id": Field(type="string", required=True), "account_id": Field(type="string", required=True), - "name": Field(type="string", required=True) - } + "name": Field(type="string", required=True), + }, ) - } + }, ) - + expected_dbt_model = """ version: 2 models: @@ -250,14 +250,14 @@ def test_to_dbt_models_with_model_level_composite_primary_key(): result = yaml.safe_load(to_dbt_models_yaml(data_contract)) expected = yaml.safe_load(expected_dbt_model) - + assert result == expected def test_to_dbt_models_with_single_column_primary_key(): """Test model-level primaryKey with single column adds unique constraint to column""" from datacontract.model.data_contract_specification import DataContractSpecification, Field, Info, Model - + # Create test data with model-level single primaryKey data_contract = DataContractSpecification( id="my-data-contract-id", @@ -269,12 +269,12 @@ def test_to_dbt_models_with_single_column_primary_key(): fields={ "tenant_id": Field(type="string", required=True), "account_id": Field(type="string", required=True), - "name": Field(type="string", required=True) - } + "name": Field(type="string", required=True), + }, ) - } + }, ) - + expected_dbt_model = """ version: 2 models: @@ -303,7 +303,7 @@ def test_to_dbt_models_with_single_column_primary_key(): result = yaml.safe_load(to_dbt_models_yaml(data_contract)) expected = yaml.safe_load(expected_dbt_model) - + assert result == expected diff --git a/tests/test_export_dqx.py b/tests/test_export_dqx.py index cfebb3e6d..f1c8d889f 100644 --- a/tests/test_export_dqx.py +++ b/tests/test_export_dqx.py @@ -16,136 +16,190 @@ def test_cli(): def test_to_dqx(): actual = DataContract(data_contract_file="fixtures/dqx/datacontract.yaml").export("dqx") # Expected quality rules (based on the data contract) - expected_rules = [{'check': {'arguments': {'column': 'interaction_id'}, - 'function': 'is_not_null'}, - 'criticality': 'error'}, - {'check': {'arguments': {'column': 'user_id'}, 'function': 'is_not_null'}, - 'criticality': 'error'}, - {'check': {'arguments': {'columns': ['user_id'], - 'ref_columns': ['id'], - 'ref_table': 'catalog1.schema1.user'}, - 'function': 'foreign_key'}, - 'criticality': 'error'}, - {'check': {'arguments': {'columns': ['user_id']}, 'function': 'is_unique'}, - 'criticality': 'error'}, - {'check': {'arguments': {'allowed': ['click', - 'view', - 'purchase', - 'like', - 'share'], - 'column': 'interaction_type'}, - 'function': 'is_in_list'}, - 'criticality': 'error'}, - {'check': {'arguments': {'column': 'interaction_timestamp', - 'timestamp_format': 'yyyy-MM-dd HH:mm:ss'}, - 'function': 'is_valid_timestamp'}, - 'criticality': 'error'}, - {'check': {'arguments': {'column': 'interaction_timestamp', 'offset': '1h'}, - 'function': 'not_in_future'}, - 'criticality': 'warning'}, - {'check': {'arguments': {'column': 'item_id'}, 'function': 'is_not_null'}, - 'criticality': 'minor'}, - {'check': {'arguments': {'column': 'interaction_value', - 'max_limit': 1000, - 'min_limit': 0}, - 'function': 'is_in_range'}, - 'criticality': 'minor'}, - {'check': {'arguments': {'column': 'location', - 'regex': '^[A-Za-z]+(?:[\\s-][A-Za-z]+)*$'}, - 'function': 'regex_match'}, - 'criticality': 'minor'}, - {'check': {'arguments': {'allowed': ['mobile', 'desktop', 'tablet'], - 'column': 'device'}, - 'function': 'is_in_list'}, - 'criticality': 'minor'}, - {'check': {'arguments': {'column': 'interaction_date', - 'date_format': 'yyyy-MM-dd'}, - 'function': 'is_valid_date'}, - 'criticality': 'error'}, - {'check': {'arguments': {'column': 'time_since_last_interaction', 'days': 30}, - 'function': 'is_older_than_n_days'}, - 'criticality': 'minor'}, - {'check': {'arguments': {'column': 'is_active', - 'expression': "is_active IN ('true', 'false')"}, - 'function': 'sql_expression'}, - 'criticality': 'minor'}, - {'check': {'arguments': {'column': 'user_profile.age', - 'max_limit': 120, - 'min_limit': 13}, - 'function': 'is_in_range'}, - 'criticality': 'minor'}, - {'check': {'arguments': {'allowed': ['male', 'female', 'other'], - 'column': 'user_profile.gender'}, - 'function': 'is_in_list'}, - 'criticality': 'minor'}, - {'check': {'arguments': {'column': 'user_profile.location_details.country', - 'regex': '^[A-Z]{2}$'}, - 'function': 'regex_match'}, - 'criticality': 'minor'}, - {'check': {'arguments': {'column': 'related_items'}, - 'function': 'is_not_null_and_not_empty_array'}, - 'criticality': 'minor'}, - {'check': {'arguments': {'column': 'interaction_context.page_url', - 'regex': '^https?://.+$'}, - 'function': 'regex_match'}, - 'criticality': 'minor'}, - {'check': {'arguments': {'allowed': ['mobile', 'desktop', 'tablet', 'tv'], - 'column': 'interaction_context.device_type'}, - 'function': 'is_in_list'}, - 'criticality': 'minor'}, - {'check': {'for_each_column': ['user_id', - 'interaction_id', - 'interaction_type', - 'interaction_timestamp'], - 'function': 'is_not_null'}, - 'criticality': 'error', - 'filter': "interaction_type IN ('click', 'view', 'purchase')"}, - {'check': {'arguments': {'column': 'interaction_value', - 'max_limit': 1000, - 'min_limit': 0}, - 'function': 'is_in_range'}, - 'criticality': 'warning', - 'filter': "interaction_type = 'purchase'"}, - {'check': {'arguments': {'allowed': ['mobile', 'tablet'], 'column': 'device'}, - 'function': 'is_in_list'}, - 'criticality': 'minor', - 'filter': "device = 'mobile'"}, - {'check': {'arguments': {'column': 'user_profile.age', - 'max_limit': 120, - 'min_limit': 13}, - 'function': 'is_in_range'}, - 'criticality': 'minor', - 'filter': 'user_profile.age IS NOT NULL'}, - {'check': {'arguments': {'columns': ['user_id', 'interaction_date']}, - 'function': 'is_unique'}, - 'criticality': 'error'}, - {'check': {'arguments': {'aggr_type': 'max', - 'column': 'interaction_value', - 'limit': 1000}, - 'function': 'is_aggr_not_greater_than'}, - 'criticality': 'error', - 'filter': "interaction_type = 'purchase'"}, - {'check': {'arguments': {'aggr_type': 'min', - 'column': 'user_profile.age', - 'group_by': ['user_id'], - 'limit': 21}, - 'function': 'is_aggr_not_less_than'}, - 'criticality': 'error'}, - {'check': {'arguments': {'aggr_type': 'count', - 'column': 'interaction_date', - 'limit': 24}, - 'function': 'is_aggr_equal'}, - 'criticality': 'error'}, - {'check': {'arguments': {'columns': ['user_id'], - 'ref_columns': ['id'], - 'ref_df_name': 'df_user'}, - 'function': 'foreign_key'}, - 'criticality': 'error'}, - {'check': {'arguments': {'columns': ['user_id'], - 'ref_columns': ['id'], - 'ref_table': 'catalog1.schema1.user'}, - 'function': 'foreign_key'}, - 'criticality': 'error'}] + expected_rules = [ + {"check": {"arguments": {"column": "interaction_id"}, "function": "is_not_null"}, "criticality": "error"}, + {"check": {"arguments": {"column": "user_id"}, "function": "is_not_null"}, "criticality": "error"}, + { + "check": { + "arguments": {"columns": ["user_id"], "ref_columns": ["id"], "ref_table": "catalog1.schema1.user"}, + "function": "foreign_key", + }, + "criticality": "error", + }, + {"check": {"arguments": {"columns": ["user_id"]}, "function": "is_unique"}, "criticality": "error"}, + { + "check": { + "arguments": {"allowed": ["click", "view", "purchase", "like", "share"], "column": "interaction_type"}, + "function": "is_in_list", + }, + "criticality": "error", + }, + { + "check": { + "arguments": {"column": "interaction_timestamp", "timestamp_format": "yyyy-MM-dd HH:mm:ss"}, + "function": "is_valid_timestamp", + }, + "criticality": "error", + }, + { + "check": {"arguments": {"column": "interaction_timestamp", "offset": "1h"}, "function": "not_in_future"}, + "criticality": "warning", + }, + {"check": {"arguments": {"column": "item_id"}, "function": "is_not_null"}, "criticality": "minor"}, + { + "check": { + "arguments": {"column": "interaction_value", "max_limit": 1000, "min_limit": 0}, + "function": "is_in_range", + }, + "criticality": "minor", + }, + { + "check": { + "arguments": {"column": "location", "regex": "^[A-Za-z]+(?:[\\s-][A-Za-z]+)*$"}, + "function": "regex_match", + }, + "criticality": "minor", + }, + { + "check": { + "arguments": {"allowed": ["mobile", "desktop", "tablet"], "column": "device"}, + "function": "is_in_list", + }, + "criticality": "minor", + }, + { + "check": { + "arguments": {"column": "interaction_date", "date_format": "yyyy-MM-dd"}, + "function": "is_valid_date", + }, + "criticality": "error", + }, + { + "check": { + "arguments": {"column": "time_since_last_interaction", "days": 30}, + "function": "is_older_than_n_days", + }, + "criticality": "minor", + }, + { + "check": { + "arguments": {"column": "is_active", "expression": "is_active IN ('true', 'false')"}, + "function": "sql_expression", + }, + "criticality": "minor", + }, + { + "check": { + "arguments": {"column": "user_profile.age", "max_limit": 120, "min_limit": 13}, + "function": "is_in_range", + }, + "criticality": "minor", + }, + { + "check": { + "arguments": {"allowed": ["male", "female", "other"], "column": "user_profile.gender"}, + "function": "is_in_list", + }, + "criticality": "minor", + }, + { + "check": { + "arguments": {"column": "user_profile.location_details.country", "regex": "^[A-Z]{2}$"}, + "function": "regex_match", + }, + "criticality": "minor", + }, + { + "check": {"arguments": {"column": "related_items"}, "function": "is_not_null_and_not_empty_array"}, + "criticality": "minor", + }, + { + "check": { + "arguments": {"column": "interaction_context.page_url", "regex": "^https?://.+$"}, + "function": "regex_match", + }, + "criticality": "minor", + }, + { + "check": { + "arguments": { + "allowed": ["mobile", "desktop", "tablet", "tv"], + "column": "interaction_context.device_type", + }, + "function": "is_in_list", + }, + "criticality": "minor", + }, + { + "check": { + "for_each_column": ["user_id", "interaction_id", "interaction_type", "interaction_timestamp"], + "function": "is_not_null", + }, + "criticality": "error", + "filter": "interaction_type IN ('click', 'view', 'purchase')", + }, + { + "check": { + "arguments": {"column": "interaction_value", "max_limit": 1000, "min_limit": 0}, + "function": "is_in_range", + }, + "criticality": "warning", + "filter": "interaction_type = 'purchase'", + }, + { + "check": {"arguments": {"allowed": ["mobile", "tablet"], "column": "device"}, "function": "is_in_list"}, + "criticality": "minor", + "filter": "device = 'mobile'", + }, + { + "check": { + "arguments": {"column": "user_profile.age", "max_limit": 120, "min_limit": 13}, + "function": "is_in_range", + }, + "criticality": "minor", + "filter": "user_profile.age IS NOT NULL", + }, + { + "check": {"arguments": {"columns": ["user_id", "interaction_date"]}, "function": "is_unique"}, + "criticality": "error", + }, + { + "check": { + "arguments": {"aggr_type": "max", "column": "interaction_value", "limit": 1000}, + "function": "is_aggr_not_greater_than", + }, + "criticality": "error", + "filter": "interaction_type = 'purchase'", + }, + { + "check": { + "arguments": {"aggr_type": "min", "column": "user_profile.age", "group_by": ["user_id"], "limit": 21}, + "function": "is_aggr_not_less_than", + }, + "criticality": "error", + }, + { + "check": { + "arguments": {"aggr_type": "count", "column": "interaction_date", "limit": 24}, + "function": "is_aggr_equal", + }, + "criticality": "error", + }, + { + "check": { + "arguments": {"columns": ["user_id"], "ref_columns": ["id"], "ref_df_name": "df_user"}, + "function": "foreign_key", + }, + "criticality": "error", + }, + { + "check": { + "arguments": {"columns": ["user_id"], "ref_columns": ["id"], "ref_table": "catalog1.schema1.user"}, + "function": "foreign_key", + }, + "criticality": "error", + }, + ] assert yaml.safe_load(actual) == expected_rules diff --git a/tests/test_import_avro.py b/tests/test_import_avro.py index af68bfda2..354f8e87d 100644 --- a/tests/test_import_avro.py +++ b/tests/test_import_avro.py @@ -23,7 +23,7 @@ def test_cli(): def test_import_avro_schema(): - result = DataContract().import_from_source("avro", "fixtures/avro/data/orders.avsc") + result = DataContract.import_from_source("avro", "fixtures/avro/data/orders.avsc") expected = """ dataContractSpecification: 1.2.1 @@ -120,7 +120,7 @@ def test_import_avro_schema(): def test_import_avro_arrays_of_records_and_nested_arrays(): - result = DataContract().import_from_source("avro", "fixtures/avro/data/arrays.avsc") + result = DataContract.import_from_source("avro", "fixtures/avro/data/arrays.avsc") expected = """ dataContractSpecification: 1.2.1 @@ -171,7 +171,7 @@ def test_import_avro_arrays_of_records_and_nested_arrays(): def test_import_avro_nested_records(): - result = DataContract().import_from_source("avro", "fixtures/avro/data/nested.avsc") + result = DataContract.import_from_source("avro", "fixtures/avro/data/nested.avsc") expected = """ dataContractSpecification: 1.2.1 @@ -200,7 +200,7 @@ def test_import_avro_nested_records(): def test_import_avro_nested_records_with_arrays(): - result = DataContract().import_from_source("avro", "fixtures/avro/data/nested_with_arrays.avsc") + result = DataContract.import_from_source("avro", "fixtures/avro/data/nested_with_arrays.avsc") expected = """ dataContractSpecification: 1.2.1 @@ -266,7 +266,7 @@ def test_import_avro_nested_records_with_arrays(): def test_import_avro_logical_types(): - result = DataContract().import_from_source("avro", "fixtures/avro/data/logical_types.avsc") + result = DataContract.import_from_source("avro", "fixtures/avro/data/logical_types.avsc") expected = """ dataContractSpecification: 1.2.1 @@ -317,7 +317,7 @@ def test_import_avro_logical_types(): def test_import_avro_optional_enum(): - result = DataContract().import_from_source("avro", "fixtures/avro/data/optional_enum.avsc") + result = DataContract.import_from_source("avro", "fixtures/avro/data/optional_enum.avsc") expected = """ dataContractSpecification: 1.2.1 diff --git a/tests/test_import_bigquery.py b/tests/test_import_bigquery.py index e7167b808..83f178c16 100644 --- a/tests/test_import_bigquery.py +++ b/tests/test_import_bigquery.py @@ -24,7 +24,7 @@ def test_cli(): def test_import_bigquery_schema(): - result = DataContract().import_from_source("bigquery", "fixtures/bigquery/import/complete_table_schema.json") + result = DataContract.import_from_source("bigquery", "fixtures/bigquery/import/complete_table_schema.json") print("Result:\n", result.to_yaml()) with open("fixtures/bigquery/import/datacontract.yaml") as file: @@ -34,7 +34,7 @@ def test_import_bigquery_schema(): def test_import_multiple_bigquery_schemas_with_different_types(): - result = DataContract().import_from_source("bigquery", "fixtures/bigquery/import/multi_import_table.json") + result = DataContract.import_from_source("bigquery", "fixtures/bigquery/import/multi_import_table.json") result = import_bigquery_from_json(result, "fixtures/bigquery/import/multi_import_external_table.json") result = import_bigquery_from_json(result, "fixtures/bigquery/import/multi_import_snapshot.json") diff --git a/tests/test_import_csv.py b/tests/test_import_csv.py index 76138b941..1debe9afd 100644 --- a/tests/test_import_csv.py +++ b/tests/test_import_csv.py @@ -26,7 +26,7 @@ def test_cli(): def test_import_csv(): source = "fixtures/csv/data/sample_data_5_column.csv" - result = DataContract().import_from_source("csv", source) + result = DataContract.import_from_source("csv", source) model = result.models["sample_data_5_column"] assert model is not None assert len(model.fields["field_one"].examples) == 5 diff --git a/tests/test_import_dbml.py b/tests/test_import_dbml.py index 3fb1f9223..c14a5d88b 100644 --- a/tests/test_import_dbml.py +++ b/tests/test_import_dbml.py @@ -42,7 +42,7 @@ def test_cli_with_filters(): def test_dbml_import(): - result = DataContract().import_from_source("dbml", "fixtures/dbml/import/dbml.txt") + result = DataContract.import_from_source("dbml", "fixtures/dbml/import/dbml.txt") print("Result:\n", result.to_yaml()) with open("fixtures/dbml/import/datacontract.yaml") as file: @@ -51,7 +51,7 @@ def test_dbml_import(): def test_dbml_import_with_schema_filter(): - result = DataContract().import_from_source("dbml", "fixtures/dbml/import/dbml.txt", dbml_schema=["orders"]) + result = DataContract.import_from_source("dbml", "fixtures/dbml/import/dbml.txt", dbml_schema=["orders"]) print("Result:\n", result.to_yaml()) with open("fixtures/dbml/import/datacontract_schema_filtered.yaml") as file: @@ -60,7 +60,7 @@ def test_dbml_import_with_schema_filter(): def test_dbml_import_with_tablename_filter(): - result = DataContract().import_from_source("dbml", "fixtures/dbml/import/dbml.txt", dbml_table=["orders"]) + result = DataContract.import_from_source("dbml", "fixtures/dbml/import/dbml.txt", dbml_table=["orders"]) print("Result:\n", result.to_yaml()) with open("fixtures/dbml/import/datacontract_table_filtered.yaml") as file: diff --git a/tests/test_import_dbt.py b/tests/test_import_dbt.py index 89cf8d9a6..37e856c74 100644 --- a/tests/test_import_dbt.py +++ b/tests/test_import_dbt.py @@ -67,7 +67,7 @@ def test_cli_with_filter(): def test_import_dbt_manifest(): - result = DataContract().import_from_source("dbt", dbt_manifest) + result = DataContract.import_from_source("dbt", dbt_manifest) expected = """ dataContractSpecification: 1.2.1 @@ -231,7 +231,7 @@ def test_import_dbt_manifest(): def test_import_dbt_manifest_bigquery(): - result = DataContract().import_from_source("dbt", dbt_manifest_bigquery) + result = DataContract.import_from_source("dbt", dbt_manifest_bigquery) expected = """ dataContractSpecification: 1.2.1 @@ -395,7 +395,7 @@ def test_import_dbt_manifest_bigquery(): def test_import_dbt_manifest_with_filter_and_empty_columns(): - result = DataContract().import_from_source("dbt", dbt_manifest_empty_columns, dbt_model=["customers"]) + result = DataContract.import_from_source("dbt", dbt_manifest_empty_columns, dbt_model=["customers"]) expected = """ dataContractSpecification: 1.2.1 @@ -417,7 +417,7 @@ def test_import_dbt_manifest_with_filter_and_empty_columns(): def test_import_dbt_manifest_with_filter(): - result = DataContract().import_from_source("dbt", dbt_manifest, dbt_model=["customers"]) + result = DataContract.import_from_source("dbt", dbt_manifest, dbt_model=["customers"]) expected = """ dataContractSpecification: 1.2.1 diff --git a/tests/test_import_glue.py b/tests/test_import_glue.py index e1501eb02..9596c7b69 100644 --- a/tests/test_import_glue.py +++ b/tests/test_import_glue.py @@ -158,7 +158,7 @@ def test_cli_with_table_filters(setup_mock_glue): @mock_aws def test_import_glue_schema_without_glue_table_filter(setup_mock_glue): - result = DataContract().import_from_source("glue", "test_database") + result = DataContract.import_from_source("glue", "test_database") with open("fixtures/glue/datacontract.yaml") as file: expected = file.read() @@ -171,7 +171,7 @@ def test_import_glue_schema_without_glue_table_filter(setup_mock_glue): @mock_aws def test_import_glue_schema_with_glue_table_filter(setup_mock_glue): - result = DataContract().import_from_source(format="glue", source="test_database", glue_table=[table_name]) + result = DataContract.import_from_source(format="glue", source="test_database", glue_table=[table_name]) with open("fixtures/glue/datacontract.yaml") as file: expected = file.read() @@ -184,7 +184,7 @@ def test_import_glue_schema_with_glue_table_filter(setup_mock_glue): @mock_aws def test_import_glue_schema_with_non_existent_glue_table_filter(setup_mock_glue): - result = DataContract().import_from_source(format="glue", source="test_database", glue_table=["table_1"]) + result = DataContract.import_from_source(format="glue", source="test_database", glue_table=["table_1"]) # we specify a table that the Mock doesn't have and thus expect an empty result with open("fixtures/glue/datacontract-empty-model.yaml") as file: diff --git a/tests/test_import_json.py b/tests/test_import_json.py index fede79e0d..82d4361cb 100644 --- a/tests/test_import_json.py +++ b/tests/test_import_json.py @@ -28,7 +28,7 @@ def test_json_simple(): expected_dict = yaml.safe_load(expected_json) json_file = "fixtures/import/json/product_simple.json" - actual = DataContract().import_from_source("json", json_file).to_yaml() + actual = DataContract.import_from_source("json", json_file).to_yaml() actual_dict = yaml.safe_load(actual) # normalize paths in both dictionaries to use forward slashes and remove any ./tests/ prefix @@ -72,7 +72,7 @@ def test_json_complex(): expected_dict = yaml.safe_load(expected_json) json_file = "fixtures/import/json/product_detail.json" - actual = DataContract().import_from_source("json", json_file).to_yaml() + actual = DataContract.import_from_source("json", json_file).to_yaml() actual_dict = yaml.safe_load(actual) # normalize paths in both dictionaries to use forward slashes and remove any ./tests/ prefix @@ -116,7 +116,7 @@ def test_ndjson(): expected_dict = yaml.safe_load(expected_json) json_file = "fixtures/import/json/inventory_ndjson.json" - actual = DataContract().import_from_source("json", json_file).to_yaml() + actual = DataContract.import_from_source("json", json_file).to_yaml() actual_dict = yaml.safe_load(actual) # normalize paths in both dictionaries to use forward slashes and remove any ./tests/ prefix diff --git a/tests/test_import_jsonschema.py b/tests/test_import_jsonschema.py index 0d9055462..b5dede019 100644 --- a/tests/test_import_jsonschema.py +++ b/tests/test_import_jsonschema.py @@ -51,7 +51,7 @@ def test_cli_with_output(tmp_path: Path): def test_import_json_schema_orders(): - result = DataContract().import_from_source("jsonschema", "fixtures/import/orders_union-types.json") + result = DataContract.import_from_source("jsonschema", "fixtures/import/orders_union-types.json") with open("fixtures/import/orders_union-types_datacontract.yml") as file: expected = file.read() @@ -62,7 +62,7 @@ def test_import_json_schema_orders(): def test_import_json_schema_football(): - result = DataContract().import_from_source("jsonschema", "fixtures/import/football.json") + result = DataContract.import_from_source("jsonschema", "fixtures/import/football.json") with open("fixtures/import/football-datacontract.yml") as file: expected = file.read() @@ -73,7 +73,7 @@ def test_import_json_schema_football(): def test_import_json_schema_football_deeply_nested_no_required(): - result = DataContract().import_from_source("jsonschema", "fixtures/import/football_deeply_nested_no_required.json") + result = DataContract.import_from_source("jsonschema", "fixtures/import/football_deeply_nested_no_required.json") with open("fixtures/import/football_deeply_nested_no_required_datacontract.yml") as file: expected = file.read() diff --git a/tests/test_import_odcs_v3.py b/tests/test_import_odcs_v3.py index 21cb86b6b..da286db8b 100644 --- a/tests/test_import_odcs_v3.py +++ b/tests/test_import_odcs_v3.py @@ -26,14 +26,14 @@ def test_cli(): def test_import_full_odcs(): - result = DataContract().import_from_source("odcs", "./fixtures/odcs_v3/full-example.odcs.yaml") + result = DataContract.import_from_source("odcs", "./fixtures/odcs_v3/full-example.odcs.yaml") expected_datacontract = read_file("fixtures/odcs_v3/full-example.datacontract.yml") assert yaml.safe_load(result.to_yaml()) == yaml.safe_load(expected_datacontract) assert DataContract(data_contract_str=expected_datacontract).lint(enabled_linters="none").has_passed() def test_import_complex_odcs(): - result = DataContract().import_from_source("odcs", "./fixtures/odcs_v3/adventureworks.odcs.yaml") + result = DataContract.import_from_source("odcs", "./fixtures/odcs_v3/adventureworks.odcs.yaml") expected_datacontract = read_file("fixtures/odcs_v3/adventureworks.datacontract.yml") assert yaml.safe_load(result.to_yaml()) == yaml.safe_load(expected_datacontract) assert DataContract(data_contract_str=expected_datacontract).lint(enabled_linters="none").has_passed() diff --git a/tests/test_import_parquet.py b/tests/test_import_parquet.py index 67de4aa1e..2a2d24e82 100644 --- a/tests/test_import_parquet.py +++ b/tests/test_import_parquet.py @@ -22,7 +22,7 @@ def test_cli(): def test_import_parquet(): - result = DataContract().import_from_source(format="parquet", source=parquet_file_path) + result = DataContract.import_from_source(format="parquet", source=parquet_file_path) expected = """dataContractSpecification: 1.2.1 id: my-data-contract-id diff --git a/tests/test_import_protobuf.py b/tests/test_import_protobuf.py index 7bdece2d3..505d62254 100644 --- a/tests/test_import_protobuf.py +++ b/tests/test_import_protobuf.py @@ -25,7 +25,7 @@ def test_cli(): def test_import_protobuf(): - result = DataContract().import_from_source("protobuf", protobuf_file_path) + result = DataContract.import_from_source("protobuf", protobuf_file_path) expected = """dataContractSpecification: 1.2.1 id: my-data-contract-id diff --git a/tests/test_import_spark.py b/tests/test_import_spark.py index 5c96a6cbb..2123012ff 100644 --- a/tests/test_import_spark.py +++ b/tests/test_import_spark.py @@ -151,17 +151,17 @@ def test_prog(spark: SparkSession, df_user, user_datacontract_no_desc, user_data expected_no_desc = user_datacontract_no_desc # does not include a table level description (table method) - result1 = DataContract().import_from_source("spark", "users") + result1 = DataContract.import_from_source("spark", "users") assert yaml.safe_load(result1.to_yaml()) == yaml.safe_load(expected_no_desc) # does include a table level description (table method) - result2 = DataContract().import_from_source("spark", "users", description="description") + result2 = DataContract.import_from_source("spark", "users", description="description") assert yaml.safe_load(result2.to_yaml()) == yaml.safe_load(expected_desc) # does not include a table level description (dataframe object method) - result3 = DataContract().import_from_source("spark", "users", dataframe=df_user) + result3 = DataContract.import_from_source("spark", "users", dataframe=df_user) assert yaml.safe_load(result3.to_yaml()) == yaml.safe_load(expected_no_desc) # does include a table level description (dataframe object method) - result4 = DataContract().import_from_source("spark", "users", dataframe=df_user, description="description") + result4 = DataContract.import_from_source("spark", "users", dataframe=df_user, description="description") assert yaml.safe_load(result4.to_yaml()) == yaml.safe_load(expected_desc) diff --git a/tests/test_import_sql_postgres.py b/tests/test_import_sql_postgres.py index 8552648d2..df635c34e 100644 --- a/tests/test_import_sql_postgres.py +++ b/tests/test_import_sql_postgres.py @@ -26,7 +26,7 @@ def test_cli(): def test_import_sql_postgres(): - result = DataContract().import_from_source("sql", sql_file_path, dialect="postgres") + result = DataContract.import_from_source("sql", sql_file_path, dialect="postgres") expected = """ dataContractSpecification: 1.2.1 @@ -64,7 +64,7 @@ def test_import_sql_postgres(): def test_import_sql_constraints(): - result = DataContract().import_from_source("sql", "fixtures/postgres/data/data_constraints.sql", dialect="postgres") + result = DataContract.import_from_source("sql", "fixtures/postgres/data/data_constraints.sql", dialect="postgres") expected = """ dataContractSpecification: 1.2.1 diff --git a/tests/test_import_sql_sqlserver.py b/tests/test_import_sql_sqlserver.py index 8923153ac..b3df21664 100644 --- a/tests/test_import_sql_sqlserver.py +++ b/tests/test_import_sql_sqlserver.py @@ -6,7 +6,7 @@ def test_import_sql_sqlserver(): - result = DataContract().import_from_source("sql", sql_file_path, dialect="sqlserver") + result = DataContract.import_from_source("sql", sql_file_path, dialect="sqlserver") expected = """ dataContractSpecification: 1.2.1 diff --git a/tests/test_import_unity_file.py b/tests/test_import_unity_file.py index 3f9a14a36..9abc713ea 100644 --- a/tests/test_import_unity_file.py +++ b/tests/test_import_unity_file.py @@ -26,7 +26,7 @@ def test_cli(): def test_import_unity(): print("running test_import_unity") - result = DataContract().import_from_source("unity", "fixtures/databricks-unity/import/unity_table_schema.json") + result = DataContract.import_from_source("unity", "fixtures/databricks-unity/import/unity_table_schema.json") with open("fixtures/databricks-unity/import/datacontract.yaml") as file: expected = file.read() @@ -56,7 +56,7 @@ def test_cli_complex_types(): @pytest.mark.skip(reason="Complex types are not perfectly supported for the unity catalog import") def test_import_unity_complex_types(): print("running test_import_unity_complex_types") - result = DataContract().import_from_source( + result = DataContract.import_from_source( "unity", "fixtures/databricks-unity/import/unity_table_schema_complex_types.json" ) @@ -70,7 +70,7 @@ def test_import_unity_complex_types(): def test_import_unity_with_owner_and_id(): print("running test_import_unity_with_owner_and_id") - result = DataContract().import_from_source( + result = DataContract.import_from_source( "unity", "fixtures/databricks-unity/import/unity_table_schema.json", owner="sales-team", id="orders-v1" ) diff --git a/tests/test_roundtrip_jsonschema.py b/tests/test_roundtrip_jsonschema.py index 20f470483..1b2321cf4 100644 --- a/tests/test_roundtrip_jsonschema.py +++ b/tests/test_roundtrip_jsonschema.py @@ -32,7 +32,7 @@ def test_export_cli(): def test_roundtrip_json_schema_orders(): # Import the data contract from the JSON schema source - result_import = DataContract().import_from_source("jsonschema", "fixtures/import/orders.json") + result_import = DataContract.import_from_source("jsonschema", "fixtures/import/orders.json") # Create a data contract specification with inline definitions data_contract = DataContract( From 9929f1d8f1c009da991fe750ee55dbc6822b754e Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Fri, 17 Oct 2025 09:55:51 +0200 Subject: [PATCH 017/150] chore(deps): update typer requirement (#904) Updates the requirements on [typer](https://github.com/fastapi/typer) to permit the latest version. - [Release notes](https://github.com/fastapi/typer/releases) - [Changelog](https://github.com/fastapi/typer/blob/master/docs/release-notes.md) - [Commits](https://github.com/fastapi/typer/compare/0.15.1...0.19.2) --- updated-dependencies: - dependency-name: typer dependency-version: 0.19.2 dependency-type: direct:production ... Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> --- pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index 35315e295..0cf5d3630 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -15,7 +15,7 @@ classifiers = [ ] requires-python = ">=3.10" dependencies = [ - "typer>=0.15.1,<0.17", + "typer>=0.15.1,<0.20", "pydantic>=2.8.2,<2.12.0", "pyyaml~=6.0.1", "requests>=2.31,<2.33", From b40f430cc7c1fbcff007bdc1b1d38081dfdf5f5a Mon Sep 17 00:00:00 2001 From: Sammi Fux Date: Fri, 17 Oct 2025 10:10:43 +0200 Subject: [PATCH 018/150] feat: #901 implement decimal support for databricks and spark testing + fix test spark startup (#902) * 901 implement decimal support for databricks and spark testing * run ruff * add changelog entry * add testcase for no precision and scale defined * disable spark ui for test spark and master(local[*]) * replace return spark with yield * remove request --------- Co-authored-by: jochenchrist --- CHANGELOG.md | 1 + datacontract/export/sql_type_converter.py | 10 +- tests/fixtures/dataframe/datacontract.yaml | 9 ++ tests/test_test_dataframe.py | 115 ++++++++++++++++----- 4 files changed, 108 insertions(+), 27 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 4c16a045d..42b868083 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -10,6 +10,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ### Added - Support for Data Contract Specification v1.2.1 (Data Quality Metrics) +- Support for decimal testing in spark and databricks (#902) - Support for BigQuery Flexible Schema in Data Contract Checks (#909) ### Changed diff --git a/datacontract/export/sql_type_converter.py b/datacontract/export/sql_type_converter.py index db1688372..fb39ca516 100644 --- a/datacontract/export/sql_type_converter.py +++ b/datacontract/export/sql_type_converter.py @@ -133,8 +133,9 @@ def convert_to_dataframe(field: Field) -> None | str: if type.lower() in ["time"]: return "STRING" if type.lower() in ["number", "decimal", "numeric"]: - # precision and scale not supported by data contract - return "DECIMAL" + precision = field.precision if field.precision is not None else 38 + scale = field.scale if field.scale is not None else 0 + return f"DECIMAL({precision},{scale})" if type.lower() in ["float"]: return "FLOAT" if type.lower() in ["double"]: @@ -182,8 +183,9 @@ def convert_to_databricks(field: Field) -> None | str: if type.lower() in ["time"]: return "STRING" if type.lower() in ["number", "decimal", "numeric"]: - # precision and scale not supported by data contract - return "DECIMAL" + precision = field.precision if field.precision is not None else 38 + scale = field.scale if field.scale is not None else 0 + return f"DECIMAL({precision},{scale})" if type.lower() in ["float"]: return "FLOAT" if type.lower() in ["double"]: diff --git a/tests/fixtures/dataframe/datacontract.yaml b/tests/fixtures/dataframe/datacontract.yaml index 8eb4fa860..f1430c5fd 100644 --- a/tests/fixtures/dataframe/datacontract.yaml +++ b/tests/fixtures/dataframe/datacontract.yaml @@ -21,6 +21,15 @@ models: minimum: 10 field_three: type: timestamp + field_four: + type: decimal + precision: 4 + scale: 2 + field_five: + type: decimal + precision: 4 + field_six: + type: decimal field_array_of_strings: type: array items: diff --git a/tests/test_test_dataframe.py b/tests/test_test_dataframe.py index e6f30869a..cbddb6529 100644 --- a/tests/test_test_dataframe.py +++ b/tests/test_test_dataframe.py @@ -1,10 +1,13 @@ from datetime import datetime +from decimal import Decimal +from typing import Any, Generator import pytest from dotenv import load_dotenv from pyspark.sql import Row, SparkSession from pyspark.sql.types import ( ArrayType, + DecimalType, IntegerType, StringType, StructField, @@ -22,7 +25,7 @@ @pytest.fixture(scope="session") -def spark(tmp_path_factory) -> SparkSession: +def spark(tmp_path_factory) -> Generator[SparkSession, Any, None]: """Create and configure a Spark session.""" spark = ( SparkSession.builder.appName("datacontract-dataframe-unittest") @@ -35,11 +38,16 @@ def spark(tmp_path_factory) -> SparkSession: "spark.jars.packages", "org.apache.spark:spark-sql-kafka-0-10_2.12:3.5.5,org.apache.spark:spark-avro_2.12:3.5.5", ) + .config('spark.driver.host', '127.0.0.1') + .master("local[*]") + .config("spark.ui.enabled", False) .getOrCreate() ) spark.sparkContext.setLogLevel("WARN") print(f"Using PySpark version {spark.version}") - return spark + + yield spark + spark.stop() # TODO this test conflicts with the test_test_kafka.py test @@ -53,36 +61,58 @@ def test_test_dataframe(spark: SparkSession): run = data_contract.test() print(run.pretty()) - assert run.result == "passed" + assert run.has_passed() assert all(check.result == "passed" for check in run.checks) - spark.stop() -def _prepare_dataframe(spark): - schema = StructType( - [ - StructField("field_one", StringType(), nullable=False), - StructField("field_two", IntegerType(), nullable=True), - StructField("field_three", TimestampType(), nullable=True), - StructField("field_array_of_strings", ArrayType(StringType()), nullable=True), - StructField( - "field_array_of_structs", - ArrayType( - StructType( - [ - StructField("inner_field_string", StringType()), - StructField("inner_field_int", IntegerType()), - ] - ) - ), - ), - ] +def test_test_dataframe_fail(spark: SparkSession): + _prepare_fail_dataframe(spark) + data_contract = DataContract( + data_contract_file=datacontract, + spark=spark, ) + + run = data_contract.test() + + print(run.pretty()) + assert not run.has_passed() + failed = [check for check in run.checks if check.result == "failed"] + assert len(failed) == 3 + + +schema = StructType( + [ + StructField("field_one", StringType(), nullable=False), + StructField("field_two", IntegerType(), nullable=True), + StructField("field_three", TimestampType(), nullable=True), + StructField("field_four", DecimalType(4, 2), nullable=True), + StructField("field_five", DecimalType(4), nullable=True), + StructField("field_six", DecimalType(38, 0), nullable=True), + StructField("field_array_of_strings", ArrayType(StringType()), nullable=True), + StructField( + "field_array_of_structs", + ArrayType( + StructType( + [ + StructField("inner_field_string", StringType()), + StructField("inner_field_int", IntegerType()), + ] + ) + ), + ), + ] +) + + +def _prepare_dataframe(spark): data = [ Row( field_one="AB-123-CD", field_two=15, field_three=datetime.strptime("2024-01-01 12:00:00", "%Y-%m-%d %H:%M:%S"), + field_four=Decimal(12.34), + field_five=Decimal(12.34), + field_six=Decimal(12.34), field_array_of_strings=["string1", "string2"], field_array_of_structs=[ Row(inner_field_string="string1", inner_field_int=1), @@ -93,6 +123,9 @@ def _prepare_dataframe(spark): field_one="XY-456-ZZ", field_two=20, field_three=datetime.strptime("2024-02-01 12:00:00", "%Y-%m-%d %H:%M:%S"), + field_four=Decimal(56.78), + field_five=Decimal(56.78), + field_six=Decimal(56.78), field_array_of_strings=["string3", "string4"], field_array_of_structs=[ Row(inner_field_string="string3", inner_field_int=3), @@ -105,3 +138,39 @@ def _prepare_dataframe(spark): # Create temporary view # Name must match the model name in the data contract df.createOrReplaceTempView("my_table") + + +def _prepare_fail_dataframe(spark): + data = [ + Row( + field_one="WRONG_FORMAT_NOT_UNIQUE", + field_two=1, + field_three=datetime.strptime("2024-01-01 12:00:00", "%Y-%m-%d %H:%M:%S"), + field_four=Decimal(12.34), + field_five=Decimal(12.34), + field_six=Decimal(12.34), + field_array_of_strings=["string1", "string2"], + field_array_of_structs=[ + Row(inner_field_string="string1", inner_field_int=1), + Row(inner_field_string="string2", inner_field_int=2), + ], + ), + Row( + field_one="WRONG_FORMAT_NOT_UNIQUE", + field_two=2, + field_three=datetime.strptime("2024-01-01 12:00:00", "%Y-%m-%d %H:%M:%S"), + field_four=Decimal(12.34), + field_five=Decimal(12.34), + field_six=Decimal(12.34), + field_array_of_strings=["string1", "string2"], + field_array_of_structs=[ + Row(inner_field_string="string1", inner_field_int=1), + Row(inner_field_string="string2", inner_field_int=2), + ], + ), + ] + # Create DataFrame + df = spark.createDataFrame(data, schema=schema) + # Create temporary view + # Name must match the model name in the data contract + df.createOrReplaceTempView("my_table") \ No newline at end of file From eb890f7caa2e9644700efff9a24618b2a2c62a86 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Fri, 17 Oct 2025 10:13:49 +0200 Subject: [PATCH 019/150] chore(deps): update databricks-sdk requirement from <0.64.0 to <0.68.0 (#903) Updates the requirements on [databricks-sdk](https://github.com/databricks/databricks-sdk-py) to permit the latest version. - [Release notes](https://github.com/databricks/databricks-sdk-py/releases) - [Changelog](https://github.com/databricks/databricks-sdk-py/blob/main/CHANGELOG.md) - [Commits](https://github.com/databricks/databricks-sdk-py/compare/v0.0.1...v0.67.0) --- updated-dependencies: - dependency-name: databricks-sdk dependency-version: 0.67.0 dependency-type: direct:production ... Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> --- pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index 0cf5d3630..29fdb71ee 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -60,7 +60,7 @@ databricks = [ "soda-core-spark-df>=3.3.20,<3.6.0", "soda-core-spark[databricks]>=3.3.20,<3.6.0", "databricks-sql-connector>=3.7.0,<4.1.0", - "databricks-sdk<0.64.0", + "databricks-sdk<0.68.0", "pyspark>=3.5.5,<4.0.0", ] From 17c129b432aa3e8af59a0998872288ab863a9182 Mon Sep 17 00:00:00 2001 From: sugato <47884819+toshifumisuga@users.noreply.github.com> Date: Fri, 17 Oct 2025 17:20:07 +0900 Subject: [PATCH 020/150] fix changelog (#900) Co-authored-by: jochenchrist --- CHANGELOG.md | 1 + tests/test_export_dbt_models.py | 40 ++++++++++++++++----------------- 2 files changed, 21 insertions(+), 20 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 42b868083..e3c51b376 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -21,6 +21,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 - Export to DQX: Correct DQX format for global-level quality check of data contract export. (#877) - Import the table tags from a open data contract spec v3 (#895) +- dbt export: Enhanced model-level primaryKey support with automatic test generation for single and multiple column primary keys (#898) ## [0.10.35] - 2025-08-25 diff --git a/tests/test_export_dbt_models.py b/tests/test_export_dbt_models.py index 06dba72fe..42c4a41d4 100644 --- a/tests/test_export_dbt_models.py +++ b/tests/test_export_dbt_models.py @@ -206,13 +206,13 @@ def test_to_dbt_models_with_model_level_composite_primary_key(): id="my-data-contract-id", info=Info(title="My Data Contract", version="0.0.1"), models={ - "sfdc_loc_tenants_test": Model( + "test_table": Model( type="table", - primaryKey=["tenant_id", "account_id"], # Model-level composite primary key + primaryKey=["order_id", "user_id"], # Model-level composite primary key fields={ - "tenant_id": Field(type="string", required=True), - "account_id": Field(type="string", required=True), - "name": Field(type="string", required=True), + "order_id": Field(type="string", required=True), + "user_id": Field(type="string", required=True), + "product_id": Field(type="string", required=True) }, ) }, @@ -221,7 +221,7 @@ def test_to_dbt_models_with_model_level_composite_primary_key(): expected_dbt_model = """ version: 2 models: - - name: sfdc_loc_tenants_test + - name: test_table config: meta: data_contract: my-data-contract-id @@ -231,18 +231,18 @@ def test_to_dbt_models_with_model_level_composite_primary_key(): data_tests: - dbt_utils.unique_combination_of_columns: combination_of_columns: - - tenant_id - - account_id + - order_id + - user_id columns: - - name: tenant_id + - name: order_id data_type: STRING constraints: - type: not_null - - name: account_id + - name: user_id data_type: STRING constraints: - type: not_null - - name: name + - name: product_id data_type: STRING constraints: - type: not_null @@ -263,13 +263,13 @@ def test_to_dbt_models_with_single_column_primary_key(): id="my-data-contract-id", info=Info(title="My Data Contract", version="0.0.1"), models={ - "sfdc_loc_tenants_test": Model( + "test_table": Model( type="table", - primaryKey=["tenant_id"], # Model-level single primary key + primaryKey=["order_id"], # Model-level single primary key fields={ - "tenant_id": Field(type="string", required=True), - "account_id": Field(type="string", required=True), - "name": Field(type="string", required=True), + "order_id": Field(type="string", required=True), + "user_id": Field(type="string", required=True), + "product_id": Field(type="string", required=True) }, ) }, @@ -278,7 +278,7 @@ def test_to_dbt_models_with_single_column_primary_key(): expected_dbt_model = """ version: 2 models: - - name: sfdc_loc_tenants_test + - name: test_table config: meta: data_contract: my-data-contract-id @@ -286,16 +286,16 @@ def test_to_dbt_models_with_single_column_primary_key(): contract: enforced: true columns: - - name: tenant_id + - name: order_id data_type: STRING constraints: - type: not_null - type: unique - - name: account_id + - name: user_id data_type: STRING constraints: - type: not_null - - name: name + - name: product_id data_type: STRING constraints: - type: not_null From c8782b08fb73314d11f85544e9ca76398b13f1d7 Mon Sep 17 00:00:00 2001 From: fuaddi Date: Fri, 17 Oct 2025 17:32:28 +0900 Subject: [PATCH 021/150] Fix: Valid Field Constraints for date/integer/number (#893) Co-authored-by: Fuaddi Yustindra --- .../lint/linters/valid_constraints_linter.py | 2 +- .../datacontract_valid_field_constraints.yaml | 16 ++++++++++++++++ tests/test_lint.py | 9 +++++++++ 3 files changed, 26 insertions(+), 1 deletion(-) create mode 100644 tests/fixtures/lint/datacontract_valid_field_constraints.yaml diff --git a/datacontract/lint/linters/valid_constraints_linter.py b/datacontract/lint/linters/valid_constraints_linter.py index 54afea84d..6121b6e73 100644 --- a/datacontract/lint/linters/valid_constraints_linter.py +++ b/datacontract/lint/linters/valid_constraints_linter.py @@ -17,7 +17,7 @@ class ValidFieldConstraintsLinter(Linter): valid_types_for_constraint = { "pattern": set(["string", "text", "varchar"]), - "format": set(["string", "text", "varchar"]), + "format": set(["string", "text", "varchar", "date", "integer", "number"]), "minLength": set(["string", "text", "varchar"]), "maxLength": set(["string", "text", "varchar"]), "minimum": set(["int", "integer", "number", "decimal", "numeric", "long", "bigint", "float", "double"]), diff --git a/tests/fixtures/lint/datacontract_valid_field_constraints.yaml b/tests/fixtures/lint/datacontract_valid_field_constraints.yaml new file mode 100644 index 000000000..43f3de841 --- /dev/null +++ b/tests/fixtures/lint/datacontract_valid_field_constraints.yaml @@ -0,0 +1,16 @@ +dataContractSpecification: 1.2.0 +id: my-data-contract-id +info: + title: "My Data Contract" + version: 0.0.1 +models: + orders: + type: table + fields: + event_date: + type: date + amount: + type: number + id: + type: integer + required: true \ No newline at end of file diff --git a/tests/test_lint.py b/tests/test_lint.py index 5cc1fb514..ab263090f 100644 --- a/tests/test_lint.py +++ b/tests/test_lint.py @@ -72,3 +72,12 @@ def test_lint_with_references(): run = data_contract.lint() assert run.result == "passed" + + +def test_lint_valid_field_constraints(): + data_contract_file = "fixtures/lint/datacontract_valid_field_constraints.yaml" + data_contract = DataContract(data_contract_file=data_contract_file) + + run = data_contract.lint() + + assert run.result == "passed" \ No newline at end of file From c333e847f60843b2f3d4ebaa67c37b0b26664f66 Mon Sep 17 00:00:00 2001 From: Damien Maresma <136118865+dmaresma@users.noreply.github.com> Date: Fri, 17 Oct 2025 04:45:00 -0400 Subject: [PATCH 022/150] initial (#875) --- datacontract/templates/datacontract_odcs.html | 101 +++++++++++------- 1 file changed, 60 insertions(+), 41 deletions(-) diff --git a/datacontract/templates/datacontract_odcs.html b/datacontract/templates/datacontract_odcs.html index cd9ab779d..adc05d928 100644 --- a/datacontract/templates/datacontract_odcs.html +++ b/datacontract/templates/datacontract_odcs.html @@ -1,3 +1,61 @@ +{% macro render_nested_properties(properties, level) %} + + {% for property in properties %} + + +
+ {% for i in range(0,level)%} +   + {% endfor %} + {% if level > 0 %} + ↳ + {% endif %} + {{ property.name or "items" }}
+ {% if property.primaryKey %} + Primary Key + {% endif %} + {% if property.partitioned %} + Partitioned + {% endif %} + {% if property.criticalDataElement %} + Critical + {% endif %} + + {{ + property.businessName or "-" }} + + +
{{ property.logicalType }}
+ {% if property.physicalType and property.physicalType != + property.logicalType %} +
{{ property.physicalType }}
+ {% endif %} + + + {% if property.required %} + Yes + {% else %} + No + {% endif %} + + {{ property.description or "-" + }} + + + {% if property.properties %} + {{render_nested_properties(property.properties, level+1)}} + {% endif %} + {% if property.items %} + {{render_nested_properties([property.items], level+1)}} + {% endif %} + {% endfor %} + + +{% endmacro %} + @@ -12,7 +70,6 @@ -
- + \ No newline at end of file From bd3a2378ecbe42a61b30585e04c4fc2ecb03834e Mon Sep 17 00:00:00 2001 From: jochen Date: Fri, 17 Oct 2025 11:22:57 +0200 Subject: [PATCH 023/150] Comment out failing test --- tests/test_lint.py | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/tests/test_lint.py b/tests/test_lint.py index ab263090f..ca8792161 100644 --- a/tests/test_lint.py +++ b/tests/test_lint.py @@ -73,11 +73,11 @@ def test_lint_with_references(): assert run.result == "passed" - -def test_lint_valid_field_constraints(): - data_contract_file = "fixtures/lint/datacontract_valid_field_constraints.yaml" - data_contract = DataContract(data_contract_file=data_contract_file) - - run = data_contract.lint() - - assert run.result == "passed" \ No newline at end of file +# +# def test_lint_valid_field_constraints(): +# data_contract_file = "fixtures/lint/datacontract_valid_field_constraints.yaml" +# data_contract = DataContract(data_contract_file=data_contract_file) +# +# run = data_contract.lint() +# +# assert run.result == "passed" \ No newline at end of file From 3a96f1c3dc32f3c1e32955ea13ef74e989114b28 Mon Sep 17 00:00:00 2001 From: jochenchrist Date: Fri, 17 Oct 2025 11:51:46 +0200 Subject: [PATCH 024/150] Remove linters. Closes #913 (#914) * Remove linters. Closes #913 --------- Co-authored-by: jochen --- CHANGELOG.md | 4 + README.md | 6 +- datacontract/data_contract.py | 40 +---- datacontract/lint/lint.py | 142 ------------------ datacontract/lint/linters/__init__.py | 0 .../lint/linters/description_linter.py | 33 ---- .../lint/linters/field_pattern_linter.py | 34 ----- .../lint/linters/field_reference_linter.py | 47 ------ .../lint/linters/notice_period_linter.py | 55 ------- .../lint/linters/valid_constraints_linter.py | 100 ------------ .../lint/datacontract_csv_lint_base.yaml | 18 --- .../lint/datacontract_quality_schema.yaml | 17 --- .../lint/datacontract_unknown_model.yaml | 11 -- .../datacontract_valid_field_constraints.yaml | 16 -- tests/test_api.py | 2 +- tests/test_documentation_linter.py | 48 ------ tests/test_export_bigquery.py | 2 +- tests/test_export_dbml.py | 4 +- tests/test_field_constraint_linter.py | 68 --------- tests/test_field_pattern_linter.py | 42 ------ tests/test_field_reference_linter.py | 56 ------- tests/test_import_avro.py | 12 +- tests/test_import_bigquery.py | 4 +- tests/test_import_csv.py | 2 +- tests/test_import_dbt.py | 8 +- tests/test_import_glue.py | 6 +- tests/test_import_json.py | 6 +- tests/test_import_jsonschema.py | 6 +- tests/test_import_odcs_v3.py | 4 +- tests/test_import_parquet.py | 2 +- tests/test_import_protobuf.py | 2 +- tests/test_import_sql_postgres.py | 4 +- tests/test_import_sql_sqlserver.py | 2 +- tests/test_import_unity_file.py | 4 +- tests/test_lint.py | 13 +- tests/test_notice_period_linter.py | 47 ------ 36 files changed, 45 insertions(+), 822 deletions(-) delete mode 100644 datacontract/lint/lint.py delete mode 100644 datacontract/lint/linters/__init__.py delete mode 100644 datacontract/lint/linters/description_linter.py delete mode 100644 datacontract/lint/linters/field_pattern_linter.py delete mode 100644 datacontract/lint/linters/field_reference_linter.py delete mode 100644 datacontract/lint/linters/notice_period_linter.py delete mode 100644 datacontract/lint/linters/valid_constraints_linter.py delete mode 100644 tests/fixtures/lint/datacontract_csv_lint_base.yaml delete mode 100644 tests/fixtures/lint/datacontract_quality_schema.yaml delete mode 100644 tests/fixtures/lint/datacontract_unknown_model.yaml delete mode 100644 tests/fixtures/lint/datacontract_valid_field_constraints.yaml delete mode 100644 tests/test_documentation_linter.py delete mode 100644 tests/test_field_constraint_linter.py delete mode 100644 tests/test_field_pattern_linter.py delete mode 100644 tests/test_field_reference_linter.py delete mode 100644 tests/test_notice_period_linter.py diff --git a/CHANGELOG.md b/CHANGELOG.md index e3c51b376..788477eb6 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -23,6 +23,10 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 - Import the table tags from a open data contract spec v3 (#895) - dbt export: Enhanced model-level primaryKey support with automatic test generation for single and multiple column primary keys (#898) +### Removed + +- Removed specific linters, as the linters did not support ODCS (#913) + ## [0.10.35] - 2025-08-25 ### Added diff --git a/README.md b/README.md index bea8defd7..2e6f1ef45 100644 --- a/README.md +++ b/README.md @@ -1772,8 +1772,7 @@ Create a data contract based on the actual data. This is the fastest way to get $ datacontract test ``` -3. Make sure that all the best practices for a `datacontract.yaml` are met using the linter. You - probably forgot to document some fields and add the terms and conditions. +3. Validate that the `datacontract.yaml` is correctly formatted and adheres to the Data Contract Specification. ```bash $ datacontract lint ``` @@ -1794,8 +1793,7 @@ Create a data contract based on the requirements from use cases. ``` 2. Create the model and quality guarantees based on your business requirements. Fill in the terms, - descriptions, etc. Make sure you follow all best practices for a `datacontract.yaml` using the - linter. + descriptions, etc. Validate that your `datacontract.yaml` is correctly formatted. ```bash $ datacontract lint ``` diff --git a/datacontract/data_contract.py b/datacontract/data_contract.py index e06cac386..768684c62 100644 --- a/datacontract/data_contract.py +++ b/datacontract/data_contract.py @@ -26,11 +26,6 @@ from datacontract.init.init_template import get_init_template from datacontract.integration.datamesh_manager import publish_test_results_to_datamesh_manager from datacontract.lint import resolve -from datacontract.lint.linters.description_linter import DescriptionLinter -from datacontract.lint.linters.field_pattern_linter import FieldPatternLinter -from datacontract.lint.linters.field_reference_linter import FieldReferenceLinter -from datacontract.lint.linters.notice_period_linter import NoticePeriodLinter -from datacontract.lint.linters.valid_constraints_linter import ValidFieldConstraintsLinter from datacontract.model.data_contract_specification import DataContractSpecification, Info from datacontract.model.exceptions import DataContractException from datacontract.model.run import Check, ResultEnum, Run @@ -64,24 +59,14 @@ def __init__( self._inline_definitions = inline_definitions self._inline_quality = inline_quality self._ssl_verification = ssl_verification - self.all_linters = { - FieldPatternLinter(), - FieldReferenceLinter(), - NoticePeriodLinter(), - ValidFieldConstraintsLinter(), - DescriptionLinter(), - } @classmethod def init(cls, template: typing.Optional[str], schema: typing.Optional[str] = None) -> DataContractSpecification: template_str = get_init_template(template) return resolve.resolve_data_contract(data_contract_str=template_str, schema_location=schema) - def lint(self, enabled_linters: typing.Union[str, set[str]] = "all") -> Run: - """Lint the data contract by deserializing the contract and checking the schema, as well as calling the configured linters. - - enabled_linters can be either "all" or "none", or a set of linter IDs. The "schema" linter is always enabled, even with enabled_linters="none". - """ + def lint(self) -> Run: + """Lint the data contract by validating it against the JSON schema.""" run = Run.create_run() try: run.log_info("Linting data contract") @@ -101,27 +86,6 @@ def lint(self, enabled_linters: typing.Union[str, set[str]] = "all") -> Run: engine="datacontract", ) ) - if enabled_linters == "none": - linters_to_check = set() - elif enabled_linters == "all": - linters_to_check = self.all_linters - elif isinstance(enabled_linters, set): - linters_to_check = {linter for linter in self.all_linters if linter.id in enabled_linters} - else: - raise RuntimeError(f"Unknown argument enabled_linters={enabled_linters} for lint()") - for linter in linters_to_check: - try: - run.checks.extend(linter.lint(data_contract)) - except Exception as e: - run.checks.append( - Check( - type="general", - result=ResultEnum.error, - name=f"Linter '{linter.name}'", - reason=str(e), - engine="datacontract", - ) - ) run.dataContractId = data_contract.id run.dataContractVersion = data_contract.info.version except DataContractException as e: diff --git a/datacontract/lint/lint.py b/datacontract/lint/lint.py deleted file mode 100644 index 9c10bf602..000000000 --- a/datacontract/lint/lint.py +++ /dev/null @@ -1,142 +0,0 @@ -import abc -from dataclasses import dataclass, field -from enum import Enum -from typing import Any, Sequence, cast - -from datacontract.model.run import Check - -from ..model.data_contract_specification import DataContractSpecification - -"""This module contains linter definitions for linting a data contract. - -Lints are quality checks that can succeed, fail, or warn. They are -distinct from checks such as "valid yaml" or "file not found", which -will cause the processing of the data contract to stop. Lints can be -ignored, and are high-level requirements on the format of a data -contract.""" - - -class LintSeverity(Enum): - """The severity of a lint message. Generally, lint messages should be - emitted with a severity of ERROR. WARNING should be used when the linter - cannot determine a lint result, for example, when an unsupported model - type is used. - """ - - ERROR = 2 - WARNING = 1 - - -@dataclass -class LinterMessage: - """A single linter message with attached severity and optional "model" that - caused the message. - - Attributes: - outcome: The outcome of the linting, either ERROR or WARNING. Linting outcomes with level WARNING are discarded for now. - message: A message describing the error or warning in more detail. - model: The model that caused the lint to fail. Is optional. - - """ - - outcome: LintSeverity - message: str - model: Any = None - - @classmethod - def error(cls, message: str, model=None): - return LinterMessage(LintSeverity.ERROR, message, model) - - @classmethod - def warning(cls, message: str, model=None): - return LinterMessage(LintSeverity.WARNING, message, model) - - -@dataclass -class LinterResult: - """Result of linting a contract. Contains multiple LinterResults from - the same linter or lint phase. - - Attributes: - linter: The linter that produced these results - results: A list of linting results. Multiple identical linting - results can be present in the list. An empty list means that - the linter ran without producing warnings or errors. - """ - - results: Sequence[LinterMessage] = field(default_factory=list) - - @classmethod - def erroneous(cls, message, model=None): - return cls([LinterMessage.error(message, model)]) - - @classmethod - def cautious(cls, message, model=None): - return cls([LinterMessage.warning(message, model)]) - - def with_warning(self, message, model=None): - result = LinterMessage.warning(message, model) - return LinterResult(cast(list[LinterMessage], self.results) + [result]) - - def with_error(self, message, model=None): - result = LinterMessage.error(message, model) - return LinterResult(cast(list[LinterMessage], self.results) + [result]) - - def has_errors(self) -> bool: - return any(map(lambda result: result.outcome == LintSeverity.ERROR, self.results)) - - def has_warnings(self) -> bool: - return any(map(lambda result: result.outcome == LintSeverity.WARNING, self.results)) - - def error_results(self) -> Sequence[LinterMessage]: - return [result for result in self.results if result.outcome == LintSeverity.ERROR] - - def warning_results(self) -> Sequence[LinterMessage]: - return [result for result in self.results if result.outcome == LintSeverity.WARNING] - - def no_errors_or_warnings(self) -> bool: - return len(self.results) == 0 - - def combine(self, other: "LinterResult") -> "LinterResult": - return LinterResult(cast(list[Any], self.results) + cast(list[Any], other.results)) - - -class Linter(abc.ABC): - @property - @abc.abstractmethod - def name(self) -> str: - """Human-readable name of the linter.""" - pass - - @property - @abc.abstractmethod - def id(self) -> str: - """A linter ID for configuration (i.e. enabling and disabling).""" - pass - - @abc.abstractmethod - def lint_implementation(self, contract: DataContractSpecification) -> LinterResult: - pass - - def lint(self, contract: DataContractSpecification) -> list[Check]: - """Call with a data contract to get a list of check results from the linter.""" - result = self.lint_implementation(contract) - checks = [] - if not result.error_results(): - checks.append(Check(type="lint", name=f"Linter '{self.name}'", result="passed", engine="datacontract")) - else: - # All linter messages are treated as warnings. Severity is - # currently ignored, but could be used in filtering in the future - # Linter messages with level WARNING are currently ignored, but might - # be logged or printed in the future. - for lint_error in result.error_results(): - checks.append( - Check( - type="lint", - name=f"Linter '{self.name}'", - result="warning", - engine="datacontract", - reason=lint_error.message, - ) - ) - return checks diff --git a/datacontract/lint/linters/__init__.py b/datacontract/lint/linters/__init__.py deleted file mode 100644 index e69de29bb..000000000 diff --git a/datacontract/lint/linters/description_linter.py b/datacontract/lint/linters/description_linter.py deleted file mode 100644 index 2f88615b8..000000000 --- a/datacontract/lint/linters/description_linter.py +++ /dev/null @@ -1,33 +0,0 @@ -from datacontract.model.data_contract_specification import DataContractSpecification - -from ..lint import Linter, LinterResult - - -class DescriptionLinter(Linter): - """Check for a description on contracts, models, model fields, definitions and examples.""" - - @property - def name(self) -> str: - return "Objects have descriptions" - - @property - def id(self) -> str: - return "description" - - def lint_implementation(self, contract: DataContractSpecification) -> LinterResult: - result = LinterResult() - if not contract.info or not contract.info.description: - result = result.with_error("Contract has empty description.") - for model_name, model in contract.models.items(): - if not model.description: - result = result.with_error(f"Model '{model_name}' has empty description.") - for field_name, field in model.fields.items(): - if not field.description: - result = result.with_error(f"Field '{field_name}' in model '{model_name}' has empty description.") - for definition_name, definition in contract.definitions.items(): - if not definition.description: - result = result.with_error(f"Definition '{definition_name}' has empty description.") - for index, example in enumerate(contract.examples): - if not example.description: - result = result.with_error(f"Example {index + 1} has empty description.") - return result diff --git a/datacontract/lint/linters/field_pattern_linter.py b/datacontract/lint/linters/field_pattern_linter.py deleted file mode 100644 index b9add00ef..000000000 --- a/datacontract/lint/linters/field_pattern_linter.py +++ /dev/null @@ -1,34 +0,0 @@ -import re - -from datacontract.model.data_contract_specification import DataContractSpecification - -from ..lint import Linter, LinterResult - - -class FieldPatternLinter(Linter): - """Checks that all patterns defined for fields are correct Python regex - syntax. - - """ - - @property - def name(self): - return "Field pattern is correct regex" - - @property - def id(self) -> str: - return "field-pattern" - - def lint_implementation(self, contract: DataContractSpecification) -> LinterResult: - result = LinterResult() - for model_name, model in contract.models.items(): - for field_name, field in model.fields.items(): - if field.pattern: - try: - re.compile(field.pattern) - except re.error as e: - result = result.with_error( - f"Failed to compile pattern regex '{field.pattern}' for " - f"field '{field_name}' in model '{model_name}': {e.msg}" - ) - return result diff --git a/datacontract/lint/linters/field_reference_linter.py b/datacontract/lint/linters/field_reference_linter.py deleted file mode 100644 index 0a9a52435..000000000 --- a/datacontract/lint/linters/field_reference_linter.py +++ /dev/null @@ -1,47 +0,0 @@ -from datacontract.model.data_contract_specification import DataContractSpecification - -from ..lint import Linter, LinterResult - - -class FieldReferenceLinter(Linter): - """Checks that all references definitions in fields refer to existing - fields. - - """ - - @property - def name(self): - return "Field references existing field" - - @property - def id(self) -> str: - return "field-reference" - - def lint_implementation(self, contract: DataContractSpecification) -> LinterResult: - result = LinterResult() - for model_name, model in contract.models.items(): - for field_name, field in model.fields.items(): - if field.references: - reference_hierarchy = field.references.split(".") - if len(reference_hierarchy) != 2: - result = result.with_error( - f"Field '{field_name}' in model '{model_name}'" - f" references must follow the model.field syntax and refer to a field in a model in this data contract." - ) - continue - ref_model = reference_hierarchy[0] - ref_field = reference_hierarchy[1] - - if ref_model not in contract.models: - result = result.with_error( - f"Field '{field_name}' in model '{model_name}' references non-existing model '{ref_model}'." - ) - else: - ref_model_obj = contract.models[ref_model] - if ref_field not in ref_model_obj.fields: - result = result.with_error( - f"Field '{field_name}' in model '{model_name}'" - f" references non-existing field '{ref_field}'" - f" in model '{ref_model}'." - ) - return result diff --git a/datacontract/lint/linters/notice_period_linter.py b/datacontract/lint/linters/notice_period_linter.py deleted file mode 100644 index 7051bc4f6..000000000 --- a/datacontract/lint/linters/notice_period_linter.py +++ /dev/null @@ -1,55 +0,0 @@ -import re - -from datacontract.model.data_contract_specification import DataContractSpecification - -from ..lint import Linter, LinterResult - - -class NoticePeriodLinter(Linter): - @property - def name(self) -> str: - return "noticePeriod in ISO8601 format" - - @property - def id(self) -> str: - return "notice-period" - - # Regex matching the "simple" ISO8601 duration format - simple = re.compile( - r"""P # Introduces period - (:?[0-9\.,]+Y)? # Number of years - (:?[0-9\.,]+M)? # Number of months - (:?[0-9\.,]+W)? # Number of weeks - (:?[0-9\.,]+D)? # Number of days - (:? # Time part (optional) - T # Always starts with T - (:?[0-9\.,]+H)? # Number of hours - (:?[0-9\.,]+M)? # Number of minutes - (:?[0-9\.,]+S)? # Number of seconds - )? - """, - re.VERBOSE, - ) - datetime_basic = re.compile(r"P\d{8}T\d{6}") - datetime_extended = re.compile(r"P\d{4}-\d{2}-\d{2}T\d{2}:\d{2}:\d{2}") - - def lint_implementation(self, contract: DataContractSpecification) -> LinterResult: - """Check whether the notice period is specified using ISO8601 duration syntax.""" - if not contract.terms: - return LinterResult.cautious("No terms defined.") - period = contract.terms.noticePeriod - if not period: - return LinterResult.cautious("No notice period defined.") - if not period.startswith("P"): - return LinterResult.erroneous(f"Notice period '{period}' is not a valid ISO8601 duration.") - if period == "P": - return LinterResult.erroneous( - "Notice period 'P' is not a valid ISO8601 duration, requires at least one duration to be specified." - ) - if ( - not self.simple.fullmatch(period) - and not self.datetime_basic.fullmatch(period) - and not self.datetime_extended.fullmatch(period) - ): - return LinterResult.erroneous(f"Notice period '{period}' is not a valid ISO8601 duration.") - return LinterResult() diff --git a/datacontract/lint/linters/valid_constraints_linter.py b/datacontract/lint/linters/valid_constraints_linter.py deleted file mode 100644 index 6121b6e73..000000000 --- a/datacontract/lint/linters/valid_constraints_linter.py +++ /dev/null @@ -1,100 +0,0 @@ -from datacontract.model.data_contract_specification import DataContractSpecification, Field - -from ..lint import Linter, LinterResult - - -class ValidFieldConstraintsLinter(Linter): - """Check validity of field constraints. - - More precisely, check that only numeric constraints are specified on - fields of numeric type and string constraints on fields of string type. - Additionally, the linter checks that defined constraints make sense. - Minimum values should not be greater than maximum values, exclusive and - non-exclusive minimum and maximum should not be combined and string - pattern and format should not be combined. - - """ - - valid_types_for_constraint = { - "pattern": set(["string", "text", "varchar"]), - "format": set(["string", "text", "varchar", "date", "integer", "number"]), - "minLength": set(["string", "text", "varchar"]), - "maxLength": set(["string", "text", "varchar"]), - "minimum": set(["int", "integer", "number", "decimal", "numeric", "long", "bigint", "float", "double"]), - "exclusiveMinimum": set( - ["int", "integer", "number", "decimal", "numeric", "long", "bigint", "float", "double"] - ), - "maximum": set(["int", "integer", "number", "decimal", "numeric", "long", "bigint", "float", "double"]), - "exclusiveMaximum": set( - ["int", "integer", "number", "decimal", "numeric", "long", "bigint", "float", "double"] - ), - } - - def check_minimum_maximum(self, field: Field, field_name: str, model_name: str) -> LinterResult: - (min, max, xmin, xmax) = (field.minimum, field.maximum, field.exclusiveMinimum, field.exclusiveMaximum) - match ( - "minimum" in field.model_fields_set, - "maximum" in field.model_fields_set, - "exclusiveMinimum" in field.model_fields_set, - "exclusiveMaximum" in field.model_fields_set, - ): - case (True, True, _, _) if min > max: - return LinterResult.erroneous( - f"Minimum {min} is greater than maximum {max} on field '{field_name}' in model '{model_name}'." - ) - case (_, _, True, True) if xmin >= xmax: - return LinterResult.erroneous( - f"Exclusive minimum {xmin} is greater than exclusive" - f" maximum {xmax} on field '{field_name}' in model '{model_name}'." - ) - case (True, True, True, True): - return LinterResult.erroneous( - f"Both exclusive and non-exclusive minimum and maximum are " - f"defined on field '{field_name}' in model '{model_name}'." - ) - case (True, _, True, _): - return LinterResult.erroneous( - f"Both exclusive and non-exclusive minimum are " - f"defined on field '{field_name}' in model '{model_name}'." - ) - case (_, True, _, True): - return LinterResult.erroneous( - f"Both exclusive and non-exclusive maximum are " - f"defined on field '{field_name}' in model '{model_name}'." - ) - return LinterResult() - - def check_string_constraints(self, field: Field, field_name: str, model_name: str) -> LinterResult: - result = LinterResult() - if field.minLength and field.maxLength and field.minLength > field.maxLength: - result = result.with_error( - f"Minimum length is greater that maximum length on field '{field_name}' in model '{model_name}'." - ) - if field.pattern and field.format: - result = result.with_error( - f"Both a pattern and a format are defined for field '{field_name}' in model '{model_name}'." - ) - return result - - @property - def name(self): - return "Fields use valid constraints" - - @property - def id(self): - return "field-constraints" - - def lint_implementation(self, contract: DataContractSpecification) -> LinterResult: - result = LinterResult() - for model_name, model in contract.models.items(): - for field_name, field in model.fields.items(): - for _property, allowed_types in self.valid_types_for_constraint.items(): - if _property in field.model_fields_set and field.type not in allowed_types: - result = result.with_error( - f"Forbidden constraint '{_property}' defined on field " - f"'{field_name}' in model '{model_name}'. Field type " - f"is '{field.type}'." - ) - result = result.combine(self.check_minimum_maximum(field, field_name, model_name)) - result = result.combine(self.check_string_constraints(field, field_name, model_name)) - return result diff --git a/tests/fixtures/lint/datacontract_csv_lint_base.yaml b/tests/fixtures/lint/datacontract_csv_lint_base.yaml deleted file mode 100644 index 17065a1ef..000000000 --- a/tests/fixtures/lint/datacontract_csv_lint_base.yaml +++ /dev/null @@ -1,18 +0,0 @@ -dataContractSpecification: 1.2.1 -id: my-data-contract-id -info: - title: My Data Contract - version: 0.0.1 -models: - orders: - fields: - column_1: - type: string - column_2: - type: string -examples: - - type: csv - model: orders - data: |- - column_1, column_2 - value_1, value_2 diff --git a/tests/fixtures/lint/datacontract_quality_schema.yaml b/tests/fixtures/lint/datacontract_quality_schema.yaml deleted file mode 100644 index 72c7751fa..000000000 --- a/tests/fixtures/lint/datacontract_quality_schema.yaml +++ /dev/null @@ -1,17 +0,0 @@ -dataContractSpecification: 1.2.1 -id: my-data-contract-id -info: - title: My Data Contract - version: 0.0.1 -models: - orders: - fields: - column_1: - type: string - column_2: - type: string -quality: - type: SodaCL - specification: |- - checks for orders: - - freshness(column_1) < 1d diff --git a/tests/fixtures/lint/datacontract_unknown_model.yaml b/tests/fixtures/lint/datacontract_unknown_model.yaml deleted file mode 100644 index 0ee1a3318..000000000 --- a/tests/fixtures/lint/datacontract_unknown_model.yaml +++ /dev/null @@ -1,11 +0,0 @@ -dataContractSpecification: 1.2.1 -id: my-data-contract-id -info: - title: My Data Contract - version: 0.0.1 -examples: - - type: csv - model: orders - data: |- - column_1, column_2 - value_1, value_2 diff --git a/tests/fixtures/lint/datacontract_valid_field_constraints.yaml b/tests/fixtures/lint/datacontract_valid_field_constraints.yaml deleted file mode 100644 index 43f3de841..000000000 --- a/tests/fixtures/lint/datacontract_valid_field_constraints.yaml +++ /dev/null @@ -1,16 +0,0 @@ -dataContractSpecification: 1.2.0 -id: my-data-contract-id -info: - title: "My Data Contract" - version: 0.0.1 -models: - orders: - type: table - fields: - event_date: - type: date - amount: - type: number - id: - type: integer - required: true \ No newline at end of file diff --git a/tests/test_api.py b/tests/test_api.py index 6cdc04790..e715ca922 100644 --- a/tests/test_api.py +++ b/tests/test_api.py @@ -16,7 +16,7 @@ def test_lint(): assert response.status_code == 200 print(response.json()) assert response.json()["result"] == "passed" - assert len(response.json()["checks"]) == 6 + assert len(response.json()["checks"]) == 1 assert all([check["result"] == "passed" for check in response.json()["checks"]]) diff --git a/tests/test_documentation_linter.py b/tests/test_documentation_linter.py deleted file mode 100644 index c97b0f961..000000000 --- a/tests/test_documentation_linter.py +++ /dev/null @@ -1,48 +0,0 @@ -import datacontract.model.data_contract_specification as spec -from datacontract.lint.linters.description_linter import DescriptionLinter -from datacontract.model.run import Check - - -def construct_error_check(msg: str) -> Check: - return Check( - type="lint", - name="Linter 'Objects have descriptions'", - result="warning", - engine="datacontract", - reason=msg, - ) - - -success_check = Check(type="lint", name="Linter 'Objects have descriptions'", result="passed", engine="datacontract") - -linter = DescriptionLinter() - - -def test_correct_contract(): - specification = spec.DataContractSpecification( - info=spec.Info(description="Test contract description"), - models={ - "test_model": spec.Model( - description="Test model description", - fields={"test_field": spec.Field(description="Test field description")}, - ) - }, - examples=[spec.Example(description="Example description")], - definitions={"test_definition": spec.Definition(description="Test description definition")}, - ) - assert linter.lint(specification) == [success_check] - - -def test_missing_contract(): - specification = spec.DataContractSpecification( - models={"test_model": spec.Model(fields={"test_field": spec.Field()})}, - examples=[spec.Example()], - definitions={"test_definition": spec.Definition()}, - ) - assert linter.lint(specification) == [ - construct_error_check("Contract has empty description."), - construct_error_check("Model 'test_model' has empty description."), - construct_error_check("Field 'test_field' in model 'test_model' has empty description."), - construct_error_check("Definition 'test_definition' has empty description."), - construct_error_check("Example 1 has empty description."), - ] diff --git a/tests/test_export_bigquery.py b/tests/test_export_bigquery.py index 22949112c..0f91c6ac8 100644 --- a/tests/test_export_bigquery.py +++ b/tests/test_export_bigquery.py @@ -29,7 +29,7 @@ def test_exports_bigquery_schema(): with open(data_contract_file) as file: file_content = file.read() data_contract = DataContract(data_contract_str=file_content, server="bigquery") - assert data_contract.lint(enabled_linters="none").has_passed() + assert data_contract.lint().has_passed() result = data_contract.export("bigquery") print("Result:\n", result) diff --git a/tests/test_export_dbml.py b/tests/test_export_dbml.py index 7dc6d10f9..6a2213368 100644 --- a/tests/test_export_dbml.py +++ b/tests/test_export_dbml.py @@ -26,7 +26,7 @@ def test_cli_with_server(): def test_dbml_export(): data_contract = DataContract(data_contract_file="fixtures/dbml/datacontract.yaml") - assert data_contract.lint(enabled_linters="none").has_passed() + assert data_contract.lint().has_passed() result = data_contract.export("dbml") @@ -81,7 +81,7 @@ def test_dbml_export(): def test_dbml_export_with_server(): data_contract = DataContract(data_contract_file="fixtures/dbml/datacontract.yaml", server="production") - assert data_contract.lint(enabled_linters="none").has_passed() + assert data_contract.lint().has_passed() result = data_contract.export("dbml") diff --git a/tests/test_field_constraint_linter.py b/tests/test_field_constraint_linter.py deleted file mode 100644 index 5a7d15158..000000000 --- a/tests/test_field_constraint_linter.py +++ /dev/null @@ -1,68 +0,0 @@ -import datacontract.model.data_contract_specification as spec -from datacontract.lint.linters.valid_constraints_linter import ValidFieldConstraintsLinter -from datacontract.model.run import Check - - -def construct_error_check(msg: str) -> Check: - return Check( - type="lint", - name="Linter 'Fields use valid constraints'", - result="warning", - engine="datacontract", - reason=msg, - ) - - -success_check = Check(type="lint", name="Linter 'Fields use valid constraints'", result="passed", engine="datacontract") - -linter = ValidFieldConstraintsLinter() - - -def test_empty_constraints(): - specification = spec.DataContractSpecification( - models={ - "test_model": spec.Model( - fields={"test_field_1": spec.Field(type="string"), "test_field_2": spec.Field(type="number")} - ) - } - ) - assert linter.lint(specification) == [success_check] - - -def test_correct_constraints(): - specification = spec.DataContractSpecification( - models={ - "test_model": spec.Model( - fields={ - "test_field_1": spec.Field(type="string", minLength=5, maxLength=8), - "test_field_2": spec.Field(type="number", minimum=10, maximum=100), - } - ) - } - ) - assert linter.lint(specification) == [success_check] - - -def test_incorrect_constraints(): - specification = spec.DataContractSpecification( - models={ - "test_model": spec.Model( - fields={ - "test_field_1": spec.Field(type="number", minLength=5), - "test_field_2": spec.Field(type="string", maximum=100), - } - ) - } - ) - assert linter.lint(specification) == [ - construct_error_check( - "Forbidden constraint 'minLength' defined on " - "field 'test_field_1' in model 'test_model'. " - "Field type is 'number'." - ), - construct_error_check( - "Forbidden constraint 'maximum' defined on " - "field 'test_field_2' in model 'test_model'. " - "Field type is 'string'." - ), - ] diff --git a/tests/test_field_pattern_linter.py b/tests/test_field_pattern_linter.py deleted file mode 100644 index 2e914ad99..000000000 --- a/tests/test_field_pattern_linter.py +++ /dev/null @@ -1,42 +0,0 @@ -import datacontract.model.data_contract_specification as spec -from datacontract.lint.linters.field_pattern_linter import FieldPatternLinter -from datacontract.model.run import Check - - -def construct_error_check(msg: str) -> Check: - return Check( - type="lint", - name="Linter 'Field pattern is correct regex'", - result="warning", - engine="datacontract", - reason=msg, - ) - - -success_check = Check( - type="lint", name="Linter 'Field pattern is correct regex'", result="passed", engine="datacontract" -) - -linter = FieldPatternLinter() - - -def test_correct_regex_pattern(): - specification = spec.DataContractSpecification( - models={"test_model": spec.Model(fields={"test_field": spec.Field(pattern=".")})} - ) - result = linter.lint(specification) - assert result == [success_check] - - -def test_incorrect_regex_pattern(): - specification = spec.DataContractSpecification( - models={"test_model": spec.Model(fields={"test_field": spec.Field(pattern="\\")})} - ) - result = linter.lint(specification) - assert result == [ - construct_error_check( - "Failed to compile pattern regex '\\' for field" - " 'test_field' in model 'test_model': " - "bad escape (end of pattern)" - ) - ] diff --git a/tests/test_field_reference_linter.py b/tests/test_field_reference_linter.py deleted file mode 100644 index 44cb8f163..000000000 --- a/tests/test_field_reference_linter.py +++ /dev/null @@ -1,56 +0,0 @@ -import datacontract.model.data_contract_specification as spec -from datacontract.lint.linters.field_reference_linter import FieldReferenceLinter -from datacontract.model.run import Check - - -def construct_error_check(msg: str) -> Check: - return Check( - type="lint", - name="Linter 'Field references existing field'", - result="warning", - engine="datacontract", - reason=msg, - ) - - -success_check = Check( - type="lint", name="Linter 'Field references existing field'", result="passed", engine="datacontract" -) - -linter = FieldReferenceLinter() - - -def test_correct_field_reference(): - specification = spec.DataContractSpecification( - models={ - "test_model_1": spec.Model(fields={"test_field_1": spec.Field(references="test_model_2.test_field_1")}), - "test_model_2": spec.Model(fields={"test_field_1": spec.Field()}), - } - ) - assert linter.lint(specification) == [success_check] - - -def test_incorrect_model_reference(): - specification = spec.DataContractSpecification( - models={"test_model_1": spec.Model(fields={"test_field_1": spec.Field(references="test_model_2.test_field_1")})} - ) - assert linter.lint(specification) == [ - construct_error_check( - "Field 'test_field_1' in model 'test_model_1' references non-existing model 'test_model_2'." - ) - ] - - -def test_incorrect_field_reference(): - specification = spec.DataContractSpecification( - models={ - "test_model_1": spec.Model(fields={"test_field_1": spec.Field(references="test_model_2.test_field_1")}), - "test_model_2": spec.Model(), - } - ) - assert linter.lint(specification) == [ - construct_error_check( - "Field 'test_field_1' in model 'test_model_1' references non-existing field 'test_field_1'" - " in model 'test_model_2'." - ) - ] diff --git a/tests/test_import_avro.py b/tests/test_import_avro.py index 354f8e87d..b97630208 100644 --- a/tests/test_import_avro.py +++ b/tests/test_import_avro.py @@ -116,7 +116,7 @@ def test_import_avro_schema(): """ print("Result:\n", result.to_yaml()) assert yaml.safe_load(result.to_yaml()) == yaml.safe_load(expected) - assert DataContract(data_contract_str=expected).lint(enabled_linters="none").has_passed() + assert DataContract(data_contract_str=expected).lint().has_passed() def test_import_avro_arrays_of_records_and_nested_arrays(): @@ -167,7 +167,7 @@ def test_import_avro_arrays_of_records_and_nested_arrays(): """ print("Result:\n", result.to_yaml()) assert yaml.safe_load(result.to_yaml()) == yaml.safe_load(expected) - assert DataContract(data_contract_str=expected).lint(enabled_linters="none").has_passed() + assert DataContract(data_contract_str=expected).lint().has_passed() def test_import_avro_nested_records(): @@ -196,7 +196,7 @@ def test_import_avro_nested_records(): """ print("Result:\n", result.to_yaml()) assert yaml.safe_load(result.to_yaml()) == yaml.safe_load(expected) - assert DataContract(data_contract_str=expected).lint(enabled_linters="none").has_passed() + assert DataContract(data_contract_str=expected).lint().has_passed() def test_import_avro_nested_records_with_arrays(): @@ -262,7 +262,7 @@ def test_import_avro_nested_records_with_arrays(): """ print("Result:\n", result.to_yaml()) assert yaml.safe_load(result.to_yaml()) == yaml.safe_load(expected) - assert DataContract(data_contract_str=expected).lint(enabled_linters="none").has_passed() + assert DataContract(data_contract_str=expected).lint().has_passed() def test_import_avro_logical_types(): @@ -313,7 +313,7 @@ def test_import_avro_logical_types(): """ print("Result:\n", result.to_yaml()) assert yaml.safe_load(result.to_yaml()) == yaml.safe_load(expected) - assert DataContract(data_contract_str=expected).lint(enabled_linters="none").has_passed() + assert DataContract(data_contract_str=expected).lint().has_passed() def test_import_avro_optional_enum(): @@ -351,4 +351,4 @@ def test_import_avro_optional_enum(): """ print("Result:\n", result.to_yaml()) assert yaml.safe_load(result.to_yaml()) == yaml.safe_load(expected) - assert DataContract(data_contract_str=expected).lint(enabled_linters="none").has_passed() + assert DataContract(data_contract_str=expected).lint().has_passed() diff --git a/tests/test_import_bigquery.py b/tests/test_import_bigquery.py index 83f178c16..daadb7668 100644 --- a/tests/test_import_bigquery.py +++ b/tests/test_import_bigquery.py @@ -30,7 +30,7 @@ def test_import_bigquery_schema(): with open("fixtures/bigquery/import/datacontract.yaml") as file: expected = file.read() assert yaml.safe_load(result.to_yaml()) == yaml.safe_load(expected) - assert DataContract(data_contract_str=expected).lint(enabled_linters="none").has_passed() + assert DataContract(data_contract_str=expected).lint().has_passed() def test_import_multiple_bigquery_schemas_with_different_types(): @@ -45,4 +45,4 @@ def test_import_multiple_bigquery_schemas_with_different_types(): with open("fixtures/bigquery/import/datacontract_multi_import.yaml") as file: expected = file.read() assert yaml.safe_load(result.to_yaml()) == yaml.safe_load(expected) - assert DataContract(data_contract_str=expected).lint(enabled_linters="none").has_passed() + assert DataContract(data_contract_str=expected).lint().has_passed() diff --git a/tests/test_import_csv.py b/tests/test_import_csv.py index 1debe9afd..d510ae760 100644 --- a/tests/test_import_csv.py +++ b/tests/test_import_csv.py @@ -77,4 +77,4 @@ def test_import_csv(): print("Result", result.to_yaml()) assert yaml.safe_load(result.to_yaml()) == yaml.safe_load(expected) # Disable linters so we don't get "missing description" warnings - assert DataContract(data_contract_str=expected).lint(enabled_linters=set()).has_passed() + assert DataContract(data_contract_str=expected).lint().has_passed() diff --git a/tests/test_import_dbt.py b/tests/test_import_dbt.py index 37e856c74..bd9456d48 100644 --- a/tests/test_import_dbt.py +++ b/tests/test_import_dbt.py @@ -227,7 +227,7 @@ def test_import_dbt_manifest(): - TABLE_PII""" print("Result:\n", result.to_yaml()) assert yaml.safe_load(result.to_yaml()) == yaml.safe_load(expected) - assert DataContract(data_contract_str=expected).lint(enabled_linters="none").has_passed() + assert DataContract(data_contract_str=expected).lint().has_passed() def test_import_dbt_manifest_bigquery(): @@ -391,7 +391,7 @@ def test_import_dbt_manifest_bigquery(): - TABLE_PII""" print("Result:\n", result.to_yaml()) assert yaml.safe_load(result.to_yaml()) == yaml.safe_load(expected) - assert DataContract(data_contract_str=expected).lint(enabled_linters="none").has_passed() + assert DataContract(data_contract_str=expected).lint().has_passed() def test_import_dbt_manifest_with_filter_and_empty_columns(): @@ -413,7 +413,7 @@ def test_import_dbt_manifest_with_filter_and_empty_columns(): """ print("Result:\n", result.to_yaml()) assert yaml.safe_load(result.to_yaml()) == yaml.safe_load(expected) - assert DataContract(data_contract_str=expected).lint(enabled_linters="none").has_passed() + assert DataContract(data_contract_str=expected).lint().has_passed() def test_import_dbt_manifest_with_filter(): @@ -463,4 +463,4 @@ def test_import_dbt_manifest_with_filter(): - TABLE_PII""" print("Result:\n", result.to_yaml()) assert yaml.safe_load(result.to_yaml()) == yaml.safe_load(expected) - assert DataContract(data_contract_str=expected).lint(enabled_linters="none").has_passed() + assert DataContract(data_contract_str=expected).lint().has_passed() diff --git a/tests/test_import_glue.py b/tests/test_import_glue.py index 9596c7b69..f61e647c7 100644 --- a/tests/test_import_glue.py +++ b/tests/test_import_glue.py @@ -166,7 +166,7 @@ def test_import_glue_schema_without_glue_table_filter(setup_mock_glue): print("Result", result.to_yaml()) assert yaml.safe_load(result.to_yaml()) == yaml.safe_load(expected) # Disable linters so we don't get "missing description" warnings - assert DataContract(data_contract_str=expected).lint(enabled_linters=set()).has_passed() + assert DataContract(data_contract_str=expected).lint().has_passed() @mock_aws @@ -179,7 +179,7 @@ def test_import_glue_schema_with_glue_table_filter(setup_mock_glue): print("Result", result.to_yaml()) assert yaml.safe_load(result.to_yaml()) == yaml.safe_load(expected) # Disable linters so we don't get "missing description" warnings - assert DataContract(data_contract_str=expected).lint(enabled_linters=set()).has_passed() + assert DataContract(data_contract_str=expected).lint().has_passed() @mock_aws @@ -193,4 +193,4 @@ def test_import_glue_schema_with_non_existent_glue_table_filter(setup_mock_glue) print("Result", result.to_yaml()) assert yaml.safe_load(result.to_yaml()) == yaml.safe_load(expected) # Disable linters so we don't get "missing description" warnings - assert DataContract(data_contract_str=expected).lint(enabled_linters=set()).has_passed() + assert DataContract(data_contract_str=expected).lint().has_passed() diff --git a/tests/test_import_json.py b/tests/test_import_json.py index 82d4361cb..c37993d54 100644 --- a/tests/test_import_json.py +++ b/tests/test_import_json.py @@ -61,7 +61,7 @@ def test_json_simple(): # making sure the data contract is correct data_contract = DataContract(data_contract_str=actual) - assert data_contract.lint(enabled_linters="none").has_passed() + assert data_contract.lint().has_passed() def test_json_complex(): @@ -105,7 +105,7 @@ def test_json_complex(): # making sure the data contract is correct data_contract = DataContract(data_contract_str=actual) - assert data_contract.lint(enabled_linters="none").has_passed() + assert data_contract.lint().has_passed() def test_ndjson(): @@ -148,4 +148,4 @@ def test_ndjson(): # making sure the data contract is correct data_contract = DataContract(data_contract_str=actual) - assert data_contract.lint(enabled_linters="none").has_passed() + assert data_contract.lint().has_passed() diff --git a/tests/test_import_jsonschema.py b/tests/test_import_jsonschema.py index b5dede019..6c153a9f0 100644 --- a/tests/test_import_jsonschema.py +++ b/tests/test_import_jsonschema.py @@ -58,7 +58,7 @@ def test_import_json_schema_orders(): print("Result:\n", result.to_yaml()) assert yaml.safe_load(result.to_yaml()) == yaml.safe_load(expected) - assert DataContract(data_contract_str=expected).lint(enabled_linters="none").has_passed() + assert DataContract(data_contract_str=expected).lint().has_passed() def test_import_json_schema_football(): @@ -66,7 +66,7 @@ def test_import_json_schema_football(): with open("fixtures/import/football-datacontract.yml") as file: expected = file.read() - assert DataContract(data_contract_str=expected).lint(enabled_linters="none").has_passed() + assert DataContract(data_contract_str=expected).lint().has_passed() print("Result:\n", result.to_yaml()) assert yaml.safe_load(result.to_yaml()) == yaml.safe_load(expected) @@ -77,7 +77,7 @@ def test_import_json_schema_football_deeply_nested_no_required(): with open("fixtures/import/football_deeply_nested_no_required_datacontract.yml") as file: expected = file.read() - assert DataContract(data_contract_str=expected).lint(enabled_linters="none").has_passed() + assert DataContract(data_contract_str=expected).lint().has_passed() print("Result:\n", result.to_yaml()) assert yaml.safe_load(result.to_yaml()) == yaml.safe_load(expected) diff --git a/tests/test_import_odcs_v3.py b/tests/test_import_odcs_v3.py index da286db8b..2a4e5431e 100644 --- a/tests/test_import_odcs_v3.py +++ b/tests/test_import_odcs_v3.py @@ -29,14 +29,14 @@ def test_import_full_odcs(): result = DataContract.import_from_source("odcs", "./fixtures/odcs_v3/full-example.odcs.yaml") expected_datacontract = read_file("fixtures/odcs_v3/full-example.datacontract.yml") assert yaml.safe_load(result.to_yaml()) == yaml.safe_load(expected_datacontract) - assert DataContract(data_contract_str=expected_datacontract).lint(enabled_linters="none").has_passed() + assert DataContract(data_contract_str=expected_datacontract).lint().has_passed() def test_import_complex_odcs(): result = DataContract.import_from_source("odcs", "./fixtures/odcs_v3/adventureworks.odcs.yaml") expected_datacontract = read_file("fixtures/odcs_v3/adventureworks.datacontract.yml") assert yaml.safe_load(result.to_yaml()) == yaml.safe_load(expected_datacontract) - assert DataContract(data_contract_str=expected_datacontract).lint(enabled_linters="none").has_passed() + assert DataContract(data_contract_str=expected_datacontract).lint().has_passed() def read_file(file): diff --git a/tests/test_import_parquet.py b/tests/test_import_parquet.py index 2a2d24e82..ea5e2ae25 100644 --- a/tests/test_import_parquet.py +++ b/tests/test_import_parquet.py @@ -65,4 +65,4 @@ def test_import_parquet(): """ assert result.to_yaml() == expected - assert DataContract(data_contract_str=expected).lint(enabled_linters=set()).has_passed() + assert DataContract(data_contract_str=expected).lint().has_passed() diff --git a/tests/test_import_protobuf.py b/tests/test_import_protobuf.py index 505d62254..b7d720628 100644 --- a/tests/test_import_protobuf.py +++ b/tests/test_import_protobuf.py @@ -101,4 +101,4 @@ def test_import_protobuf(): print("Result", result.to_yaml()) assert yaml.safe_load(result.to_yaml()) == yaml.safe_load(expected) # Disable linters so we don't get "missing description" warnings - assert DataContract(data_contract_str=expected).lint(enabled_linters=set()).has_passed() + assert DataContract(data_contract_str=expected).lint().has_passed() diff --git a/tests/test_import_sql_postgres.py b/tests/test_import_sql_postgres.py index df635c34e..6a1a3aebf 100644 --- a/tests/test_import_sql_postgres.py +++ b/tests/test_import_sql_postgres.py @@ -60,7 +60,7 @@ def test_import_sql_postgres(): print("Result", result.to_yaml()) assert yaml.safe_load(result.to_yaml()) == yaml.safe_load(expected) # Disable linters so we don't get "missing description" warnings - assert DataContract(data_contract_str=expected).lint(enabled_linters=set()).has_passed() + assert DataContract(data_contract_str=expected).lint().has_passed() def test_import_sql_constraints(): @@ -148,4 +148,4 @@ def test_import_sql_constraints(): print("Result", result.to_yaml()) assert yaml.safe_load(result.to_yaml()) == yaml.safe_load(expected) # Disable linters so we don't get "missing description" warnings - assert DataContract(data_contract_str=expected).lint(enabled_linters=set()).has_passed() + assert DataContract(data_contract_str=expected).lint().has_passed() diff --git a/tests/test_import_sql_sqlserver.py b/tests/test_import_sql_sqlserver.py index b3df21664..4c9fc0d55 100644 --- a/tests/test_import_sql_sqlserver.py +++ b/tests/test_import_sql_sqlserver.py @@ -175,4 +175,4 @@ def test_import_sql_sqlserver(): print("Result", result.to_yaml()) assert yaml.safe_load(result.to_yaml()) == yaml.safe_load(expected) # Disable linters so we don't get "missing description" warnings - assert DataContract(data_contract_str=expected).lint(enabled_linters=set()).has_passed() + assert DataContract(data_contract_str=expected).lint().has_passed() diff --git a/tests/test_import_unity_file.py b/tests/test_import_unity_file.py index 9abc713ea..9209585b7 100644 --- a/tests/test_import_unity_file.py +++ b/tests/test_import_unity_file.py @@ -34,7 +34,7 @@ def test_import_unity(): result_yaml = result.to_yaml() print("Result:\n", result_yaml) assert yaml.safe_load(result_yaml) == yaml.safe_load(expected) - assert DataContract(data_contract_str=expected).lint(enabled_linters="none").has_passed() + assert DataContract(data_contract_str=expected).lint().has_passed() def test_cli_complex_types(): @@ -65,7 +65,7 @@ def test_import_unity_complex_types(): print("Result:\n", result.to_yaml()) assert yaml.safe_load(result.to_yaml()) == yaml.safe_load(expected) - assert DataContract(data_contract_str=expected).lint(enabled_linters="none").has_passed() + assert DataContract(data_contract_str=expected).lint().has_passed() def test_import_unity_with_owner_and_id(): diff --git a/tests/test_lint.py b/tests/test_lint.py index ca8792161..c7c3a1fba 100644 --- a/tests/test_lint.py +++ b/tests/test_lint.py @@ -27,7 +27,7 @@ def test_lint_invalid_data_contract(): def test_lint_cli_valid(): data_contract_file = "fixtures/lint/valid_datacontract.yaml" - expected_output = "🟢 data contract is valid. Run 6 checks." + expected_output = "🟢 data contract is valid. Run 1 checks." result = runner.invoke(app, ["lint", data_contract_file]) @@ -71,13 +71,4 @@ def test_lint_with_references(): run = data_contract.lint() - assert run.result == "passed" - -# -# def test_lint_valid_field_constraints(): -# data_contract_file = "fixtures/lint/datacontract_valid_field_constraints.yaml" -# data_contract = DataContract(data_contract_file=data_contract_file) -# -# run = data_contract.lint() -# -# assert run.result == "passed" \ No newline at end of file + assert run.result == "passed" \ No newline at end of file diff --git a/tests/test_notice_period_linter.py b/tests/test_notice_period_linter.py deleted file mode 100644 index ab2c5478f..000000000 --- a/tests/test_notice_period_linter.py +++ /dev/null @@ -1,47 +0,0 @@ -import datacontract.model.data_contract_specification as spec -from datacontract.lint.linters.notice_period_linter import NoticePeriodLinter -from datacontract.model.run import Check - - -def construct_error_check(msg: str) -> Check: - return Check( - type="lint", - name="Linter 'noticePeriod in ISO8601 format'", - result="warning", - engine="datacontract", - reason=msg, - ) - - -success_check = Check( - type="lint", name="Linter 'noticePeriod in ISO8601 format'", result="passed", engine="datacontract" -) - - -def test_lint_correct_period(): - specification = spec.DataContractSpecification() - specification.terms = spec.Terms(noticePeriod="P1M") - result = NoticePeriodLinter().lint(specification) - assert result == [success_check] - - -def test_lint_empty_period(): - # This returns a warning that's currently ignored. - # If warnings are treated differently, change this spec. - specification = spec.DataContractSpecification(terms=spec.Terms()) - result = NoticePeriodLinter().lint(specification) - assert result == [success_check] - - -def test_lint_incorrect_period(): - # This returns a warning that's currently ignored. - # If warnings are treated differently, change this spec. - specification = spec.DataContractSpecification(terms=spec.Terms(noticePeriod="P0")) - result = NoticePeriodLinter().lint(specification) - assert result == [construct_error_check("Notice period 'P0' is not a valid ISO8601 duration.")] - - -def test_lint_correct_datetime_period(): - specification = spec.DataContractSpecification(terms=spec.Terms(noticePeriod="P00000001T000001")) - result = NoticePeriodLinter().lint(specification) - assert result == [success_check] From 874bfe2876129880eaba4e29fc697b2ff401486c Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Fri, 17 Oct 2025 13:57:50 +0200 Subject: [PATCH 025/150] chore(deps): update snowflake-connector-python[pandas] requirement (#911) Updates the requirements on [snowflake-connector-python[pandas]](https://github.com/snowflakedb/snowflake-connector-python) to permit the latest version. - [Release notes](https://github.com/snowflakedb/snowflake-connector-python/releases) - [Commits](https://github.com/snowflakedb/snowflake-connector-python/compare/v3.6.0...v4.0.0) --- updated-dependencies: - dependency-name: snowflake-connector-python[pandas] dependency-version: 4.0.0 dependency-type: direct:production ... Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> --- pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index 29fdb71ee..d92f6a9e7 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -84,7 +84,7 @@ s3 = [ ] snowflake = [ - "snowflake-connector-python[pandas]>=3.6,<3.17", + "snowflake-connector-python[pandas]>=3.6,<4.1", "soda-core-snowflake>=3.3.20,<3.6.0" ] From 26256edabae0f918d3d3e2f0f0b7db8fa35ac477 Mon Sep 17 00:00:00 2001 From: Baptiste Date: Fri, 17 Oct 2025 14:06:14 +0200 Subject: [PATCH 026/150] feat: enhance databricks connection with Unity Catalog using databricks profiles (#894) Co-authored-by: jochenchrist --- README.md | 10 +++++++--- datacontract/imports/unity_importer.py | 27 +++++++++++++++----------- 2 files changed, 23 insertions(+), 14 deletions(-) diff --git a/README.md b/README.md index 2e6f1ef45..3fee35df6 100644 --- a/README.md +++ b/README.md @@ -1444,18 +1444,22 @@ datacontract import --format bigquery --bigquery-project --bigquery ``` #### Unity Catalog - ```bash # Example import from a Unity Catalog JSON file datacontract import --format unity --source my_unity_table.json ``` ```bash -# Example import single table from Unity Catalog via HTTP endpoint +# Example import single table from Unity Catalog via HTTP endpoint using PAT export DATACONTRACT_DATABRICKS_SERVER_HOSTNAME="https://xyz.cloud.databricks.com" -export DATACONTRACT_DATABRICKS_HTTP_PATH="/sql/1.0/warehouses/b053a331fa014fb4" export DATACONTRACT_DATABRICKS_TOKEN= datacontract import --format unity --unity-table-full-name +``` + Please Refer to [Databricks documentation](https://docs.databricks.com/aws/en/dev-tools/auth/unified-auth) on how to set up a profile +```bash +# Example import single table from Unity Catalog via HTTP endpoint using Profile +export DATACONTRACT_DATABRICKS_PROFILE="my-profile" +datacontract import --format unity --unity-table-full-name ``` #### dbt diff --git a/datacontract/imports/unity_importer.py b/datacontract/imports/unity_importer.py index bb02516fd..973ddc7a2 100644 --- a/datacontract/imports/unity_importer.py +++ b/datacontract/imports/unity_importer.py @@ -88,23 +88,28 @@ def import_unity_from_api( """ try: # print(f"Retrieving Unity Catalog schema for table: {unity_table_full_name}") + profile = os.getenv("DATACONTRACT_DATABRICKS_PROFILE") host, token = os.getenv("DATACONTRACT_DATABRICKS_SERVER_HOSTNAME"), os.getenv("DATACONTRACT_DATABRICKS_TOKEN") # print(f"Databricks host: {host}, token: {'***' if token else 'not set'}") - if not host: - raise DataContractException( - type="configuration", - name="Databricks configuration", - reason="DATACONTRACT_DATABRICKS_SERVER_HOSTNAME environment variable is not set", - engine="datacontract", - ) - if not token: - raise DataContractException( + exception = DataContractException( type="configuration", name="Databricks configuration", - reason="DATACONTRACT_DATABRICKS_TOKEN environment variable is not set", + reason="", engine="datacontract", ) - workspace_client = WorkspaceClient(host=host, token=token) + if not profile and not host and not token: + reason = "Either DATACONTRACT_DATABRICKS_PROFILE or both DATACONTRACT_DATABRICKS_SERVER_HOSTNAME and DATACONTRACT_DATABRICKS_TOKEN environment variables must be set" + exception.reason = reason + raise exception + if token and not host: + reason = "DATACONTRACT_DATABRICKS_SERVER_HOSTNAME environment variable is not set" + exception.reason = reason + raise exception + if host and not token: + reason = "DATACONTRACT_DATABRICKS_TOKEN environment variable is not set" + exception.reason = reason + raise exception + workspace_client = WorkspaceClient(profile=profile) if profile else WorkspaceClient(host=host, token=token) except Exception as e: raise DataContractException( type="schema", From a52594a39e3288a1ad7b60cc1fd44dfa84ca7ff8 Mon Sep 17 00:00:00 2001 From: jochenchrist Date: Fri, 17 Oct 2025 14:12:40 +0200 Subject: [PATCH 027/150] Update README.md --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 3fee35df6..f9e984eb5 100644 --- a/README.md +++ b/README.md @@ -25,7 +25,7 @@ This data contract contains all information to connect to S3 and check that the Let's use [uv](https://docs.astral.sh/uv/) to install the CLI (or use the [Docker image](#docker)), ```bash -$ uv tool install 'datacontract-cli[all]' +$ uv tool install --python python3.11 'datacontract-cli[all]' ``` From bf5822b3ea293d09e0e7d371acf08f5d914fac72 Mon Sep 17 00:00:00 2001 From: jochen Date: Fri, 17 Oct 2025 14:20:33 +0200 Subject: [PATCH 028/150] Restrict deployment to specific repository --- .github/workflows/ci.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/ci.yaml b/.github/workflows/ci.yaml index d0994df22..5bcc332e5 100644 --- a/.github/workflows/ci.yaml +++ b/.github/workflows/ci.yaml @@ -73,7 +73,7 @@ jobs: runs-on: ubuntu-latest needs: - test - if: github.event_name != 'pull_request' + if: github.event_name != 'pull_request' && github.repository == 'datacontract/datacontract-cli' steps: - name: Checkout uses: actions/checkout@v4 From ea58ab531bce71d7015f967e8b1a182494c5633d Mon Sep 17 00:00:00 2001 From: jochen Date: Fri, 17 Oct 2025 15:10:36 +0200 Subject: [PATCH 029/150] Import: Fallback to physicalType when logicalType is missing in ODCS --- CHANGELOG.md | 3 +- datacontract/imports/odcs_v3_importer.py | 75 ++++++++++++++++++++---- tests/test_import_odcs_v3.py | 75 ++++++++++++++++++++++++ 3 files changed, 139 insertions(+), 14 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 788477eb6..57cd48ec5 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -22,7 +22,8 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 - Export to DQX: Correct DQX format for global-level quality check of data contract export. (#877) - Import the table tags from a open data contract spec v3 (#895) - dbt export: Enhanced model-level primaryKey support with automatic test generation for single and multiple column primary keys (#898) - +- ODCS: field discarded when no logicalType defined (#891) + ### Removed - Removed specific linters, as the linters did not support ODCS (#913) diff --git a/datacontract/imports/odcs_v3_importer.py b/datacontract/imports/odcs_v3_importer.py index 255593ef0..667daf719 100644 --- a/datacontract/imports/odcs_v3_importer.py +++ b/datacontract/imports/odcs_v3_importer.py @@ -334,7 +334,7 @@ def import_fields( return result for odcs_property in odcs_properties: - mapped_type = map_type(odcs_property.logicalType, custom_type_mappings) + mapped_type = map_type(odcs_property.logicalType, custom_type_mappings, odcs_property.physicalType) if mapped_type is not None: property_name = odcs_property.name description = odcs_property.description if odcs_property.description is not None else None @@ -377,23 +377,72 @@ def import_fields( result[property_name] = field else: - logger.info( - f"Can't map {odcs_property.name} to the Datacontract Mapping types, as there is no equivalent or special mapping. Consider introducing a customProperty 'dc_mapping_{odcs_property.logicalType}' that defines your expected type as the 'value'" + type_info = f"logicalType={odcs_property.logicalType}, physicalType={odcs_property.physicalType}" + logger.warning( + f"Can't map field '{odcs_property.name}' ({type_info}) to the Datacontract Mapping types. " + f"Both logicalType and physicalType are missing or unmappable. " + f"Consider introducing a customProperty 'dc_mapping_' that defines your expected type as the 'value'" ) return result -def map_type(odcs_type: str, custom_mappings: Dict[str, str]) -> str | None: - if odcs_type is None: - return None - t = odcs_type.lower() - if t in DATACONTRACT_TYPES: - return t - elif custom_mappings.get(t) is not None: - return custom_mappings.get(t) - else: - return None +def map_type(odcs_logical_type: str, custom_mappings: Dict[str, str], physical_type: str = None) -> str | None: + # Try to map logicalType first + if odcs_logical_type is not None: + t = odcs_logical_type.lower() + if t in DATACONTRACT_TYPES: + return t + elif custom_mappings.get(t) is not None: + return custom_mappings.get(t) + + # Fallback to physicalType if logicalType is not mapped + if physical_type is not None: + pt = physical_type.lower() + # Remove parameters from physical type (e.g., VARCHAR(50) -> varchar, DECIMAL(10,2) -> decimal) + pt_base = pt.split('(')[0].strip() + + # Try direct mapping of physical type + if pt in DATACONTRACT_TYPES: + return pt + elif pt_base in DATACONTRACT_TYPES: + return pt_base + elif custom_mappings.get(pt) is not None: + return custom_mappings.get(pt) + elif custom_mappings.get(pt_base) is not None: + return custom_mappings.get(pt_base) + # Common physical type mappings + elif pt_base in ["varchar", "char", "nvarchar", "nchar", "text", "ntext", "string", "character varying"]: + return "string" + elif pt_base in ["int", "integer", "smallint", "tinyint", "mediumint", "int2", "int4", "int8"]: + return "int" + elif pt_base in ["bigint", "long", "int64"]: + return "long" + elif pt_base in ["float", "real", "float4", "float8"]: + return "float" + elif pt_base in ["double", "double precision"]: + return "double" + elif pt_base in ["decimal", "numeric", "number"]: + return "decimal" + elif pt_base in ["boolean", "bool", "bit"]: + return "boolean" + elif pt_base in ["timestamp", "datetime", "datetime2", "timestamptz", "timestamp with time zone"]: + return "timestamp" + elif pt_base in ["date"]: + return "date" + elif pt_base in ["time"]: + return "time" + elif pt_base in ["json", "jsonb"]: + return "json" + elif pt_base in ["array"]: + return "array" + elif pt_base in ["object", "struct", "record"]: + return "object" + elif pt_base in ["bytes", "binary", "varbinary", "blob", "bytea"]: + return "bytes" + else: + return None + return None def get_custom_type_mappings(odcs_custom_properties: List[CustomProperty]) -> Dict[str, str]: diff --git a/tests/test_import_odcs_v3.py b/tests/test_import_odcs_v3.py index 2a4e5431e..48976b07a 100644 --- a/tests/test_import_odcs_v3.py +++ b/tests/test_import_odcs_v3.py @@ -39,6 +39,81 @@ def test_import_complex_odcs(): assert DataContract(data_contract_str=expected_datacontract).lint().has_passed() +def test_import_odcs_without_logicaltype(): + """Test that fields without logicalType are imported using physicalType as fallback. + + This test validates the fix for issue #891 where fields without logicalType + were being discarded during import, even though logicalType is optional per ODCS spec. + """ + odcs_yaml = """ +version: 3.0.2 +kind: DataContract +apiVersion: v3.0.2 +id: test-contract-no-logicaltype +name: Test Contract Without LogicalType +description: + purpose: Test ODCS import without logicalType + +schema: + - name: test_table + type: table + properties: + - name: event_date + physicalType: DATE + description: Event date without logicalType + required: true + - name: amount + physicalType: DECIMAL(10,2) + description: Amount without logicalType + required: true + - name: status + physicalType: VARCHAR(50) + description: Status field + required: false + - name: count + physicalType: INTEGER + description: Count field +""" + + # Write test file + import tempfile + with tempfile.NamedTemporaryFile(mode='w', suffix='.yaml', delete=False) as f: + f.write(odcs_yaml) + temp_file = f.name + + try: + result = DataContract.import_from_source("odcs", temp_file) + + # Verify model was imported + assert result.models is not None + assert "test_table" in result.models + + # Verify all fields were imported (not discarded) + model = result.models["test_table"] + assert len(model.fields) == 4, f"Expected 4 fields, got {len(model.fields)}" + + # Verify field types were correctly mapped from physicalType + assert "event_date" in model.fields + assert model.fields["event_date"].type == "date" + + assert "amount" in model.fields + assert model.fields["amount"].type == "decimal" + + assert "status" in model.fields + assert model.fields["status"].type == "varchar" + + assert "count" in model.fields + assert model.fields["count"].type == "integer" + + # Verify physicalType is preserved in config + assert model.fields["amount"].config is not None + assert model.fields["amount"].config.get("physicalType") == "DECIMAL(10,2)" + + finally: + # Clean up temp file + os.unlink(temp_file) + + def read_file(file): if not os.path.exists(file): print(f"The file '{file}' does not exist.") From 19148b609213c616fbea217d66a62ac346e66aaf Mon Sep 17 00:00:00 2001 From: jochen Date: Fri, 17 Oct 2025 15:13:46 +0200 Subject: [PATCH 030/150] Bump version to 0.10.36 and update changelog with Data Contract Specification v1.2.1 support. --- CHANGELOG.md | 3 +++ pyproject.toml | 2 +- 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 57cd48ec5..b6e35604d 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -7,6 +7,9 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ## Unreleased + +## [0.10.36] - 2025-10-17 + ### Added - Support for Data Contract Specification v1.2.1 (Data Quality Metrics) diff --git a/pyproject.toml b/pyproject.toml index d92f6a9e7..c1d08c4cb 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [project] name = "datacontract-cli" -version = "0.10.35" +version = "0.10.36" description = "The datacontract CLI is an open source command-line tool for working with Data Contracts. It uses data contract YAML files to lint the data contract, connect to data sources and execute schema and quality tests, detect breaking changes, and export to different formats. The tool is written in Python. It can be used as a standalone CLI tool, in a CI/CD pipeline, or directly as a Python library." license = "MIT" readme = "README.md" From 3ade60ef66cc31137fc9a6bbfa441005fd863ab0 Mon Sep 17 00:00:00 2001 From: jochen Date: Fri, 17 Oct 2025 15:38:50 +0200 Subject: [PATCH 031/150] Bump version to 0.10.36 and update changelog with Data Contract Specification v1.2.1 support. --- README.md | 40 +++++++++++++++++++++++++++++----------- 1 file changed, 29 insertions(+), 11 deletions(-) diff --git a/README.md b/README.md index f9e984eb5..25fdee8cb 100644 --- a/README.md +++ b/README.md @@ -656,19 +656,37 @@ models: fields: ... ``` -Notebook -```python -%pip install datacontract-cli[databricks] -dbutils.library.restartPython() +##### Installing on Databricks Compute -from datacontract.data_contract import DataContract +**Important:** When using Databricks LTS ML runtimes (15.4, 16.4), installing via `%pip install` in notebooks can issues. -data_contract = DataContract( - data_contract_file="/Volumes/acme_catalog_prod/orders_latest/datacontract/datacontract.yaml", - spark=spark) -run = data_contract.test() -run.result -``` +**Recommended approach:** Use Databricks' native library management instead: + +1. **Create or configure your compute cluster:** + - Navigate to **Compute** in the Databricks workspace + - Create a new cluster or select an existing one + - Go to the **Libraries** tab + +2. **Add the datacontract-cli library:** + - Click **Install new** + - Select **PyPI** as the library source + - Enter package name: `datacontract-cli[databricks]` + - Click **Install** + +3. **Restart the cluster** to apply the library installation + +4. **Use in your notebook** without additional installation: + ```python + from datacontract.data_contract import DataContract + + data_contract = DataContract( + data_contract_file="/Volumes/acme_catalog_prod/orders_latest/datacontract/datacontract.yaml", + spark=spark) + run = data_contract.test() + run.result + ``` + +Databricks' library management properly resolves dependencies during cluster initialization, rather than at runtime in the notebook. #### Dataframe (programmatic) From a62f11f620af54e53b990a45b29ecc78294f2dd3 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Sat, 18 Oct 2025 10:34:26 +0200 Subject: [PATCH 032/150] chore(deps): update databricks-sql-connector requirement (#859) Updates the requirements on [databricks-sql-connector](https://github.com/databricks/databricks-sql-python) to permit the latest version. - [Release notes](https://github.com/databricks/databricks-sql-python/releases) - [Changelog](https://github.com/databricks/databricks-sql-python/blob/main/CHANGELOG.md) - [Commits](https://github.com/databricks/databricks-sql-python/compare/v3.7.0...v4.1.0) --- updated-dependencies: - dependency-name: databricks-sql-connector dependency-version: 4.1.0 dependency-type: direct:production ... Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> Co-authored-by: jochenchrist --- pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index c1d08c4cb..251c30b23 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -59,7 +59,7 @@ excel = [ databricks = [ "soda-core-spark-df>=3.3.20,<3.6.0", "soda-core-spark[databricks]>=3.3.20,<3.6.0", - "databricks-sql-connector>=3.7.0,<4.1.0", + "databricks-sql-connector>=3.7.0,<4.2.0", "databricks-sdk<0.68.0", "pyspark>=3.5.5,<4.0.0", ] From 16b10253df6d3e8ee1f2fa4e2eaf8cf8f5ae49a6 Mon Sep 17 00:00:00 2001 From: jochenchrist Date: Mon, 20 Oct 2025 18:45:47 +0200 Subject: [PATCH 033/150] Update README.md --- README.md | 22 ++++++++++++++++++++++ 1 file changed, 22 insertions(+) diff --git a/README.md b/README.md index 25fdee8cb..4f831af1b 100644 --- a/README.md +++ b/README.md @@ -2041,6 +2041,28 @@ uv run ruff check uv run pytest ``` +### Troubleshooting + +Windows: Test fail + +Run in wsl. (We need to fix the pathes in the tests so that normal Windows will work, contributions are appreciated) + +PyCharm does not pick up the `.venv` ([Issue](https://github.com/astral-sh/uv/issues/12545)) + +Try to sync all groups: + +``` +uv sync --all-groups --all-extras +``` + +Errors in tests that use PySpark (e.g. test_test_kafka.py) + +Ensure you have a JDK 17 or 21 installed. Java 25 causes issues. + +``` +java --version +``` + ### Docker Build From 4cc107a785af072af6345704d19820612ae902b3 Mon Sep 17 00:00:00 2001 From: jochenchrist Date: Mon, 20 Oct 2025 18:53:25 +0200 Subject: [PATCH 034/150] Update README.md --- README.md | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/README.md b/README.md index 4f831af1b..c171f147a 100644 --- a/README.md +++ b/README.md @@ -2043,11 +2043,13 @@ uv run pytest ### Troubleshooting -Windows: Test fail +#### Windows: Some tests fail Run in wsl. (We need to fix the pathes in the tests so that normal Windows will work, contributions are appreciated) -PyCharm does not pick up the `.venv` ([Issue](https://github.com/astral-sh/uv/issues/12545)) +#### PyCharm does not pick up the `.venv` + +This [uv issue](https://github.com/astral-sh/uv/issues/12545) might be relevant. Try to sync all groups: @@ -2055,7 +2057,7 @@ Try to sync all groups: uv sync --all-groups --all-extras ``` -Errors in tests that use PySpark (e.g. test_test_kafka.py) +#### Errors in tests that use PySpark (e.g. test_test_kafka.py) Ensure you have a JDK 17 or 21 installed. Java 25 causes issues. From 5d351cfec20e347a5f9af9fdbdee5d03116774fa Mon Sep 17 00:00:00 2001 From: Timo Date: Fri, 24 Oct 2025 12:31:26 +0000 Subject: [PATCH 035/150] Oracle: Import contract from SQL file (#920) * Oracle: Import contract from SQL file * Review feedback --- datacontract/imports/sql_importer.py | 40 +++-- tests/fixtures/oracle/import/ddl.sql | 28 +++ tests/test_import_sql_oracle.py | 256 +++++++++++++++++++++++++++ 3 files changed, 310 insertions(+), 14 deletions(-) create mode 100644 tests/fixtures/oracle/import/ddl.sql create mode 100644 tests/test_import_sql_oracle.py diff --git a/datacontract/imports/sql_importer.py b/datacontract/imports/sql_importer.py index e8efe3b90..3a5558a86 100644 --- a/datacontract/imports/sql_importer.py +++ b/datacontract/imports/sql_importer.py @@ -220,7 +220,7 @@ def map_type_from_sql(sql_type: str) -> str | None: return "string" elif sql_type_normed.startswith("ntext"): return "string" - elif sql_type_normed.startswith("int"): + elif sql_type_normed.startswith("int") and not sql_type_normed.startswith("interval"): return "int" elif sql_type_normed.startswith("bigint"): return "long" @@ -244,26 +244,20 @@ def map_type_from_sql(sql_type: str) -> str | None: return "bytes" elif sql_type_normed.startswith("varbinary"): return "bytes" + elif sql_type_normed.startswith("raw"): + return "bytes" + elif sql_type_normed == "blob" or sql_type_normed == "bfile": + return "bytes" elif sql_type_normed == "date": return "date" elif sql_type_normed == "time": return "string" - elif sql_type_normed == "timestamp": - return "timestamp_ntz" - elif ( - sql_type_normed == "timestamptz" - or sql_type_normed == "timestamp_tz" - or sql_type_normed == "timestamp with time zone" - ): - return "timestamp_tz" - elif sql_type_normed == "timestampntz" or sql_type_normed == "timestamp_ntz": + elif sql_type_normed.startswith("timestamp"): + return map_timestamp(sql_type_normed) + elif sql_type_normed == "datetime" or sql_type_normed == "datetime2": return "timestamp_ntz" elif sql_type_normed == "smalldatetime": return "timestamp_ntz" - elif sql_type_normed == "datetime": - return "timestamp_ntz" - elif sql_type_normed == "datetime2": - return "timestamp_ntz" elif sql_type_normed == "datetimeoffset": return "timestamp_tz" elif sql_type_normed == "uniqueidentifier": # tsql @@ -272,9 +266,27 @@ def map_type_from_sql(sql_type: str) -> str | None: return "string" elif sql_type_normed == "xml": # tsql return "string" + elif sql_type_normed.startswith("number"): + return "number" + elif (sql_type_normed == "clob" or sql_type_normed == "nclob"): + return "text" else: return "variant" +def map_timestamp(timestamp_type: str) -> str: + match timestamp_type: + case "timestamp" | "timestampntz" | "timestamp_ntz" : + return "timestamp_ntz" + case "timestamptz" | "timestamp_tz" | "timestamp with time zone": + return "timestamp_tz" + case localTimezone if localTimezone.startswith("timestampltz"): + return "timestamp_tz" + case timezoneWrittenOut if timezoneWrittenOut.endswith("time zone"): + return "timestamp_tz" + case _: + return "timestamp" + + def read_file(path): if not os.path.exists(path): diff --git a/tests/fixtures/oracle/import/ddl.sql b/tests/fixtures/oracle/import/ddl.sql new file mode 100644 index 000000000..304dcb668 --- /dev/null +++ b/tests/fixtures/oracle/import/ddl.sql @@ -0,0 +1,28 @@ +-- https://docs.oracle.com/en/database/oracle/oracle-database/19/sqlrf/Data-Types.html + +CREATE TABLE field_showcase +( + field_primary_key INT PRIMARY KEY, -- Primary key + field_not_null INT NOT NULL, -- Not null + field_varchar VARCHAR2, -- Variable-length string + field_nvarchar NVARCHAR2, -- Variable-length Unicode string + field_number NUMBER, -- Number + field_float FLOAT, -- Float + field_date DATE, -- Date and Time down to second precision + field_binary_float BINARY_FLOAT, -- 32-bit floating point number + field_binary_double BINARY_DOUBLE, -- 64-bit floating point number + field_timestamp TIMESTAMP, -- Timestamp with fractional second precision of 6, no timezones + field_timestamp_tz TIMESTAMP WITH TIME ZONE, -- Timestamp with fractional second precision of 6, with timezones (TZ) + field_timestamp_ltz TIMESTAMP WITH LOCAL TIME ZONE, -- Timestamp with fractional second precision of 6, with local timezone (LTZ) + field_interval_year INTERVAL YEAR TO MONTH, -- Interval of time in years and months with default (2) precision + field_interval_day INTERVAL DAY TO SECOND, -- Interval of time in days, hours, minutes and seconds with default (2 / 6) precision + field_raw RAW, -- Large raw binary data + field_rowid ROWID, -- Base 64 string representing a unique row address + field_urowid UROWID, -- Base 64 string representing the logical address + field_char CHAR(10), -- Fixed-length string + field_nchar NCHAR(10), -- Fixed-length Unicode string + field_clob CLOB, -- Character large object + field_nclob NCLOB, -- National character large object + field_blob BLOB, -- Binary large object + field_bfile BFILE -- Binary file +) \ No newline at end of file diff --git a/tests/test_import_sql_oracle.py b/tests/test_import_sql_oracle.py new file mode 100644 index 000000000..f63107250 --- /dev/null +++ b/tests/test_import_sql_oracle.py @@ -0,0 +1,256 @@ +import yaml +from typer.testing import CliRunner + +from datacontract.cli import app +from datacontract.data_contract import DataContract + +# logging.basicConfig(level=logging.DEBUG, force=True) + +data_definition_file = "fixtures/oracle/import/ddl.sql" + + +def test_cli(): + runner = CliRunner() + result = runner.invoke( + app, + [ + "import", + "--format", + "sql", + "--source", + data_definition_file, + ], + ) + assert result.exit_code == 0 + + +def test_import_sql_oracle(): + result = DataContract.import_from_source("sql", data_definition_file, dialect="oracle") + + expected = """ +dataContractSpecification: 1.2.1 +id: my-data-contract-id +info: + title: My Data Contract + version: 0.0.1 +servers: + oracle: + type: oracle +models: + field_showcase: + type: table + fields: + field_primary_key: + type: int + primaryKey: true + description: Primary key + config: + oracleType: INT + field_not_null: + type: int + required: true + description: Not null + config: + oracleType: INT + field_varchar: + type: string + description: Variable-length string + config: + oracleType: VARCHAR2 + field_nvarchar: + type: string + description: Variable-length Unicode string + config: + oracleType: NVARCHAR2 + field_number: + type: number + description: Number + config: + oracleType: NUMBER + field_float: + type: float + description: Float + config: + oracleType: FLOAT + field_date: + type: date + description: Date and Time down to second precision + config: + oracleType: DATE + field_binary_float: + type: float + description: 32-bit floating point number + config: + oracleType: FLOAT + field_binary_double: + type: double + description: 64-bit floating point number + config: + oracleType: DOUBLE PRECISION + field_timestamp: + type: timestamp_ntz + description: Timestamp with fractional second precision of 6, no timezones + config: + oracleType: TIMESTAMP + field_timestamp_tz: + type: timestamp_tz + description: Timestamp with fractional second precision of 6, with timezones + (TZ) + config: + oracleType: TIMESTAMP WITH TIME ZONE + field_timestamp_ltz: + type: timestamp_tz + description: Timestamp with fractional second precision of 6, with local timezone + (LTZ) + config: + oracleType: TIMESTAMPLTZ + field_interval_year: + type: variant + description: Interval of time in years and months with default (2) precision + config: + oracleType: INTERVAL YEAR TO MONTH + field_interval_day: + type: variant + description: Interval of time in days, hours, minutes and seconds with default + (2 / 6) precision + config: + oracleType: INTERVAL DAY TO SECOND + field_raw: + type: bytes + description: Large raw binary data + config: + oracleType: RAW + field_rowid: + type: variant + description: Base 64 string representing a unique row address + config: + oracleType: ROWID + field_urowid: + type: variant + description: Base 64 string representing the logical address + config: + oracleType: UROWID + field_char: + type: string + description: Fixed-length string + maxLength: 10 + config: + oracleType: CHAR(10) + field_nchar: + type: string + description: Fixed-length Unicode string + maxLength: 10 + config: + oracleType: NCHAR(10) + field_clob: + type: text + description: Character large object + config: + oracleType: CLOB + field_nclob: + type: text + description: National character large object + config: + oracleType: NCLOB + field_blob: + type: bytes + description: Binary large object + config: + oracleType: BLOB + field_bfile: + type: bytes + config: + oracleType: BFILE + """ + print("Result", result.to_yaml()) + assert yaml.safe_load(result.to_yaml()) == yaml.safe_load(expected) + # Disable linters so we don't get "missing description" warnings + assert DataContract(data_contract_str=expected).lint().has_passed() + + +def test_import_sql_constraints(): + result = DataContract.import_from_source("sql", "fixtures/postgres/data/data_constraints.sql", dialect="postgres") + + expected = """ +dataContractSpecification: 1.2.1 +id: my-data-contract-id +info: + title: My Data Contract + version: 0.0.1 +servers: + postgres: + type: postgres +models: + customer_location: + type: table + fields: + id: + type: decimal + required: true + # primaryKey: true + config: + postgresType: DECIMAL + created_by: + type: string + required: true + maxLength: 30 + config: + postgresType: VARCHAR(30) + create_date: + type: timestamp_ntz + required: true + config: + postgresType: TIMESTAMP + changed_by: + type: string + maxLength: 30 + config: + postgresType: VARCHAR(30) + change_date: + type: timestamp_ntz + config: + postgresType: TIMESTAMP + name: + type: string + required: true + maxLength: 120 + config: + postgresType: VARCHAR(120) + short_name: + type: string + maxLength: 60 + config: + postgresType: VARCHAR(60) + display_name: + type: string + required: true + maxLength: 120 + config: + postgresType: VARCHAR(120) + code: + type: string + required: true + maxLength: 30 + config: + postgresType: VARCHAR(30) + description: + type: string + maxLength: 4000 + config: + postgresType: VARCHAR(4000) + language_id: + type: decimal + required: true + config: + postgresType: DECIMAL + status: + type: string + required: true + maxLength: 2 + config: + postgresType: VARCHAR(2) + """ + print("Result", result.to_yaml()) + assert yaml.safe_load(result.to_yaml()) == yaml.safe_load(expected) + # Disable linters so we don't get "missing description" warnings + assert DataContract(data_contract_str=expected).lint().has_passed() From 11921861b267643947f889a40498c8f253e37e46 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Thu, 30 Oct 2025 15:41:05 +0100 Subject: [PATCH 036/150] chore(deps): bump uvicorn from 0.35.0 to 0.38.0 (#918) Bumps [uvicorn](https://github.com/Kludex/uvicorn) from 0.35.0 to 0.38.0. - [Release notes](https://github.com/Kludex/uvicorn/releases) - [Changelog](https://github.com/Kludex/uvicorn/blob/main/docs/release-notes.md) - [Commits](https://github.com/Kludex/uvicorn/compare/0.35.0...0.38.0) --- updated-dependencies: - dependency-name: uvicorn dependency-version: 0.38.0 dependency-type: direct:production update-type: version-update:semver-minor ... Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> --- pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index 251c30b23..5714eac84 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -118,7 +118,7 @@ rdf = [ api = [ "fastapi==0.116.1", - "uvicorn==0.35.0", + "uvicorn==0.38.0", ] protobuf = [ From 12afbd745fff369ad074a06094aa3d0cc292b191 Mon Sep 17 00:00:00 2001 From: didipayson <141541117+didipayson@users.noreply.github.com> Date: Thu, 30 Oct 2025 18:51:38 -0700 Subject: [PATCH 037/150] odcs v3 importer databricks nested arrays support (#906) * add support for nested arrays in Databricks for odcs v3 importer * remove added blank lines * ruff format * CHANGELOG.md entry added * Refactor array logic. Update test cases. --------- Co-authored-by: Didi Payson Co-authored-by: jochen --- CHANGELOG.md | 8 + datacontract/export/excel_exporter.py | 1 + datacontract/imports/odcs_v3_importer.py | 119 ++++++++------ pyproject.toml | 2 +- tests/fixtures/excel/shipments-odcs.yaml | 30 ++++ .../odcs_v3/adventureworks.datacontract.yml | 153 +----------------- .../odcs_v3/full-example.datacontract.yml | 6 + tests/fixtures/odcs_v3/full-example.odcs.yaml | 6 + 8 files changed, 122 insertions(+), 203 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index b6e35604d..d3b97bf75 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -7,6 +7,14 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ## Unreleased +### Added + +- Support for nested arrays in odcs v3 importer + +### Fixed + +- Excel exporter now exports critical data element + ## [0.10.36] - 2025-10-17 diff --git a/datacontract/export/excel_exporter.py b/datacontract/export/excel_exporter.py index df8896211..902f4654b 100644 --- a/datacontract/export/excel_exporter.py +++ b/datacontract/export/excel_exporter.py @@ -314,6 +314,7 @@ def set_by_header(header_name: str, value: Any): "Transform Sources", ",".join(property.transformSourceObjects) if property.transformSourceObjects else "" ) set_by_header("Transform Logic", property.transformLogic) + set_by_header("Critical Data Element Status", property.criticalDataElement) # Authoritative definitions if property.authoritativeDefinitions and len(property.authoritativeDefinitions) > 0: diff --git a/datacontract/imports/odcs_v3_importer.py b/datacontract/imports/odcs_v3_importer.py index 667daf719..94ee12bb0 100644 --- a/datacontract/imports/odcs_v3_importer.py +++ b/datacontract/imports/odcs_v3_importer.py @@ -284,7 +284,7 @@ def convert_quality_list(odcs_quality_list): return quality_list -def import_field_config(odcs_property: SchemaProperty, server_type=None) -> Dict[str, Any]: +def import_field_config(odcs_property: SchemaProperty, server_type=None) -> dict[Any, Any] | None: config = {} if odcs_property.criticalDataElement is not None: config["criticalDataElement"] = odcs_property.criticalDataElement @@ -316,6 +316,9 @@ def import_field_config(odcs_property: SchemaProperty, server_type=None) -> Dict else: config["physicalType"] = physical_type + if len(config) == 0: + return None + return config @@ -327,64 +330,80 @@ def has_composite_primary_key(odcs_properties: List[SchemaProperty]) -> bool: def import_fields( odcs_properties: List[SchemaProperty], custom_type_mappings: Dict[str, str], server_type ) -> Dict[str, Field]: - logger = logging.getLogger(__name__) result = {} if odcs_properties is None: return result for odcs_property in odcs_properties: - mapped_type = map_type(odcs_property.logicalType, custom_type_mappings, odcs_property.physicalType) - if mapped_type is not None: - property_name = odcs_property.name - description = odcs_property.description if odcs_property.description is not None else None - field = Field( - description=" ".join(description.splitlines()) if description is not None else None, - type=mapped_type, - title=odcs_property.businessName, - required=odcs_property.required if odcs_property.required is not None else None, - primaryKey=odcs_property.primaryKey - if not has_composite_primary_key(odcs_properties) and odcs_property.primaryKey is not None - else False, - unique=odcs_property.unique if odcs_property.unique else None, - examples=odcs_property.examples if odcs_property.examples is not None else None, - classification=odcs_property.classification if odcs_property.classification is not None else None, - tags=odcs_property.tags if odcs_property.tags is not None else None, - quality=convert_quality_list(odcs_property.quality), - fields=import_fields(odcs_property.properties, custom_type_mappings, server_type) - if odcs_property.properties is not None - else {}, - config=import_field_config(odcs_property, server_type), - format=getattr(odcs_property, "format", None), - ) - # mapped_type is array - if field.type == "array" and odcs_property.items is not None: - # nested array object - if odcs_property.items.logicalType == "object": - field.items = Field( - type="object", - fields=import_fields(odcs_property.items.properties, custom_type_mappings, server_type), - ) - # array of simple type - elif odcs_property.items.logicalType is not None: - field.items = Field(type=odcs_property.items.logicalType) + field = import_field(odcs_property, odcs_properties, custom_type_mappings, server_type) + if field is not None: + result[odcs_property.name] = field - # enum from quality validValues as enum - if field.type == "string": - for q in field.quality: - if hasattr(q, "validValues"): - field.enum = q.validValues + return result - result[property_name] = field - else: - type_info = f"logicalType={odcs_property.logicalType}, physicalType={odcs_property.physicalType}" - logger.warning( - f"Can't map field '{odcs_property.name}' ({type_info}) to the Datacontract Mapping types. " - f"Both logicalType and physicalType are missing or unmappable. " - f"Consider introducing a customProperty 'dc_mapping_' that defines your expected type as the 'value'" - ) - return result +def import_field( + odcs_property: SchemaProperty, + odcs_properties: List[SchemaProperty], + custom_type_mappings: Dict[str, str], + server_type: str +) -> Field | None: + """ + Import a single ODCS property as a datacontract Field. + Returns None if the property cannot be mapped. + """ + logger = logging.getLogger(__name__) + + mapped_type = map_type(odcs_property.logicalType, custom_type_mappings, odcs_property.physicalType) + + if mapped_type is None: + type_info = f"logicalType={odcs_property.logicalType}, physicalType={odcs_property.physicalType}" + logger.warning( + f"Can't map field '{odcs_property.name}' ({type_info}) to the datacontract mapping types. " + f"Both logicalType and physicalType are missing or unmappable. " + f"Consider introducing a customProperty 'dc_mapping_' that defines your expected type as the 'value'" + ) + return None + + description = odcs_property.description if odcs_property.description is not None else None + field = Field( + description=" ".join(description.splitlines()) if description is not None else None, + type=mapped_type, + title=odcs_property.businessName, + required=odcs_property.required if odcs_property.required is not None else None, + primaryKey=to_primary_key(odcs_property, odcs_properties), + unique=odcs_property.unique if odcs_property.unique else None, + examples=odcs_property.examples if odcs_property.examples is not None else None, + classification=odcs_property.classification if odcs_property.classification is not None else None, + tags=odcs_property.tags if odcs_property.tags is not None else None, + quality=convert_quality_list(odcs_property.quality), + fields=import_fields(odcs_property.properties, custom_type_mappings, server_type) + if odcs_property.properties is not None + else {}, + config=import_field_config(odcs_property, server_type), + format=getattr(odcs_property, "format", None), + ) + + # mapped_type is array + if field.type == "array" and odcs_property.items is not None: + field.items = import_field(odcs_property.items, [], custom_type_mappings, server_type) + + # enum from quality validValues as enum + if field.type == "string": + for q in field.quality: + if hasattr(q, "validValues"): + field.enum = q.validValues + + return field + + +def to_primary_key(odcs_property: SchemaProperty, odcs_properties: list[SchemaProperty]) -> bool | None: + if odcs_property.primaryKey is None: + return None + if has_composite_primary_key(odcs_properties): + return None + return odcs_property.primaryKey def map_type(odcs_logical_type: str, custom_mappings: Dict[str, str], physical_type: str = None) -> str | None: diff --git a/pyproject.toml b/pyproject.toml index 5714eac84..0c290c4a7 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -34,7 +34,7 @@ dependencies = [ "Jinja2>=3.1.5,<4.0.0", "jinja_partials>=0.2.1,<1.0.0", "datacontract-specification>=1.2.3,<2.0.0", - "open-data-contract-standard>=3.0.4,<4.0.0", + "open-data-contract-standard>=3.0.5,<4.0.0", ] [project.optional-dependencies] diff --git a/tests/fixtures/excel/shipments-odcs.yaml b/tests/fixtures/excel/shipments-odcs.yaml index 9c3d577d7..bf04b37f7 100644 --- a/tests/fixtures/excel/shipments-odcs.yaml +++ b/tests/fixtures/excel/shipments-odcs.yaml @@ -42,9 +42,13 @@ schema: - businesskey primaryKey: true logicalType: string + required: false + unique: false + partitioned: false classification: internal examples: - 123e4567-e89b-12d3-a456-426614174000 + criticalDataElement: false - name: order_id physicalType: text physicalName: oid @@ -52,54 +56,75 @@ schema: - url: http://localhost:8080/demo203502625092/definitions/sales/order_id type: definition primaryKey: false + partitioned: false - name: delivery_date physicalType: timestamp_tz description: The actual or expected delivery date of the shipment. businessName: Delivery Date primaryKey: false logicalType: date + required: false + unique: false + partitioned: false classification: internal examples: - '2023-10-01T10:00:00Z' + criticalDataElement: false - name: carrier physicalType: text description: The shipping carrier used for the delivery. businessName: Carrier primaryKey: false logicalType: string + required: false + unique: false + partitioned: false classification: internal examples: - FedEx - UPS + criticalDataElement: false - name: tracking_number physicalType: text description: Tracking number provided by the carrier. businessName: Tracking Number primaryKey: false logicalType: string + required: false + unique: false + partitioned: false classification: restricted examples: - 1Z999AA10123456784 + criticalDataElement: false - name: status physicalType: text description: Current status of the shipment. businessName: Status primaryKey: false logicalType: string + required: false + unique: false + partitioned: false classification: internal examples: - Delivered - In Transit + criticalDataElement: false - name: inline_object_definition physicalType: json description: A JSON representation of additional shipment info businessName: Inline Object Definition primaryKey: false logicalType: object + required: false + unique: false + partitioned: false partitionKeyPosition: -1 classification: internal examples: - '{"destination": "New York"}' + criticalDataElement: false quality: - description: '{field} must contain the field "destination"' type: text @@ -118,6 +143,8 @@ schema: primaryKey: false logicalType: string required: true + unique: false + partitioned: false classification: restricted examples: - Marienplatz 1 @@ -128,6 +155,8 @@ schema: primaryKey: false logicalType: string required: true + unique: false + partitioned: false classification: restricted examples: - Munich @@ -138,6 +167,7 @@ schema: primaryKey: false logicalType: string required: true + partitioned: false classification: restricted examples: - DE diff --git a/tests/fixtures/odcs_v3/adventureworks.datacontract.yml b/tests/fixtures/odcs_v3/adventureworks.datacontract.yml index 81ffb2aad..99a0686c3 100644 --- a/tests/fixtures/odcs_v3/adventureworks.datacontract.yml +++ b/tests/fixtures/odcs_v3/adventureworks.datacontract.yml @@ -193,7 +193,6 @@ models: businessentityid: type: number required: false - primaryKey: false description: Employee identification number. Foreign key to Employee.BusinessEntityID. config: criticalDataElement: false @@ -202,7 +201,6 @@ models: departmentid: type: number required: false - primaryKey: false description: Department in which the employee worked including currently. Foreign key to Department.DepartmentID. config: @@ -212,7 +210,6 @@ models: shiftid: type: number required: false - primaryKey: false description: Identifies which 8-hour shift the employee works. Foreign key to Shift.Shift.ID. config: @@ -222,7 +219,6 @@ models: startdate: type: date required: false - primaryKey: false description: Date the employee started work in the department. config: criticalDataElement: false @@ -231,7 +227,6 @@ models: enddate: type: date required: false - primaryKey: false description: Date the employee left the department. NULL = Current department. config: criticalDataElement: false @@ -240,7 +235,6 @@ models: modifieddate: type: date required: false - primaryKey: false config: criticalDataElement: false partitioned: false @@ -253,7 +247,6 @@ models: businessentityid: type: number required: false - primaryKey: false description: Employee identification number. Foreign key to Employee.BusinessEntityID. config: criticalDataElement: false @@ -262,7 +255,6 @@ models: ratechangedate: type: date required: false - primaryKey: false description: Date the change in pay is effective config: criticalDataElement: false @@ -271,7 +263,6 @@ models: rate: type: number required: false - primaryKey: false description: Salary hourly rate. config: criticalDataElement: false @@ -280,7 +271,6 @@ models: payfrequency: type: number required: false - primaryKey: false description: 1 = Salary received monthly, 2 = Salary received biweekly config: criticalDataElement: false @@ -289,7 +279,6 @@ models: modifieddate: type: date required: false - primaryKey: false config: criticalDataElement: false partitioned: false @@ -548,7 +537,6 @@ models: businessentityid: type: number required: false - primaryKey: false description: Primary key. Foreign key to BusinessEntity.BusinessEntityID. config: criticalDataElement: false @@ -557,7 +545,6 @@ models: addressid: type: number required: false - primaryKey: false description: Primary key. Foreign key to Address.AddressID. config: criticalDataElement: false @@ -566,7 +553,6 @@ models: addresstypeid: type: number required: false - primaryKey: false description: Primary key. Foreign key to AddressType.AddressTypeID. config: criticalDataElement: false @@ -575,7 +561,6 @@ models: rowguid: type: string required: false - primaryKey: false config: criticalDataElement: false partitioned: false @@ -583,7 +568,6 @@ models: modifieddate: type: date required: false - primaryKey: false config: criticalDataElement: false partitioned: false @@ -596,7 +580,6 @@ models: businessentityid: type: number required: false - primaryKey: false description: Primary key. Foreign key to BusinessEntity.BusinessEntityID. config: criticalDataElement: false @@ -605,7 +588,6 @@ models: personid: type: number required: false - primaryKey: false description: Primary key. Foreign key to Person.BusinessEntityID. config: criticalDataElement: false @@ -614,7 +596,6 @@ models: contacttypeid: type: number required: false - primaryKey: false description: Primary key. Foreign key to ContactType.ContactTypeID. config: criticalDataElement: false @@ -623,7 +604,6 @@ models: rowguid: type: string required: false - primaryKey: false config: criticalDataElement: false partitioned: false @@ -631,7 +611,6 @@ models: modifieddate: type: date required: false - primaryKey: false config: criticalDataElement: false partitioned: false @@ -707,7 +686,6 @@ models: businessentityid: type: number required: false - primaryKey: false description: Primary key. Person associated with this email address. Foreign key to Person.BusinessEntityID config: @@ -717,7 +695,6 @@ models: emailaddressid: type: number required: false - primaryKey: false description: Primary key. ID of this email address. config: criticalDataElement: false @@ -726,7 +703,6 @@ models: emailaddress: type: string required: false - primaryKey: false description: E-mail address for the person. config: criticalDataElement: false @@ -735,7 +711,6 @@ models: rowguid: type: string required: false - primaryKey: false config: criticalDataElement: false partitioned: false @@ -743,7 +718,6 @@ models: modifieddate: type: date required: false - primaryKey: false config: criticalDataElement: false partitioned: false @@ -934,7 +908,6 @@ models: businessentityid: type: number required: false - primaryKey: false description: Business entity identification number. Foreign key to Person.BusinessEntityID. config: criticalDataElement: false @@ -943,7 +916,6 @@ models: phonenumber: type: object required: false - primaryKey: false description: Telephone number identification number. config: criticalDataElement: false @@ -952,7 +924,6 @@ models: phonenumbertypeid: type: number required: false - primaryKey: false description: Kind of phone number. Foreign key to PhoneNumberType.PhoneNumberTypeID. config: criticalDataElement: false @@ -961,7 +932,6 @@ models: modifieddate: type: date required: false - primaryKey: false config: criticalDataElement: false partitioned: false @@ -1671,7 +1641,6 @@ models: productid: type: number required: false - primaryKey: false description: Product identification number. Foreign key to Product.ProductID config: criticalDataElement: false @@ -1680,7 +1649,6 @@ models: startdate: type: date required: false - primaryKey: false description: Product cost start date. config: criticalDataElement: false @@ -1689,7 +1657,6 @@ models: enddate: type: date required: false - primaryKey: false description: Product cost end date. config: criticalDataElement: false @@ -1698,7 +1665,6 @@ models: standardcost: type: number required: false - primaryKey: false description: Standard cost of the product. config: criticalDataElement: false @@ -1707,7 +1673,6 @@ models: modifieddate: type: date required: false - primaryKey: false config: criticalDataElement: false partitioned: false @@ -1759,7 +1724,6 @@ models: productid: type: number required: false - primaryKey: false description: Product identification number. Foreign key to Product.ProductID. config: criticalDataElement: false @@ -1768,7 +1732,6 @@ models: modifieddate: type: date required: false - primaryKey: false config: criticalDataElement: false partitioned: false @@ -1776,7 +1739,6 @@ models: documentnode: type: string required: false - primaryKey: false description: Document identification number. Foreign key to Document.DocumentNode. config: criticalDataElement: false @@ -1790,7 +1752,6 @@ models: productid: type: number required: false - primaryKey: false description: Product identification number. Foreign key to Product.ProductID. config: criticalDataElement: false @@ -1799,7 +1760,6 @@ models: locationid: type: number required: false - primaryKey: false description: Inventory location identification number. Foreign key to Location.LocationID. config: criticalDataElement: false @@ -1808,7 +1768,6 @@ models: shelf: type: string required: false - primaryKey: false description: Storage compartment within an inventory location. config: criticalDataElement: false @@ -1817,7 +1776,6 @@ models: bin: type: number required: false - primaryKey: false description: Storage container on a shelf in an inventory location. config: criticalDataElement: false @@ -1826,7 +1784,6 @@ models: quantity: type: number required: false - primaryKey: false description: Quantity of products in the inventory location. config: criticalDataElement: false @@ -1835,7 +1792,6 @@ models: rowguid: type: string required: false - primaryKey: false config: criticalDataElement: false partitioned: false @@ -1843,7 +1799,6 @@ models: modifieddate: type: date required: false - primaryKey: false config: criticalDataElement: false partitioned: false @@ -1856,7 +1811,6 @@ models: productid: type: number required: false - primaryKey: false description: Product identification number. Foreign key to Product.ProductID config: criticalDataElement: false @@ -1865,7 +1819,6 @@ models: startdate: type: date required: false - primaryKey: false description: List price start date. config: criticalDataElement: false @@ -1874,7 +1827,6 @@ models: enddate: type: date required: false - primaryKey: false description: List price end date config: criticalDataElement: false @@ -1883,7 +1835,6 @@ models: listprice: type: number required: false - primaryKey: false description: Product list price. config: criticalDataElement: false @@ -1892,7 +1843,6 @@ models: modifieddate: type: date required: false - primaryKey: false config: criticalDataElement: false partitioned: false @@ -1962,7 +1912,6 @@ models: productmodelid: type: number required: false - primaryKey: false description: Primary key. Foreign key to ProductModel.ProductModelID. config: criticalDataElement: false @@ -1971,7 +1920,6 @@ models: illustrationid: type: number required: false - primaryKey: false description: Primary key. Foreign key to Illustration.IllustrationID. config: criticalDataElement: false @@ -1980,7 +1928,6 @@ models: modifieddate: type: date required: false - primaryKey: false config: criticalDataElement: false partitioned: false @@ -1994,7 +1941,6 @@ models: productmodelid: type: number required: false - primaryKey: false description: Primary key. Foreign key to ProductModel.ProductModelID. config: criticalDataElement: false @@ -2003,7 +1949,6 @@ models: productdescriptionid: type: number required: false - primaryKey: false description: Primary key. Foreign key to ProductDescription.ProductDescriptionID. config: criticalDataElement: false @@ -2012,7 +1957,6 @@ models: cultureid: type: string required: false - primaryKey: false description: Culture identification number. Foreign key to Culture.CultureID. config: criticalDataElement: false @@ -2021,7 +1965,6 @@ models: modifieddate: type: date required: false - primaryKey: false config: criticalDataElement: false partitioned: false @@ -2092,7 +2035,6 @@ models: productid: type: number required: false - primaryKey: false description: Product identification number. Foreign key to Product.ProductID. config: criticalDataElement: false @@ -2101,7 +2043,6 @@ models: productphotoid: type: number required: false - primaryKey: false description: Product photo identification number. Foreign key to ProductPhoto.ProductPhotoID. config: criticalDataElement: false @@ -2110,7 +2051,6 @@ models: primary: type: object required: false - primaryKey: false description: 0 = Photo is not the principal image. 1 = Photo is the principal image. config: @@ -2120,7 +2060,6 @@ models: modifieddate: type: date required: false - primaryKey: false config: criticalDataElement: false partitioned: false @@ -2578,7 +2517,6 @@ models: workorderid: type: number required: false - primaryKey: false description: Primary key. Foreign key to WorkOrder.WorkOrderID. config: criticalDataElement: false @@ -2587,7 +2525,6 @@ models: productid: type: number required: false - primaryKey: false description: Primary key. Foreign key to Product.ProductID. config: criticalDataElement: false @@ -2596,7 +2533,6 @@ models: operationsequence: type: number required: false - primaryKey: false description: Primary key. Indicates the manufacturing process sequence. config: criticalDataElement: false @@ -2605,7 +2541,6 @@ models: locationid: type: number required: false - primaryKey: false description: Manufacturing location where the part is processed. Foreign key to Location.LocationID. config: @@ -2615,7 +2550,6 @@ models: scheduledstartdate: type: date required: false - primaryKey: false description: Planned manufacturing start date. config: criticalDataElement: false @@ -2624,7 +2558,6 @@ models: scheduledenddate: type: date required: false - primaryKey: false description: Planned manufacturing end date. config: criticalDataElement: false @@ -2633,7 +2566,6 @@ models: actualstartdate: type: date required: false - primaryKey: false description: Actual start date. config: criticalDataElement: false @@ -2642,7 +2574,6 @@ models: actualenddate: type: date required: false - primaryKey: false description: Actual end date. config: criticalDataElement: false @@ -2651,7 +2582,6 @@ models: actualresourcehrs: type: number required: false - primaryKey: false description: Number of manufacturing hours used. config: criticalDataElement: false @@ -2660,7 +2590,6 @@ models: plannedcost: type: number required: false - primaryKey: false description: Estimated manufacturing cost. config: criticalDataElement: false @@ -2669,7 +2598,6 @@ models: actualcost: type: number required: false - primaryKey: false description: Actual manufacturing cost. config: criticalDataElement: false @@ -2678,7 +2606,6 @@ models: modifieddate: type: date required: false - primaryKey: false config: criticalDataElement: false partitioned: false @@ -2691,7 +2618,6 @@ models: productid: type: number required: false - primaryKey: false description: Primary key. Foreign key to Product.ProductID. config: criticalDataElement: false @@ -2700,7 +2626,6 @@ models: businessentityid: type: number required: false - primaryKey: false description: Primary key. Foreign key to Vendor.BusinessEntityID. config: criticalDataElement: false @@ -2709,7 +2634,6 @@ models: averageleadtime: type: number required: false - primaryKey: false description: The average span of time (in days) between placing an order with the vendor and receiving the purchased product. config: @@ -2719,7 +2643,6 @@ models: standardprice: type: number required: false - primaryKey: false description: The vendor's usual selling price. config: criticalDataElement: false @@ -2728,7 +2651,6 @@ models: lastreceiptcost: type: number required: false - primaryKey: false description: The selling price when last purchased. config: criticalDataElement: false @@ -2737,7 +2659,6 @@ models: lastreceiptdate: type: date required: false - primaryKey: false description: Date the product was last received by the vendor. config: criticalDataElement: false @@ -2746,7 +2667,6 @@ models: minorderqty: type: number required: false - primaryKey: false description: The maximum quantity that should be ordered. config: criticalDataElement: false @@ -2755,7 +2675,6 @@ models: maxorderqty: type: number required: false - primaryKey: false description: The minimum quantity that should be ordered. config: criticalDataElement: false @@ -2764,7 +2683,6 @@ models: onorderqty: type: number required: false - primaryKey: false description: The quantity currently on order. config: criticalDataElement: false @@ -2773,7 +2691,6 @@ models: unitmeasurecode: type: string required: false - primaryKey: false description: The product's unit of measure. config: criticalDataElement: false @@ -2782,7 +2699,6 @@ models: modifieddate: type: date required: false - primaryKey: false config: criticalDataElement: false partitioned: false @@ -2796,7 +2712,6 @@ models: purchaseorderid: type: number required: false - primaryKey: false description: Primary key. Foreign key to PurchaseOrderHeader.PurchaseOrderID. config: criticalDataElement: false @@ -2805,7 +2720,6 @@ models: purchaseorderdetailid: type: number required: false - primaryKey: false description: Primary key. One line number per purchased product. config: criticalDataElement: false @@ -2814,7 +2728,6 @@ models: duedate: type: date required: false - primaryKey: false description: Date the product is expected to be received. config: criticalDataElement: false @@ -2823,7 +2736,6 @@ models: orderqty: type: number required: false - primaryKey: false description: Quantity ordered. config: criticalDataElement: false @@ -2832,7 +2744,6 @@ models: productid: type: number required: false - primaryKey: false description: Product identification number. Foreign key to Product.ProductID. config: criticalDataElement: false @@ -2841,7 +2752,6 @@ models: unitprice: type: number required: false - primaryKey: false description: Vendor's selling price of a single product. config: criticalDataElement: false @@ -2850,7 +2760,6 @@ models: receivedqty: type: number required: false - primaryKey: false description: Quantity actually received from the vendor. config: criticalDataElement: false @@ -2859,7 +2768,6 @@ models: rejectedqty: type: number required: false - primaryKey: false description: Quantity rejected during inspection. config: criticalDataElement: false @@ -2868,7 +2776,6 @@ models: modifieddate: type: date required: false - primaryKey: false config: criticalDataElement: false partitioned: false @@ -3134,7 +3041,6 @@ models: countryregioncode: type: string required: false - primaryKey: false description: ISO code for countries and regions. Foreign key to CountryRegion.CountryRegionCode. config: criticalDataElement: false @@ -3143,7 +3049,6 @@ models: currencycode: type: string required: false - primaryKey: false description: ISO standard currency code. Foreign key to Currency.CurrencyCode. config: criticalDataElement: false @@ -3152,7 +3057,6 @@ models: modifieddate: type: date required: false - primaryKey: false config: criticalDataElement: false partitioned: false @@ -3380,7 +3284,6 @@ models: businessentityid: type: number required: false - primaryKey: false description: Business entity identification number. Foreign key to Person.BusinessEntityID. config: criticalDataElement: false @@ -3389,7 +3292,6 @@ models: creditcardid: type: number required: false - primaryKey: false description: Credit card identification number. Foreign key to CreditCard.CreditCardID. config: criticalDataElement: false @@ -3398,7 +3300,6 @@ models: modifieddate: type: date required: false - primaryKey: false config: criticalDataElement: false partitioned: false @@ -3411,7 +3312,6 @@ models: salesorderid: type: number required: false - primaryKey: false description: Primary key. Foreign key to SalesOrderHeader.SalesOrderID. config: criticalDataElement: false @@ -3420,7 +3320,6 @@ models: salesorderdetailid: type: number required: false - primaryKey: false description: Primary key. One incremental unique number per product sold. config: criticalDataElement: false @@ -3429,7 +3328,6 @@ models: carriertrackingnumber: type: string required: false - primaryKey: false description: Shipment tracking number supplied by the shipper. config: criticalDataElement: false @@ -3438,7 +3336,6 @@ models: orderqty: type: number required: false - primaryKey: false description: Quantity ordered per product. config: criticalDataElement: false @@ -3447,7 +3344,6 @@ models: productid: type: number required: false - primaryKey: false description: Product sold to customer. Foreign key to Product.ProductID. config: criticalDataElement: false @@ -3456,7 +3352,6 @@ models: specialofferid: type: number required: false - primaryKey: false description: Promotional code. Foreign key to SpecialOffer.SpecialOfferID. config: criticalDataElement: false @@ -3465,7 +3360,6 @@ models: unitprice: type: number required: false - primaryKey: false description: Selling price of a single product. config: criticalDataElement: false @@ -3474,7 +3368,6 @@ models: unitpricediscount: type: number required: false - primaryKey: false description: Discount amount. config: criticalDataElement: false @@ -3483,7 +3376,6 @@ models: rowguid: type: string required: false - primaryKey: false config: criticalDataElement: false partitioned: false @@ -3491,7 +3383,6 @@ models: modifieddate: type: date required: false - primaryKey: false config: criticalDataElement: false partitioned: false @@ -3735,7 +3626,6 @@ models: salesorderid: type: number required: false - primaryKey: false description: Primary key. Foreign key to SalesOrderHeader.SalesOrderID. config: criticalDataElement: false @@ -3744,7 +3634,6 @@ models: salesreasonid: type: number required: false - primaryKey: false description: Primary key. Foreign key to SalesReason.SalesReasonID. config: criticalDataElement: false @@ -3753,7 +3642,6 @@ models: modifieddate: type: date required: false - primaryKey: false config: criticalDataElement: false partitioned: false @@ -3850,7 +3738,6 @@ models: businessentityid: type: number required: false - primaryKey: false description: Sales person identification number. Foreign key to SalesPerson.BusinessEntityID. config: criticalDataElement: false @@ -3859,7 +3746,6 @@ models: quotadate: type: date required: false - primaryKey: false description: Sales quota date. config: criticalDataElement: false @@ -3868,7 +3754,6 @@ models: salesquota: type: number required: false - primaryKey: false description: Sales quota amount. config: criticalDataElement: false @@ -3877,7 +3762,6 @@ models: rowguid: type: string required: false - primaryKey: false config: criticalDataElement: false partitioned: false @@ -3885,7 +3769,6 @@ models: modifieddate: type: date required: false - primaryKey: false config: criticalDataElement: false partitioned: false @@ -4098,7 +3981,6 @@ models: businessentityid: type: number required: false - primaryKey: false description: Primary key. The sales rep. Foreign key to SalesPerson.BusinessEntityID. config: criticalDataElement: false @@ -4107,7 +3989,6 @@ models: territoryid: type: number required: false - primaryKey: false description: Primary key. Territory identification number. Foreign key to SalesTerritory.SalesTerritoryID. config: @@ -4117,7 +3998,6 @@ models: startdate: type: date required: false - primaryKey: false description: Primary key. Date the sales representive started work in the territory. config: @@ -4127,7 +4007,6 @@ models: enddate: type: date required: false - primaryKey: false description: Date the sales representative left work in the territory. config: criticalDataElement: false @@ -4136,7 +4015,6 @@ models: rowguid: type: string required: false - primaryKey: false config: criticalDataElement: false partitioned: false @@ -4144,7 +4022,6 @@ models: modifieddate: type: date required: false - primaryKey: false config: criticalDataElement: false partitioned: false @@ -4317,7 +4194,6 @@ models: specialofferid: type: number required: false - primaryKey: false description: Primary key for SpecialOfferProduct records. config: criticalDataElement: false @@ -4326,7 +4202,6 @@ models: productid: type: number required: false - primaryKey: false description: Product identification number. Foreign key to Product.ProductID. config: criticalDataElement: false @@ -4335,7 +4210,6 @@ models: rowguid: type: string required: false - primaryKey: false config: criticalDataElement: false partitioned: false @@ -4343,7 +4217,6 @@ models: modifieddate: type: date required: false - primaryKey: false config: criticalDataElement: false partitioned: false @@ -4411,77 +4284,53 @@ models: title: StoreHolidayHours type: array required: false - primaryKey: false items: type: object fields: Date: title: Date type: date - required: false - primaryKey: false examples: - '2024-08-13' config: - criticalDataElement: false - partitioned: false physicalType: string Close: title: Close type: date - required: false - primaryKey: false examples: - 02:00 PM config: - criticalDataElement: false - partitioned: false physicalType: string Open: title: Open type: date - required: false - primaryKey: false examples: - 10:00 AM config: - criticalDataElement: false - partitioned: false physicalType: string config: - criticalDataElement: false - partitioned: false physicalType: array extendedData: title: extendedData type: object required: true - primaryKey: false fields: pharmacyUUID: title: pharmacyUUID type: string required: true - primaryKey: false unique: true examples: - ec43dd63-c258-4506-8965-88a9e0c130ad config: - criticalDataElement: false - partitioned: false physicalType: string config: - criticalDataElement: false - partitioned: false physicalType: object ArrayComments: title: ArrayComments type: array required: false - primaryKey: false items: type: string config: - criticalDataElement: false - partitioned: false - physicalType: array \ No newline at end of file + physicalType: array diff --git a/tests/fixtures/odcs_v3/full-example.datacontract.yml b/tests/fixtures/odcs_v3/full-example.datacontract.yml index 8d6f79d40..6e85caaa7 100644 --- a/tests/fixtures/odcs_v3/full-example.datacontract.yml +++ b/tests/fixtures/odcs_v3/full-example.datacontract.yml @@ -84,6 +84,12 @@ models: partitionKeyPosition: -1 criticalDataElement: false encryptedName: rcvr_cntry_code_encrypted + array_of_array: + type: array + items: + type: array + items: + type: string quality: - type: library description: Ensure row count is within expected volume range diff --git a/tests/fixtures/odcs_v3/full-example.odcs.yaml b/tests/fixtures/odcs_v3/full-example.odcs.yaml index a606ce0be..c5587fcad 100644 --- a/tests/fixtures/odcs_v3/full-example.odcs.yaml +++ b/tests/fixtures/odcs_v3/full-example.odcs.yaml @@ -112,6 +112,12 @@ schema: value: - property: COMPARISON_TYPE value: Greater than + - name: array_of_array + logicalType: array + items: + logicalType: array + items: + logicalType: string quality: - rule: rowCount mustBeGreaterThan: 1000 From 49dc9c8dd5337556f03bb14702029a376e35c196 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Fri, 31 Oct 2025 14:39:48 +0100 Subject: [PATCH 038/150] chore(deps): update pydantic requirement (#917) Updates the requirements on [pydantic](https://github.com/pydantic/pydantic) to permit the latest version. - [Release notes](https://github.com/pydantic/pydantic/releases) - [Changelog](https://github.com/pydantic/pydantic/blob/main/HISTORY.md) - [Commits](https://github.com/pydantic/pydantic/compare/v2.8.2...v2.12.3) --- updated-dependencies: - dependency-name: pydantic dependency-version: 2.12.3 dependency-type: direct:production ... Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> --- pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index 0c290c4a7..736b3272f 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -16,7 +16,7 @@ classifiers = [ requires-python = ">=3.10" dependencies = [ "typer>=0.15.1,<0.20", - "pydantic>=2.8.2,<2.12.0", + "pydantic>=2.8.2,<2.13.0", "pyyaml~=6.0.1", "requests>=2.31,<2.33", "fastjsonschema>=2.19.1,<2.22.0", From 53e71ea47e0d57a08f0cd2f14243b1c87a5c1aba Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Fri, 31 Oct 2025 15:16:42 +0100 Subject: [PATCH 039/150] chore(deps): bump pymssql from 2.3.7 to 2.3.8 (#916) Bumps [pymssql](https://github.com/pymssql/pymssql) from 2.3.7 to 2.3.8. - [Release notes](https://github.com/pymssql/pymssql/releases) - [Changelog](https://github.com/pymssql/pymssql/blob/master/ChangeLog.rst) - [Commits](https://github.com/pymssql/pymssql/compare/v2.3.7...v2.3.8) --- updated-dependencies: - dependency-name: pymssql dependency-version: 2.3.8 dependency-type: direct:production update-type: version-update:semver-patch ... Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> --- pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index 736b3272f..39f6db1f5 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -139,7 +139,7 @@ dev = [ "pre-commit>=3.7.1,<4.4.0", "pytest", "pytest-xdist", - "pymssql==2.3.7", + "pymssql==2.3.8", "ruff", "testcontainers[minio,postgres,kafka,mssql]==4.12.0", "trino==0.336.0", From 3895762914a5dc5fa4c48955e9edb1027223daf3 Mon Sep 17 00:00:00 2001 From: Yannick <60961333+YLibert@users.noreply.github.com> Date: Sun, 2 Nov 2025 07:46:30 +0100 Subject: [PATCH 040/150] Fix #922: validate the contract against the custom schema before converting (#923) * fix: validate the contract against the custom schema before converting * Adding test cases --------- Signed-off-by: Yannick Libert Co-authored-by: jochen --- CHANGELOG.md | 5 +- datacontract/lint/resolve.py | 20 ++- datacontract/lint/schema.py | 34 ++-- tests/fixtures/lint/invalid.odcs.yaml | 6 + tests/fixtures/lint/valid.odcs.yaml | 215 ++++++++++++++++++++++++++ tests/fixtures/postgres/odcs.yaml | 2 +- tests/test_lint.py | 17 ++ 7 files changed, 277 insertions(+), 22 deletions(-) create mode 100644 tests/fixtures/lint/invalid.odcs.yaml create mode 100644 tests/fixtures/lint/valid.odcs.yaml diff --git a/CHANGELOG.md b/CHANGELOG.md index d3b97bf75..af33bf5cf 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -9,11 +9,12 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ### Added -- Support for nested arrays in odcs v3 importer +- import: Support for nested arrays in odcs v3 importer +- lint: ODCS schema is now checked before converting ### Fixed -- Excel exporter now exports critical data element +- export: Excel exporter now exports critical data element ## [0.10.36] - 2025-10-17 diff --git a/datacontract/lint/resolve.py b/datacontract/lint/resolve.py index 87b354cdc..b79d25004 100644 --- a/datacontract/lint/resolve.py +++ b/datacontract/lint/resolve.py @@ -1,6 +1,8 @@ +import importlib.resources as resources import logging import os import warnings +from pathlib import Path import fastjsonschema import yaml @@ -298,6 +300,14 @@ def _resolve_data_contract_from_str( ) -> DataContractSpecification: yaml_dict = _to_yaml(data_contract_str) + if schema_location is None: + if is_open_data_contract_standard(yaml_dict): + logging.info("Using ODCS 3.0.2 schema to validate data contract") + # TODO refactor this to a specific function + schema_location = resources.files("datacontract").joinpath("schemas", "odcs-3.0.2.schema.json") + + _validate_json_schema(yaml_dict, schema_location) + if is_open_data_contract_standard(yaml_dict): logging.info("Importing ODCS v3") # if ODCS, then validate the ODCS schema and import to DataContractSpecification directly @@ -311,7 +321,7 @@ def _resolve_data_contract_from_str( def _resolve_dcs_from_yaml_dict(inline_definitions, inline_quality, schema_location, yaml_dict): - _validate_data_contract_specification_schema(yaml_dict, schema_location) + _validate_json_schema(yaml_dict, schema_location) data_contract_specification = yaml_dict spec = DataContractSpecification(**data_contract_specification) if inline_definitions: @@ -349,16 +359,16 @@ def _to_yaml(data_contract_str) -> dict: ) -def _validate_data_contract_specification_schema(data_contract_yaml, schema_location: str = None): +def _validate_json_schema(yaml_str, schema_location: str | Path = None): schema = fetch_schema(schema_location) try: - fastjsonschema.validate(schema, data_contract_yaml, use_default=False) + fastjsonschema.validate(schema, yaml_str, use_default=False) logging.debug("YAML data is valid.") except JsonSchemaValueException as e: logging.warning(f"Data Contract YAML is invalid. Validation error: {e.message}") raise DataContractException( type="lint", - result="failed", + result=ResultEnum.failed, name="Check that data contract YAML is valid", reason=e.message, engine="datacontract", @@ -367,7 +377,7 @@ def _validate_data_contract_specification_schema(data_contract_yaml, schema_loca logging.warning(f"Data Contract YAML is invalid. Validation error: {str(e)}") raise DataContractException( type="lint", - result="failed", + result=ResultEnum.failed, name="Check that data contract YAML is valid", reason=str(e), engine="datacontract", diff --git a/datacontract/lint/schema.py b/datacontract/lint/schema.py index f32b7cce6..f250d969c 100644 --- a/datacontract/lint/schema.py +++ b/datacontract/lint/schema.py @@ -2,16 +2,18 @@ import json import logging import os +from pathlib import Path from typing import Any, Dict import requests from datacontract.model.exceptions import DataContractException +from datacontract.model.run import ResultEnum DEFAULT_DATA_CONTRACT_SCHEMA = "datacontract-1.2.1.schema.json" -def fetch_schema(location: str = None) -> Dict[str, Any]: +def fetch_schema(location: str | Path = None) -> Dict[str, Any]: """ Fetch and return a JSON schema from a given location. @@ -36,19 +38,23 @@ def fetch_schema(location: str = None) -> Dict[str, Any]: schema_file = schemas.joinpath("schemas", DEFAULT_DATA_CONTRACT_SCHEMA) with schema_file.open("r") as file: schema = json.load(file) - elif location.startswith("http://") or location.startswith("https://"): - response = requests.get(location) - schema = response.json() else: - if not os.path.exists(location): - raise DataContractException( - type="lint", - name=f"Reading schema from {location}", - reason=f"The file '{location}' does not exist.", - engine="datacontract", - result="error", - ) - with open(location, "r") as file: - schema = json.load(file) + # Convert Path objects to strings for string operations + location_str = str(location) + + if location_str.startswith("http://") or location_str.startswith("https://"): + response = requests.get(location_str) + schema = response.json() + else: + if not os.path.exists(location): + raise DataContractException( + type="lint", + name=f"Reading schema from {location}", + reason=f"The file '{location}' does not exist.", + engine="datacontract", + result=ResultEnum.error, + ) + with open(location, "r") as file: + schema = json.load(file) return schema diff --git a/tests/fixtures/lint/invalid.odcs.yaml b/tests/fixtures/lint/invalid.odcs.yaml new file mode 100644 index 000000000..670ee4d29 --- /dev/null +++ b/tests/fixtures/lint/invalid.odcs.yaml @@ -0,0 +1,6 @@ +apiVersion: "v3.0.2" +kind: "DataContract" +id: "valid_odcs" +name: "This is invalid ODCS, because the status is missing" +version: "1.0.0" +#status: "draft" diff --git a/tests/fixtures/lint/valid.odcs.yaml b/tests/fixtures/lint/valid.odcs.yaml new file mode 100644 index 000000000..c36091e95 --- /dev/null +++ b/tests/fixtures/lint/valid.odcs.yaml @@ -0,0 +1,215 @@ +apiVersion: "v3.0.2" +kind: "DataContract" +id: "valid_odcs" +name: "Valid ODCS data contract" +version: "2.0.0" +status: "draft" +tenant: "company-A" +tags: +- "datalocation:EU" +description: + purpose: "This data can be used for analytical purposes" + usage: "Use this to analyze shipments" + limitations: "Not suitable for real-time use cases" + authoritativeDefinitions: + - type: "Data Guidelines" + url: "https://example.com/data-guidelines.html" + customProperties: + - property: "github_link" + value: "https://github.example.com/shipment-specification.yaml" +customProperties: +- property: "additionalField" + value: "some value in a new major contract" +- property: "owner" + value: "fulfillment" +schema: +- name: "shipments2" + physicalType: "table" + physicalName: "shipments_v2" + businessName: "Shipments" + description: "This table contains shipment data, including details about shipment IDs, associated orders, delivery dates, carriers, tracking numbers, statuses, and additional shipment information in JSON format." + dataGranularityDescription: "Not Aggregated" + tags: + - "pii" + quality: + - type: "sql" + description: "Table shall contain at least 1 row" + query: "SELECT COUNT(*) FROM shipments" + thresholdType: "mustBeGreaterThanOrEqualTo" + mustBeGreaterThanOrEqualTo: 1 + properties: + - name: "shipment_id" + businessName: "Shipment ID" + physicalName: "sid" + logicalType: "string" + description: "Unique identifier for each shipment." + required: false + unique: false + physicalType: "uuid" + primaryKey: true + partitioned: false + classification: "internal" + examples: + - "123e4567-e89b-12d3-a456-426614174000" + criticalDataElement: false + tags: + - "businesskey" + authoritativeDefinitions: + - type: "definition" + url: "https://datamesh-manager-demo.azurecontainerapps.io/demo440238121320/definitions/fulfillment/shipment_id" + - name: "order_id" + physicalName: "oid" + physicalType: "text" + primaryKey: false + partitioned: false + authoritativeDefinitions: + - type: "definition" + url: "https://datamesh-manager-demo.azurecontainerapps.io/demo440238121320/definitions/sales/order_id" + - name: "delivery_date" + businessName: "Delivery Date" + logicalType: "date" + description: "The actual or expected delivery date of the shipment." + required: false + unique: false + physicalType: "timestamp_tz" + primaryKey: false + partitioned: false + classification: "internal" + examples: + - "2023-10-01T10:00:00Z" + criticalDataElement: false + - name: "carrier" + businessName: "Carrier" + logicalType: "string" + description: "The shipping carrier used for the delivery." + required: false + unique: false + physicalType: "text" + primaryKey: false + partitioned: false + classification: "internal" + examples: + - "FedEx" + - "UPS" + criticalDataElement: false + - name: "tracking_number" + businessName: "Tracking Number" + logicalType: "string" + description: "Tracking number provided by the carrier." + required: false + unique: false + physicalType: "text" + primaryKey: false + partitioned: false + classification: "restricted" + examples: + - "1Z999AA10123456784" + criticalDataElement: false + customProperties: + - property: "external" + value: "true" + - name: "status" + businessName: "Status" + logicalType: "string" + description: "Current status of the shipment." + required: false + unique: false + physicalType: "text" + primaryKey: false + partitioned: false + classification: "internal" + examples: + - "Delivered" + - "In Transit" + criticalDataElement: false + - name: "inline_object_definition" + businessName: "Inline Object Definition" + logicalType: "object" + description: "A JSON representation of additional shipment info" + required: false + unique: false + physicalType: "json" + primaryKey: false + partitioned: false + partitionKeyPosition: -1 + classification: "internal" + examples: + - "{\"destination\": \"New York\"}" + criticalDataElement: false + quality: + - type: "text" + description: "{field} must contain the field \"destination\"" + - name: "address" + businessName: "Shipment Address" + logicalType: "object" + description: "Shipping address details." + required: true + physicalType: "JSON" + classification: "restricted" + properties: + - name: "street" + businessName: "Street" + logicalType: "string" + description: "Street address." + required: true + unique: false + physicalType: "text" + primaryKey: false + partitioned: false + classification: "restricted" + examples: + - "Marienplatz 1" + - name: "city" + businessName: "City" + logicalType: "string" + description: "City of the shipping address." + required: true + unique: false + physicalType: "text" + primaryKey: false + partitioned: false + classification: "restricted" + examples: + - "Munich" + - name: "country" + businessName: "Country" + logicalType: "string" + description: "Country of the shipping address." + required: true + physicalType: "text" + primaryKey: false + partitioned: false + classification: "restricted" + examples: + - "DE" +servers: +- server: "production" + type: "bigquery" + environment: "production" + dataset: "shipments_v1" + project: "acme_shipments_prod" +support: +- channel: "slackname" + url: "http://find.me.here" + tool: "slack" + scope: "interactive" +authoritativeDefinitions: +- type: "Guidelines" + url: "https://example.com/guidelines" +price: + priceAmount: 1 + priceCurrency: "EUR" + priceUnit: "Per 1000 requests" +team: +- username: "vimportant" + role: "administrator" + dateIn: "2020-01-01" +- username: "nimportant" + role: "reader" + dateIn: "2020-01-01" + dateOut: "2024-10-10" +slaProperties: +- property: "availability" + value: "95%" + unit: "%" + driver: "operational" diff --git a/tests/fixtures/postgres/odcs.yaml b/tests/fixtures/postgres/odcs.yaml index 3b20f356c..736a55366 100644 --- a/tests/fixtures/postgres/odcs.yaml +++ b/tests/fixtures/postgres/odcs.yaml @@ -4,7 +4,7 @@ id: postgres name: postgres version: 0.0.1 domain: my-domain-team -status: null +status: active schema: - name: my_table physicalName: my_table diff --git a/tests/test_lint.py b/tests/test_lint.py index c7c3a1fba..90e29eb41 100644 --- a/tests/test_lint.py +++ b/tests/test_lint.py @@ -54,6 +54,23 @@ def test_lint_custom_schema(): assert run.result == "passed" +def test_lint_valid_odcs_schema(): + data_contract_file = "fixtures/lint/valid.odcs.yaml" + data_contract = DataContract(data_contract_file=data_contract_file) + + run = data_contract.lint() + + assert run.result == "passed" + + +def test_lint_invalid_odcs_schema(): + data_contract_file = "fixtures/lint/invalid.odcs.yaml" + data_contract = DataContract(data_contract_file=data_contract_file) + + run = data_contract.lint() + + assert run.result == "failed" + def test_lint_with_ref(): data_contract = DataContract( From ba550ed0ccca7af1628ce352d2171eab2aa130b6 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Sun, 2 Nov 2025 07:50:12 +0100 Subject: [PATCH 041/150] chore(deps): update aiobotocore requirement (#915) Updates the requirements on [aiobotocore](https://github.com/aio-libs/aiobotocore) to permit the latest version. - [Release notes](https://github.com/aio-libs/aiobotocore/releases) - [Changelog](https://github.com/aio-libs/aiobotocore/blob/master/CHANGES.rst) - [Commits](https://github.com/aio-libs/aiobotocore/compare/2.17.0...2.25.0) --- updated-dependencies: - dependency-name: aiobotocore dependency-version: 2.25.0 dependency-type: direct:production ... Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> --- pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index 39f6db1f5..917f7a081 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -80,7 +80,7 @@ postgres = [ s3 = [ "s3fs>=2025.2.0,<2026.0.0", - "aiobotocore>=2.17.0,<2.25.0", + "aiobotocore>=2.17.0,<2.26.0", ] snowflake = [ From 02b54afb1bdc9fb37b6564690d203f1a9a3ff54c Mon Sep 17 00:00:00 2001 From: jochen Date: Sun, 2 Nov 2025 16:21:01 +0100 Subject: [PATCH 042/150] Improve JSON Schema export with logical type options --- .pre-commit-hooks.yaml | 17 ----------------- 1 file changed, 17 deletions(-) delete mode 100644 .pre-commit-hooks.yaml diff --git a/.pre-commit-hooks.yaml b/.pre-commit-hooks.yaml deleted file mode 100644 index e5158f1d8..000000000 --- a/.pre-commit-hooks.yaml +++ /dev/null @@ -1,17 +0,0 @@ -- id: datacontract-lint - name: Data Contract Linter - description: This hook lint the data contract. - entry: datacontract lint - files: "datacontract*.yaml" - language: python - additional_dependencies: ['.[all]'] - types: [yaml] - -- id: datacontract-test - name: Data Contract Tester - description: This hook test the data contract. - entry: datacontract test - files: "datacontract*.yaml" - language: python - additional_dependencies: ['.[all]'] - types: [yaml] From 84b8f741d4cbf8841f34d343c21a53d61e5b6165 Mon Sep 17 00:00:00 2001 From: jochen Date: Sun, 2 Nov 2025 16:23:00 +0100 Subject: [PATCH 043/150] Remove pre-commit usage section from README and update Ruff version in pre-commit config. --- .pre-commit-config.yaml | 2 +- README.md | 21 --------------------- 2 files changed, 1 insertion(+), 22 deletions(-) diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 8939841a2..e53648d7c 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -1,7 +1,7 @@ repos: - repo: https://github.com/astral-sh/ruff-pre-commit # Ruff version. - rev: v0.13.3 + rev: v0.14.3 hooks: # Run the linter. - id: ruff diff --git a/README.md b/README.md index c171f147a..a56c6fc8a 100644 --- a/README.md +++ b/README.md @@ -2097,27 +2097,6 @@ docker compose run --rm datacontract --version This command runs the container momentarily to check the version of the `datacontract` CLI. The `--rm` flag ensures that the container is automatically removed after the command executes, keeping your environment clean. -## Use with pre-commit - -To run `datacontract-cli` as part of a [pre-commit](https://pre-commit.com/) workflow, add something like the below to the `repos` list in the project's `.pre-commit-config.yaml`: - -```yaml -repos: - - repo: https://github.com/datacontract/datacontract-cli - rev: "v0.10.9" - hooks: - - id: datacontract-lint - - id: datacontract-test - args: ["--server", "production"] -``` - -### Available Hook IDs - -| Hook ID | Description | Dependency | -| ----------------- | -------------------------------------------------- | ---------- | -| datacontract-lint | Runs the lint subcommand. | Python3 | -| datacontract-test | Runs the test subcommand. Please look at | Python3 | -| | [test](#test) section for all available arguments. | | ## Release Steps From e6ffd29d2f261866b141a50779c66ff28db017cf Mon Sep 17 00:00:00 2001 From: Simon Harrer Date: Mon, 3 Nov 2025 08:26:15 +0100 Subject: [PATCH 044/150] add --debug for all commands --- datacontract/catalog/catalog.py | 2 + datacontract/cli.py | 47 ++++++++++++++++++++++++ datacontract/imports/odcs_v3_importer.py | 4 +- datacontract/imports/sql_importer.py | 6 +-- datacontract/lint/resolve.py | 16 ++++++-- datacontract/lint/schema.py | 3 ++ datacontract/model/odcs.py | 13 +++++++ datacontract/templates/index.html | 6 +-- tests/test_export_dbt_models.py | 4 +- tests/test_import_odcs_v3.py | 3 +- 10 files changed, 90 insertions(+), 14 deletions(-) diff --git a/datacontract/catalog/catalog.py b/datacontract/catalog/catalog.py index 50a92aecc..b9cfc270f 100644 --- a/datacontract/catalog/catalog.py +++ b/datacontract/catalog/catalog.py @@ -1,3 +1,4 @@ +import logging from dataclasses import dataclass from datetime import datetime from pathlib import Path @@ -11,6 +12,7 @@ def create_data_contract_html(contracts, file: Path, path: Path, schema: str): + logging.debug(f"Creating data contract html for file {file} and schema {schema}") data_contract = DataContract( data_contract_file=f"{file.absolute()}", inline_definitions=True, inline_quality=True, schema_location=schema ) diff --git a/datacontract/cli.py b/datacontract/cli.py index 8e27e335c..23cdd97e8 100644 --- a/datacontract/cli.py +++ b/datacontract/cli.py @@ -1,4 +1,6 @@ +import logging import os +import sys from importlib import metadata from pathlib import Path from typing import Iterable, List, Optional @@ -17,11 +19,14 @@ publish_data_contract_to_datamesh_manager, ) from datacontract.lint.resolve import resolve_data_contract_dict +from datacontract.model.exceptions import DataContractException from datacontract.output.output_format import OutputFormat from datacontract.output.test_results_writer import write_test_result console = Console() +debug_option = Annotated[bool, typer.Option(help="Enable debug logging")] + class OrderedCommands(TyperGroup): def list_commands(self, ctx: Context) -> Iterable[str]: @@ -69,10 +74,13 @@ def init( ] = "datacontract.yaml", template: Annotated[str, typer.Option(help="URL of a template or data contract")] = None, overwrite: Annotated[bool, typer.Option(help="Replace the existing datacontract.yaml")] = False, + debug: debug_option = None, ): """ Create an empty data contract. """ + enable_debug_logging(debug) + if not overwrite and os.path.exists(location): console.print("File already exists, use --overwrite to overwrite") raise typer.Exit(code=1) @@ -99,14 +107,24 @@ def lint( ), ] = None, output_format: Annotated[OutputFormat, typer.Option(help="The target format for the test results.")] = None, + debug: debug_option = None, ): """ Validate that the datacontract.yaml is correctly formatted. """ + enable_debug_logging(debug) + run = DataContract(data_contract_file=location, schema_location=schema).lint() write_test_result(run, console, output_format, output) +def enable_debug_logging(debug: bool): + if debug: + logging.basicConfig( + level=logging.DEBUG, format="%(asctime)s - %(name)s - %(levelname)s - %(message)s", stream=sys.stderr + ) + + @app.command() def test( location: Annotated[ @@ -142,10 +160,13 @@ def test( bool, typer.Option(help="SSL verification when publishing the data contract."), ] = True, + debug: debug_option = None, ): """ Run schema and quality tests on configured servers. """ + enable_debug_logging(debug) + console.print(f"Testing {location}") if server == "all": server = None @@ -216,10 +237,13 @@ def export( help="The file path or URL of a template. For Excel format: path/URL to custom Excel template. For custom format: path to Jinja template." ), ] = None, + debug: debug_option = None, ): """ Convert data contract to a specific format. Saves to file specified by `output` option if present, otherwise prints to stdout. """ + enable_debug_logging(debug) + # Validate that Excel format requires an output file path if format == ExportFormat.excel and output is None: console.print("❌ Error: Excel export requires an output file path.") @@ -327,10 +351,13 @@ def import_( Optional[str], typer.Option(help="The identifier for the the data contract."), ] = None, + debug: debug_option = None, ): """ Create a data contract from the given source location. Saves to file specified by `output` option if present, otherwise prints to stdout. """ + enable_debug_logging(debug) + result = DataContract.import_from_source( format=format, source=source, @@ -372,10 +399,13 @@ def publish( bool, typer.Option(help="SSL verification when publishing the data contract."), ] = True, + debug: debug_option = None, ): """ Publish the data contract to the Data Mesh Manager. """ + enable_debug_logging(debug) + publish_data_contract_to_datamesh_manager( data_contract_dict=resolve_data_contract_dict(location), ssl_verification=ssl_verification, @@ -395,10 +425,13 @@ def catalog( str, typer.Option(help="The location (url or path) of the Data Contract Specification JSON Schema"), ] = None, + debug: debug_option = None, ): """ Create a html catalog of data contracts. """ + enable_debug_logging(debug) + path = Path(output) path.mkdir(parents=True, exist_ok=True) console.print(f"Created {output}") @@ -407,6 +440,11 @@ def catalog( for file in Path().rglob(files): try: create_data_contract_html(contracts, file, path, schema) + except DataContractException as e: + if e.reason == "Cannot parse ODPS product": + console.print(f"Skipped {file} due to error: {e.reason}") + else: + console.print(f"Skipped {file} due to error: {e}") except Exception as e: console.print(f"Skipped {file} due to error: {e}") @@ -423,10 +461,12 @@ def breaking( str, typer.Argument(help="The location (url or path) of the new data contract yaml."), ], + debug: debug_option = None, ): """ Identifies breaking changes between data contracts. Prints to stdout. """ + enable_debug_logging(debug) # TODO exception handling result = DataContract(data_contract_file=location_old, inline_definitions=True).breaking( @@ -449,10 +489,12 @@ def changelog( str, typer.Argument(help="The location (url or path) of the new data contract yaml."), ], + debug: debug_option = None, ): """ Generate a changelog between data contracts. Prints to stdout. """ + enable_debug_logging(debug) # TODO exception handling result = DataContract(data_contract_file=location_old, inline_definitions=True).changelog( @@ -472,10 +514,12 @@ def diff( str, typer.Argument(help="The location (url or path) of the new data contract yaml."), ], + debug: debug_option = None, ): """ PLACEHOLDER. Currently works as 'changelog' does. """ + enable_debug_logging(debug) # TODO change to diff output, not the changelog entries result = DataContract(data_contract_file=location_old, inline_definitions=True).changelog( @@ -510,6 +554,7 @@ def api( host: Annotated[ str, typer.Option(help="Bind socket to this host. Hint: For running in docker, set it to 0.0.0.0") ] = "127.0.0.1", + debug: debug_option = None, ): """ Start the datacontract CLI as server application with REST API. @@ -527,6 +572,8 @@ def api( It is possible to run the API with extra arguments for `uvicorn.run()` as keyword arguments, e.g.: `datacontract api --port 1234 --root_path /datacontract`. """ + enable_debug_logging(debug) + import uvicorn from uvicorn.config import LOGGING_CONFIG diff --git a/datacontract/imports/odcs_v3_importer.py b/datacontract/imports/odcs_v3_importer.py index 94ee12bb0..69c6b001d 100644 --- a/datacontract/imports/odcs_v3_importer.py +++ b/datacontract/imports/odcs_v3_importer.py @@ -347,7 +347,7 @@ def import_field( odcs_property: SchemaProperty, odcs_properties: List[SchemaProperty], custom_type_mappings: Dict[str, str], - server_type: str + server_type: str, ) -> Field | None: """ Import a single ODCS property as a datacontract Field. @@ -419,7 +419,7 @@ def map_type(odcs_logical_type: str, custom_mappings: Dict[str, str], physical_t if physical_type is not None: pt = physical_type.lower() # Remove parameters from physical type (e.g., VARCHAR(50) -> varchar, DECIMAL(10,2) -> decimal) - pt_base = pt.split('(')[0].strip() + pt_base = pt.split("(")[0].strip() # Try direct mapping of physical type if pt in DATACONTRACT_TYPES: diff --git a/datacontract/imports/sql_importer.py b/datacontract/imports/sql_importer.py index 3a5558a86..e18fef716 100644 --- a/datacontract/imports/sql_importer.py +++ b/datacontract/imports/sql_importer.py @@ -268,14 +268,15 @@ def map_type_from_sql(sql_type: str) -> str | None: return "string" elif sql_type_normed.startswith("number"): return "number" - elif (sql_type_normed == "clob" or sql_type_normed == "nclob"): + elif sql_type_normed == "clob" or sql_type_normed == "nclob": return "text" else: return "variant" + def map_timestamp(timestamp_type: str) -> str: match timestamp_type: - case "timestamp" | "timestampntz" | "timestamp_ntz" : + case "timestamp" | "timestampntz" | "timestamp_ntz": return "timestamp_ntz" case "timestamptz" | "timestamp_tz" | "timestamp with time zone": return "timestamp_tz" @@ -287,7 +288,6 @@ def map_timestamp(timestamp_type: str) -> str: return "timestamp" - def read_file(path): if not os.path.exists(path): raise DataContractException( diff --git a/datacontract/lint/resolve.py b/datacontract/lint/resolve.py index b79d25004..e6cd7838a 100644 --- a/datacontract/lint/resolve.py +++ b/datacontract/lint/resolve.py @@ -19,7 +19,7 @@ DeprecatedQuality, ) from datacontract.model.exceptions import DataContractException -from datacontract.model.odcs import is_open_data_contract_standard +from datacontract.model.odcs import is_open_data_contract_standard, is_open_data_product_standard from datacontract.model.run import ResultEnum @@ -285,6 +285,16 @@ def _resolve_data_contract_from_str_v2( ) -> DataContractSpecification | OpenDataContractStandard: yaml_dict = _to_yaml(data_contract_str) + if is_open_data_product_standard(yaml_dict): + logging.info("Cannot import ODPS, as not supported") + raise DataContractException( + type="schema", + result=ResultEnum.failed, + name="Parse ODCS contract", + reason="Cannot parse ODPS product", + engine="datacontract", + ) + if is_open_data_contract_standard(yaml_dict): logging.info("Importing ODCS v3") # if ODCS, then validate the ODCS schema and import to DataContractSpecification directly @@ -346,8 +356,7 @@ def _resolve_dcs_from_yaml_dict(inline_definitions, inline_quality, schema_locat def _to_yaml(data_contract_str) -> dict: try: - yaml_dict = yaml.safe_load(data_contract_str) - return yaml_dict + return yaml.safe_load(data_contract_str) except Exception as e: logging.warning(f"Cannot parse YAML. Error: {str(e)}") raise DataContractException( @@ -360,6 +369,7 @@ def _to_yaml(data_contract_str) -> dict: def _validate_json_schema(yaml_str, schema_location: str | Path = None): + logging.debug(f"Linting data contract with schema at {schema_location}") schema = fetch_schema(schema_location) try: fastjsonschema.validate(schema, yaml_str, use_default=False) diff --git a/datacontract/lint/schema.py b/datacontract/lint/schema.py index f250d969c..b0e7867aa 100644 --- a/datacontract/lint/schema.py +++ b/datacontract/lint/schema.py @@ -43,6 +43,7 @@ def fetch_schema(location: str | Path = None) -> Dict[str, Any]: location_str = str(location) if location_str.startswith("http://") or location_str.startswith("https://"): + logging.debug(f"Downloading schema from {location_str}") response = requests.get(location_str) schema = response.json() else: @@ -54,6 +55,8 @@ def fetch_schema(location: str | Path = None) -> Dict[str, Any]: engine="datacontract", result=ResultEnum.error, ) + + logging.debug(f"Loading JSON schema locally at {location}") with open(location, "r") as file: schema = json.load(file) diff --git a/datacontract/model/odcs.py b/datacontract/model/odcs.py index 3b040918e..9f7cd6675 100644 --- a/datacontract/model/odcs.py +++ b/datacontract/model/odcs.py @@ -9,3 +9,16 @@ def is_open_data_contract_standard(odcs: dict) -> bool: bool: True if the dictionary is an OpenDataContractStandard, False otherwise. """ return odcs.get("kind") == "DataContract" and odcs.get("apiVersion", "").startswith("v3") + + +def is_open_data_product_standard(odcs: dict) -> bool: + """ + Check if the given dictionary is an open data product standard. + + Args: + odcs (dict): The dictionary to check. + + Returns: + bool: True if the dictionary is an open data product standard, False otherwise. + """ + return odcs.get("kind") == "DataProduct" and odcs.get("apiVersion", "").startswith("v1") diff --git a/datacontract/templates/index.html b/datacontract/templates/index.html index fb4a2bb1d..f7b9e8569 100644 --- a/datacontract/templates/index.html +++ b/datacontract/templates/index.html @@ -185,15 +185,15 @@