Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,11 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
## Unreleased


### Added

- Export to DQX : datacontract export --format dqx


## [0.10.34] - 2025-08-06

### Added
Expand Down
59 changes: 30 additions & 29 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -923,7 +923,7 @@ models:
│ terraform|avro-idl|sql|sql-query|mer │
│ maid|html|go|bigquery|dbml|spark|sql │
│ alchemy|data-caterer|dcs|markdown|ic │
│ eberg|custom|excel] │
│ eberg|custom|excel|dqx] │
│ --output PATH Specify the file path where the │
│ exported data will be saved. If no │
│ path is provided, the output will be │
Expand Down Expand Up @@ -969,35 +969,36 @@ datacontract export --format html --output datacontract.html

Available export options:

| Type | Description | Status |
|----------------------|---------------------------------------------------------|--------|
| `html` | Export to HTML | ✅ |
| `jsonschema` | Export to JSON Schema | ✅ |
| `odcs` | Export to Open Data Contract Standard (ODCS) V3 | ✅ |
| `sodacl` | Export to SodaCL quality checks in YAML format | ✅ |
| `dbt` | Export to dbt models in YAML format | ✅ |
| `dbt-sources` | Export to dbt sources in YAML format | ✅ |
| `dbt-staging-sql` | Export to dbt staging SQL models | ✅ |
| `rdf` | Export data contract to RDF representation in N3 format | ✅ |
| `avro` | Export to AVRO models | ✅ |
| `protobuf` | Export to Protobuf | ✅ |
| `terraform` | Export to terraform resources | ✅ |
| `sql` | Export to SQL DDL | ✅ |
| `sql-query` | Export to SQL Query | ✅ |
| `great-expectations` | Export to Great Expectations Suites in JSON Format | ✅ |
| `bigquery` | Export to BigQuery Schemas | ✅ |
| `go` | Export to Go types | ✅ |
| `pydantic-model` | Export to pydantic models | ✅ |
| `DBML` | Export to a DBML Diagram description | ✅ |
| `spark` | Export to a Spark StructType | ✅ |
| `sqlalchemy` | Export to SQLAlchemy Models | ✅ |
| `data-caterer` | Export to Data Caterer in YAML format | ✅ |
| `dcs` | Export to Data Contract Specification in YAML format | ✅ |
| `markdown` | Export to Markdown | ✅ |
| Type | Description | Status |
|----------------------|---------------------------------------------------------|---------|
| `html` | Export to HTML | ✅ |
| `jsonschema` | Export to JSON Schema | ✅ |
| `odcs` | Export to Open Data Contract Standard (ODCS) V3 | ✅ |
| `sodacl` | Export to SodaCL quality checks in YAML format | ✅ |
| `dbt` | Export to dbt models in YAML format | ✅ |
| `dbt-sources` | Export to dbt sources in YAML format | ✅ |
| `dbt-staging-sql` | Export to dbt staging SQL models | ✅ |
| `rdf` | Export data contract to RDF representation in N3 format | ✅ |
| `avro` | Export to AVRO models | ✅ |
| `protobuf` | Export to Protobuf | ✅ |
| `terraform` | Export to terraform resources | ✅ |
| `sql` | Export to SQL DDL | ✅ |
| `sql-query` | Export to SQL Query | ✅ |
| `great-expectations` | Export to Great Expectations Suites in JSON Format | ✅ |
| `bigquery` | Export to BigQuery Schemas | ✅ |
| `go` | Export to Go types | ✅ |
| `pydantic-model` | Export to pydantic models | ✅ |
| `DBML` | Export to a DBML Diagram description | ✅ |
| `spark` | Export to a Spark StructType | ✅ |
| `sqlalchemy` | Export to SQLAlchemy Models | ✅ |
| `data-caterer` | Export to Data Caterer in YAML format | ✅ |
| `dcs` | Export to Data Contract Specification in YAML format | ✅ |
| `markdown` | Export to Markdown | ✅ |
| `iceberg` | Export to an Iceberg JSON Schema Definition | partial |
| `excel` | Export to ODCS Excel Template | ✅ |
| `custom` | Export to Custom format with Jinja | ✅ |
| Missing something? | Please create an issue on GitHub | TBD |
| `excel` | Export to ODCS Excel Template | ✅ |
| `custom` | Export to Custom format with Jinja | ✅ |
| `dqx` | Export to DQX in YAML format | ✅ |
| Missing something? | Please create an issue on GitHub | TBD |

#### SQL

Expand Down
121 changes: 121 additions & 0 deletions datacontract/export/dqx_converter.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,121 @@
from typing import Any, Dict, List, Union

import yaml

from datacontract.export.exporter import Exporter, _check_models_for_export
from datacontract.model.data_contract_specification import DataContractSpecification, Field, Model, Quality


class DqxKeys:
CHECK = "check"
ARGUMENTS = "arguments"
SPECIFICATION = "specification"
COL_NAME = "column"
COL_NAMES = "for_each_column"
COLUMNS = "columns"
FUNCTION = "function"


class DqxExporter(Exporter):
"""Exporter implementation for converting data contracts to DQX YAML file."""

def export(
self,
data_contract: DataContractSpecification,
model: Model,
server: str,
sql_server_type: str,
export_args: Dict[str, Any],
) -> str:
"""Exports a data contract to DQX format."""
model_name, model_value = _check_models_for_export(data_contract, model, self.export_format)
return to_dqx_yaml(model_value)


def to_dqx_yaml(model_value: Model) -> str:
"""
Converts the data contract's quality checks to DQX YAML format.

Args:
model_value (Model): The data contract to convert.

Returns:
str: YAML representation of the data contract's quality checks.
"""
extracted_rules = extract_quality_rules(model_value)
return yaml.dump(extracted_rules, sort_keys=False, allow_unicode=True, default_flow_style=False)


def process_quality_rule(rule: Quality, column_name: str) -> Dict[str, Any]:
"""
Processes a single quality rule by injecting the column path into its arguments if absent.

Args:
rule (Quality): The quality rule to process.
column_name (str): The full path to the current column.

Returns:
dict: The processed quality rule specification.
"""
rule_data = rule.model_extra
specification = rule_data[DqxKeys.SPECIFICATION]
check = specification[DqxKeys.CHECK]

arguments = check.setdefault(DqxKeys.ARGUMENTS, {})

if DqxKeys.COL_NAME not in arguments and DqxKeys.COL_NAMES not in arguments and DqxKeys.COLUMNS not in arguments:
if check[DqxKeys.FUNCTION] not in ("is_unique", "foreign_key"):
arguments[DqxKeys.COL_NAME] = column_name
else:
arguments[DqxKeys.COLUMNS] = [column_name]

return specification


def extract_quality_rules(data: Union[Model, Field, Quality], column_path: str = "") -> List[Dict[str, Any]]:
"""
Recursively extracts all quality rules from a data contract structure.

Args:
data (Union[Model, Field, Quality]): The data contract model, field, or quality rule.
column_path (str, optional): The current path in the schema hierarchy. Defaults to "".

Returns:
List[Dict[str, Any]]: A list of quality rule specifications.
"""
quality_rules = []

if isinstance(data, Quality):
return [process_quality_rule(data, column_path)]

if isinstance(data, (Model, Field)):
for key, field in data.fields.items():
current_path = build_column_path(column_path, key)

if field.fields:
# Field is a struct-like object, recurse deeper
quality_rules.extend(extract_quality_rules(field, current_path))
else:
# Process quality rules at leaf fields
for rule in field.quality:
quality_rules.append(process_quality_rule(rule, current_path))

# Process any quality rules attached directly to this level
for rule in data.quality:
quality_rules.append(process_quality_rule(rule, column_path))

return quality_rules


def build_column_path(current_path: str, key: str) -> str:
"""
Builds the full column path by concatenating parent path with current key.

Args:
current_path (str): The current path prefix.
key (str): The current field's key.

Returns:
str: The full path.
"""
return f"{current_path}.{key}" if current_path else key
1 change: 1 addition & 0 deletions datacontract/export/exporter.py
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,7 @@ class ExportFormat(str, Enum):
iceberg = "iceberg"
custom = "custom"
excel = "excel"
dqx = "dqx"

@classmethod
def get_supported_formats(cls):
Expand Down
6 changes: 6 additions & 0 deletions datacontract/export/exporter_factory.py
Original file line number Diff line number Diff line change
Expand Up @@ -197,6 +197,12 @@ def load_module_class(module_path, class_name):
class_name="MarkdownExporter",
)

exporter_factory.register_lazy_exporter(
name=ExportFormat.dqx,
module_path="datacontract.export.dqx_converter",
class_name="DqxExporter",
)

exporter_factory.register_lazy_exporter(
name=ExportFormat.iceberg, module_path="datacontract.export.iceberg_converter", class_name="IcebergExporter"
)
Expand Down
2 changes: 1 addition & 1 deletion datacontract/export/spark_converter.py
Original file line number Diff line number Diff line change
Expand Up @@ -152,7 +152,7 @@ def to_spark_data_type(field: Field) -> types.DataType:
return types.DateType()
if field_type == "bytes":
return types.BinaryType()
return types.StringType() # default if no condition is met
return types.StringType() # default if no condition is met


def print_schema(dtype: types.DataType) -> str:
Expand Down
Loading
Loading