Skip to content

Commit 89b5c66

Browse files
ctarri04Z01ASYBAjochenchrist
authored
feat: export datacontract to dqx (#846)
Co-authored-by: Ababacar Sy Badiane <[email protected]> Co-authored-by: jochenchrist <[email protected]>
1 parent 44b8b16 commit 89b5c66

File tree

9 files changed

+1085
-30
lines changed

9 files changed

+1085
-30
lines changed

CHANGELOG.md

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,11 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
1717
- JSON Schema Check: Add globbing support for local JSON files
1818

1919

20+
### Added
21+
22+
- Export to DQX : datacontract export --format dqx
23+
24+
2025
## [0.10.34] - 2025-08-06
2126

2227
### Added

README.md

Lines changed: 30 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -923,7 +923,7 @@ models:
923923
│ terraform|avro-idl|sql|sql-query|mer │
924924
│ maid|html|go|bigquery|dbml|spark|sql │
925925
│ alchemy|data-caterer|dcs|markdown|ic │
926-
│ eberg|custom|excel]
926+
│ eberg|custom|excel|dqx]
927927
│ --output PATH Specify the file path where the │
928928
│ exported data will be saved. If no │
929929
│ path is provided, the output will be │
@@ -969,35 +969,36 @@ datacontract export --format html --output datacontract.html
969969

970970
Available export options:
971971

972-
| Type | Description | Status |
973-
|----------------------|---------------------------------------------------------|--------|
974-
| `html` | Export to HTML ||
975-
| `jsonschema` | Export to JSON Schema ||
976-
| `odcs` | Export to Open Data Contract Standard (ODCS) V3 ||
977-
| `sodacl` | Export to SodaCL quality checks in YAML format ||
978-
| `dbt` | Export to dbt models in YAML format ||
979-
| `dbt-sources` | Export to dbt sources in YAML format ||
980-
| `dbt-staging-sql` | Export to dbt staging SQL models ||
981-
| `rdf` | Export data contract to RDF representation in N3 format ||
982-
| `avro` | Export to AVRO models ||
983-
| `protobuf` | Export to Protobuf ||
984-
| `terraform` | Export to terraform resources ||
985-
| `sql` | Export to SQL DDL ||
986-
| `sql-query` | Export to SQL Query ||
987-
| `great-expectations` | Export to Great Expectations Suites in JSON Format ||
988-
| `bigquery` | Export to BigQuery Schemas ||
989-
| `go` | Export to Go types ||
990-
| `pydantic-model` | Export to pydantic models ||
991-
| `DBML` | Export to a DBML Diagram description ||
992-
| `spark` | Export to a Spark StructType ||
993-
| `sqlalchemy` | Export to SQLAlchemy Models ||
994-
| `data-caterer` | Export to Data Caterer in YAML format ||
995-
| `dcs` | Export to Data Contract Specification in YAML format ||
996-
| `markdown` | Export to Markdown ||
972+
| Type | Description | Status |
973+
|----------------------|---------------------------------------------------------|---------|
974+
| `html` | Export to HTML | |
975+
| `jsonschema` | Export to JSON Schema | |
976+
| `odcs` | Export to Open Data Contract Standard (ODCS) V3 | |
977+
| `sodacl` | Export to SodaCL quality checks in YAML format | |
978+
| `dbt` | Export to dbt models in YAML format | |
979+
| `dbt-sources` | Export to dbt sources in YAML format | |
980+
| `dbt-staging-sql` | Export to dbt staging SQL models | |
981+
| `rdf` | Export data contract to RDF representation in N3 format | |
982+
| `avro` | Export to AVRO models | |
983+
| `protobuf` | Export to Protobuf | |
984+
| `terraform` | Export to terraform resources | |
985+
| `sql` | Export to SQL DDL | |
986+
| `sql-query` | Export to SQL Query | |
987+
| `great-expectations` | Export to Great Expectations Suites in JSON Format | |
988+
| `bigquery` | Export to BigQuery Schemas | |
989+
| `go` | Export to Go types | |
990+
| `pydantic-model` | Export to pydantic models | |
991+
| `DBML` | Export to a DBML Diagram description | |
992+
| `spark` | Export to a Spark StructType | |
993+
| `sqlalchemy` | Export to SQLAlchemy Models | |
994+
| `data-caterer` | Export to Data Caterer in YAML format | |
995+
| `dcs` | Export to Data Contract Specification in YAML format | |
996+
| `markdown` | Export to Markdown | |
997997
| `iceberg` | Export to an Iceberg JSON Schema Definition | partial |
998-
| `excel` | Export to ODCS Excel Template ||
999-
| `custom` | Export to Custom format with Jinja ||
1000-
| Missing something? | Please create an issue on GitHub | TBD |
998+
| `excel` | Export to ODCS Excel Template ||
999+
| `custom` | Export to Custom format with Jinja ||
1000+
| `dqx` | Export to DQX in YAML format ||
1001+
| Missing something? | Please create an issue on GitHub | TBD |
10011002

10021003
#### SQL
10031004

Lines changed: 121 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,121 @@
1+
from typing import Any, Dict, List, Union
2+
3+
import yaml
4+
5+
from datacontract.export.exporter import Exporter, _check_models_for_export
6+
from datacontract.model.data_contract_specification import DataContractSpecification, Field, Model, Quality
7+
8+
9+
class DqxKeys:
10+
CHECK = "check"
11+
ARGUMENTS = "arguments"
12+
SPECIFICATION = "specification"
13+
COL_NAME = "column"
14+
COL_NAMES = "for_each_column"
15+
COLUMNS = "columns"
16+
FUNCTION = "function"
17+
18+
19+
class DqxExporter(Exporter):
20+
"""Exporter implementation for converting data contracts to DQX YAML file."""
21+
22+
def export(
23+
self,
24+
data_contract: DataContractSpecification,
25+
model: Model,
26+
server: str,
27+
sql_server_type: str,
28+
export_args: Dict[str, Any],
29+
) -> str:
30+
"""Exports a data contract to DQX format."""
31+
model_name, model_value = _check_models_for_export(data_contract, model, self.export_format)
32+
return to_dqx_yaml(model_value)
33+
34+
35+
def to_dqx_yaml(model_value: Model) -> str:
36+
"""
37+
Converts the data contract's quality checks to DQX YAML format.
38+
39+
Args:
40+
model_value (Model): The data contract to convert.
41+
42+
Returns:
43+
str: YAML representation of the data contract's quality checks.
44+
"""
45+
extracted_rules = extract_quality_rules(model_value)
46+
return yaml.dump(extracted_rules, sort_keys=False, allow_unicode=True, default_flow_style=False)
47+
48+
49+
def process_quality_rule(rule: Quality, column_name: str) -> Dict[str, Any]:
50+
"""
51+
Processes a single quality rule by injecting the column path into its arguments if absent.
52+
53+
Args:
54+
rule (Quality): The quality rule to process.
55+
column_name (str): The full path to the current column.
56+
57+
Returns:
58+
dict: The processed quality rule specification.
59+
"""
60+
rule_data = rule.model_extra
61+
specification = rule_data[DqxKeys.SPECIFICATION]
62+
check = specification[DqxKeys.CHECK]
63+
64+
arguments = check.setdefault(DqxKeys.ARGUMENTS, {})
65+
66+
if DqxKeys.COL_NAME not in arguments and DqxKeys.COL_NAMES not in arguments and DqxKeys.COLUMNS not in arguments:
67+
if check[DqxKeys.FUNCTION] not in ("is_unique", "foreign_key"):
68+
arguments[DqxKeys.COL_NAME] = column_name
69+
else:
70+
arguments[DqxKeys.COLUMNS] = [column_name]
71+
72+
return specification
73+
74+
75+
def extract_quality_rules(data: Union[Model, Field, Quality], column_path: str = "") -> List[Dict[str, Any]]:
76+
"""
77+
Recursively extracts all quality rules from a data contract structure.
78+
79+
Args:
80+
data (Union[Model, Field, Quality]): The data contract model, field, or quality rule.
81+
column_path (str, optional): The current path in the schema hierarchy. Defaults to "".
82+
83+
Returns:
84+
List[Dict[str, Any]]: A list of quality rule specifications.
85+
"""
86+
quality_rules = []
87+
88+
if isinstance(data, Quality):
89+
return [process_quality_rule(data, column_path)]
90+
91+
if isinstance(data, (Model, Field)):
92+
for key, field in data.fields.items():
93+
current_path = build_column_path(column_path, key)
94+
95+
if field.fields:
96+
# Field is a struct-like object, recurse deeper
97+
quality_rules.extend(extract_quality_rules(field, current_path))
98+
else:
99+
# Process quality rules at leaf fields
100+
for rule in field.quality:
101+
quality_rules.append(process_quality_rule(rule, current_path))
102+
103+
# Process any quality rules attached directly to this level
104+
for rule in data.quality:
105+
quality_rules.append(process_quality_rule(rule, column_path))
106+
107+
return quality_rules
108+
109+
110+
def build_column_path(current_path: str, key: str) -> str:
111+
"""
112+
Builds the full column path by concatenating parent path with current key.
113+
114+
Args:
115+
current_path (str): The current path prefix.
116+
key (str): The current field's key.
117+
118+
Returns:
119+
str: The full path.
120+
"""
121+
return f"{current_path}.{key}" if current_path else key

datacontract/export/exporter.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -46,6 +46,7 @@ class ExportFormat(str, Enum):
4646
iceberg = "iceberg"
4747
custom = "custom"
4848
excel = "excel"
49+
dqx = "dqx"
4950

5051
@classmethod
5152
def get_supported_formats(cls):

datacontract/export/exporter_factory.py

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -197,6 +197,12 @@ def load_module_class(module_path, class_name):
197197
class_name="MarkdownExporter",
198198
)
199199

200+
exporter_factory.register_lazy_exporter(
201+
name=ExportFormat.dqx,
202+
module_path="datacontract.export.dqx_converter",
203+
class_name="DqxExporter",
204+
)
205+
200206
exporter_factory.register_lazy_exporter(
201207
name=ExportFormat.iceberg, module_path="datacontract.export.iceberg_converter", class_name="IcebergExporter"
202208
)

datacontract/export/spark_converter.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -152,7 +152,7 @@ def to_spark_data_type(field: Field) -> types.DataType:
152152
return types.DateType()
153153
if field_type == "bytes":
154154
return types.BinaryType()
155-
return types.StringType() # default if no condition is met
155+
return types.StringType() # default if no condition is met
156156

157157

158158
def print_schema(dtype: types.DataType) -> str:

0 commit comments

Comments
 (0)