Skip to content

Commit 44e458d

Browse files
fix: separate quoting config for field and model names (#798)
Allow for separate quoting configuration for field and model names via the new class. This allows for more fine-granular control over quoting as per a database's requirements. Co-authored-by: jochenchrist <[email protected]>
1 parent bbfd6c9 commit 44e458d

File tree

1 file changed

+102
-59
lines changed

1 file changed

+102
-59
lines changed

datacontract/engines/data_contract_checks.py

Lines changed: 102 additions & 59 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,6 @@
1+
import re
12
import uuid
3+
from dataclasses import dataclass
24
from typing import List
35
from venv import logger
46

@@ -9,6 +11,12 @@
911
from datacontract.model.run import Check
1012

1113

14+
@dataclass
15+
class QuotingConfig:
16+
quote_field_name: bool = False
17+
quote_model_name: bool = False
18+
19+
1220
def create_checks(data_contract_spec: DataContractSpecification, server: Server) -> List[Check]:
1321
checks: List[Check] = []
1422
for model_key, model_value in data_contract_spec.models.items():
@@ -26,37 +34,41 @@ def to_model_checks(model_key, model_value, server: Server) -> List[Check]:
2634
fields = model_value.fields
2735

2836
check_types = is_check_types(server)
29-
quote_field_name = server_type in ["postgres", "sqlserver"]
37+
38+
quoting_config = QuotingConfig(
39+
quote_field_name=server_type in ["postgres", "sqlserver"],
40+
quote_model_name=server_type in ["postgres", "sqlserver"],
41+
)
3042

3143
for field_name, field in fields.items():
32-
checks.append(check_field_is_present(model_name, field_name, quote_field_name))
44+
checks.append(check_field_is_present(model_name, field_name, quoting_config))
3345
if check_types and field.type is not None:
3446
sql_type = convert_to_sql_type(field, server_type)
35-
checks.append(check_field_type(model_name, field_name, sql_type, quote_field_name))
47+
checks.append(check_field_type(model_name, field_name, sql_type, quoting_config))
3648
if field.required:
37-
checks.append(check_field_required(model_name, field_name, quote_field_name))
49+
checks.append(check_field_required(model_name, field_name, quoting_config))
3850
if field.unique:
39-
checks.append(check_field_unique(model_name, field_name, quote_field_name))
51+
checks.append(check_field_unique(model_name, field_name, quoting_config))
4052
if field.minLength is not None:
41-
checks.append(check_field_min_length(model_name, field_name, field.minLength, quote_field_name))
53+
checks.append(check_field_min_length(model_name, field_name, field.minLength, quoting_config))
4254
if field.maxLength is not None:
43-
checks.append(check_field_max_length(model_name, field_name, field.maxLength, quote_field_name))
55+
checks.append(check_field_max_length(model_name, field_name, field.maxLength, quoting_config))
4456
if field.minimum is not None:
45-
checks.append(check_field_minimum(model_name, field_name, field.minimum, quote_field_name))
57+
checks.append(check_field_minimum(model_name, field_name, field.minimum, quoting_config))
4658
if field.maximum is not None:
47-
checks.append(check_field_maximum(model_name, field_name, field.maximum, quote_field_name))
59+
checks.append(check_field_maximum(model_name, field_name, field.maximum, quoting_config))
4860
if field.exclusiveMinimum is not None:
49-
checks.append(check_field_minimum(model_name, field_name, field.exclusiveMinimum, quote_field_name))
50-
checks.append(check_field_not_equal(model_name, field_name, field.exclusiveMinimum, quote_field_name))
61+
checks.append(check_field_minimum(model_name, field_name, field.exclusiveMinimum, quoting_config))
62+
checks.append(check_field_not_equal(model_name, field_name, field.exclusiveMinimum, quoting_config))
5163
if field.exclusiveMaximum is not None:
52-
checks.append(check_field_maximum(model_name, field_name, field.exclusiveMaximum, quote_field_name))
53-
checks.append(check_field_not_equal(model_name, field_name, field.exclusiveMaximum, quote_field_name))
64+
checks.append(check_field_maximum(model_name, field_name, field.exclusiveMaximum, quoting_config))
65+
checks.append(check_field_not_equal(model_name, field_name, field.exclusiveMaximum, quoting_config))
5466
if field.pattern is not None:
55-
checks.append(check_field_regex(model_name, field_name, field.pattern, quote_field_name))
67+
checks.append(check_field_regex(model_name, field_name, field.pattern, quoting_config))
5668
if field.enum is not None and len(field.enum) > 0:
57-
checks.append(check_field_enum(model_name, field_name, field.enum, quote_field_name))
69+
checks.append(check_field_enum(model_name, field_name, field.enum, quoting_config))
5870
if field.quality is not None and len(field.quality) > 0:
59-
quality_list = check_quality_list(model_name, field_name, field.quality)
71+
quality_list = check_quality_list(model_name, field_name, field.quality, quoting_config)
6072
if (quality_list is not None) and len(quality_list) > 0:
6173
checks.extend(quality_list)
6274
# TODO references: str = None
@@ -70,8 +82,8 @@ def to_model_checks(model_key, model_value, server: Server) -> List[Check]:
7082
return checks
7183

7284

73-
def checks_for(model_name, quote_field_name):
74-
if quote_field_name:
85+
def checks_for(model_name, quote_model_name: bool):
86+
if quote_model_name:
7587
return f'checks for "{model_name}"'
7688
return f"checks for {model_name}"
7789

@@ -98,11 +110,11 @@ def to_model_name(model_key, model_value, server_type):
98110
return model_key
99111

100112

101-
def check_field_is_present(model_name, field_name, quote_field_name: bool) -> Check:
113+
def check_field_is_present(model_name, field_name, quoting_config: QuotingConfig = QuotingConfig()) -> Check:
102114
check_type = "field_is_present"
103115
check_key = f"{model_name}__{field_name}__{check_type}"
104116
sodacl_check_dict = {
105-
checks_for(model_name, quote_field_name): [
117+
checks_for(model_name, quoting_config.quote_model_name): [
106118
{
107119
"schema": {
108120
"name": check_key,
@@ -127,11 +139,13 @@ def check_field_is_present(model_name, field_name, quote_field_name: bool) -> Ch
127139
)
128140

129141

130-
def check_field_type(model_name: str, field_name: str, expected_type: str, quote_field_name: bool = False):
142+
def check_field_type(
143+
model_name: str, field_name: str, expected_type: str, quoting_config: QuotingConfig = QuotingConfig()
144+
):
131145
check_type = "field_type"
132146
check_key = f"{model_name}__{field_name}__{check_type}"
133147
sodacl_check_dict = {
134-
checks_for(model_name, quote_field_name): [
148+
checks_for(model_name, quoting_config.quote_model_name): [
135149
{
136150
"schema": {
137151
"name": check_key,
@@ -158,16 +172,16 @@ def check_field_type(model_name: str, field_name: str, expected_type: str, quote
158172
)
159173

160174

161-
def check_field_required(model_name: str, field_name: str, quote_field_name: bool = False):
162-
if quote_field_name:
175+
def check_field_required(model_name: str, field_name: str, quoting_config: QuotingConfig = QuotingConfig()):
176+
if quoting_config.quote_field_name:
163177
field_name_for_soda = f'"{field_name}"'
164178
else:
165179
field_name_for_soda = field_name
166180

167181
check_type = "field_required"
168182
check_key = f"{model_name}__{field_name}__{check_type}"
169183
sodacl_check_dict = {
170-
checks_for(model_name, quote_field_name): [
184+
checks_for(model_name, quoting_config.quote_model_name): [
171185
{
172186
f"missing_count({field_name_for_soda}) = 0": {
173187
"name": check_key,
@@ -189,16 +203,16 @@ def check_field_required(model_name: str, field_name: str, quote_field_name: boo
189203
)
190204

191205

192-
def check_field_unique(model_name: str, field_name: str, quote_field_name: bool = False):
193-
if quote_field_name:
206+
def check_field_unique(model_name: str, field_name: str, quoting_config: QuotingConfig = QuotingConfig()):
207+
if quoting_config.quote_field_name:
194208
field_name_for_soda = f'"{field_name}"'
195209
else:
196210
field_name_for_soda = field_name
197211

198212
check_type = "field_unique"
199213
check_key = f"{model_name}__{field_name}__{check_type}"
200214
sodacl_check_dict = {
201-
checks_for(model_name, quote_field_name): [
215+
checks_for(model_name, quoting_config.quote_model_name): [
202216
{
203217
f"duplicate_count({field_name_for_soda}) = 0": {
204218
"name": check_key,
@@ -220,16 +234,18 @@ def check_field_unique(model_name: str, field_name: str, quote_field_name: bool
220234
)
221235

222236

223-
def check_field_min_length(model_name: str, field_name: str, min_length: int, quote_field_name: bool = False):
224-
if quote_field_name:
237+
def check_field_min_length(
238+
model_name: str, field_name: str, min_length: int, quoting_config: QuotingConfig = QuotingConfig()
239+
):
240+
if quoting_config.quote_field_name:
225241
field_name_for_soda = f'"{field_name}"'
226242
else:
227243
field_name_for_soda = field_name
228244

229245
check_type = "field_min_length"
230246
check_key = f"{model_name}__{field_name}__{check_type}"
231247
sodacl_check_dict = {
232-
checks_for(model_name, quote_field_name): [
248+
checks_for(model_name, quoting_config.quote_model_name): [
233249
{
234250
f"invalid_count({field_name_for_soda}) = 0": {
235251
"name": check_key,
@@ -252,16 +268,18 @@ def check_field_min_length(model_name: str, field_name: str, min_length: int, qu
252268
)
253269

254270

255-
def check_field_max_length(model_name: str, field_name: str, max_length: int, quote_field_name: bool = False):
256-
if quote_field_name:
271+
def check_field_max_length(
272+
model_name: str, field_name: str, max_length: int, quoting_config: QuotingConfig = QuotingConfig()
273+
):
274+
if quoting_config.quote_field_name:
257275
field_name_for_soda = f'"{field_name}"'
258276
else:
259277
field_name_for_soda = field_name
260278

261279
check_type = "field_max_length"
262280
check_key = f"{model_name}__{field_name}__{check_type}"
263281
sodacl_check_dict = {
264-
checks_for(model_name, quote_field_name): [
282+
checks_for(model_name, quoting_config.quote_model_name): [
265283
{
266284
f"invalid_count({field_name_for_soda}) = 0": {
267285
"name": check_key,
@@ -284,16 +302,18 @@ def check_field_max_length(model_name: str, field_name: str, max_length: int, qu
284302
)
285303

286304

287-
def check_field_minimum(model_name: str, field_name: str, minimum: int, quote_field_name: bool = False):
288-
if quote_field_name:
305+
def check_field_minimum(
306+
model_name: str, field_name: str, minimum: int, quoting_config: QuotingConfig = QuotingConfig()
307+
):
308+
if quoting_config.quote_field_name:
289309
field_name_for_soda = f'"{field_name}"'
290310
else:
291311
field_name_for_soda = field_name
292312

293313
check_type = "field_minimum"
294314
check_key = f"{model_name}__{field_name}__{check_type}"
295315
sodacl_check_dict = {
296-
checks_for(model_name, quote_field_name): [
316+
checks_for(model_name, quoting_config.quote_model_name): [
297317
{
298318
f"invalid_count({field_name_for_soda}) = 0": {
299319
"name": check_key,
@@ -316,16 +336,18 @@ def check_field_minimum(model_name: str, field_name: str, minimum: int, quote_fi
316336
)
317337

318338

319-
def check_field_maximum(model_name: str, field_name: str, maximum: int, quote_field_name: bool = False):
320-
if quote_field_name:
339+
def check_field_maximum(
340+
model_name: str, field_name: str, maximum: int, quoting_config: QuotingConfig = QuotingConfig()
341+
):
342+
if quoting_config.quote_field_name:
321343
field_name_for_soda = f'"{field_name}"'
322344
else:
323345
field_name_for_soda = field_name
324346

325347
check_type = "field_maximum"
326348
check_key = f"{model_name}__{field_name}__{check_type}"
327349
sodacl_check_dict = {
328-
checks_for(model_name, quote_field_name): [
350+
checks_for(model_name, quoting_config.quote_model_name): [
329351
{
330352
f"invalid_count({field_name_for_soda}) = 0": {
331353
"name": check_key,
@@ -348,16 +370,18 @@ def check_field_maximum(model_name: str, field_name: str, maximum: int, quote_fi
348370
)
349371

350372

351-
def check_field_not_equal(model_name: str, field_name: str, value: int, quote_field_name: bool = False):
352-
if quote_field_name:
373+
def check_field_not_equal(
374+
model_name: str, field_name: str, value: int, quoting_config: QuotingConfig = QuotingConfig()
375+
):
376+
if quoting_config.quote_field_name:
353377
field_name_for_soda = f'"{field_name}"'
354378
else:
355379
field_name_for_soda = field_name
356380

357381
check_type = "field_not_equal"
358382
check_key = f"{model_name}__{field_name}__{check_type}"
359383
sodacl_check_dict = {
360-
checks_for(model_name, quote_field_name): [
384+
checks_for(model_name, quoting_config.quote_model_name): [
361385
{
362386
f"invalid_count({field_name_for_soda}) = 0": {
363387
"name": check_key,
@@ -380,16 +404,16 @@ def check_field_not_equal(model_name: str, field_name: str, value: int, quote_fi
380404
)
381405

382406

383-
def check_field_enum(model_name: str, field_name: str, enum: list, quote_field_name: bool = False):
384-
if quote_field_name:
407+
def check_field_enum(model_name: str, field_name: str, enum: list, quoting_config: QuotingConfig = QuotingConfig()):
408+
if quoting_config.quote_field_name:
385409
field_name_for_soda = f'"{field_name}"'
386410
else:
387411
field_name_for_soda = field_name
388412

389413
check_type = "field_enum"
390414
check_key = f"{model_name}__{field_name}__{check_type}"
391415
sodacl_check_dict = {
392-
checks_for(model_name, quote_field_name): [
416+
checks_for(model_name, quoting_config.quote_model_name): [
393417
{
394418
f"invalid_count({field_name_for_soda}) = 0": {
395419
"name": check_key,
@@ -412,16 +436,16 @@ def check_field_enum(model_name: str, field_name: str, enum: list, quote_field_n
412436
)
413437

414438

415-
def check_field_regex(model_name: str, field_name: str, pattern: str, quote_field_name: bool = False):
416-
if quote_field_name:
439+
def check_field_regex(model_name: str, field_name: str, pattern: str, quoting_config: QuotingConfig = QuotingConfig()):
440+
if quoting_config.quote_field_name:
417441
field_name_for_soda = f'"{field_name}"'
418442
else:
419443
field_name_for_soda = field_name
420444

421445
check_type = "field_regex"
422446
check_key = f"{model_name}__{field_name}__{check_type}"
423447
sodacl_check_dict = {
424-
checks_for(model_name, quote_field_name): [
448+
checks_for(model_name, quoting_config.quote_model_name): [
425449
{
426450
f"invalid_count({field_name_for_soda}) = 0": {
427451
"name": check_key,
@@ -444,7 +468,9 @@ def check_field_regex(model_name: str, field_name: str, pattern: str, quote_fiel
444468
)
445469

446470

447-
def check_quality_list(model_name, field_name, quality_list: List[Quality]) -> List[Check]:
471+
def check_quality_list(
472+
model_name, field_name, quality_list: List[Quality], quoting_config: QuotingConfig = QuotingConfig()
473+
) -> List[Check]:
448474
checks: List[Check] = []
449475

450476
count = 0
@@ -457,15 +483,20 @@ def check_quality_list(model_name, field_name, quality_list: List[Quality]) -> L
457483
check_key = f"{model_name}__{field_name}__quality_sql_{count}"
458484
check_type = "model_quality_sql"
459485
threshold = to_sodacl_threshold(quality)
460-
query = prepare_query(quality, model_name, field_name)
486+
query = prepare_query(quality, model_name, field_name, quoting_config)
461487
if query is None:
462488
logger.warning(f"Quality check {check_key} has no query")
463489
continue
464490
if threshold is None:
465491
logger.warning(f"Quality check {check_key} has no valid threshold")
466492
continue
493+
494+
if quoting_config.quote_model_name:
495+
model_name_for_soda = f'"{model_name}"'
496+
else:
497+
model_name_for_soda = model_name
467498
sodacl_check_dict = {
468-
f"checks for {model_name}": [
499+
f"checks for {model_name_for_soda}": [
469500
{
470501
f"{check_key} {threshold}": {
471502
f"{check_key} query": query,
@@ -493,22 +524,34 @@ def check_quality_list(model_name, field_name, quality_list: List[Quality]) -> L
493524
return checks
494525

495526

496-
def prepare_query(quality: Quality, model_name: str, field_name: str = None) -> str | None:
527+
def prepare_query(
528+
quality: Quality, model_name: str, field_name: str = None, quoting_config: QuotingConfig = QuotingConfig()
529+
) -> str | None:
497530
if quality.query is None:
498531
return None
499532
if quality.query == "":
500533
return None
501534

502535
query = quality.query
503536

504-
query = query.replace("{model}", model_name)
505-
query = query.replace("{schema}", model_name)
506-
query = query.replace("{table}", model_name)
537+
if quoting_config.quote_field_name:
538+
field_name_for_soda = f'"{field_name}"'
539+
else:
540+
field_name_for_soda = field_name
541+
542+
if quoting_config.quote_model_name:
543+
model_name_for_soda = f'"{model_name}"'
544+
else:
545+
model_name_for_soda = model_name
546+
547+
query = re.sub(r'["\']?\{model}["\']?', model_name_for_soda, query)
548+
query = re.sub(r'["\']?{schema}["\']?', model_name_for_soda, query)
549+
query = re.sub(r'["\']?{table}["\']?', model_name_for_soda, query)
507550

508551
if field_name is not None:
509-
query = query.replace("{field}", field_name)
510-
query = query.replace("{column}", field_name)
511-
query = query.replace("{property}", field_name)
552+
query = re.sub(r'["\']?{field}["\']?', field_name_for_soda, query)
553+
query = re.sub(r'["\']?{column}["\']?', field_name_for_soda, query)
554+
query = re.sub(r'["\']?{property}["\']?', field_name_for_soda, query)
512555

513556
return query
514557

0 commit comments

Comments
 (0)