diff --git a/ingestion/examples/sample_data/tests/testCaseResults.json b/ingestion/examples/sample_data/tests/testCaseResults.json index cb880dee626d..af7d54d66d7a 100644 --- a/ingestion/examples/sample_data/tests/testCaseResults.json +++ b/ingestion/examples/sample_data/tests/testCaseResults.json @@ -708,6 +708,26 @@ { "name": "addedRows", "value": "2234" + }, + { + "name": "addedColumns", + "value": "2" + }, + { + "name": "removedColumns", + "value": "1" + }, + { + "name": "changedColumns", + "value": "1" + }, + { + "name": "schemaTable1", + "value": "serviceType='BigQuery' fullyQualifiedTableName='ecommerce_db.shopify.dim_address' schema={'order_id': Integer(_notes=[], precision=0, python_type=), 'quantity': Integer(_notes=[], precision=0, python_type=), 'product_id': Integer(_notes=[], precision=0, python_type=), 'customer_id': Integer(_notes=[], precision=0, python_type=), 'status': String_VaryingAlphanum(_notes=[], collation=None), 'order_date': Date(_notes=[], precision=6, rounds=True), 'price': Decimal(_notes=[], precision=2)}" + }, + { + "name": "schemaTable2", + "value": "serviceType='BigQuery' fullyQualifiedTableName='shopify.production_dim_address' schema={'user_id': Integer(_notes=[], precision=0, python_type=), 'id': Integer(_notes=[], precision=0, python_type=), 'cycle_name': String_VaryingAlphanum(_notes=[], collation=None), 'status': String_VaryingAlphanum(_notes=[], collation=None), 'order_date': Date(_notes=[], precision=6, rounds=True)}" } ] } @@ -748,7 +768,7 @@ [ "+", "facf92d7-05ea-43d2-ba2a-067d63dee60c", - "a8d30187-1409-4606-9259-322a4f6caf74", + "e02e1fac-b650-4db8-8c9d-5fa5edf5d863", "Amber", "Albert", "3170 Warren Orchard Apt. 834", @@ -796,6 +816,14 @@ { "name": "changedColumns", "value": "1" + }, + { + "name": "schemaTable1", + "value": "serviceType='BigQuery' fullyQualifiedTableName='ecommerce_db.shopify.dim_address' schema={'order_id': Integer(_notes=[], precision=0, python_type=), 'quantity': Integer(_notes=[], precision=0, python_type=), 'product_id': Integer(_notes=[], precision=0, python_type=), 'customer_id': Integer(_notes=[], precision=0, python_type=), 'status': String_VaryingAlphanum(_notes=[], collation=None), 'order_date': Date(_notes=[], precision=6, rounds=True), 'price': Decimal(_notes=[], precision=2)}" + }, + { + "name": "schemaTable2", + "value": "serviceType='BigQuery' fullyQualifiedTableName='shopify.production_dim_address' schema={'user_id': Integer(_notes=[], precision=0, python_type=), 'id': Integer(_notes=[], precision=0, python_type=), 'cycle_name': String_VaryingAlphanum(_notes=[], collation=None), 'status': String_VaryingAlphanum(_notes=[], collation=None), 'order_date': Date(_notes=[], precision=6, rounds=True)}" } ] } diff --git a/ingestion/examples/sample_data/tests/testSuites.json b/ingestion/examples/sample_data/tests/testSuites.json index de3d0e5cdd4d..a2de3c49a47f 100644 --- a/ingestion/examples/sample_data/tests/testSuites.json +++ b/ingestion/examples/sample_data/tests/testSuites.json @@ -419,6 +419,10 @@ { "name": "table2", "value": "sample_data.ecommerce_db.shopify.production_dim_address" + }, + { + "name": "keyColumns", + "value": "[\"address_id\"]" } ], "resolutions": {} diff --git a/ingestion/src/metadata/data_quality/validations/table/sqlalchemy/tableDiff.py b/ingestion/src/metadata/data_quality/validations/table/sqlalchemy/tableDiff.py index 1d1d137f2ee3..67e77d175a18 100644 --- a/ingestion/src/metadata/data_quality/validations/table/sqlalchemy/tableDiff.py +++ b/ingestion/src/metadata/data_quality/validations/table/sqlalchemy/tableDiff.py @@ -24,6 +24,7 @@ from data_diff.diff_tables import DiffResultWrapper from data_diff.errors import DataDiffMismatchingKeyTypesError from data_diff.utils import ArithAlphanumeric, CaseInsensitiveDict +from pydantic import BaseModel from sqlalchemy import Column as SAColumn from sqlalchemy import literal, select from sqlalchemy.engine import make_url @@ -76,6 +77,26 @@ ] +class SchemaDiffResult(BaseModel): + class Config: + arbitrary_types_allowed = True + + serviceType: str + fullyQualifiedTableName: str + schema: Dict[str, Dict[str, str]] + + +class ColumnDiffResult(BaseModel): + class Config: + arbitrary_types_allowed = True + + removed: List[str] + added: List[str] + changed: List[str] + schemaTable1: SchemaDiffResult + schemaTable2: SchemaDiffResult + + def build_sample_where_clause( table: TableParameter, key_columns: List[str], salt: str, hex_nounce: str ) -> str: @@ -229,12 +250,23 @@ def _run_dimensional_validation(self): return [] def _run(self) -> TestCaseResult: - result = self.get_column_diff() - if result: - return result + column_diff: ColumnDiffResult = self.get_column_diff() threshold = self.get_test_case_param_value( self.test_case.parameterValues, "threshold", int, default=0 ) + if column_diff: + # If there are column differences, we set extra_columns to the common columns for the diff + # Exclude incomparable columns (different data types) from the comparison + common_columns = list( + ( + set(column_diff.schemaTable1.schema.keys()) + & set(column_diff.schemaTable2.schema.keys()) + ) + - set(column_diff.changed) + ) + self.runtime_params.extraColumns = common_columns + self.runtime_params.table1.extra_columns = common_columns + self.runtime_params.table2.extra_columns = common_columns table_diff_iter = self.get_table_diff() if not threshold or self.test_case.computePassedFailedRowCount: @@ -255,6 +287,7 @@ def _run(self) -> TestCaseResult: stats["updated"], stats["exclusive_A"], stats["exclusive_B"], + column_diff, ) count = self._compute_row_count(self.runner, None) # type: ignore test_case_result.passedRows = stats["unchanged"] @@ -268,6 +301,7 @@ def _run(self) -> TestCaseResult: return self.get_row_diff_test_case_result( threshold, self.calculate_diffs_with_limit(table_diff_iter, threshold), + column_diff, ) def get_incomparable_columns(self) -> List[str]: @@ -508,6 +542,7 @@ def get_row_diff_test_case_result( changed: Optional[int] = None, removed: Optional[int] = None, added: Optional[int] = None, + column_diff: Optional[ColumnDiffResult] = None, ) -> TestCaseResult: """Build a test case result for a row diff test. If the number of differences is less than the threshold, the test will pass, otherwise it will fail. The result will contain the number of added, removed, and changed @@ -523,6 +558,34 @@ def get_row_diff_test_case_result( Returns: TestCaseResult: The result of the row diff test """ + test_case_results = [ + TestResultValue(name="removedRows", value=str(removed)), + TestResultValue(name="addedRows", value=str(added)), + TestResultValue(name="changedRows", value=str(changed)), + TestResultValue(name="diffCount", value=str(total_diffs)), + ] + + if column_diff: + test_case_results.extend( + [ + TestResultValue( + name="removedColumns", value=str(len(column_diff.removed)) + ), + TestResultValue( + name="addedColumns", value=str(len(column_diff.added)) + ), + TestResultValue( + name="changedColumns", value=str(len(column_diff.changed)) + ), + TestResultValue( + name="schemaTable1", value=str(column_diff.schemaTable1) + ), + TestResultValue( + name="schemaTable2", value=str(column_diff.schemaTable2) + ), + ] + ) + return TestCaseResult( timestamp=self.execution_date, # type: ignore testCaseStatus=self.get_test_case_status( @@ -531,12 +594,7 @@ def get_row_diff_test_case_result( result=f"Found {total_diffs} different rows which is more than the threshold of {threshold}", failedRows=total_diffs, validateColumns=False, - testResultValue=[ - TestResultValue(name="removedRows", value=str(removed)), - TestResultValue(name="addedRows", value=str(added)), - TestResultValue(name="changedRows", value=str(changed)), - TestResultValue(name="diffCount", value=str(total_diffs)), - ], + testResultValue=test_case_results, ) def _validate_dialects(self): @@ -551,7 +609,7 @@ def _validate_dialects(self): if dialect not in SUPPORTED_DIALECTS: raise UnsupportedDialectError(name, dialect) - def get_column_diff(self) -> Optional[TestCaseResult]: + def get_column_diff(self) -> Optional[ColumnDiffResult]: """Get the column diff between the two tables. If there are no differences, return None.""" removed, added = self.get_changed_added_columns( [ @@ -568,10 +626,32 @@ def get_column_diff(self) -> Optional[TestCaseResult]: ) changed = self.get_incomparable_columns() if removed or added or changed: - return self.column_validation_result( - removed, - added, - changed, + return ColumnDiffResult( + removed=removed, + added=added, + changed=changed, + schemaTable1=SchemaDiffResult( + serviceType=self.runtime_params.table1.database_service_type.name, + fullyQualifiedTableName=self.runtime_params.table1.path, + schema={ + c.name.root: { + "type": c.dataTypeDisplay, + "constraints": c.constraint.value if c.constraint else "", + } + for c in self.runtime_params.table1.columns + }, + ), + schemaTable2=SchemaDiffResult( + serviceType=self.runtime_params.table2.database_service_type.name, + fullyQualifiedTableName=self.runtime_params.table2.path, + schema={ + c.name.root: { + "type": c.dataTypeDisplay, + "constraints": c.constraint.value if c.constraint else "", + } + for c in self.runtime_params.table2.columns + }, + ), ) return None diff --git a/ingestion/tests/integration/data_quality/test_data_diff.py b/ingestion/tests/integration/data_quality/test_data_diff.py index ee269b49c641..879e3e0a2e45 100644 --- a/ingestion/tests/integration/data_quality/test_data_diff.py +++ b/ingestion/tests/integration/data_quality/test_data_diff.py @@ -221,11 +221,25 @@ def __init__(self, *args, **kwargs): "POSTGRES_SERVICE.dvdrental.public.customer_without_first_name", TestCaseResult( timestamp=int(datetime.now().timestamp() * 1000), - testCaseStatus=TestCaseStatus.Failed, + testCaseStatus=TestCaseStatus.Success, + failedRows=0, + passedRows=599, testResultValue=[ + TestResultValue(name="removedRows", value="0"), + TestResultValue(name="addedRows", value="0"), + TestResultValue(name="changedRows", value="0"), + TestResultValue(name="diffCount", value="0"), TestResultValue(name="removedColumns", value="1"), TestResultValue(name="addedColumns", value="0"), TestResultValue(name="changedColumns", value="0"), + TestResultValue( + name="schemaTable1", + value="serviceType='Postgres' fullyQualifiedTableName='public.customer' schema={'customer_id': {'type': 'integer', 'constraints': 'PRIMARY_KEY'}, 'store_id': {'type': 'smallint', 'constraints': 'NOT_NULL'}, 'first_name': {'type': 'character varying(45)', 'constraints': 'NOT_NULL'}, 'last_name': {'type': 'character varying(45)', 'constraints': 'NOT_NULL'}, 'email': {'type': 'character varying(50)', 'constraints': 'NULL'}, 'address_id': {'type': 'smallint', 'constraints': 'NOT_NULL'}, 'activebool': {'type': 'boolean', 'constraints': 'NOT_NULL'}, 'create_date': {'type': 'date', 'constraints': 'NOT_NULL'}, 'last_update': {'type': 'timestamp without time zone', 'constraints': 'NULL'}, 'active': {'type': 'integer', 'constraints': 'NULL'}, 'json_field': {'type': 'jsonb', 'constraints': 'NULL'}}", + ), + TestResultValue( + name="schemaTable2", + value="serviceType='Postgres' fullyQualifiedTableName='public.customer_without_first_name' schema={'customer_id': {'type': 'integer', 'constraints': 'NULL'}, 'store_id': {'type': 'smallint', 'constraints': 'NULL'}, 'last_name': {'type': 'character varying(45)', 'constraints': 'NULL'}, 'email': {'type': 'character varying(50)', 'constraints': 'NULL'}, 'address_id': {'type': 'smallint', 'constraints': 'NULL'}, 'activebool': {'type': 'boolean', 'constraints': 'NULL'}, 'create_date': {'type': 'date', 'constraints': 'NULL'}, 'last_update': {'type': 'timestamp without time zone', 'constraints': 'NULL'}, 'active': {'type': 'integer', 'constraints': 'NULL'}, 'json_field': {'type': 'jsonb', 'constraints': 'NULL'}}", + ), ], ), ), @@ -303,11 +317,25 @@ def __init__(self, *args, **kwargs): "POSTGRES_SERVICE.dvdrental.public.customer_different_case_columns", TestCaseResult( timestamp=int(datetime.now().timestamp() * 1000), - testCaseStatus=TestCaseStatus.Failed, + testCaseStatus=TestCaseStatus.Success, + failedRows=0, + passedRows=599, testResultValue=[ + TestResultValue(name="removedRows", value="0"), + TestResultValue(name="addedRows", value="0"), + TestResultValue(name="changedRows", value="0"), + TestResultValue(name="diffCount", value="0"), TestResultValue(name="removedColumns", value="1"), TestResultValue(name="addedColumns", value="1"), TestResultValue(name="changedColumns", value="0"), + TestResultValue( + name="schemaTable1", + value="serviceType='Postgres' fullyQualifiedTableName='public.customer' schema={'customer_id': {'type': 'integer', 'constraints': 'PRIMARY_KEY'}, 'store_id': {'type': 'smallint', 'constraints': 'NOT_NULL'}, 'first_name': {'type': 'character varying(45)', 'constraints': 'NOT_NULL'}, 'last_name': {'type': 'character varying(45)', 'constraints': 'NOT_NULL'}, 'email': {'type': 'character varying(50)', 'constraints': 'NULL'}, 'address_id': {'type': 'smallint', 'constraints': 'NOT_NULL'}, 'activebool': {'type': 'boolean', 'constraints': 'NOT_NULL'}, 'create_date': {'type': 'date', 'constraints': 'NOT_NULL'}, 'last_update': {'type': 'timestamp without time zone', 'constraints': 'NULL'}, 'active': {'type': 'integer', 'constraints': 'NULL'}, 'json_field': {'type': 'jsonb', 'constraints': 'NULL'}}", + ), + TestResultValue( + name="schemaTable2", + value="serviceType='Postgres' fullyQualifiedTableName='public.customer_different_case_columns' schema={'customer_id': {'type': 'integer', 'constraints': 'NULL'}, 'store_id': {'type': 'smallint', 'constraints': 'NULL'}, 'First_Name': {'type': 'character varying(45)', 'constraints': 'NULL'}, 'last_name': {'type': 'character varying(45)', 'constraints': 'NULL'}, 'email': {'type': 'character varying(50)', 'constraints': 'NULL'}, 'address_id': {'type': 'smallint', 'constraints': 'NULL'}, 'activebool': {'type': 'boolean', 'constraints': 'NULL'}, 'create_date': {'type': 'date', 'constraints': 'NULL'}, 'last_update': {'type': 'timestamp without time zone', 'constraints': 'NULL'}, 'active': {'type': 'integer', 'constraints': 'NULL'}, 'json_field': {'type': 'jsonb', 'constraints': 'NULL'}}", + ), ], ), ), @@ -465,14 +493,25 @@ def test_happy_paths( ), TestCaseResult( timestamp=int(datetime.now().timestamp() * 1000), - testCaseStatus=TestCaseStatus.Failed, - result="Tables have 1 different columns:" - "\n Changed columns:" - "\n first_name: VARCHAR -> INT", + testCaseStatus=TestCaseStatus.Success, + failedRows=0, + passedRows=599, testResultValue=[ + TestResultValue(name="removedRows", value="0"), + TestResultValue(name="addedRows", value="0"), + TestResultValue(name="changedRows", value="0"), + TestResultValue(name="diffCount", value="0"), TestResultValue(name="removedColumns", value="0"), TestResultValue(name="addedColumns", value="0"), TestResultValue(name="changedColumns", value="1"), + TestResultValue( + name="schemaTable1", + value="serviceType='Postgres' fullyQualifiedTableName='public.customer' schema={'customer_id': {'type': 'integer', 'constraints': 'PRIMARY_KEY'}, 'store_id': {'type': 'smallint', 'constraints': 'NOT_NULL'}, 'first_name': {'type': 'character varying(45)', 'constraints': 'NOT_NULL'}, 'last_name': {'type': 'character varying(45)', 'constraints': 'NOT_NULL'}, 'email': {'type': 'character varying(50)', 'constraints': 'NULL'}, 'address_id': {'type': 'smallint', 'constraints': 'NOT_NULL'}, 'activebool': {'type': 'boolean', 'constraints': 'NOT_NULL'}, 'create_date': {'type': 'date', 'constraints': 'NOT_NULL'}, 'last_update': {'type': 'timestamp without time zone', 'constraints': 'NULL'}, 'active': {'type': 'integer', 'constraints': 'NULL'}, 'json_field': {'type': 'jsonb', 'constraints': 'NULL'}}", + ), + TestResultValue( + name="schemaTable2", + value="serviceType='Postgres' fullyQualifiedTableName='public.customer_int_first_name' schema={'customer_id': {'type': 'integer', 'constraints': 'NULL'}, 'store_id': {'type': 'smallint', 'constraints': 'NULL'}, 'last_name': {'type': 'character varying(45)', 'constraints': 'NULL'}, 'email': {'type': 'character varying(50)', 'constraints': 'NULL'}, 'address_id': {'type': 'smallint', 'constraints': 'NULL'}, 'activebool': {'type': 'boolean', 'constraints': 'NULL'}, 'create_date': {'type': 'date', 'constraints': 'NULL'}, 'last_update': {'type': 'timestamp without time zone', 'constraints': 'NULL'}, 'active': {'type': 'integer', 'constraints': 'NULL'}, 'json_field': {'type': 'jsonb', 'constraints': 'NULL'}, 'first_name': {'type': 'integer', 'constraints': 'NULL'}}", + ), ], ), ), diff --git a/ingestion/tests/unit/data_quality/validations/table/sqlalchemy/test_table_diff.py b/ingestion/tests/unit/data_quality/validations/table/sqlalchemy/test_table_diff.py index 487f913f5e25..c2fa27c1079c 100644 --- a/ingestion/tests/unit/data_quality/validations/table/sqlalchemy/test_table_diff.py +++ b/ingestion/tests/unit/data_quality/validations/table/sqlalchemy/test_table_diff.py @@ -3,7 +3,7 @@ from unittest.mock import MagicMock, Mock, patch import pytest -from dirty_equals import Contains, DirtyEquals, HasAttributes +from dirty_equals import Contains, DirtyEquals, HasAttributes, IsList from metadata.data_quality.validations.models import ( TableDiffRuntimeParameters, @@ -12,15 +12,33 @@ from metadata.data_quality.validations.table.sqlalchemy.tableDiff import ( TableDiffValidator, ) -from metadata.generated.schema.entity.data.table import Column, ColumnName, DataType +from metadata.generated.schema.entity.data.table import ( + Column, + ColumnName, + Constraint, + DataType, +) from metadata.generated.schema.entity.services.databaseService import ( DatabaseServiceType, ) -from metadata.generated.schema.tests.basic import TestCaseStatus from metadata.generated.schema.tests.testCase import TestCase from metadata.generated.schema.type.basic import Timestamp +def build_column( + name: str, + data_type: DataType = DataType.STRING, + constraint: Constraint = Constraint.NULL, +) -> Column: + """Helper to create Column with all required fields for tests.""" + return Column.model_construct( + name=ColumnName(root=name), + dataType=data_type, + dataTypeDisplay=data_type.value, + constraint=constraint, + ) + + def build_table_parameter( *columns: Column, key_columns: list[str], @@ -42,13 +60,9 @@ def build_table_parameter( @pytest.fixture def table1_parameter() -> TableParameter: return build_table_parameter( - Column.model_construct(name=ColumnName(root="id"), dataType=DataType.STRING), - Column.model_construct( - name=ColumnName(root="first_name"), dataType=DataType.STRING - ), - Column.model_construct( - name=ColumnName(root="last_name"), dataType=DataType.STRING - ), + build_column("id", constraint=Constraint.PRIMARY_KEY), + build_column("first_name"), + build_column("last_name"), key_columns=["id"], extra_columns=["first_name", "last_name"], service_url="postgresql://postgres:postgres@service1:5432/postgres", @@ -58,15 +72,9 @@ def table1_parameter() -> TableParameter: @pytest.fixture def table2_parameter() -> TableParameter: return build_table_parameter( - Column.model_construct( - name=ColumnName(root="table_id"), dataType=DataType.STRING - ), - Column.model_construct( - name=ColumnName(root="first_name"), dataType=DataType.STRING - ), - Column.model_construct( - name=ColumnName(root="last_name"), dataType=DataType.STRING - ), + build_column("table_id", constraint=Constraint.PRIMARY_KEY), + build_column("first_name"), + build_column("last_name"), key_columns=["table_id"], extra_columns=["first_name", "last_name"], service_url="postgresql://postgres:postgres@service2:5432/postgres", @@ -119,86 +127,59 @@ def test_it_returns_none_when_no_diff( ( ( build_table_parameter( - Column.model_construct( - name=ColumnName(root="id"), dataType=DataType.STRING - ), - Column.model_construct( - name=ColumnName(root="last_name"), dataType=DataType.STRING - ), + build_column("id", constraint=Constraint.PRIMARY_KEY), + build_column("last_name"), key_columns=["id"], extra_columns=["last_name"], ), build_table_parameter( - Column.model_construct( - name=ColumnName(root="id"), dataType=DataType.STRING - ), - Column.model_construct( - name=ColumnName(root="first_name"), dataType=DataType.STRING - ), + build_column("id", constraint=Constraint.PRIMARY_KEY), + build_column("first_name"), key_columns=["id"], extra_columns=["first_name"], ), HasAttributes( - testCaseStatus=TestCaseStatus.Failed, - result=Contains("Removed columns: last_name") - & Contains("Added columns: first_name") - & ~Contains("Changed"), + removed=IsList(Contains("last_name")), + added=IsList(Contains("first_name")), + changed=[], ), ), ( build_table_parameter( - Column.model_construct( - name=ColumnName(root="id"), dataType=DataType.STRING - ), - Column.model_construct( - name=ColumnName(root="last_name"), dataType=DataType.STRING - ), + build_column("id", constraint=Constraint.PRIMARY_KEY), + build_column("last_name"), key_columns=["id"], extra_columns=["last_name"], ), build_table_parameter( - Column.model_construct( - name=ColumnName(root="table_id"), dataType=DataType.STRING - ), - Column.model_construct( - name=ColumnName(root="first_name"), dataType=DataType.STRING - ), + build_column("table_id", constraint=Constraint.PRIMARY_KEY), + build_column("first_name"), key_columns=["table_id"], extra_columns=["first_name"], ), HasAttributes( - testCaseStatus=TestCaseStatus.Failed, - result=Contains("Removed columns: last_name") - & Contains("Added columns: first_name") - & ~Contains("Changed"), + removed=IsList(Contains("last_name")), + added=IsList(Contains("first_name")), + changed=[], ), ), ( build_table_parameter( - Column.model_construct( - name=ColumnName(root="id"), dataType=DataType.STRING - ), - Column.model_construct( - name=ColumnName(root="last_name"), dataType=DataType.STRING - ), + build_column("id", constraint=Constraint.PRIMARY_KEY), + build_column("last_name"), key_columns=["id"], extra_columns=["last_name"], ), build_table_parameter( - Column.model_construct( - name=ColumnName(root="table_id"), dataType=DataType.STRING - ), - Column.model_construct( - name=ColumnName(root="first_name"), dataType=DataType.STRING - ), + build_column("table_id", constraint=Constraint.PRIMARY_KEY), + build_column("first_name"), key_columns=["id"], # The error trying to solve in #22302 extra_columns=["first_name"], ), HasAttributes( - testCaseStatus=TestCaseStatus.Failed, - result=Contains("Removed columns: last_name") - & Contains("Added columns: table_id, first_name") - & ~Contains("Changed"), + removed=IsList(Contains("last_name")), + added=IsList(Contains("table_id"), Contains("first_name")), + changed=[], ), ), ), diff --git a/openmetadata-ui/src/main/resources/ui/src/utils/DataQuality/TestSummaryGraphUtils.ts b/openmetadata-ui/src/main/resources/ui/src/utils/DataQuality/TestSummaryGraphUtils.ts index edb467babcad..21e8d48b3584 100644 --- a/openmetadata-ui/src/main/resources/ui/src/utils/DataQuality/TestSummaryGraphUtils.ts +++ b/openmetadata-ui/src/main/resources/ui/src/utils/DataQuality/TestSummaryGraphUtils.ts @@ -20,6 +20,8 @@ import { } from '../../generated/tests/testCase'; import { getRandomHexColor } from '../DataInsightUtils'; +const EXCLUDED_CHART_FIELDS = ['schemaTable1', 'schemaTable2']; + export type PrepareChartDataType = { testCaseParameterValue: TestCaseParameterValue[]; testCaseResults: TestCaseResult[]; @@ -45,6 +47,9 @@ export const prepareChartData = ({ let showAILearningBanner = false; testCaseResults.forEach((result) => { const values = result.testResultValue?.reduce((acc, curr) => { + if (EXCLUDED_CHART_FIELDS.includes(curr.name ?? '')) { + return acc; + } const value = round(parseFloat(curr.value ?? ''), 2) || 0; return { @@ -93,12 +98,16 @@ export const prepareChartData = ({ (result) => result.testResultValue?.length ); + const filteredResultValues = + testCaseResultParams?.testResultValue?.filter( + (info) => !EXCLUDED_CHART_FIELDS.includes(info.name ?? '') + ) ?? []; + return { - information: - testCaseResultParams?.testResultValue?.map((info, i) => ({ - label: info.name ?? '', - color: COLORS[i] ?? getRandomHexColor(), - })) ?? [], + information: filteredResultValues.map((info, i) => ({ + label: info.name ?? '', + color: COLORS[i] ?? getRandomHexColor(), + })), data: dataPoints, showAILearningBanner, }; diff --git a/scripts/validate_change.sh b/scripts/validate_change.sh index a03768518e43..d2d40fa510ed 100755 --- a/scripts/validate_change.sh +++ b/scripts/validate_change.sh @@ -62,14 +62,24 @@ $DIFF ### Instructions: 1. Analyze which files changed and what functionality was affected -2. Determine which tests are relevant: +2. Provide a throrough code review of the changes validating for: + - Code quality + - Adherence to project conventions + - Potential bugs or issues + - Missing tests or documentation + - Security implications + - Performance considerations +3. Determine which tests are relevant: - Unit tests for changed modules - - Integration tests if APIs/services changed + - Integration tests if APIs/services changed (for python integration tests assume docker and test DB are available) - E2E tests (via Playwright MCP) if UI/flows changed -3. Run the relevant tests: +4. Run the relevant tests: - For unit/integration: use the project's test runner (npm test, pytest, etc.) - For E2E: use Playwright MCP to validate on http://localhost:8585 -4. Report results in this exact format: +5. Report results in this exact format: + +## Code Review +[Detailed code review comments by files changed] ## Validation Results