@@ -153,28 +153,50 @@ def get_checks_fails(client: Client, job_url: str):
153153 Get tests that did not succeed for the given job URL.
154154 Exclude checks that have status 'error' as they are counted in get_checks_errors.
155155 """
156- columns = "check_status as job_status, check_name as job_name, test_status, test_name, report_url as results_link"
157- query = f"""SELECT { columns } FROM `gh-data`.checks
158- WHERE task_url LIKE '{ job_url } %'
159- AND test_status IN ('FAIL', 'ERROR')
160- AND check_status!='error'
161- ORDER BY check_name, test_name
162- """
156+ query = f"""SELECT job_status, job_name, status as test_status, test_name, results_link
157+ FROM (
158+ SELECT
159+ argMax(check_status, check_start_time) as job_status,
160+ check_name as job_name,
161+ argMax(test_status, check_start_time) as status,
162+ test_name,
163+ report_url as results_link,
164+ task_url
165+ FROM `gh-data`.checks
166+ GROUP BY check_name, test_name, report_url, task_url
167+ )
168+ WHERE task_url LIKE '{ job_url } %'
169+ AND test_status IN ('FAIL', 'ERROR')
170+ AND job_status!='error'
171+ ORDER BY job_name, test_name
172+ """
163173 return client .query_dataframe (query )
164174
165175
166176def get_checks_known_fails (client : Client , job_url : str , known_fails : dict ):
167177 """
168178 Get tests that are known to fail for the given job URL.
169179 """
170- assert len (known_fails ) > 0 , "cannot query the database with empty known fails"
171- columns = "check_status as job_status, check_name as job_name, test_status, test_name, report_url as results_link"
172- query = f"""SELECT { columns } FROM `gh-data`.checks
173- WHERE task_url LIKE '{ job_url } %'
174- AND test_status='BROKEN'
175- AND test_name IN ({ ',' .join (f"'{ test } '" for test in known_fails .keys ())} )
176- ORDER BY test_name, check_name
177- """
180+ if len (known_fails ) == 0 :
181+ return pd .DataFrame ()
182+
183+ query = f"""SELECT job_status, job_name, status as test_status, test_name, results_link
184+ FROM (
185+ SELECT
186+ argMax(check_status, check_start_time) as job_status,
187+ check_name as job_name,
188+ argMax(test_status, check_start_time) as status,
189+ test_name,
190+ report_url as results_link,
191+ task_url
192+ FROM `gh-data`.checks
193+ GROUP BY check_name, test_name, report_url, task_url
194+ )
195+ WHERE task_url LIKE '{ job_url } %'
196+ AND test_status='BROKEN'
197+ AND test_name IN ({ ',' .join (f"'{ test } '" for test in known_fails .keys ())} )
198+ ORDER BY job_name, test_name
199+ """
178200
179201 df = client .query_dataframe (query )
180202
@@ -195,12 +217,22 @@ def get_checks_errors(client: Client, job_url: str):
195217 """
196218 Get checks that have status 'error' for the given job URL.
197219 """
198- columns = "check_status as job_status, check_name as job_name, test_status, test_name, report_url as results_link"
199- query = f"""SELECT { columns } FROM `gh-data`.checks
200- WHERE task_url LIKE '{ job_url } %'
201- AND check_status=='error'
202- ORDER BY check_name, test_name
203- """
220+ query = f"""SELECT job_status, job_name, status as test_status, test_name, results_link
221+ FROM (
222+ SELECT
223+ argMax(check_status, check_start_time) as job_status,
224+ check_name as job_name,
225+ argMax(test_status, check_start_time) as status,
226+ test_name,
227+ report_url as results_link,
228+ task_url
229+ FROM `gh-data`.checks
230+ GROUP BY check_name, test_name, report_url, task_url
231+ )
232+ WHERE task_url LIKE '{ job_url } %'
233+ AND job_status=='error'
234+ ORDER BY job_name, test_name
235+ """
204236 return client .query_dataframe (query )
205237
206238
@@ -233,14 +265,14 @@ def get_regression_fails(client: Client, job_url: str):
233265 architecture as arch,
234266 test_name,
235267 argMax(result, start_time) AS status,
236- job_url,
237268 job_name,
238- report_url as results_link
269+ report_url as results_link,
270+ job_url
239271 FROM `gh-data`.clickhouse_regression_results
240272 GROUP BY architecture, test_name, job_url, job_name, report_url
241273 ORDER BY length(test_name) DESC
242274 )
243- WHERE job_url= '{ job_url } '
275+ WHERE job_url LIKE '{ job_url } % '
244276 AND status IN ('Fail', 'Error')
245277 """
246278 df = client .query_dataframe (query )
@@ -249,6 +281,99 @@ def get_regression_fails(client: Client, job_url: str):
249281 return df
250282
251283
284+ def get_new_fails_this_pr (
285+ client : Client ,
286+ pr_info : dict ,
287+ checks_fails : pd .DataFrame ,
288+ regression_fails : pd .DataFrame ,
289+ ):
290+ """
291+ Get tests that failed in the PR but passed in the base branch.
292+ Compares both checks and regression test results.
293+ """
294+ base_sha = pr_info .get ("base" , {}).get ("sha" )
295+ if not base_sha :
296+ raise Exception ("No base SHA found for PR" )
297+
298+ # Modify tables to have the same columns
299+ if len (checks_fails ) > 0 :
300+ checks_fails = checks_fails .copy ().drop (columns = ["job_status" ])
301+ if len (regression_fails ) > 0 :
302+ regression_fails = regression_fails .copy ()
303+ regression_fails ["job_name" ] = regression_fails .apply (
304+ lambda row : f"{ row ['arch' ]} { row ['job_name' ]} " .strip (), axis = 1
305+ )
306+ regression_fails ["test_status" ] = regression_fails ["status" ]
307+
308+ # Combine both types of fails and select only desired columns
309+ desired_columns = ["job_name" , "test_name" , "test_status" , "results_link" ]
310+ all_pr_fails = pd .concat ([checks_fails , regression_fails ], ignore_index = True )[
311+ desired_columns
312+ ]
313+ if len (all_pr_fails ) == 0 :
314+ return pd .DataFrame ()
315+
316+ # Get all checks from the base branch that didn't fail
317+ base_checks_query = f"""SELECT job_name, status as test_status, test_name, results_link
318+ FROM (
319+ SELECT
320+ check_name as job_name,
321+ argMax(test_status, check_start_time) as status,
322+ test_name,
323+ report_url as results_link,
324+ task_url
325+ FROM `gh-data`.checks
326+ WHERE commit_sha='{ base_sha } '
327+ GROUP BY check_name, test_name, report_url, task_url
328+ )
329+ WHERE test_status NOT IN ('FAIL', 'ERROR')
330+ ORDER BY job_name, test_name
331+ """
332+ base_checks = client .query_dataframe (base_checks_query )
333+
334+ # Get regression results from base branch that didn't fail
335+ base_regression_query = f"""SELECT arch, job_name, status, test_name, results_link
336+ FROM (
337+ SELECT
338+ architecture as arch,
339+ test_name,
340+ argMax(result, start_time) AS status,
341+ job_url,
342+ job_name,
343+ report_url as results_link
344+ FROM `gh-data`.clickhouse_regression_results
345+ WHERE results_link LIKE'%/{ base_sha } /%'
346+ GROUP BY architecture, test_name, job_url, job_name, report_url
347+ ORDER BY length(test_name) DESC
348+ )
349+ WHERE status NOT IN ('Fail', 'Error')
350+ """
351+ base_regression = client .query_dataframe (base_regression_query )
352+ if len (base_regression ) > 0 :
353+ base_regression ["job_name" ] = base_regression .apply (
354+ lambda row : f"{ row ['arch' ]} { row ['job_name' ]} " .strip (), axis = 1
355+ )
356+ base_regression ["test_status" ] = base_regression ["status" ]
357+ base_regression = base_regression .drop (columns = ["arch" , "status" ])
358+
359+ # Combine base results
360+ base_results = pd .concat ([base_checks , base_regression ], ignore_index = True )
361+
362+ # Find tests that failed in PR but passed in base
363+ pr_failed_tests = set (zip (all_pr_fails ["job_name" ], all_pr_fails ["test_name" ]))
364+ base_passed_tests = set (zip (base_results ["job_name" ], base_results ["test_name" ]))
365+
366+ new_fails = pr_failed_tests .intersection (base_passed_tests )
367+
368+ # Filter PR results to only include new fails
369+ mask = all_pr_fails .apply (
370+ lambda row : (row ["job_name" ], row ["test_name" ]) in new_fails , axis = 1
371+ )
372+ new_fails_df = all_pr_fails [mask ]
373+
374+ return new_fails_df
375+
376+
252377def get_cves (pr_number , commit_sha ):
253378 """
254379 Fetch Grype results from S3.
@@ -306,15 +431,15 @@ def get_cves(pr_number, commit_sha):
306431def url_to_html_link (url : str ) -> str :
307432 if not url :
308433 return ""
309- text = url .split ("/" )[- 1 ]
434+ text = url .split ("/" )[- 1 ]. replace ( "__" , "_" )
310435 if not text :
311436 text = "results"
312437 return f'<a href="{ url } ">{ text } </a>'
313438
314439
315440def format_test_name_for_linewrap (text : str ) -> str :
316441 """Tweak the test name to improve line wrapping."""
317- return text . replace ( ".py::" , "/" )
442+ return f'<span style="line-break: anywhere;"> { text } </span>'
318443
319444
320445def format_test_status (text : str ) -> str :
@@ -402,6 +527,7 @@ def main():
402527 "job_statuses" : get_commit_statuses (args .commit_sha ),
403528 "checks_fails" : get_checks_fails (db_client , args .actions_run_url ),
404529 "checks_known_fails" : [],
530+ "pr_new_fails" : [],
405531 "checks_errors" : get_checks_errors (db_client , args .actions_run_url ),
406532 "regression_fails" : get_regression_fails (db_client , args .actions_run_url ),
407533 "docker_images_cves" : (
@@ -436,6 +562,12 @@ def main():
436562 pr_info_html = f"""<a href="https://github.com/{ GITHUB_REPO } /pull/{ pr_info ["number" ]} ">
437563 #{ pr_info .get ("number" )} ({ pr_info .get ("base" , {}).get ('ref' )} <- { pr_info .get ("head" , {}).get ('ref' )} ) { pr_info .get ("title" )}
438564 </a>"""
565+ fail_results ["pr_new_fails" ] = get_new_fails_this_pr (
566+ db_client ,
567+ pr_info ,
568+ fail_results ["checks_fails" ],
569+ fail_results ["regression_fails" ],
570+ )
439571 except Exception as e :
440572 pr_info_html = e
441573
@@ -454,8 +586,10 @@ def main():
454586 "github_repo" : GITHUB_REPO ,
455587 "s3_bucket" : S3_BUCKET ,
456588 "pr_info_html" : pr_info_html ,
589+ "pr_number" : args .pr_number ,
457590 "workflow_id" : args .actions_run_url .split ("/" )[- 1 ],
458591 "commit_sha" : args .commit_sha ,
592+ "base_sha" : "" if args .pr_number == 0 else pr_info .get ("base" , {}).get ("sha" ),
459593 "date" : f"{ datetime .utcnow ().strftime ('%Y-%m-%d %H:%M:%S' )} UTC" ,
460594 "is_preview" : args .mark_preview ,
461595 "counts" : {
@@ -469,6 +603,7 @@ def main():
469603 if not args .known_fails
470604 else len (fail_results ["checks_known_fails" ])
471605 ),
606+ "pr_new_fails" : len (fail_results ["pr_new_fails" ]),
472607 },
473608 "ci_jobs_status_html" : format_results_as_html_table (
474609 fail_results ["job_statuses" ]
@@ -490,6 +625,7 @@ def main():
490625 if not args .known_fails
491626 else format_results_as_html_table (fail_results ["checks_known_fails" ])
492627 ),
628+ "new_fails_html" : format_results_as_html_table (fail_results ["pr_new_fails" ]),
493629 }
494630
495631 # Render the template with the context
0 commit comments