Skip to content

Commit 1cfd0d1

Browse files
committed
cmd/watchflakes: report consistent failures at top
Currently, watchflakes skips consistent failures, including ones at the top, i.e. failing the latest a few commits. Consistently failing at tip means builds are broken recently on this builder. Currently we rely on human watching the dashboard to report such failures. This CL lets watchflakes to do so. If a new issue is opened for such failures, the title includes "[consistent failure]" to increase awareness. Fixes golang/go#58819. Change-Id: I2ea89a5d8edee0b9087f03b58e4ff834c03e826d Reviewed-on: https://go-review.googlesource.com/c/build/+/601439 Reviewed-by: Carlos Amedee <[email protected]> LUCI-TryBot-Result: Go LUCI <[email protected]>
1 parent c640e78 commit 1cfd0d1

File tree

2 files changed

+37
-4
lines changed

2 files changed

+37
-4
lines changed

cmd/watchflakes/luci.go

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -108,6 +108,7 @@ type BuildResult struct {
108108
StepLogURL string // textual log of the (last) failed step, if any
109109
StepLogText string
110110
Failures []*Failure
111+
Top bool // whether this is a consistent failure at the top (tip)
111112
}
112113

113114
type Commit struct {

cmd/watchflakes/main.go

Lines changed: 36 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -36,6 +36,8 @@ const timeLimit = 45 * 24 * time.Hour
3636

3737
const maxFailPerBuild = 3
3838

39+
const tooManyToBeFlakes = 4
40+
3941
var (
4042
build = flag.String("build", "", "a particular build ID or URL to analyze (mainly for debugging)")
4143
md = flag.Bool("md", false, "print Markdown output suitable for GitHub issues")
@@ -228,6 +230,20 @@ Repeat:
228230
}
229231
}
230232
}
233+
for _, issue := range issues {
234+
if issue.Number == 0 && len(issue.Post) >= tooManyToBeFlakes && issue.Post[0].Top {
235+
// New issue. Check if it is failing consistently at top.
236+
top := 0
237+
for _, fp := range issue.Post {
238+
if fp.Top {
239+
top++
240+
}
241+
}
242+
if top >= tooManyToBeFlakes {
243+
issue.Title += " [consistent failure]"
244+
}
245+
}
246+
}
231247

232248
if query != nil {
233249
format := (*FailurePost).Text
@@ -324,13 +340,15 @@ func skipBrokenCommits(boards []*Dashboard) {
324340
// skipBrokenBuilders identifies builders that were consistently broken
325341
// (at least tooManyToBeFlakes failures in a row) and then turned ok.
326342
// It changes those consistent failures to SKIP.
343+
//
344+
// It does not skip consistent failures at the top (latest few commits).
345+
// Instead, it sets Top to true on them.
327346
func skipBrokenBuilders(boards []*Dashboard) {
328-
const tooManyToBeFlakes = 4
329-
330347
for _, dash := range boards {
331348
for _, rs := range dash.Results {
332-
bad := 100 // squash failures at the top of the dashboard, which may turn out to be consistent
349+
bad := 0
333350
badStart := 0
351+
top := true
334352
skip := func(i int) { // skip the i-th result
335353
if rs[i] != nil {
336354
fmt.Printf("skip: builder %s was broken at %s (%s %s)\n", rs[i].Builder, shortHash(rs[i].Commit), dash.Repo, dash.GoBranch)
@@ -343,10 +361,24 @@ func skipBrokenBuilders(boards []*Dashboard) {
343361
}
344362
switch r.Status {
345363
case bbpb.Status_SUCCESS:
364+
if top && bad < tooManyToBeFlakes {
365+
// Skip the run at the top.
366+
// Too few to tell if it is flaky or consistent.
367+
// It may also get fixed soon.
368+
for j := 0; j < i; j++ {
369+
skip(j)
370+
}
371+
}
372+
top = false
346373
bad = 0
347374
continue
348375
case bbpb.Status_FAILURE:
349376
bad++
377+
if top {
378+
// Set Top to true, but don't skip.
379+
r.Top = true
380+
continue
381+
}
350382
default: // ignore other status
351383
continue
352384
}
@@ -368,7 +400,7 @@ func skipBrokenBuilders(boards []*Dashboard) {
368400
// even if there are just a few of them. Otherwise we get
369401
// spurious flakes when there's one bad entry before the
370402
// cutoff and lots after the cutoff.
371-
if bad > 0 && badStart > 0 {
403+
if bad > 0 {
372404
for j := badStart; j < len(rs); j++ {
373405
skip(j)
374406
}

0 commit comments

Comments
 (0)