Skip to content

Commit b01e492

Browse files
committed
cmd/coordinator: fix health checking of ppc64 machines
Fixes golang/go#34990 Updates golang/go#21189 Change-Id: I7f546564dd5149de6915dc631cafe3164e0e7a70 Reviewed-on: https://go-review.googlesource.com/c/build/+/202017 Reviewed-by: Bryan C. Mills <[email protected]> Run-TryBot: Bryan C. Mills <[email protected]> TryBot-Result: Gobot Gobot <[email protected]>
1 parent 92a45e4 commit b01e492

File tree

2 files changed

+31
-8
lines changed

2 files changed

+31
-8
lines changed

cmd/coordinator/reverse.go

Lines changed: 28 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -81,6 +81,22 @@ type reverseBuildletPool struct {
8181
// we bound how many can be running at once. Fortunately there aren't many left.
8282
oldInUse map[*buildlet.Client]bool
8383

84+
// hostLastGood tracks when buildlets were last seen to be
85+
// healthy. It's only used by the health reporting code (in
86+
// status.go). The reason it's a map on reverseBuildletPool
87+
// rather than a field on each reverseBuildlet is because we
88+
// also want to track the last known health time of buildlets
89+
// that aren't currently connected.
90+
//
91+
// Each buildlet's health is recorded in the map twice, under
92+
// two different keys: 1) its reported host name, and 2) its
93+
// hostType + ":" + its reported host name. It's recorded both
94+
// ways so the status code can check for both globally-unique
95+
// hostnames that change host types (e.g. our Macs), as well
96+
// as hostnames that aren't globally unique and are expected
97+
// to be found with different hostTypes (e.g. our ppc64le
98+
// machines as both POWER8 and POWER9 host types, but with the
99+
// same names).
84100
hostLastGood map[string]time.Time
85101
}
86102

@@ -208,6 +224,14 @@ func (p *reverseBuildletPool) healthCheckBuildletLoop(b *reverseBuildlet) {
208224
}
209225
}
210226

227+
// recordHealthy updates the two map entries in hostLastGood recording
228+
// that b is healthy.
229+
func (p *reverseBuildletPool) recordHealthy(b *reverseBuildlet) {
230+
t := time.Now()
231+
p.hostLastGood[b.hostname] = t
232+
p.hostLastGood[b.hostType+":"+b.hostname] = t
233+
}
234+
211235
func (p *reverseBuildletPool) healthCheckBuildlet(b *reverseBuildlet) bool {
212236
if b.client.IsBroken() {
213237
return false
@@ -217,7 +241,7 @@ func (p *reverseBuildletPool) healthCheckBuildlet(b *reverseBuildlet) bool {
217241
panic("previous health check still running")
218242
}
219243
if b.inUse {
220-
p.hostLastGood[b.hostname] = time.Now()
244+
p.recordHealthy(b)
221245
p.mu.Unlock()
222246
return true // skip busy buildlets
223247
}
@@ -257,9 +281,8 @@ func (p *reverseBuildletPool) healthCheckBuildlet(b *reverseBuildlet) bool {
257281
}
258282
b.inUse = false
259283
b.inHealthCheck = false
260-
now := time.Now()
261-
b.inUseTime = now
262-
p.hostLastGood[b.hostname] = now
284+
b.inUseTime = time.Now()
285+
p.recordHealthy(b)
263286
go p.noteBuildletAvailable(b.hostType)
264287
return true
265288
}
@@ -479,7 +502,7 @@ func (p *reverseBuildletPool) addBuildlet(b *reverseBuildlet) {
479502
defer p.noteBuildletAvailable(b.hostType)
480503
defer p.mu.Unlock()
481504
p.buildlets = append(p.buildlets, b)
482-
p.hostLastGood[b.hostname] = time.Now()
505+
p.recordHealthy(b)
483506
go p.healthCheckBuildletLoop(b)
484507
}
485508

cmd/coordinator/status.go

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -427,7 +427,7 @@ func newPacketHealthChecker() *healthChecker {
427427
func newOSUPPC64Checker() *healthChecker {
428428
var hosts []string
429429
for i := 1; i <= expectedHosts("host-linux-ppc64-osu"); i++ {
430-
name := fmt.Sprintf("go-be-%v", i)
430+
name := fmt.Sprintf("host-linux-ppc64-osu:go-be-%v", i)
431431
hosts = append(hosts, name)
432432
}
433433
return &healthChecker{
@@ -441,7 +441,7 @@ func newOSUPPC64Checker() *healthChecker {
441441
func newOSUPPC64leChecker() *healthChecker {
442442
var hosts []string
443443
for i := 1; i <= expectedHosts("host-linux-ppc64le-osu"); i++ {
444-
name := fmt.Sprintf("power_%02d", i)
444+
name := fmt.Sprintf("host-linux-ppc64le-osu:power_%02d", i)
445445
hosts = append(hosts, name)
446446
}
447447
return &healthChecker{
@@ -455,7 +455,7 @@ func newOSUPPC64leChecker() *healthChecker {
455455
func newOSUPPC64lePower9Checker() *healthChecker {
456456
var hosts []string
457457
for i := 1; i <= expectedHosts("host-linux-ppc64le-power9-osu"); i++ {
458-
name := fmt.Sprintf("power_%02d", i)
458+
name := fmt.Sprintf("host-linux-ppc64le-power9-osu:power_%02d", i)
459459
hosts = append(hosts, name)
460460
}
461461
return &healthChecker{

0 commit comments

Comments
 (0)