Skip to content

Commit c340b22

Browse files
authored
Merge pull request #2250 from JoelSpeed/mhc-targets
✨ Add Health Check logic to MachineHealthCheck Reconciler
2 parents 349f460 + 067f1e0 commit c340b22

11 files changed

+1409
-19
lines changed

api/v1alpha3/machinehealthcheck_types.go

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -44,6 +44,11 @@ type MachineHealthCheckSpec struct {
4444
// "selector" are not healthy.
4545
// +optional
4646
MaxUnhealthy *intstr.IntOrString `json:"maxUnhealthy,omitempty"`
47+
48+
// Machines older than this duration without a node will be considered to have
49+
// failed and will be remediated.
50+
// +optional
51+
NodeStartupTimeout *metav1.Duration `json:"nodeStartupTimeout,omitempty"`
4752
}
4853

4954
// ANCHOR_END: MachineHealthCHeckSpec
@@ -73,11 +78,11 @@ type UnhealthyCondition struct {
7378
type MachineHealthCheckStatus struct {
7479
// total number of machines counted by this machine health check
7580
// +kubebuilder:validation:Minimum=0
76-
ExpectedMachines int32 `json:"expectedMachines"`
81+
ExpectedMachines int32 `json:"expectedMachines,omitempty"`
7782

7883
// total number of healthy machines counted by this machine health check
7984
// +kubebuilder:validation:Minimum=0
80-
CurrentHealthy int32 `json:"currentHealthy"`
85+
CurrentHealthy int32 `json:"currentHealthy,omitempty"`
8186
}
8287

8388
// ANCHOR_END: MachineHealthCheckStatus

api/v1alpha3/machinehealthcheck_webhook.go

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@ package v1alpha3
1818

1919
import (
2020
"fmt"
21+
"time"
2122

2223
apierrors "k8s.io/apimachinery/pkg/api/errors"
2324
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
@@ -28,6 +29,14 @@ import (
2829
"sigs.k8s.io/controller-runtime/pkg/webhook"
2930
)
3031

32+
var (
33+
// Default time allowed for a node to start up. Can be made longer as part of
34+
// spec if required for particular provider.
35+
// 10 minutes should allow the instance to start and the node to join the
36+
// cluster on most providers.
37+
defaultNodeStartupTimeout = metav1.Duration{Duration: 10 * time.Minute}
38+
)
39+
3140
func (m *MachineHealthCheck) SetupWebhookWithManager(mgr ctrl.Manager) error {
3241
return ctrl.NewWebhookManagedBy(mgr).
3342
For(m).
@@ -46,6 +55,10 @@ func (m *MachineHealthCheck) Default() {
4655
defaultMaxUnhealthy := intstr.FromString("100%")
4756
m.Spec.MaxUnhealthy = &defaultMaxUnhealthy
4857
}
58+
59+
if m.Spec.NodeStartupTimeout == nil {
60+
m.Spec.NodeStartupTimeout = &defaultNodeStartupTimeout
61+
}
4962
}
5063

5164
// ValidateCreate implements webhook.Validator so a webhook will be registered for the type
@@ -86,6 +99,13 @@ func (m *MachineHealthCheck) validate(old *MachineHealthCheck) error {
8699
)
87100
}
88101

102+
if m.Spec.NodeStartupTimeout != nil && m.Spec.NodeStartupTimeout.Seconds() < 30 {
103+
allErrs = append(
104+
allErrs,
105+
field.Invalid(field.NewPath("spec", "nodeStartupTimeout"), m.Spec.NodeStartupTimeout, "must be at least 30s"),
106+
)
107+
}
108+
89109
if len(allErrs) == 0 {
90110
return nil
91111
}

api/v1alpha3/machinehealthcheck_webhook_test.go

Lines changed: 66 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@ package v1alpha3
1818

1919
import (
2020
"testing"
21+
"time"
2122

2223
. "github.com/onsi/gomega"
2324
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
@@ -30,6 +31,8 @@ func TestMachineHealthCheckDefault(t *testing.T) {
3031
mhc.Default()
3132

3233
g.Expect(mhc.Spec.MaxUnhealthy.String()).To(Equal("100%"))
34+
g.Expect(mhc.Spec.NodeStartupTimeout).ToNot(BeNil())
35+
g.Expect(*mhc.Spec.NodeStartupTimeout).To(Equal(metav1.Duration{Duration: 10 * time.Minute}))
3336
}
3437

3538
func TestMachineHealthCheckLabelSelectorAsSelectorValidation(t *testing.T) {
@@ -115,3 +118,66 @@ func TestMachineHealthCheckClusterNameImmutable(t *testing.T) {
115118
})
116119
}
117120
}
121+
122+
func TestMachineHealthCheckNodeStartupTimeout(t *testing.T) {
123+
zero := metav1.Duration{Duration: 0}
124+
twentyNineSeconds := metav1.Duration{Duration: 29 * time.Second}
125+
thirtySeconds := metav1.Duration{Duration: 30 * time.Second}
126+
oneMinute := metav1.Duration{Duration: 1 * time.Minute}
127+
minusOneMinute := metav1.Duration{Duration: -1 * time.Minute}
128+
129+
tests := []struct {
130+
name string
131+
timeout *metav1.Duration
132+
expectErr bool
133+
}{
134+
{
135+
name: "when the nodeStartupTimeout is not given",
136+
timeout: nil,
137+
expectErr: false,
138+
},
139+
{
140+
name: "when the nodeStartupTimeout is greater than 30s",
141+
timeout: &oneMinute,
142+
expectErr: false,
143+
},
144+
{
145+
name: "when the nodeStartupTimeout is 30s",
146+
timeout: &thirtySeconds,
147+
expectErr: false,
148+
},
149+
{
150+
name: "when the nodeStartupTimeout is 29s",
151+
timeout: &twentyNineSeconds,
152+
expectErr: true,
153+
},
154+
{
155+
name: "when the nodeStartupTimeout is less than 0",
156+
timeout: &minusOneMinute,
157+
expectErr: true,
158+
},
159+
{
160+
name: "when the nodeStartupTimeout is 0",
161+
timeout: &zero,
162+
expectErr: true,
163+
},
164+
}
165+
166+
for _, tt := range tests {
167+
g := NewWithT(t)
168+
169+
mhc := &MachineHealthCheck{
170+
Spec: MachineHealthCheckSpec{
171+
NodeStartupTimeout: tt.timeout,
172+
},
173+
}
174+
175+
if tt.expectErr {
176+
g.Expect(mhc.ValidateCreate()).NotTo(Succeed())
177+
g.Expect(mhc.ValidateUpdate(mhc)).NotTo(Succeed())
178+
} else {
179+
g.Expect(mhc.ValidateCreate()).To(Succeed())
180+
g.Expect(mhc.ValidateUpdate(mhc)).To(Succeed())
181+
}
182+
}
183+
}

api/v1alpha3/zz_generated.deepcopy.go

Lines changed: 5 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

config/crd/bases/cluster.x-k8s.io_machinehealthchecks.yaml

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -67,6 +67,10 @@ spec:
6767
description: Any further remediation is only allowed if at most "MaxUnhealthy"
6868
machines selected by "selector" are not healthy.
6969
x-kubernetes-int-or-string: true
70+
nodeStartupTimeout:
71+
description: Machines older than this duration without a node will
72+
be considered to have failed and will be remediated.
73+
type: string
7074
selector:
7175
description: Label selector to match machines whose health will be
7276
exercised
@@ -158,9 +162,6 @@ spec:
158162
format: int32
159163
minimum: 0
160164
type: integer
161-
required:
162-
- currentHealthy
163-
- expectedMachines
164165
type: object
165166
type: object
166167
served: true

controllers/machine_helpers.go

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,8 @@ import (
2020
"context"
2121

2222
"github.com/pkg/errors"
23+
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
24+
"k8s.io/apimachinery/pkg/labels"
2325
clusterv1 "sigs.k8s.io/cluster-api/api/v1alpha3"
2426
"sigs.k8s.io/controller-runtime/pkg/client"
2527
)
@@ -47,3 +49,20 @@ func getActiveMachinesInCluster(ctx context.Context, c client.Client, namespace,
4749
}
4850
return machines, nil
4951
}
52+
53+
// hasMatchingLabels verifies that the Label Selector matches the given Labels
54+
func hasMatchingLabels(matchSelector metav1.LabelSelector, matchLabels map[string]string) bool {
55+
// This should never fail, validating webhook should catch this first
56+
selector, err := metav1.LabelSelectorAsSelector(&matchSelector)
57+
if err != nil {
58+
return false
59+
}
60+
// If a nil or empty selector creeps in, it should match nothing, not everything.
61+
if selector.Empty() {
62+
return false
63+
}
64+
if !selector.Matches(labels.Set(matchLabels)) {
65+
return false
66+
}
67+
return true
68+
}

controllers/machine_helpers_test.go

Lines changed: 71 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -127,3 +127,74 @@ func Test_getActiveMachinesInCluster(t *testing.T) {
127127
})
128128
}
129129
}
130+
131+
func TestMachineHealthCheckHasMatchingLabels(t *testing.T) {
132+
testCases := []struct {
133+
name string
134+
selector metav1.LabelSelector
135+
labels map[string]string
136+
expected bool
137+
}{
138+
{
139+
name: "selector matches labels",
140+
141+
selector: metav1.LabelSelector{
142+
MatchLabels: map[string]string{
143+
"foo": "bar",
144+
},
145+
},
146+
147+
labels: map[string]string{
148+
"foo": "bar",
149+
},
150+
151+
expected: true,
152+
},
153+
{
154+
name: "selector does not match labels",
155+
156+
selector: metav1.LabelSelector{
157+
MatchLabels: map[string]string{
158+
"foo": "bar",
159+
},
160+
},
161+
162+
labels: map[string]string{
163+
"no": "match",
164+
},
165+
expected: false,
166+
},
167+
{
168+
name: "selector is empty",
169+
selector: metav1.LabelSelector{},
170+
labels: map[string]string{},
171+
expected: false,
172+
},
173+
{
174+
name: "seelctor is invalid",
175+
selector: metav1.LabelSelector{
176+
MatchLabels: map[string]string{
177+
"foo": "bar",
178+
},
179+
MatchExpressions: []metav1.LabelSelectorRequirement{
180+
{
181+
Operator: "bad-operator",
182+
},
183+
},
184+
},
185+
labels: map[string]string{
186+
"foo": "bar",
187+
},
188+
expected: false,
189+
},
190+
}
191+
192+
for _, tc := range testCases {
193+
t.Run(tc.name, func(t *testing.T) {
194+
g := NewWithT(t)
195+
196+
got := hasMatchingLabels(tc.selector, tc.labels)
197+
g.Expect(got).To(Equal(tc.expected))
198+
})
199+
}
200+
}

0 commit comments

Comments
 (0)