Skip to content

Commit f4ae6de

Browse files
committed
Prometheus add nodes gauge for SQS mode
1 parent e981e08 commit f4ae6de

File tree

7 files changed

+415
-19
lines changed

7 files changed

+415
-19
lines changed

cmd/node-termination-handler.go

Lines changed: 15 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -120,7 +120,20 @@ func main() {
120120
log.Fatal().Err(err).Msg("Unable to instantiate a node for various kubernetes node functions,")
121121
}
122122

123-
metrics, err := observability.InitMetrics(nthConfig.EnablePrometheus, nthConfig.PrometheusPort)
123+
cfg := aws.NewConfig().WithRegion(nthConfig.AWSRegion).WithEndpoint(nthConfig.AWSEndpoint).WithSTSRegionalEndpoint(endpoints.RegionalSTSEndpoint)
124+
sess := session.Must(session.NewSessionWithOptions(session.Options{
125+
Config: *cfg,
126+
SharedConfigState: session.SharedConfigEnable,
127+
}))
128+
creds, err := sess.Config.Credentials.Get()
129+
if err != nil {
130+
log.Fatal().Err(err).Msg("Unable to get AWS credentials")
131+
}
132+
log.Debug().Msgf("AWS Credentials retrieved from provider: %s", creds.ProviderName)
133+
134+
ec2 := ec2.New(sess)
135+
136+
metrics, err := observability.InitMetrics(nthConfig, node, ec2)
124137
if err != nil {
125138
nthConfig.Print()
126139
log.Fatal().Err(err).Msg("Unable to instantiate observability metrics,")
@@ -204,17 +217,6 @@ func main() {
204217
}
205218
}
206219
if nthConfig.EnableSQSTerminationDraining {
207-
cfg := aws.NewConfig().WithRegion(nthConfig.AWSRegion).WithEndpoint(nthConfig.AWSEndpoint).WithSTSRegionalEndpoint(endpoints.RegionalSTSEndpoint)
208-
sess := session.Must(session.NewSessionWithOptions(session.Options{
209-
Config: *cfg,
210-
SharedConfigState: session.SharedConfigEnable,
211-
}))
212-
creds, err := sess.Config.Credentials.Get()
213-
if err != nil {
214-
log.Fatal().Err(err).Msg("Unable to get AWS credentials")
215-
}
216-
log.Debug().Msgf("AWS Credentials retrieved from provider: %s", creds.ProviderName)
217-
218220
completeLifecycleActionDelay := time.Duration(nthConfig.CompleteLifecycleActionDelaySeconds) * time.Second
219221
sqsMonitor := sqsevent.SQSMonitor{
220222
CheckIfManaged: nthConfig.CheckTagBeforeDraining,
@@ -224,7 +226,7 @@ func main() {
224226
CancelChan: cancelChan,
225227
SQS: sqsevent.GetSqsClient(sess),
226228
ASG: autoscaling.New(sess),
227-
EC2: ec2.New(sess),
229+
EC2: ec2,
228230
BeforeCompleteLifecycleAction: func() { <-time.After(completeLifecycleActionDelay) },
229231
}
230232
monitoringFns[sqsEvents] = sqsMonitor

pkg/ec2helper/ec2helper.go

Lines changed: 92 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,92 @@
1+
// Copyright 2016-2017 Amazon.com, Inc. or its affiliates. All Rights Reserved.
2+
//
3+
// Licensed under the Apache License, Version 2.0 (the "License"). You may
4+
// not use this file except in compliance with the License. A copy of the
5+
// License is located at
6+
//
7+
// http://aws.amazon.com/apache2.0/
8+
//
9+
// or in the "license" file accompanying this file. This file is distributed
10+
// on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
11+
// express or implied. See the License for the specific language governing
12+
// permissions and limitations under the License.
13+
14+
package ec2helper
15+
16+
import (
17+
"fmt"
18+
19+
"github.com/aws/aws-sdk-go/aws"
20+
"github.com/aws/aws-sdk-go/service/ec2"
21+
"github.com/aws/aws-sdk-go/service/ec2/ec2iface"
22+
)
23+
24+
type IEC2Helper interface {
25+
GetInstanceIdsMapByTagKey(tag string) (map[string]bool, error)
26+
}
27+
28+
type EC2Helper struct {
29+
ec2ServiceClient ec2iface.EC2API
30+
}
31+
32+
func New(ec2 ec2iface.EC2API) EC2Helper {
33+
return EC2Helper{
34+
ec2ServiceClient: ec2,
35+
}
36+
}
37+
38+
func (h EC2Helper) GetInstanceIdsByTagKey(tag string) ([]string, error) {
39+
ids := []string{}
40+
nextToken := ""
41+
42+
for {
43+
result, err := h.ec2ServiceClient.DescribeInstances(&ec2.DescribeInstancesInput{
44+
Filters: []*ec2.Filter{
45+
{
46+
Name: aws.String("tag-key"),
47+
Values: []*string{aws.String(tag)},
48+
},
49+
},
50+
NextToken: &nextToken,
51+
})
52+
53+
if err != nil {
54+
return ids, err
55+
}
56+
57+
if result == nil || len(result.Reservations) == 0 ||
58+
len(result.Reservations[0].Instances) == 0 {
59+
return ids, fmt.Errorf("failed to describe instances")
60+
}
61+
62+
for _, reservation := range result.Reservations {
63+
for _, instance := range reservation.Instances {
64+
if instance.InstanceId == nil {
65+
continue
66+
}
67+
ids = append(ids, *instance.InstanceId)
68+
}
69+
}
70+
71+
if result.NextToken == nil {
72+
break
73+
}
74+
nextToken = *result.NextToken
75+
}
76+
77+
return ids, nil
78+
}
79+
80+
func (h EC2Helper) GetInstanceIdsMapByTagKey(tag string) (map[string]bool, error) {
81+
idMap := map[string]bool{}
82+
ids, err := h.GetInstanceIdsByTagKey(tag)
83+
if err != nil {
84+
return idMap, err
85+
}
86+
87+
for _, id := range ids {
88+
idMap[id] = true
89+
}
90+
91+
return idMap, nil
92+
}

pkg/ec2helper/ec2helper_test.go

Lines changed: 74 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,74 @@
1+
// Copyright 2016-2017 Amazon.com, Inc. or its affiliates. All Rights Reserved.
2+
//
3+
// Licensed under the Apache License, Version 2.0 (the "License"). You may
4+
// not use this file except in compliance with the License. A copy of the
5+
// License is located at
6+
//
7+
// http://aws.amazon.com/apache2.0/
8+
//
9+
// or in the "license" file accompanying this file. This file is distributed
10+
// on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
11+
// express or implied. See the License for the specific language governing
12+
// permissions and limitations under the License.
13+
14+
package ec2helper_test
15+
16+
import (
17+
"testing"
18+
19+
"github.com/aws/aws-node-termination-handler/pkg/ec2helper"
20+
h "github.com/aws/aws-node-termination-handler/pkg/test"
21+
"github.com/aws/aws-sdk-go/aws"
22+
"github.com/aws/aws-sdk-go/service/ec2"
23+
)
24+
25+
const (
26+
instanceId1 = "i-1"
27+
instanceId2 = "i-2"
28+
)
29+
30+
func TestGetInstanceIdsByTagKey(t *testing.T) {
31+
ec2Mock := h.MockedEC2{
32+
DescribeInstancesResp: getDescribeInstancesResp(),
33+
}
34+
ec2Helper := ec2helper.New(ec2Mock)
35+
instanceIds, err := ec2Helper.GetInstanceIdsByTagKey("myNTHManagedTag")
36+
h.Ok(t, err)
37+
38+
h.Equals(t, 2, len(instanceIds))
39+
h.Equals(t, instanceId1, instanceIds[0])
40+
h.Equals(t, instanceId2, instanceIds[1])
41+
}
42+
43+
func TestGetInstanceIdsMapByTagKey(t *testing.T) {
44+
ec2Mock := h.MockedEC2{
45+
DescribeInstancesResp: getDescribeInstancesResp(),
46+
}
47+
ec2Helper := ec2helper.New(ec2Mock)
48+
instanceIdsMap, err := ec2Helper.GetInstanceIdsMapByTagKey("myNTHManagedTag")
49+
h.Ok(t, err)
50+
51+
_, exist := instanceIdsMap[instanceId1]
52+
h.Equals(t, true, exist)
53+
_, exist = instanceIdsMap[instanceId2]
54+
h.Equals(t, true, exist)
55+
_, exist = instanceIdsMap["non-existent instance id"]
56+
h.Equals(t, false, exist)
57+
}
58+
59+
func getDescribeInstancesResp() ec2.DescribeInstancesOutput {
60+
return ec2.DescribeInstancesOutput{
61+
Reservations: []*ec2.Reservation{
62+
{
63+
Instances: []*ec2.Instance{
64+
{
65+
InstanceId: aws.String(instanceId1),
66+
},
67+
{
68+
InstanceId: aws.String(instanceId2),
69+
},
70+
},
71+
},
72+
},
73+
}
74+
}

pkg/node/node.go

Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -635,6 +635,32 @@ func (n Node) fetchKubernetesNode(nodeName string) (*corev1.Node, error) {
635635
return &matchingNodes.Items[0], nil
636636
}
637637

638+
// fetchKubernetesNode will send an http request to the k8s api server and return list of AWS EC2 instance id
639+
func (n Node) FetchKubernetesNodeInstanceIds() ([]string, error) {
640+
ids := []string{}
641+
642+
if n.nthConfig.DryRun {
643+
return ids, nil
644+
}
645+
matchingNodes, err := n.drainHelper.Client.CoreV1().Nodes().List(context.TODO(), metav1.ListOptions{})
646+
if err != nil {
647+
log.Warn().Msgf("Unable to list Nodes")
648+
return ids, err
649+
}
650+
651+
if matchingNodes == nil || len(matchingNodes.Items) == 0 {
652+
return ids, nil
653+
}
654+
655+
for _, node := range matchingNodes.Items {
656+
// sample providerID: aws:///us-west-2a/i-0abcd1234efgh5678
657+
parts := strings.Split(node.Spec.ProviderID, "/")
658+
ids = append(ids, parts[len(parts)-1])
659+
}
660+
661+
return ids, nil
662+
}
663+
638664
func (n Node) fetchAllPods(nodeName string) (*corev1.PodList, error) {
639665
if n.nthConfig.DryRun {
640666
log.Info().Msgf("Would have retrieved running pod list on node %s, but dry-run flag was set", nodeName)

pkg/node/node_test.go

Lines changed: 31 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@ package node_test
1515

1616
import (
1717
"context"
18+
"fmt"
1819
"strconv"
1920
"strings"
2021
"testing"
@@ -35,7 +36,11 @@ import (
3536
)
3637

3738
// Size of the fakeRecorder buffer
38-
const recorderBufferSize = 10
39+
const (
40+
recorderBufferSize = 10
41+
instanceId1 = "i-0abcd1234efgh5678"
42+
instanceId2 = "i-0wxyz5678ijkl1234"
43+
)
3944

4045
var nodeName = "NAME"
4146

@@ -379,6 +384,31 @@ func TestUncordonIfRebootedTimeParseFailure(t *testing.T) {
379384
h.Assert(t, err != nil, "Failed to return error on UncordonIfReboted failure to parse time")
380385
}
381386

387+
func TestFetchKubernetesNodeInstanceIds(t *testing.T) {
388+
client := fake.NewSimpleClientset(
389+
&v1.Node{
390+
ObjectMeta: metav1.ObjectMeta{Name: "node-1"},
391+
Spec: v1.NodeSpec{ProviderID: fmt.Sprintf("aws:///us-west-2a/%s", instanceId1)},
392+
},
393+
&v1.Node{
394+
ObjectMeta: metav1.ObjectMeta{Name: "node-2"},
395+
Spec: v1.NodeSpec{ProviderID: fmt.Sprintf("aws:///us-west-2a/%s", instanceId2)},
396+
},
397+
)
398+
399+
_, err := client.CoreV1().Nodes().List(context.Background(), metav1.ListOptions{})
400+
h.Ok(t, err)
401+
402+
node, err := newNode(config.Config{}, client)
403+
h.Ok(t, err)
404+
405+
instanceIds, err := node.FetchKubernetesNodeInstanceIds()
406+
h.Ok(t, err)
407+
h.Equals(t, 2, len(instanceIds))
408+
h.Equals(t, instanceId1, instanceIds[0])
409+
h.Equals(t, instanceId2, instanceIds[1])
410+
}
411+
382412
func TestFilterOutDaemonSetPods(t *testing.T) {
383413
tNode, err := newNode(config.Config{IgnoreDaemonSets: true}, fake.NewSimpleClientset())
384414
h.Ok(t, err)

0 commit comments

Comments
 (0)