Skip to content

Commit 486acdf

Browse files
authored
Add support for EKS accelerated AMIs based on AL2023 (#7996)
add support for EKS accelerated AMIs based on AL2023
1 parent d70fff1 commit 486acdf

File tree

8 files changed

+104
-77
lines changed

8 files changed

+104
-77
lines changed

pkg/ami/api.go

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -19,14 +19,16 @@ import (
1919
// Variations of image classes
2020
const (
2121
ImageClassGeneral = iota
22-
ImageClassGPU
22+
ImageClassNvidia
23+
ImageClassNeuron
2324
ImageClassARM
2425
)
2526

2627
// ImageClasses is a list of image class names
2728
var ImageClasses = []string{
2829
"ImageClassGeneral",
29-
"ImageClassGPU",
30+
"ImageClassNvidia",
31+
"ImageClassNeuron",
3032
"ImageClassARM",
3133
}
3234

pkg/ami/auto_resolver.go

Lines changed: 16 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -25,11 +25,14 @@ func MakeImageSearchPatterns(version string) map[string]map[int]string {
2525
return map[string]map[int]string{
2626
api.NodeImageFamilyAmazonLinux2023: {
2727
ImageClassGeneral: fmt.Sprintf("amazon-eks-node-al2023-x86_64-standard-%s-v*", version),
28+
ImageClassNvidia: fmt.Sprintf("amazon-eks-node-al2023-x86_64-nvidia-*-%s-v*", version),
29+
ImageClassNeuron: fmt.Sprintf("amazon-eks-node-al2023-x86_64-neuron-%s-v*", version),
2830
ImageClassARM: fmt.Sprintf("amazon-eks-node-al2023-arm64-standard-%s-v*", version),
2931
},
3032
api.NodeImageFamilyAmazonLinux2: {
3133
ImageClassGeneral: fmt.Sprintf("amazon-eks-node-%s-v*", version),
32-
ImageClassGPU: fmt.Sprintf("amazon-eks-gpu-node-%s-*", version),
34+
ImageClassNvidia: fmt.Sprintf("amazon-eks-gpu-node-%s-*", version),
35+
ImageClassNeuron: fmt.Sprintf("amazon-eks-gpu-node-%s-*", version),
3336
ImageClassARM: fmt.Sprintf("amazon-eks-arm64-node-%s-*", version),
3437
},
3538
api.NodeImageFamilyUbuntuPro2204: {
@@ -90,16 +93,22 @@ func (r *AutoResolver) Resolve(ctx context.Context, region, version, instanceTyp
9093

9194
imageClasses := MakeImageSearchPatterns(version)[imageFamily]
9295
namePattern := imageClasses[ImageClassGeneral]
93-
if instanceutils.IsGPUInstanceType(instanceType) {
96+
var ok bool
97+
switch {
98+
case instanceutils.IsNvidiaInstanceType(instanceType):
99+
namePattern, ok = imageClasses[ImageClassNvidia]
100+
if !ok {
101+
logger.Critical("image family %s doesn't support Nvidia GPU image class", imageFamily)
102+
return "", NewErrFailedResolution(region, version, instanceType, imageFamily)
103+
}
104+
case instanceutils.IsNeuronInstanceType(instanceType):
94105
var ok bool
95-
namePattern, ok = imageClasses[ImageClassGPU]
106+
namePattern, ok = imageClasses[ImageClassNeuron]
96107
if !ok {
97-
logger.Critical("image family %s doesn't support GPU image class", imageFamily)
108+
logger.Critical("image family %s doesn't support Neuron GPU image class", imageFamily)
98109
return "", NewErrFailedResolution(region, version, instanceType, imageFamily)
99110
}
100-
}
101-
102-
if instanceutils.IsARMInstanceType(instanceType) {
111+
case instanceutils.IsARMInstanceType(instanceType):
103112
var ok bool
104113
namePattern, ok = imageClasses[ImageClassARM]
105114
if !ok {

pkg/ami/ssm_resolver.go

Lines changed: 14 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -55,8 +55,8 @@ func MakeSSMParameterName(version, instanceType, imageFamily string) (string, er
5555

5656
switch imageFamily {
5757
case api.NodeImageFamilyAmazonLinux2023:
58-
return fmt.Sprintf("/aws/service/eks/optimized-ami/%s/%s/%s/standard/recommended/%s",
59-
version, utils.ToKebabCase(imageFamily), instanceEC2ArchName(instanceType), fieldName), nil
58+
return fmt.Sprintf("/aws/service/eks/optimized-ami/%s/%s/%s/%s/recommended/%s",
59+
version, utils.ToKebabCase(imageFamily), instanceEC2ArchName(instanceType), imageType(imageFamily, instanceType, version), fieldName), nil
6060
case api.NodeImageFamilyAmazonLinux2:
6161
return fmt.Sprintf("/aws/service/eks/optimized-ami/%s/%s/recommended/%s", version, imageType(imageFamily, instanceType, version), fieldName), nil
6262
case api.NodeImageFamilyWindowsServer2019CoreContainer,
@@ -102,6 +102,10 @@ func MakeManagedSSMParameterName(version string, amiType ekstypes.AMITypes) stri
102102
switch amiType {
103103
case ekstypes.AMITypesAl2023X8664Standard:
104104
return fmt.Sprintf("/aws/service/eks/optimized-ami/%s/%s/x86_64/standard/recommended/release_version", version, utils.ToKebabCase(api.NodeImageFamilyAmazonLinux2023))
105+
case ekstypes.AMITypesAl2023X8664Nvidia:
106+
return fmt.Sprintf("/aws/service/eks/optimized-ami/%s/%s/x86_64/nvidia/recommended/release_version", version, utils.ToKebabCase(api.NodeImageFamilyAmazonLinux2023))
107+
case ekstypes.AMITypesAl2023X8664Neuron:
108+
return fmt.Sprintf("/aws/service/eks/optimized-ami/%s/%s/x86_64/neuron/recommended/release_version", version, utils.ToKebabCase(api.NodeImageFamilyAmazonLinux2023))
105109
case ekstypes.AMITypesAl2023Arm64Standard:
106110
return fmt.Sprintf("/aws/service/eks/optimized-ami/%s/%s/arm64/standard/recommended/release_version", version, utils.ToKebabCase(api.NodeImageFamilyAmazonLinux2023))
107111
case ekstypes.AMITypesAl2X8664:
@@ -138,6 +142,14 @@ func ubuntuArchName(instanceType string) string {
138142
func imageType(imageFamily, instanceType, version string) string {
139143
family := utils.ToKebabCase(imageFamily)
140144
switch imageFamily {
145+
case api.NodeImageFamilyAmazonLinux2023:
146+
if instanceutils.IsNvidiaInstanceType(instanceType) {
147+
return "nvidia"
148+
}
149+
if instanceutils.IsNeuronInstanceType(instanceType) {
150+
return "neuron"
151+
}
152+
return "standard"
141153
case api.NodeImageFamilyBottlerocket:
142154
if instanceutils.IsNvidiaInstanceType(instanceType) {
143155
return fmt.Sprintf("%s-%s", version, "nvidia")

pkg/apis/eksctl.io/v1alpha5/defaults.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -135,7 +135,7 @@ func SetManagedNodeGroupDefaults(ng *ManagedNodeGroup, meta *ClusterMeta, contro
135135
// When using custom AMIs, we want the user to explicitly specify AMI family.
136136
// Thus, we only set up default AMI family when no custom AMI is being used.
137137
if ng.AMIFamily == "" && ng.AMI == "" {
138-
if isMinVer, _ := utils.IsMinVersion(Version1_30, meta.Version); isMinVer && !instanceutils.IsGPUInstanceType(ng.InstanceType) &&
138+
if isMinVer, _ := utils.IsMinVersion(Version1_30, meta.Version); isMinVer &&
139139
!instanceutils.IsARMGPUInstanceType(ng.InstanceType) {
140140
ng.AMIFamily = NodeImageFamilyAmazonLinux2023
141141
} else {

pkg/apis/eksctl.io/v1alpha5/gpu_validation_test.go

Lines changed: 12 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -40,22 +40,16 @@ var _ = Describe("GPU instance support", func() {
4040
assertValidationError(e, api.ValidateManagedNodeGroup(0, mng))
4141
},
4242
Entry("AL2023 INF", gpuInstanceEntry{
43-
amiFamily: api.NodeImageFamilyAmazonLinux2023,
44-
gpuInstanceType: "inf1.xlarge",
45-
expectUnsupportedErr: true,
46-
instanceTypeName: "Inferentia",
43+
amiFamily: api.NodeImageFamilyAmazonLinux2023,
44+
gpuInstanceType: "inf1.xlarge",
4745
}),
4846
Entry("AL2023 TRN", gpuInstanceEntry{
49-
amiFamily: api.NodeImageFamilyAmazonLinux2023,
50-
gpuInstanceType: "trn1.2xlarge",
51-
expectUnsupportedErr: true,
52-
instanceTypeName: "Trainium",
47+
amiFamily: api.NodeImageFamilyAmazonLinux2023,
48+
gpuInstanceType: "trn1.2xlarge",
5349
}),
5450
Entry("AL2023 NVIDIA", gpuInstanceEntry{
55-
amiFamily: api.NodeImageFamilyAmazonLinux2023,
56-
gpuInstanceType: "g4dn.xlarge",
57-
expectUnsupportedErr: true,
58-
instanceTypeName: "GPU",
51+
amiFamily: api.NodeImageFamilyAmazonLinux2023,
52+
gpuInstanceType: "g4dn.xlarge",
5953
}),
6054
Entry("AL2", gpuInstanceEntry{
6155
gpuInstanceType: "asdf",
@@ -107,22 +101,16 @@ var _ = Describe("GPU instance support", func() {
107101

108102
},
109103
Entry("AL2023 INF", gpuInstanceEntry{
110-
amiFamily: api.NodeImageFamilyAmazonLinux2023,
111-
gpuInstanceType: "inf1.xlarge",
112-
expectUnsupportedErr: true,
113-
instanceTypeName: "Inferentia",
104+
amiFamily: api.NodeImageFamilyAmazonLinux2023,
105+
gpuInstanceType: "inf1.xlarge",
114106
}),
115107
Entry("AL2023 TRN", gpuInstanceEntry{
116-
amiFamily: api.NodeImageFamilyAmazonLinux2023,
117-
gpuInstanceType: "trn1.2xlarge",
118-
expectUnsupportedErr: true,
119-
instanceTypeName: "Trainium",
108+
amiFamily: api.NodeImageFamilyAmazonLinux2023,
109+
gpuInstanceType: "trn1.2xlarge",
120110
}),
121111
Entry("AL2023 NVIDIA", gpuInstanceEntry{
122-
amiFamily: api.NodeImageFamilyAmazonLinux2023,
123-
gpuInstanceType: "g4dn.xlarge",
124-
expectUnsupportedErr: true,
125-
instanceTypeName: "GPU",
112+
amiFamily: api.NodeImageFamilyAmazonLinux2023,
113+
gpuInstanceType: "g4dn.xlarge",
126114
}),
127115
Entry("AL2", gpuInstanceEntry{
128116
gpuInstanceType: "g4dn.xlarge",

pkg/apis/eksctl.io/v1alpha5/validation.go

Lines changed: 9 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -661,12 +661,10 @@ func validateNodeGroupBase(np NodePool, path string, controlPlaneOnOutposts bool
661661

662662
instanceType := SelectInstanceType(np)
663663

664-
if ng.AMIFamily == NodeImageFamilyAmazonLinux2023 && instanceutils.IsNvidiaInstanceType(instanceType) {
665-
return ErrUnsupportedInstanceTypes("GPU", NodeImageFamilyAmazonLinux2023,
666-
fmt.Sprintf("EKS accelerated AMIs based on %s will be available at a later date", NodeImageFamilyAmazonLinux2023))
667-
}
668-
669-
if ng.AMIFamily != NodeImageFamilyAmazonLinux2 && ng.AMIFamily != NodeImageFamilyBottlerocket && ng.AMIFamily != "" {
664+
if ng.AMIFamily != NodeImageFamilyAmazonLinux2023 &&
665+
ng.AMIFamily != NodeImageFamilyAmazonLinux2 &&
666+
ng.AMIFamily != NodeImageFamilyBottlerocket &&
667+
ng.AMIFamily != "" {
670668
if instanceutils.IsNvidiaInstanceType(instanceType) {
671669
logger.Warning(GPUDriversWarning(ng.AMIFamily))
672670
}
@@ -676,12 +674,14 @@ func validateNodeGroupBase(np NodePool, path string, controlPlaneOnOutposts bool
676674
}
677675
}
678676

679-
if ng.AMIFamily != NodeImageFamilyAmazonLinux2 && ng.AMIFamily != "" {
680-
// Only AL2 supports Inferentia hosts.
677+
if ng.AMIFamily != NodeImageFamilyAmazonLinux2 &&
678+
ng.AMIFamily != NodeImageFamilyAmazonLinux2023 &&
679+
ng.AMIFamily != "" {
680+
// Only AL2 and AL2023 support Inferentia hosts.
681681
if instanceutils.IsInferentiaInstanceType(instanceType) {
682682
return ErrUnsupportedInstanceTypes("Inferentia", ng.AMIFamily, fmt.Sprintf("please use %s instead", NodeImageFamilyAmazonLinux2))
683683
}
684-
// Only AL2 supports Trainium hosts.
684+
// Only AL2 and AL2023 support Trainium hosts.
685685
if instanceutils.IsTrainiumInstanceType(instanceType) {
686686
return ErrUnsupportedInstanceTypes("Trainium", ng.AMIFamily, fmt.Sprintf("please use %s instead", NodeImageFamilyAmazonLinux2))
687687
}

pkg/cfn/builder/managed_nodegroup.go

Lines changed: 31 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -263,41 +263,45 @@ func validateLaunchTemplate(launchTemplateData *ec2types.ResponseLaunchTemplateD
263263

264264
func getAMIType(ng *api.ManagedNodeGroup, instanceType string) ekstypes.AMITypes {
265265
amiTypeMapping := map[string]struct {
266-
X86x64 ekstypes.AMITypes
267-
X86GPU ekstypes.AMITypes
268-
ARM ekstypes.AMITypes
269-
ARMGPU ekstypes.AMITypes
266+
X86x64 ekstypes.AMITypes
267+
X86Nvidia ekstypes.AMITypes
268+
X86Neuron ekstypes.AMITypes
269+
ARM ekstypes.AMITypes
270+
ARMGPU ekstypes.AMITypes
270271
}{
271272
api.NodeImageFamilyAmazonLinux2023: {
272-
X86x64: ekstypes.AMITypesAl2023X8664Standard,
273-
ARM: ekstypes.AMITypesAl2023Arm64Standard,
273+
X86x64: ekstypes.AMITypesAl2023X8664Standard,
274+
X86Nvidia: ekstypes.AMITypesAl2023X8664Nvidia,
275+
X86Neuron: ekstypes.AMITypesAl2023X8664Neuron,
276+
ARM: ekstypes.AMITypesAl2023Arm64Standard,
274277
},
275278
api.NodeImageFamilyAmazonLinux2: {
276-
X86x64: ekstypes.AMITypesAl2X8664,
277-
X86GPU: ekstypes.AMITypesAl2X8664Gpu,
278-
ARM: ekstypes.AMITypesAl2Arm64,
279+
X86x64: ekstypes.AMITypesAl2X8664,
280+
X86Nvidia: ekstypes.AMITypesAl2X8664Gpu,
281+
X86Neuron: ekstypes.AMITypesAl2X8664Gpu,
282+
ARM: ekstypes.AMITypesAl2Arm64,
279283
},
280284
api.NodeImageFamilyBottlerocket: {
281-
X86x64: ekstypes.AMITypesBottlerocketX8664,
282-
X86GPU: ekstypes.AMITypesBottlerocketX8664Nvidia,
283-
ARM: ekstypes.AMITypesBottlerocketArm64,
284-
ARMGPU: ekstypes.AMITypesBottlerocketArm64Nvidia,
285+
X86x64: ekstypes.AMITypesBottlerocketX8664,
286+
X86Nvidia: ekstypes.AMITypesBottlerocketX8664Nvidia,
287+
ARM: ekstypes.AMITypesBottlerocketArm64,
288+
ARMGPU: ekstypes.AMITypesBottlerocketArm64Nvidia,
285289
},
286290
api.NodeImageFamilyWindowsServer2019FullContainer: {
287-
X86x64: ekstypes.AMITypesWindowsFull2019X8664,
288-
X86GPU: ekstypes.AMITypesWindowsFull2019X8664,
291+
X86x64: ekstypes.AMITypesWindowsFull2019X8664,
292+
X86Nvidia: ekstypes.AMITypesWindowsFull2019X8664,
289293
},
290294
api.NodeImageFamilyWindowsServer2019CoreContainer: {
291-
X86x64: ekstypes.AMITypesWindowsCore2019X8664,
292-
X86GPU: ekstypes.AMITypesWindowsCore2019X8664,
295+
X86x64: ekstypes.AMITypesWindowsCore2019X8664,
296+
X86Nvidia: ekstypes.AMITypesWindowsCore2019X8664,
293297
},
294298
api.NodeImageFamilyWindowsServer2022FullContainer: {
295-
X86x64: ekstypes.AMITypesWindowsFull2022X8664,
296-
X86GPU: ekstypes.AMITypesWindowsFull2022X8664,
299+
X86x64: ekstypes.AMITypesWindowsFull2022X8664,
300+
X86Nvidia: ekstypes.AMITypesWindowsFull2022X8664,
297301
},
298302
api.NodeImageFamilyWindowsServer2022CoreContainer: {
299-
X86x64: ekstypes.AMITypesWindowsCore2022X8664,
300-
X86GPU: ekstypes.AMITypesWindowsCore2022X8664,
303+
X86x64: ekstypes.AMITypesWindowsCore2022X8664,
304+
X86Nvidia: ekstypes.AMITypesWindowsCore2022X8664,
301305
},
302306
}
303307

@@ -307,13 +311,14 @@ func getAMIType(ng *api.ManagedNodeGroup, instanceType string) ekstypes.AMITypes
307311
}
308312

309313
switch {
310-
case instanceutils.IsGPUInstanceType(instanceType):
311-
if instanceutils.IsARMInstanceType(instanceType) {
312-
return amiType.ARMGPU
313-
}
314-
return amiType.X86GPU
314+
case instanceutils.IsARMGPUInstanceType(instanceType):
315+
return amiType.ARMGPU
315316
case instanceutils.IsARMInstanceType(instanceType):
316317
return amiType.ARM
318+
case instanceutils.IsNvidiaInstanceType(instanceType):
319+
return amiType.X86Nvidia
320+
case instanceutils.IsNeuronInstanceType(instanceType):
321+
return amiType.X86Neuron
317322
default:
318323
return amiType.X86x64
319324
}

pkg/cfn/builder/managed_nodegroup_ami_type_test.go

Lines changed: 17 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -77,23 +77,24 @@ var _ = DescribeTable("Managed Nodegroup AMI type", func(e amiTypeEntry) {
7777
expectedAMIType: "AL2_x86_64",
7878
}),
7979

80-
Entry("AMI type", amiTypeEntry{
80+
Entry("default Nvidia GPU instance type", amiTypeEntry{
8181
nodeGroup: &api.ManagedNodeGroup{
8282
NodeGroupBase: &api.NodeGroupBase{
83-
Name: "test",
83+
Name: "test",
84+
InstanceType: "p2.xlarge",
8485
},
8586
},
86-
expectedAMIType: "AL2023_x86_64_STANDARD",
87+
expectedAMIType: "AL2023_x86_64_NVIDIA",
8788
}),
8889

89-
Entry("default GPU instance type", amiTypeEntry{
90+
Entry("default Neuron GPU instance type", amiTypeEntry{
9091
nodeGroup: &api.ManagedNodeGroup{
9192
NodeGroupBase: &api.NodeGroupBase{
9293
Name: "test",
93-
InstanceType: "p2.xlarge",
94+
InstanceType: "inf1.2xlarge",
9495
},
9596
},
96-
expectedAMIType: "AL2_x86_64_GPU",
97+
expectedAMIType: "AL2023_x86_64_NEURON",
9798
}),
9899

99100
Entry("AL2 GPU instance type", amiTypeEntry{
@@ -107,6 +108,16 @@ var _ = DescribeTable("Managed Nodegroup AMI type", func(e amiTypeEntry) {
107108
expectedAMIType: "AL2_x86_64_GPU",
108109
}),
109110

111+
Entry("default ARM instance type", amiTypeEntry{
112+
nodeGroup: &api.ManagedNodeGroup{
113+
NodeGroupBase: &api.NodeGroupBase{
114+
Name: "test",
115+
InstanceType: "a1.2xlarge",
116+
},
117+
},
118+
expectedAMIType: "AL2023_ARM_64_STANDARD",
119+
}),
120+
110121
Entry("AL2 ARM instance type", amiTypeEntry{
111122
nodeGroup: &api.ManagedNodeGroup{
112123
NodeGroupBase: &api.NodeGroupBase{

0 commit comments

Comments
 (0)