Skip to content

Commit d8c2cb6

Browse files
committed
Add missing fields to launch template validators
This patch adds the missing fields to the launch template validators. For the head node, these are the `BlockDeviceMappings` and `KeyName`, while for the compute resources these are the `BlockDeviceMappings`, `InstanceMetadataOptions` and `CapacityReservationSpecification`.
1 parent 02a8e1a commit d8c2cb6

File tree

9 files changed

+756
-66
lines changed

9 files changed

+756
-66
lines changed

cli/src/pcluster/config/cluster_config.py

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1112,6 +1112,10 @@ def __init__(self, name: str, capacity_type: str = None):
11121112
_capacity_type = CapacityType[capacity_type.upper()] if capacity_type else None
11131113
self.capacity_type = Resource.init_param(_capacity_type, default=CapacityType.ONDEMAND)
11141114

1115+
def is_spot(self):
1116+
"""Return True if the queue has SPOT capacity."""
1117+
return self.capacity_type == CapacityType.SPOT
1118+
11151119
def _register_validators(self, context: ValidatorContext = None): # noqa: D102 #pylint: disable=unused-argument
11161120
self._register_validator(NameValidator, name=self.name)
11171121

@@ -1193,6 +1197,7 @@ def _register_validators(self, context: ValidatorContext = None): # noqa: D102
11931197
self._register_validator(
11941198
HeadNodeLaunchTemplateValidator,
11951199
head_node=self.head_node,
1200+
os=self.image.os,
11961201
ami_id=self.head_node_ami,
11971202
tags=self.get_cluster_tags(),
11981203
)
@@ -2470,6 +2475,7 @@ def _register_validators(self, context: ValidatorContext = None):
24702475
ComputeResourceLaunchTemplateValidator,
24712476
queue=queue,
24722477
ami_id=queue_image,
2478+
os=self.image.os,
24732479
tags=self.get_cluster_tags(),
24742480
)
24752481
ami_volume_size = AWSApi.instance().ec2.describe_image(queue_image).volume_size
Lines changed: 56 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,56 @@
1+
from abc import ABC, abstractmethod
2+
3+
from pcluster.constants import OS_MAPPING
4+
5+
6+
class _LaunchTemplateBuilder(ABC):
7+
"""Abstract class with methods with the common logic for launch template builders."""
8+
9+
def get_block_device_mappings(self, root_volume, image_os):
10+
"""Return a list of block device mappings."""
11+
block_device_mappings = []
12+
for _, (device_name_index, virtual_name_index) in enumerate(zip(list(map(chr, range(97, 121))), range(0, 24))):
13+
device_name = "/dev/xvdb{0}".format(device_name_index)
14+
virtual_name = "ephemeral{0}".format(virtual_name_index)
15+
block_device_mappings.append(self._block_device_mapping_for_virt(device_name, virtual_name))
16+
17+
block_device_mappings.append(
18+
self._block_device_mapping_for_ebs(OS_MAPPING[image_os]["root-device"], root_volume)
19+
)
20+
return block_device_mappings
21+
22+
def get_instance_market_options(self, queue, compute_resource):
23+
"""Return the instance market options for spot instances."""
24+
instance_market_options = None
25+
if queue.is_spot():
26+
instance_market_options = self._instance_market_option(
27+
market_type="spot",
28+
spot_instance_type="one-time",
29+
instance_interruption_behavior="terminate",
30+
max_price=None if compute_resource.spot_price is None else str(compute_resource.spot_price),
31+
)
32+
return instance_market_options
33+
34+
def get_capacity_reservation(self, queue, compute_resource):
35+
"""Return the capacity reservation if a target is defined at the queue or compute resource level."""
36+
capacity_reservation = None
37+
cr_target = compute_resource.capacity_reservation_target or queue.capacity_reservation_target
38+
if cr_target:
39+
capacity_reservation = self._capacity_reservation(cr_target)
40+
return capacity_reservation
41+
42+
@abstractmethod
43+
def _block_device_mapping_for_virt(self, device_name, virtual_name):
44+
pass
45+
46+
@abstractmethod
47+
def _block_device_mapping_for_ebs(self, device_name, volume):
48+
pass
49+
50+
@abstractmethod
51+
def _instance_market_option(self, market_type, spot_instance_type, instance_interruption_behavior, max_price):
52+
pass
53+
54+
@abstractmethod
55+
def _capacity_reservation(self, cr_target):
56+
pass

cli/src/pcluster/templates/cdk_builder_utils.py

Lines changed: 39 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,6 @@
2626
BaseComputeResource,
2727
BaseQueue,
2828
HeadNode,
29-
LocalStorage,
3029
SharedStorageType,
3130
SlurmClusterConfig,
3231
SlurmQueue,
@@ -35,11 +34,11 @@
3534
COOKBOOK_PACKAGES_VERSIONS,
3635
CW_LOGS_RETENTION_DAYS_DEFAULT,
3736
IAM_ROLE_PATH,
38-
OS_MAPPING,
3937
PCLUSTER_CLUSTER_NAME_TAG,
4038
PCLUSTER_DYNAMODB_PREFIX,
4139
PCLUSTER_NODE_TYPE_TAG,
4240
)
41+
from pcluster.launch_template_utils import _LaunchTemplateBuilder
4342
from pcluster.models.s3_bucket import S3Bucket, parse_bucket_url
4443
from pcluster.utils import (
4544
get_attr,
@@ -52,34 +51,6 @@
5251
PCLUSTER_LAMBDA_PREFIX = "pcluster-"
5352

5453

55-
def get_block_device_mappings(local_storage: LocalStorage, os: str):
56-
"""Return block device mapping."""
57-
block_device_mappings = []
58-
for _, (device_name_index, virtual_name_index) in enumerate(zip(list(map(chr, range(97, 121))), range(0, 24))):
59-
device_name = "/dev/xvdb{0}".format(device_name_index)
60-
virtual_name = "ephemeral{0}".format(virtual_name_index)
61-
block_device_mappings.append(
62-
ec2.CfnLaunchTemplate.BlockDeviceMappingProperty(device_name=device_name, virtual_name=virtual_name)
63-
)
64-
65-
root_volume = local_storage.root_volume
66-
67-
block_device_mappings.append(
68-
ec2.CfnLaunchTemplate.BlockDeviceMappingProperty(
69-
device_name=OS_MAPPING[os]["root-device"],
70-
ebs=ec2.CfnLaunchTemplate.EbsProperty(
71-
volume_size=root_volume.size,
72-
encrypted=root_volume.encrypted,
73-
volume_type=root_volume.volume_type,
74-
iops=root_volume.iops,
75-
throughput=root_volume.throughput,
76-
delete_on_termination=root_volume.delete_on_termination,
77-
),
78-
)
79-
)
80-
return block_device_mappings
81-
82-
8354
def create_hash_suffix(string_to_hash: str):
8455
"""Create 16digit hash string."""
8556
return (
@@ -814,3 +785,41 @@ def _stack_unique_id(self):
814785

815786
def _format_arn(self, **kwargs):
816787
return Stack.of(self).format_arn(**kwargs)
788+
789+
790+
class CdkLaunchTemplateBuilder(_LaunchTemplateBuilder):
791+
"""Concrete class for building a CDK launch template."""
792+
793+
def _block_device_mapping_for_ebs(self, device_name, volume):
794+
return ec2.CfnLaunchTemplate.BlockDeviceMappingProperty(
795+
device_name=device_name,
796+
ebs=ec2.CfnLaunchTemplate.EbsProperty(
797+
volume_size=volume.size,
798+
encrypted=volume.encrypted,
799+
volume_type=volume.volume_type,
800+
iops=volume.iops,
801+
throughput=volume.throughput,
802+
delete_on_termination=volume.delete_on_termination,
803+
),
804+
)
805+
806+
def _block_device_mapping_for_virt(self, device_name, virtual_name):
807+
return ec2.CfnLaunchTemplate.BlockDeviceMappingProperty(device_name=device_name, virtual_name=virtual_name)
808+
809+
def _instance_market_option(self, market_type, spot_instance_type, instance_interruption_behavior, max_price):
810+
return ec2.CfnLaunchTemplate.InstanceMarketOptionsProperty(
811+
market_type=market_type,
812+
spot_options=ec2.CfnLaunchTemplate.SpotOptionsProperty(
813+
spot_instance_type=spot_instance_type,
814+
instance_interruption_behavior=instance_interruption_behavior,
815+
max_price=max_price,
816+
),
817+
)
818+
819+
def _capacity_reservation(self, cr_target):
820+
return ec2.CfnLaunchTemplate.CapacityReservationSpecificationProperty(
821+
capacity_reservation_target=ec2.CfnLaunchTemplate.CapacityReservationTargetProperty(
822+
capacity_reservation_id=cr_target.capacity_reservation_id,
823+
capacity_reservation_resource_group_arn=cr_target.capacity_reservation_resource_group_arn,
824+
)
825+
)

cli/src/pcluster/templates/cluster_stack.py

Lines changed: 15 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -45,7 +45,6 @@
4545
from pcluster.config.cluster_config import (
4646
AwsBatchClusterConfig,
4747
BaseSharedFsx,
48-
CapacityType,
4948
ExistingFsxOntap,
5049
ExistingFsxOpenZfs,
5150
SchedulerPluginQueue,
@@ -73,6 +72,7 @@
7372
from pcluster.models.s3_bucket import S3Bucket
7473
from pcluster.templates.awsbatch_builder import AwsBatchConstruct
7574
from pcluster.templates.cdk_builder_utils import (
75+
CdkLaunchTemplateBuilder,
7676
ComputeNodeIamResources,
7777
HeadNodeIamResources,
7878
PclusterLambdaConstruct,
@@ -81,7 +81,6 @@
8181
convert_deletion_policy,
8282
create_hash_suffix,
8383
generate_launch_template_version_cfn_parameter_hash,
84-
get_block_device_mappings,
8584
get_cloud_watch_logs_policy_statement,
8685
get_cloud_watch_logs_retention_days,
8786
get_common_user_data_env,
@@ -119,6 +118,7 @@ def __init__(
119118
) -> None:
120119
super().__init__(scope, construct_id, **kwargs)
121120
self._stack_name = stack_name
121+
self._launch_template_builder = CdkLaunchTemplateBuilder()
122122
self.config = cluster_config
123123
self.bucket = bucket
124124
self.timestamp = datetime.utcnow().strftime("%Y%m%d%H%M%S")
@@ -874,7 +874,9 @@ def _add_head_node(self):
874874
"HeadNodeLaunchTemplate",
875875
launch_template_data=ec2.CfnLaunchTemplate.LaunchTemplateDataProperty(
876876
instance_type=head_node.instance_type,
877-
block_device_mappings=get_block_device_mappings(head_node.local_storage, self.config.image.os),
877+
block_device_mappings=self._launch_template_builder.get_block_device_mappings(
878+
head_node.local_storage.root_volume, self.config.image.os
879+
),
878880
key_name=head_node.ssh.key_name,
879881
network_interfaces=head_lt_nw_interfaces,
880882
image_id=self.config.head_node_ami,
@@ -1317,6 +1319,7 @@ def __init__(
13171319
self._dynamodb_table = dynamodb_table
13181320
self._compute_node_instance_profiles = compute_node_instance_profiles
13191321
self._head_eni = head_eni
1322+
self._launch_template_builder = CdkLaunchTemplateBuilder()
13201323
self._add_resources()
13211324

13221325
# -- Utility methods --------------------------------------------------------------------------------------------- #
@@ -1443,41 +1446,19 @@ def _add_compute_resource_launch_template(
14431446
)
14441447
)
14451448

1446-
instance_market_options = None
1447-
if queue.capacity_type == CapacityType.SPOT:
1448-
instance_market_options = ec2.CfnLaunchTemplate.InstanceMarketOptionsProperty(
1449-
market_type="spot",
1450-
spot_options=ec2.CfnLaunchTemplate.SpotOptionsProperty(
1451-
spot_instance_type="one-time",
1452-
instance_interruption_behavior="terminate",
1453-
max_price=None if compute_resource.spot_price is None else str(compute_resource.spot_price),
1454-
),
1455-
)
1456-
14571449
conditional_template_properties = {}
1458-
14591450
if compute_resource.is_ebs_optimized:
14601451
conditional_template_properties.update({"ebs_optimized": True})
14611452
if isinstance(compute_resource, SlurmComputeResource):
14621453
conditional_template_properties.update({"instance_type": compute_resource.instance_type})
14631454

1464-
capacity_reservation_specification = None
1465-
cr_target = compute_resource.capacity_reservation_target or queue.capacity_reservation_target
1466-
if cr_target:
1467-
capacity_reservation_specification = ec2.CfnLaunchTemplate.CapacityReservationSpecificationProperty(
1468-
capacity_reservation_target=ec2.CfnLaunchTemplate.CapacityReservationTargetProperty(
1469-
capacity_reservation_id=cr_target.capacity_reservation_id,
1470-
capacity_reservation_resource_group_arn=cr_target.capacity_reservation_resource_group_arn,
1471-
)
1472-
)
1473-
14741455
return ec2.CfnLaunchTemplate(
14751456
self,
14761457
f"LaunchTemplate{create_hash_suffix(queue.name + compute_resource.name)}",
14771458
launch_template_name=f"{self.stack_name}-{queue.name}-{compute_resource.name}",
14781459
launch_template_data=ec2.CfnLaunchTemplate.LaunchTemplateDataProperty(
1479-
block_device_mappings=get_block_device_mappings(
1480-
queue.compute_settings.local_storage, self._config.image.os
1460+
block_device_mappings=self._launch_template_builder.get_block_device_mappings(
1461+
queue.compute_settings.local_storage.root_volume, self._config.image.os
14811462
),
14821463
# key_name=,
14831464
network_interfaces=compute_lt_nw_interfaces,
@@ -1486,9 +1467,14 @@ def _add_compute_resource_launch_template(
14861467
iam_instance_profile=ec2.CfnLaunchTemplate.IamInstanceProfileProperty(
14871468
name=instance_profiles[queue.name]
14881469
),
1489-
instance_market_options=instance_market_options,
1470+
instance_market_options=self._launch_template_builder.get_instance_market_options(
1471+
queue, compute_resource
1472+
),
14901473
instance_initiated_shutdown_behavior="terminate",
1491-
capacity_reservation_specification=capacity_reservation_specification,
1474+
capacity_reservation_specification=self._launch_template_builder.get_capacity_reservation(
1475+
queue,
1476+
compute_resource,
1477+
),
14921478
metadata_options=ec2.CfnLaunchTemplate.MetadataOptionsProperty(
14931479
http_tokens=get_http_tokens_setting(self._config.imds.imds_support)
14941480
),

cli/src/pcluster/utils.py

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -361,3 +361,8 @@ def yaml_no_duplicates_constructor(loader, node, deep=False):
361361
def get_http_tokens_setting(imds_support):
362362
"""Get http tokens settings for supported IMDS version."""
363363
return "required" if imds_support == "v2.0" else "optional"
364+
365+
366+
def remove_none_values(original_dictionary):
367+
"""Return a dictionary without entries with None value."""
368+
return {key: value for key, value in original_dictionary.items() if value is not None}

0 commit comments

Comments
 (0)