3535NUM_USERS_TO_CREATE = 5
3636NUM_USERS_TO_TEST = 3
3737
38+ MAX_QUARANTINED_STACKS = 5
39+
40+ AD_STACK_PREFIX = 'integ-tests-MultiUserInfraStack'
41+
42+ DO_NOT_DELETE_TAG_KEY = 'DO-NOT-DELETE'
43+
3844
3945def get_infra_stack_outputs (stack_name ):
4046 cfn = boto3 .client ("cloudformation" )
@@ -117,7 +123,7 @@ def add_tag_to_stack(stack_name, key, value):
117123 stack = cfn .Stack (stack_name )
118124 add_tag = True
119125 for tag in stack .tags :
120- if tag .get ("Key" ) == "DO-NOT-DELETE" :
126+ if tag .get ("Key" ) == DO_NOT_DELETE_TAG_KEY :
121127 add_tag = False
122128 break
123129 if add_tag :
@@ -189,7 +195,7 @@ def _get_stack_parameters(directory_type, vpc_stack, keypair):
189195
190196def _create_directory_stack (cfn_stacks_factory , request , directory_type , region , vpc_stack : CfnVpcStack ):
191197 directory_stack_name = generate_stack_name (
192- f"integ-tests-MultiUserInfraStack { directory_type } " , request .config .getoption ("stackname_suffix" )
198+ f"{ AD_STACK_PREFIX } { directory_type } " , request .config .getoption ("stackname_suffix" )
193199 )
194200
195201 if directory_type not in ("MicrosoftAD" , "SimpleAD" ):
@@ -203,7 +209,7 @@ def _create_directory_stack(cfn_stacks_factory, request, directory_type, region,
203209 stack_parameters = _get_stack_parameters (directory_type , vpc_stack , request .config .getoption ("key_name" ))
204210 tags = [{"Key" : "parallelcluster:integ-tests-ad-stack" , "Value" : directory_type }]
205211 if request .config .getoption ("retain_ad_stack" ):
206- tags .append ({"Key" : "DO-NOT-DELETE" , "Value" : "Retained for integration testing" })
212+ tags .append ({"Key" : DO_NOT_DELETE_TAG_KEY , "Value" : "Retained for integration testing" })
207213
208214 directory_stack = CfnStack (
209215 name = directory_stack_name ,
@@ -213,11 +219,30 @@ def _create_directory_stack(cfn_stacks_factory, request, directory_type, region,
213219 capabilities = ["CAPABILITY_IAM" , "CAPABILITY_NAMED_IAM" , "CAPABILITY_AUTO_EXPAND" ],
214220 tags = tags ,
215221 )
216- cfn_stacks_factory .create_stack (directory_stack )
222+ try :
223+ cfn_stacks_factory .create_stack (directory_stack , stack_is_under_test = True )
224+ except Exception as e :
225+ logging .error ("Failed to create stack %s" , directory_stack_name )
226+ # We want to retain the stack in case of failure in order to debug it.
227+ # We retain a limited number of stack to contain the costs.
228+ n_retained_ad_stacks = get_retained_ad_stacks_count ()
229+ if n_retained_ad_stacks < MAX_QUARANTINED_STACKS :
230+ logging .warn ("Retaining failed stack %s to debug failure" , directory_stack_name )
231+ add_tag_to_stack (directory_stack .name , DO_NOT_DELETE_TAG_KEY , "Retained to debug failure" )
232+ else :
233+ logging .warn ("Cannot retain failed stack %s for debugging because there are already %d retained (max: %d)" ,
234+ directory_stack_name , n_retained_ad_stacks , MAX_QUARANTINED_STACKS )
235+ raise e
217236 logging .info ("Creation of stack %s complete" , directory_stack_name )
218237
219238 return directory_stack
220239
240+ def get_retained_ad_stacks_count ():
241+ cfn = boto3 .client ("cloudformation" )
242+ failed_stacks = cfn .list_stacks (StackStatusFilter = ['CREATE_FAILED' ])["StackSummaries" ]
243+ failed_ad_stacks = [stack for stack in failed_stacks if AD_STACK_PREFIX in stack .get ('StackName' )]
244+ return len ([stack for stack in failed_ad_stacks if stack .get ("Tags" ) and
245+ any (tag .get ("Key" ) == DO_NOT_DELETE_TAG_KEY for tag in stack .get ("Tags" ))])
221246
222247@retry (wait_fixed = seconds (20 ), stop_max_delay = seconds (700 ))
223248def _check_ssm_success (ssm_client , command_id , instance_id ):
@@ -243,7 +268,7 @@ def _directory_factory(
243268 directory_stack_name = created_directory_stacks .get (region , {}).get ("directory" )
244269 logging .info ("Using directory stack named %s created by another test" , directory_stack_name )
245270 else :
246- stack_prefix = f"integ-tests-MultiUserInfraStack { directory_type } "
271+ stack_prefix = f"{ AD_STACK_PREFIX } { directory_type } "
247272 directory_stack_name = find_stack_by_tag ("parallelcluster:integ-tests-ad-stack" , region , stack_prefix )
248273
249274 if not directory_stack_name :
@@ -257,7 +282,7 @@ def _directory_factory(
257282 directory_stack_name = directory_stack .name
258283 created_directory_stacks [region ]["directory" ] = directory_stack_name
259284 if request .config .getoption ("retain_ad_stack" ):
260- add_tag_to_stack (vpc_stack .name , "DO-NOT-DELETE" , "Retained for integration testing" )
285+ add_tag_to_stack (vpc_stack .name , DO_NOT_DELETE_TAG_KEY , "Retained for integration testing" )
261286 return directory_stack_name
262287
263288 yield _directory_factory
0 commit comments