Skip to content

Commit b44d7d7

Browse files
Bug 1943378: Fix InstanceCreate volume cleanup
With this change, the volume cleanup is triggered by any early failure of InstanceCreate. That is to say: if InstanceCreate creates a volume, that volume will be deleted if any of the next steps fail and cause the server never to be created.
1 parent 0ff90e6 commit b44d7d7

File tree

1 file changed

+29
-20
lines changed

1 file changed

+29
-20
lines changed

pkg/cloud/openstack/clients/machineservice.go

Lines changed: 29 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -462,6 +462,17 @@ func GetSecurityGroups(is *InstanceService, sg_param []openstackconfigv1.Securit
462462
// If ServerGroupName is nonempty and no server group exists with that name,
463463
// then InstanceCreate creates a server group with that name.
464464
func (is *InstanceService) InstanceCreate(clusterName string, name string, clusterSpec *openstackconfigv1.OpenstackClusterProviderSpec, config *openstackconfigv1.OpenstackProviderSpec, cmd string, keyName string, configClient configclient.ConfigV1Interface) (instance *Instance, err error) {
465+
// server is only non-nil in case of successful server creation.
466+
//
467+
// There are multiple preparation steps in this method, some of which
468+
// create resources (e.g. a volume in case of boot-from-volume with
469+
// "image" source), and all of which have a chance to fail.
470+
//
471+
// This variable is guaranteed to remain nil until server creation is
472+
// successful. Deferred cleanup functions can restore the initial state
473+
// in case of failure, by only acting if "server" is nil.
474+
var server *servers.Server
475+
465476
if config == nil {
466477
return nil, fmt.Errorf("create Options need be specified to create instace")
467478
}
@@ -475,8 +486,6 @@ func (is *InstanceService) InstanceCreate(clusterName string, name string, clust
475486
}
476487
}
477488

478-
var cleanupOperationsInCaseOfServerCreationFailure []func() error
479-
480489
// Set default Tags
481490
machineTags := []string{
482491
"cluster-api-provider-openstack",
@@ -713,7 +722,7 @@ func (is *InstanceService) InstanceCreate(clusterName string, name string, clust
713722
volumeName := name
714723

715724
// if source type is "image" then we have to create a volume from the image first
716-
klog.Infof("Creating bootable volume with name %q from image %v.", volumeName, config.RootVolume.SourceUUID)
725+
klog.Infof("Creating bootable volume with name %q from image %q.", volumeName, config.RootVolume.SourceUUID)
717726

718727
// Deleting any volumes with the same name, as they may
719728
// be leftovers from a previous failed try.
@@ -750,21 +759,23 @@ func (is *InstanceService) InstanceCreate(clusterName string, name string, clust
750759
if err != nil {
751760
return nil, fmt.Errorf("Create bootable volume err: %v", err)
752761
}
753-
754-
cleanupOperationsInCaseOfServerCreationFailure = append(cleanupOperationsInCaseOfServerCreationFailure, func() error {
755-
return volumes.Delete(is.volumeClient, volume.ID, nil).ExtractErr()
756-
})
757762
volumeID = volume.ID
758763

759-
err = volumes.WaitForStatus(is.volumeClient, volumeID, "available", 300)
760-
if err != nil {
761-
klog.Infof("Bootable volume %v creation failed. Removing...", volumeID)
762-
err = volumes.Delete(is.volumeClient, volumeID, volumes.DeleteOpts{}).ExtractErr()
763-
if err != nil {
764-
return nil, fmt.Errorf("Bootable volume deletion err: %v", err)
764+
defer func() {
765+
// If the server is created in Nova, the lifetime of the associated volume will be
766+
// bound to it. This deferred cleanup function tackles the case where a volume was created
767+
// but never attached to a server. In that case, the volume must be removed manually.
768+
if server == nil {
769+
if err := volumes.Delete(is.volumeClient, volumeID, nil).ExtractErr(); err != nil {
770+
klog.Infof("Failed to delete stale volume %q", volumeID)
771+
} else {
772+
klog.Infof("Deleted stale volume %q", volumeID)
773+
}
765774
}
775+
}()
766776

767-
return nil, fmt.Errorf("Bootable volume %v is not available err: %v", volumeID, err)
777+
if err := volumes.WaitForStatus(is.volumeClient, volumeID, "available", 300); err != nil {
778+
return nil, fmt.Errorf("bootable volume is not available: %v", err)
768779
}
769780

770781
klog.Infof("Bootable volume %v was created successfully.", volumeID)
@@ -839,16 +850,14 @@ func (is *InstanceService) InstanceCreate(clusterName string, name string, clust
839850
}
840851
}
841852

842-
server, err := servers.Create(is.computeClient, keypairs.CreateOptsExt{
853+
server, err = servers.Create(is.computeClient, keypairs.CreateOptsExt{
843854
CreateOptsBuilder: serverCreateOpts,
844855
KeyName: keyName,
845856
}).Extract()
846857
if err != nil {
847-
for _, cleanup := range cleanupOperationsInCaseOfServerCreationFailure {
848-
if e := cleanup(); e != nil {
849-
err = fmt.Errorf("%w. Additionally: %v", err, e)
850-
}
851-
}
858+
// Gophercloud does not guarantee "server" to be nil when "err"
859+
// is not nil. Resetting to nil to trigger cleanup functions.
860+
server = nil
852861
return nil, err
853862
}
854863

0 commit comments

Comments
 (0)