6262LOCK_TYPE_RUNNING = "running"
6363lockRunning = None
6464
65+ # process "lock" to indicate that the GC process has been activated but may not
66+ # yet be running, stops a second process from being started.
67+ LOCK_TYPE_GC_ACTIVE = "gc_active"
68+ lockActive = None
69+
6570# Default coalesce error rate limit, in messages per minute. A zero value
6671# disables throttling, and a negative value disables error reporting.
6772DEFAULT_COALESCE_ERR_RATE = 1.0 / 60
@@ -1456,6 +1461,7 @@ def findCoalesceable(self):
14561461 for vdi in self .vdis .values ():
14571462 if vdi .isCoalesceable () and vdi not in self ._failedCoalesceTargets :
14581463 candidates .append (vdi )
1464+ Util .log ("%s is coalescable" % vdi .uuid )
14591465
14601466 # pick one in the tallest tree
14611467 treeHeight = dict ()
@@ -1748,6 +1754,7 @@ def _coalesce(self, vdi):
17481754
17491755 vdi .parent ._reloadChildren (vdi )
17501756 self .journaler .remove (vdi .JRN_RELINK , vdi .uuid )
1757+ self .deleteVDI (vdi )
17511758
17521759 def _coalesceLeaf (self , vdi ):
17531760 """Leaf-coalesce VDI vdi. Return true if we succeed, false if we cannot
@@ -1786,7 +1793,6 @@ def _snapshotCoalesce(self, vdi):
17861793 Util .log ("Coalescing parent %s" % tempSnap )
17871794 util .fistpoint .activate ("LVHDRT_coaleaf_delay_2" , self .uuid )
17881795 self ._coalesce (tempSnap )
1789- self .deleteVDI (tempSnap )
17901796 if not vdi .isLeafCoalesceable ():
17911797 Util .log ("The VDI tree appears to have been altered since" )
17921798 return False
@@ -2532,48 +2538,59 @@ def normalizeType(type):
25322538 return type
25332539
25342540def _gcLoop (sr , dryRun ):
2535- failedCandidates = []
2536- while True :
2537- if not sr .xapi .isPluggedHere ():
2538- Util .log ("SR no longer attached, exiting" )
2539- break
2540- sr .scanLocked ()
2541- if not sr .hasWork ():
2542- Util .log ("No work, exiting" )
2543- break
2544-
2545- if not lockRunning .acquireNoblock ():
2546- Util .log ("Another instance already running, exiting" )
2547- break
2548- try :
2549- if not sr .gcEnabled ():
2541+ if not lockActive .acquireNoblock ():
2542+ Util .log ("Another GC instance already active, exiting" )
2543+ return
2544+ try :
2545+ # TODO: make the delay configurable
2546+ Util .log ("GC active, about to go quiet" )
2547+ time .sleep (5 * 60 )
2548+ Util .log ("GC active, quiet period ended" )
2549+
2550+ while True :
2551+ if not sr .xapi .isPluggedHere ():
2552+ Util .log ("SR no longer attached, exiting" )
25502553 break
2551- sr .cleanupCoalesceJournals ()
25522554 sr .scanLocked ()
2553- sr .updateBlockInfo ()
2554-
2555- if len (sr .findGarbage ()) > 0 :
2556- sr .garbageCollect (dryRun )
2557- sr .xapi .srUpdate ()
2558- continue
2555+ if not sr .hasWork ():
2556+ Util .log ("No work, exiting" )
2557+ break
25592558
2560- candidate = sr .findCoalesceable ()
2561- if candidate :
2562- util .fistpoint .activate ("LVHDRT_finding_a_suitable_pair" ,sr .uuid )
2563- sr .coalesce (candidate , dryRun )
2564- sr .xapi .srUpdate ()
2565- continue
2559+ if not lockRunning .acquireNoblock ():
2560+ Util .log ("Unable to acquire GC running lock." )
2561+ return
2562+ try :
2563+ if not sr .gcEnabled ():
2564+ break
2565+ sr .cleanupCoalesceJournals ()
2566+ sr .scanLocked ()
2567+ sr .updateBlockInfo ()
2568+
2569+ howmany = len (sr .findGarbage ())
2570+ if howmany > 0 :
2571+ Util .log ("Found %d orphaned vdis" % howmany )
2572+ sr .garbageCollect (dryRun )
2573+ sr .xapi .srUpdate ()
2574+
2575+ candidate = sr .findCoalesceable ()
2576+ if candidate :
2577+ util .fistpoint .activate (
2578+ "LVHDRT_finding_a_suitable_pair" , sr .uuid )
2579+ sr .coalesce (candidate , dryRun )
2580+ sr .xapi .srUpdate ()
2581+ continue
25662582
2567- candidate = sr .findLeafCoalesceable ()
2568- if candidate :
2569- sr .coalesceLeaf (candidate , dryRun )
2570- sr .xapi .srUpdate ()
2571- continue
2583+ candidate = sr .findLeafCoalesceable ()
2584+ if candidate :
2585+ sr .coalesceLeaf (candidate , dryRun )
2586+ sr .xapi .srUpdate ()
2587+ continue
25722588
2573- Util .log ("No work left" )
2574- sr .cleanup ()
2575- finally :
2576- lockRunning .release ()
2589+ finally :
2590+ lockRunning .release ()
2591+ finally :
2592+ Util .log ("GC process exiting, no work left" )
2593+ lockActive .release ()
25772594
25782595def _gc (session , srUuid , dryRun ):
25792596 init (srUuid )
@@ -2621,7 +2638,10 @@ def _abort(srUuid, soft=False):
26212638def init (srUuid ):
26222639 global lockRunning
26232640 if not lockRunning :
2624- lockRunning = lock .Lock (LOCK_TYPE_RUNNING , srUuid )
2641+ lockRunning = lock .Lock (LOCK_TYPE_RUNNING , srUuid )
2642+ global lockActive
2643+ if not lockActive :
2644+ lockActive = lock .Lock (LOCK_TYPE_GC_ACTIVE , srUuid )
26252645
26262646def usage ():
26272647 output = """Garbage collect and/or coalesce VHDs in a VHD-based SR
@@ -2741,8 +2761,8 @@ def get_state(srUuid):
27412761 locking.
27422762 """
27432763 init (srUuid )
2744- if lockRunning .acquireNoblock ():
2745- lockRunning .release ()
2764+ if lockActive .acquireNoblock ():
2765+ lockActive .release ()
27462766 return False
27472767 return True
27482768
@@ -2752,7 +2772,7 @@ def should_preempt(session, srUuid):
27522772 if len (entries ) == 0 :
27532773 return False
27542774 elif len (entries ) > 1 :
2755- raise util .SMException ("More than one coalesce entry: " + entries )
2775+ raise util .SMException ("More than one coalesce entry: " + str ( entries ) )
27562776 sr .scan ()
27572777 coalescedUuid = entries .popitem ()[0 ]
27582778 garbage = sr .findGarbage ()
0 commit comments