Skip to content

Commit 9d82cec

Browse files
MarkSymsCtxgermanop
authored andcommitted
[CA-183960][CA-196882]: GC in batch mode and immediate deletes in SRs
FileSR performs VDI.delete inline Inline delete VDIs for LVHD SRs GC waits five minutes after start before initiating work loop. Use two locks in gcLoop, one to prevent multiple GC processs starting and one to interlink with abort requests while work is actually occuring. Inline delete orphaned nodes in coalesce After handling garbage in the GC loop proceed to coalesce without returning to scan and potentially finding more garbage whcih then prevents coalesce. Fix bug in _kickGC where the running GC could be stopped but not start a new one. Signed-off-by: Mark Syms <[email protected]> Signed-off-by: Stefano Panella <[email protected]> Reviewed-by: Germano Percossi <[email protected]> GitHub: closes #295
1 parent 37947ab commit 9d82cec

File tree

3 files changed

+70
-52
lines changed

3 files changed

+70
-52
lines changed

drivers/FileSR.py

Lines changed: 1 addition & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -567,14 +567,7 @@ def delete(self, sr_uuid, vdi_uuid):
567567
if self.attached:
568568
raise xs_errors.XenError('VDIInUse')
569569

570-
if self.vdi_type == vhdutil.VDI_TYPE_VHD:
571-
try:
572-
util.ioretry(lambda: self._mark_hidden(self.path))
573-
except util.CommandException, inst:
574-
raise xs_errors.XenError('VDIDelete',
575-
opterr='error %d' % inst.code)
576-
else:
577-
os.unlink(self.path)
570+
os.unlink(self.path)
578571

579572
self.sr.deleted_vdi(vdi_uuid)
580573
self._db_forget()

drivers/LVHDSR.py

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1365,8 +1365,6 @@ def _kickGC(self):
13651365
if e.code != errno.ETIMEDOUT:
13661366
raise
13671367
util.SMlog('failed to abort the GC')
1368-
finally:
1369-
return
13701368
else:
13711369
util.SMlog("A GC instance already running, not kicking")
13721370
return
@@ -1806,6 +1804,13 @@ def delete(self, sr_uuid, vdi_uuid):
18061804
if self.sr.lvActivator.get(self.uuid, False):
18071805
self.sr.lvActivator.deactivate(self.uuid, False)
18081806

1807+
try:
1808+
self.sr.lvmCache.remove(self.lvname)
1809+
except SR.SRException, e:
1810+
util.SMlog(
1811+
"Failed to remove the volume (maybe is leaf coalescing) "
1812+
"for %s err:%d" % (self.uuid, e.errno))
1813+
18091814
self.sr._updateStats(self.sr.uuid, -self.size)
18101815
self.sr._kickGC()
18111816

drivers/cleanup.py

Lines changed: 62 additions & 42 deletions
Original file line numberDiff line numberDiff line change
@@ -62,6 +62,11 @@
6262
LOCK_TYPE_RUNNING = "running"
6363
lockRunning = None
6464

65+
# process "lock" to indicate that the GC process has been activated but may not
66+
# yet be running, stops a second process from being started.
67+
LOCK_TYPE_GC_ACTIVE = "gc_active"
68+
lockActive = None
69+
6570
# Default coalesce error rate limit, in messages per minute. A zero value
6671
# disables throttling, and a negative value disables error reporting.
6772
DEFAULT_COALESCE_ERR_RATE = 1.0/60
@@ -1456,6 +1461,7 @@ def findCoalesceable(self):
14561461
for vdi in self.vdis.values():
14571462
if vdi.isCoalesceable() and vdi not in self._failedCoalesceTargets:
14581463
candidates.append(vdi)
1464+
Util.log("%s is coalescable" % vdi.uuid)
14591465

14601466
# pick one in the tallest tree
14611467
treeHeight = dict()
@@ -1748,6 +1754,7 @@ def _coalesce(self, vdi):
17481754

17491755
vdi.parent._reloadChildren(vdi)
17501756
self.journaler.remove(vdi.JRN_RELINK, vdi.uuid)
1757+
self.deleteVDI(vdi)
17511758

17521759
def _coalesceLeaf(self, vdi):
17531760
"""Leaf-coalesce VDI vdi. Return true if we succeed, false if we cannot
@@ -1786,7 +1793,6 @@ def _snapshotCoalesce(self, vdi):
17861793
Util.log("Coalescing parent %s" % tempSnap)
17871794
util.fistpoint.activate("LVHDRT_coaleaf_delay_2", self.uuid)
17881795
self._coalesce(tempSnap)
1789-
self.deleteVDI(tempSnap)
17901796
if not vdi.isLeafCoalesceable():
17911797
Util.log("The VDI tree appears to have been altered since")
17921798
return False
@@ -2532,48 +2538,59 @@ def normalizeType(type):
25322538
return type
25332539

25342540
def _gcLoop(sr, dryRun):
2535-
failedCandidates = []
2536-
while True:
2537-
if not sr.xapi.isPluggedHere():
2538-
Util.log("SR no longer attached, exiting")
2539-
break
2540-
sr.scanLocked()
2541-
if not sr.hasWork():
2542-
Util.log("No work, exiting")
2543-
break
2544-
2545-
if not lockRunning.acquireNoblock():
2546-
Util.log("Another instance already running, exiting")
2547-
break
2548-
try:
2549-
if not sr.gcEnabled():
2541+
if not lockActive.acquireNoblock():
2542+
Util.log("Another GC instance already active, exiting")
2543+
return
2544+
try:
2545+
# TODO: make the delay configurable
2546+
Util.log("GC active, about to go quiet")
2547+
time.sleep(5 * 60)
2548+
Util.log("GC active, quiet period ended")
2549+
2550+
while True:
2551+
if not sr.xapi.isPluggedHere():
2552+
Util.log("SR no longer attached, exiting")
25502553
break
2551-
sr.cleanupCoalesceJournals()
25522554
sr.scanLocked()
2553-
sr.updateBlockInfo()
2554-
2555-
if len(sr.findGarbage()) > 0:
2556-
sr.garbageCollect(dryRun)
2557-
sr.xapi.srUpdate()
2558-
continue
2555+
if not sr.hasWork():
2556+
Util.log("No work, exiting")
2557+
break
25592558

2560-
candidate = sr.findCoalesceable()
2561-
if candidate:
2562-
util.fistpoint.activate("LVHDRT_finding_a_suitable_pair",sr.uuid)
2563-
sr.coalesce(candidate, dryRun)
2564-
sr.xapi.srUpdate()
2565-
continue
2559+
if not lockRunning.acquireNoblock():
2560+
Util.log("Unable to acquire GC running lock.")
2561+
return
2562+
try:
2563+
if not sr.gcEnabled():
2564+
break
2565+
sr.cleanupCoalesceJournals()
2566+
sr.scanLocked()
2567+
sr.updateBlockInfo()
2568+
2569+
howmany = len(sr.findGarbage())
2570+
if howmany > 0:
2571+
Util.log("Found %d orphaned vdis" % howmany)
2572+
sr.garbageCollect(dryRun)
2573+
sr.xapi.srUpdate()
2574+
2575+
candidate = sr.findCoalesceable()
2576+
if candidate:
2577+
util.fistpoint.activate(
2578+
"LVHDRT_finding_a_suitable_pair", sr.uuid)
2579+
sr.coalesce(candidate, dryRun)
2580+
sr.xapi.srUpdate()
2581+
continue
25662582

2567-
candidate = sr.findLeafCoalesceable()
2568-
if candidate:
2569-
sr.coalesceLeaf(candidate, dryRun)
2570-
sr.xapi.srUpdate()
2571-
continue
2583+
candidate = sr.findLeafCoalesceable()
2584+
if candidate:
2585+
sr.coalesceLeaf(candidate, dryRun)
2586+
sr.xapi.srUpdate()
2587+
continue
25722588

2573-
Util.log("No work left")
2574-
sr.cleanup()
2575-
finally:
2576-
lockRunning.release()
2589+
finally:
2590+
lockRunning.release()
2591+
finally:
2592+
Util.log("GC process exiting, no work left")
2593+
lockActive.release()
25772594

25782595
def _gc(session, srUuid, dryRun):
25792596
init(srUuid)
@@ -2621,7 +2638,10 @@ def _abort(srUuid, soft=False):
26212638
def init(srUuid):
26222639
global lockRunning
26232640
if not lockRunning:
2624-
lockRunning = lock.Lock(LOCK_TYPE_RUNNING, srUuid)
2641+
lockRunning = lock.Lock(LOCK_TYPE_RUNNING, srUuid)
2642+
global lockActive
2643+
if not lockActive:
2644+
lockActive = lock.Lock(LOCK_TYPE_GC_ACTIVE, srUuid)
26252645

26262646
def usage():
26272647
output = """Garbage collect and/or coalesce VHDs in a VHD-based SR
@@ -2741,8 +2761,8 @@ def get_state(srUuid):
27412761
locking.
27422762
"""
27432763
init(srUuid)
2744-
if lockRunning.acquireNoblock():
2745-
lockRunning.release()
2764+
if lockActive.acquireNoblock():
2765+
lockActive.release()
27462766
return False
27472767
return True
27482768

@@ -2752,7 +2772,7 @@ def should_preempt(session, srUuid):
27522772
if len(entries) == 0:
27532773
return False
27542774
elif len(entries) > 1:
2755-
raise util.SMException("More than one coalesce entry: " + entries)
2775+
raise util.SMException("More than one coalesce entry: " + str(entries))
27562776
sr.scan()
27572777
coalescedUuid = entries.popitem()[0]
27582778
garbage = sr.findGarbage()

0 commit comments

Comments
 (0)