|
28 | 28 | // b. Sweep any unswept spans. There will only be unswept spans if
|
29 | 29 | // this GC cycle was forced before the expected time.
|
30 | 30 | //
|
31 |
| -// 2. GC performs the "mark 1" sub-phase. In this sub-phase, Ps are |
32 |
| -// allowed to locally cache parts of the work queue. |
| 31 | +// 2. GC performs the mark phase. |
33 | 32 | //
|
34 | 33 | // a. Prepare for the mark phase by setting gcphase to _GCmark
|
35 | 34 | // (from _GCoff), enabling the write barrier, enabling mutator
|
|
54 | 53 | // object to black and shading all pointers found in the object
|
55 | 54 | // (which in turn may add those pointers to the work queue).
|
56 | 55 | //
|
57 |
| -// 3. Once the global work queue is empty (but local work queue caches |
58 |
| -// may still contain work), GC performs the "mark 2" sub-phase. |
| 56 | +// e. Because GC work is spread across local caches, GC uses a |
| 57 | +// distributed termination algorithm to detect when there are no |
| 58 | +// more root marking jobs or grey objects (see gcMarkDone). At this |
| 59 | +// point, GC transitions to mark termination. |
59 | 60 | //
|
60 |
| -// a. GC stops all workers, disables local work queue caches, |
61 |
| -// flushes each P's local work queue cache to the global work queue |
62 |
| -// cache, and reenables workers. |
63 |
| -// |
64 |
| -// b. GC again drains the work queue, as in 2d above. |
65 |
| -// |
66 |
| -// 4. Once the work queue is empty, GC performs mark termination. |
| 61 | +// 3. GC performs mark termination. |
67 | 62 | //
|
68 | 63 | // a. Stop the world.
|
69 | 64 | //
|
70 | 65 | // b. Set gcphase to _GCmarktermination, and disable workers and
|
71 | 66 | // assists.
|
72 | 67 | //
|
73 |
| -// c. Drain any remaining work from the work queue (typically there |
74 |
| -// will be none). |
| 68 | +// c. Perform housekeeping like flushing mcaches. |
75 | 69 | //
|
76 |
| -// d. Perform other housekeeping like flushing mcaches. |
77 |
| -// |
78 |
| -// 5. GC performs the sweep phase. |
| 70 | +// 4. GC performs the sweep phase. |
79 | 71 | //
|
80 | 72 | // a. Prepare for the sweep phase by setting gcphase to _GCoff,
|
81 | 73 | // setting up sweep state and disabling the write barrier.
|
|
86 | 78 | // c. GC does concurrent sweeping in the background and in response
|
87 | 79 | // to allocation. See description below.
|
88 | 80 | //
|
89 |
| -// 6. When sufficient allocation has taken place, replay the sequence |
| 81 | +// 5. When sufficient allocation has taken place, replay the sequence |
90 | 82 | // starting with 1 above. See discussion of GC rate below.
|
91 | 83 |
|
92 | 84 | // Concurrent sweep.
|
@@ -996,8 +988,7 @@ var work struct {
|
996 | 988 | // startSema protects the transition from "off" to mark or
|
997 | 989 | // mark termination.
|
998 | 990 | startSema uint32
|
999 |
| - // markDoneSema protects transitions from mark 1 to mark 2 and |
1000 |
| - // from mark 2 to mark termination. |
| 991 | + // markDoneSema protects transitions from mark to mark termination. |
1001 | 992 | markDoneSema uint32
|
1002 | 993 |
|
1003 | 994 | bgMarkReady note // signal background mark worker has started
|
@@ -1385,128 +1376,121 @@ func gcStart(mode gcMode, trigger gcTrigger) {
|
1385 | 1376 | semrelease(&work.startSema)
|
1386 | 1377 | }
|
1387 | 1378 |
|
1388 |
| -// gcMarkDone transitions the GC from mark 1 to mark 2 and from mark 2 |
1389 |
| -// to mark termination. |
| 1379 | +// gcMarkDoneFlushed counts the number of P's with flushed work. |
| 1380 | +// |
| 1381 | +// Ideally this would be a captured local in gcMarkDone, but forEachP |
| 1382 | +// escapes its callback closure, so it can't capture anything. |
1390 | 1383 | //
|
1391 |
| -// This should be called when all mark work has been drained. In mark |
1392 |
| -// 1, this includes all root marking jobs, global work buffers, and |
1393 |
| -// active work buffers in assists and background workers; however, |
1394 |
| -// work may still be cached in per-P work buffers. In mark 2, per-P |
1395 |
| -// caches are disabled. |
| 1384 | +// This is protected by markDoneSema. |
| 1385 | +var gcMarkDoneFlushed uint32 |
| 1386 | + |
| 1387 | +// gcMarkDone transitions the GC from mark to mark termination if all |
| 1388 | +// reachable objects have been marked (that is, there are no grey |
| 1389 | +// objects and can be no more in the future). Otherwise, it flushes |
| 1390 | +// all local work to the global queues where it can be discovered by |
| 1391 | +// other workers. |
| 1392 | +// |
| 1393 | +// This should be called when all local mark work has been drained and |
| 1394 | +// there are no remaining workers. Specifically, when |
| 1395 | +// |
| 1396 | +// work.nwait == work.nproc && !gcMarkWorkAvailable(p) |
1396 | 1397 | //
|
1397 | 1398 | // The calling context must be preemptible.
|
1398 | 1399 | //
|
1399 |
| -// Note that it is explicitly okay to have write barriers in this |
1400 |
| -// function because completion of concurrent mark is best-effort |
1401 |
| -// anyway. Any work created by write barriers here will be cleaned up |
1402 |
| -// by mark termination. |
| 1400 | +// Flushing local work is important because idle Ps may have local |
| 1401 | +// work queued. This is the only way to make that work visible and |
| 1402 | +// drive GC to completion. |
| 1403 | +// |
| 1404 | +// It is explicitly okay to have write barriers in this function. If |
| 1405 | +// it does transition to mark termination, then all reachable objects |
| 1406 | +// have been marked, so the write barrier cannot shade any more |
| 1407 | +// objects. |
1403 | 1408 | func gcMarkDone() {
|
1404 |
| -top: |
| 1409 | + // Ensure only one thread is running the ragged barrier at a |
| 1410 | + // time. |
1405 | 1411 | semacquire(&work.markDoneSema)
|
1406 | 1412 |
|
| 1413 | +top: |
1407 | 1414 | // Re-check transition condition under transition lock.
|
| 1415 | + // |
| 1416 | + // It's critical that this checks the global work queues are |
| 1417 | + // empty before performing the ragged barrier. Otherwise, |
| 1418 | + // there could be global work that a P could take after the P |
| 1419 | + // has passed the ragged barrier. |
1408 | 1420 | if !(gcphase == _GCmark && work.nwait == work.nproc && !gcMarkWorkAvailable(nil)) {
|
1409 | 1421 | semrelease(&work.markDoneSema)
|
1410 | 1422 | return
|
1411 | 1423 | }
|
1412 | 1424 |
|
1413 |
| - // Disallow starting new workers so that any remaining workers |
1414 |
| - // in the current mark phase will drain out. |
1415 |
| - // |
1416 |
| - // TODO(austin): Should dedicated workers keep an eye on this |
1417 |
| - // and exit gcDrain promptly? |
1418 |
| - atomic.Xaddint64(&gcController.dedicatedMarkWorkersNeeded, -0xffffffff) |
1419 |
| - prevFractionalGoal := gcController.fractionalUtilizationGoal |
1420 |
| - gcController.fractionalUtilizationGoal = 0 |
1421 |
| - |
1422 |
| - if !gcBlackenPromptly { |
1423 |
| - // Transition from mark 1 to mark 2. |
1424 |
| - // |
1425 |
| - // The global work list is empty, but there can still be work |
1426 |
| - // sitting in the per-P work caches. |
1427 |
| - // Flush and disable work caches. |
1428 |
| - |
1429 |
| - // Disallow caching workbufs and indicate that we're in mark 2. |
1430 |
| - gcBlackenPromptly = true |
1431 |
| - |
1432 |
| - // Prevent completion of mark 2 until we've flushed |
1433 |
| - // cached workbufs. |
1434 |
| - atomic.Xadd(&work.nwait, -1) |
1435 |
| - |
1436 |
| - // GC is set up for mark 2. Let Gs blocked on the |
1437 |
| - // transition lock go while we flush caches. |
1438 |
| - semrelease(&work.markDoneSema) |
1439 |
| - |
1440 |
| - systemstack(func() { |
1441 |
| - // Flush all currently cached workbufs and |
1442 |
| - // ensure all Ps see gcBlackenPromptly. This |
1443 |
| - // also blocks until any remaining mark 1 |
1444 |
| - // workers have exited their loop so we can |
1445 |
| - // start new mark 2 workers. |
1446 |
| - forEachP(func(_p_ *p) { |
1447 |
| - wbBufFlush1(_p_) |
1448 |
| - _p_.gcw.dispose() |
1449 |
| - }) |
| 1425 | + // Flush all local buffers and collect flushedWork flags. |
| 1426 | + gcMarkDoneFlushed = 0 |
| 1427 | + systemstack(func() { |
| 1428 | + forEachP(func(_p_ *p) { |
| 1429 | + // Flush the write barrier buffer, since this may add |
| 1430 | + // work to the gcWork. |
| 1431 | + wbBufFlush1(_p_) |
| 1432 | + // Flush the gcWork, since this may create global work |
| 1433 | + // and set the flushedWork flag. |
| 1434 | + // |
| 1435 | + // TODO(austin): Break up these workbufs to |
| 1436 | + // better distribute work. |
| 1437 | + _p_.gcw.dispose() |
| 1438 | + // Collect the flushedWork flag. |
| 1439 | + if _p_.gcw.flushedWork { |
| 1440 | + atomic.Xadd(&gcMarkDoneFlushed, 1) |
| 1441 | + _p_.gcw.flushedWork = false |
| 1442 | + } |
1450 | 1443 | })
|
| 1444 | + }) |
1451 | 1445 |
|
1452 |
| - // Check that roots are marked. We should be able to |
1453 |
| - // do this before the forEachP, but based on issue |
1454 |
| - // #16083 there may be a (harmless) race where we can |
1455 |
| - // enter mark 2 while some workers are still scanning |
1456 |
| - // stacks. The forEachP ensures these scans are done. |
1457 |
| - // |
1458 |
| - // TODO(austin): Figure out the race and fix this |
1459 |
| - // properly. |
1460 |
| - gcMarkRootCheck() |
1461 |
| - |
1462 |
| - // Now we can start up mark 2 workers. |
1463 |
| - atomic.Xaddint64(&gcController.dedicatedMarkWorkersNeeded, 0xffffffff) |
1464 |
| - gcController.fractionalUtilizationGoal = prevFractionalGoal |
| 1446 | + if gcMarkDoneFlushed != 0 { |
| 1447 | + // More grey objects were discovered since the |
| 1448 | + // previous termination check, so there may be more |
| 1449 | + // work to do. Keep going. It's possible the |
| 1450 | + // transition condition became true again during the |
| 1451 | + // ragged barrier, so re-check it. |
| 1452 | + goto top |
| 1453 | + } |
1465 | 1454 |
|
1466 |
| - incnwait := atomic.Xadd(&work.nwait, +1) |
1467 |
| - if incnwait == work.nproc && !gcMarkWorkAvailable(nil) { |
1468 |
| - // This loop will make progress because |
1469 |
| - // gcBlackenPromptly is now true, so it won't |
1470 |
| - // take this same "if" branch. |
1471 |
| - goto top |
1472 |
| - } |
1473 |
| - } else { |
1474 |
| - // Transition to mark termination. |
1475 |
| - now := nanotime() |
1476 |
| - work.tMarkTerm = now |
1477 |
| - work.pauseStart = now |
1478 |
| - getg().m.preemptoff = "gcing" |
1479 |
| - if trace.enabled { |
1480 |
| - traceGCSTWStart(0) |
1481 |
| - } |
1482 |
| - systemstack(stopTheWorldWithSema) |
1483 |
| - // The gcphase is _GCmark, it will transition to _GCmarktermination |
1484 |
| - // below. The important thing is that the wb remains active until |
1485 |
| - // all marking is complete. This includes writes made by the GC. |
| 1455 | + // There was no global work, no local work, and no Ps |
| 1456 | + // communicated work since we took markDoneSema. Therefore |
| 1457 | + // there are no grey objects and no more objects can be |
| 1458 | + // shaded. Transition to mark termination. |
| 1459 | + now := nanotime() |
| 1460 | + work.tMarkTerm = now |
| 1461 | + work.pauseStart = now |
| 1462 | + getg().m.preemptoff = "gcing" |
| 1463 | + if trace.enabled { |
| 1464 | + traceGCSTWStart(0) |
| 1465 | + } |
| 1466 | + systemstack(stopTheWorldWithSema) |
| 1467 | + // The gcphase is _GCmark, it will transition to _GCmarktermination |
| 1468 | + // below. The important thing is that the wb remains active until |
| 1469 | + // all marking is complete. This includes writes made by the GC. |
1486 | 1470 |
|
1487 |
| - // Record that one root marking pass has completed. |
1488 |
| - work.markrootDone = true |
| 1471 | + // Record that one root marking pass has completed. |
| 1472 | + work.markrootDone = true |
1489 | 1473 |
|
1490 |
| - // Disable assists and background workers. We must do |
1491 |
| - // this before waking blocked assists. |
1492 |
| - atomic.Store(&gcBlackenEnabled, 0) |
| 1474 | + // Disable assists and background workers. We must do |
| 1475 | + // this before waking blocked assists. |
| 1476 | + atomic.Store(&gcBlackenEnabled, 0) |
1493 | 1477 |
|
1494 |
| - // Wake all blocked assists. These will run when we |
1495 |
| - // start the world again. |
1496 |
| - gcWakeAllAssists() |
| 1478 | + // Wake all blocked assists. These will run when we |
| 1479 | + // start the world again. |
| 1480 | + gcWakeAllAssists() |
1497 | 1481 |
|
1498 |
| - // Likewise, release the transition lock. Blocked |
1499 |
| - // workers and assists will run when we start the |
1500 |
| - // world again. |
1501 |
| - semrelease(&work.markDoneSema) |
| 1482 | + // Likewise, release the transition lock. Blocked |
| 1483 | + // workers and assists will run when we start the |
| 1484 | + // world again. |
| 1485 | + semrelease(&work.markDoneSema) |
1502 | 1486 |
|
1503 |
| - // endCycle depends on all gcWork cache stats being |
1504 |
| - // flushed. This is ensured by mark 2. |
1505 |
| - nextTriggerRatio := gcController.endCycle() |
| 1487 | + // endCycle depends on all gcWork cache stats being flushed. |
| 1488 | + // The termination algorithm above ensured that up to |
| 1489 | + // allocations since the ragged barrier. |
| 1490 | + nextTriggerRatio := gcController.endCycle() |
1506 | 1491 |
|
1507 |
| - // Perform mark termination. This will restart the world. |
1508 |
| - gcMarkTermination(nextTriggerRatio) |
1509 |
| - } |
| 1492 | + // Perform mark termination. This will restart the world. |
| 1493 | + gcMarkTermination(nextTriggerRatio) |
1510 | 1494 | }
|
1511 | 1495 |
|
1512 | 1496 | func gcMarkTermination(nextTriggerRatio float64) {
|
@@ -1940,23 +1924,23 @@ func gcMark(start_time int64) {
|
1940 | 1924 | if work.full == 0 && work.nDataRoots+work.nBSSRoots+work.nSpanRoots+work.nStackRoots == 0 {
|
1941 | 1925 | // There's no work on the work queue and no root jobs
|
1942 | 1926 | // that can produce work, so don't bother entering the
|
1943 |
| - // getfull() barrier. |
1944 |
| - // |
1945 |
| - // This will be the situation the vast majority of the |
1946 |
| - // time after concurrent mark. However, we still need |
1947 |
| - // a fallback for STW GC and because there are some |
1948 |
| - // known races that occasionally leave work around for |
1949 |
| - // mark termination. |
1950 |
| - // |
1951 |
| - // We're still hedging our bets here: if we do |
1952 |
| - // accidentally produce some work, we'll still process |
1953 |
| - // it, just not necessarily in parallel. |
| 1927 | + // getfull() barrier. There will be flushCacheRoots |
| 1928 | + // work, but that doesn't gray anything. |
1954 | 1929 | //
|
1955 |
| - // TODO(austin): Fix the races and and remove |
1956 |
| - // work draining from mark termination so we don't |
1957 |
| - // need the fallback path. |
| 1930 | + // This should always be the situation after |
| 1931 | + // concurrent mark. |
1958 | 1932 | work.helperDrainBlock = false
|
1959 | 1933 | } else {
|
| 1934 | + // There's marking work to do. This is the case during |
| 1935 | + // STW GC and in checkmark mode. Instruct GC workers |
| 1936 | + // to block in getfull until all GC workers are in getfull. |
| 1937 | + // |
| 1938 | + // TODO(austin): Move STW and checkmark marking out of |
| 1939 | + // mark termination and eliminate this code path. |
| 1940 | + if !useCheckmark && debug.gcstoptheworld == 0 && debug.gcrescanstacks == 0 { |
| 1941 | + print("runtime: full=", hex(work.full), " nDataRoots=", work.nDataRoots, " nBSSRoots=", work.nBSSRoots, " nSpanRoots=", work.nSpanRoots, " nStackRoots=", work.nStackRoots, "\n") |
| 1942 | + panic("non-empty mark queue after concurrent mark") |
| 1943 | + } |
1960 | 1944 | work.helperDrainBlock = true
|
1961 | 1945 | }
|
1962 | 1946 |
|
@@ -1991,16 +1975,35 @@ func gcMark(start_time int64) {
|
1991 | 1975 | // Record that at least one root marking pass has completed.
|
1992 | 1976 | work.markrootDone = true
|
1993 | 1977 |
|
1994 |
| - // Double-check that all gcWork caches are empty. This should |
1995 |
| - // be ensured by mark 2 before we enter mark termination. |
| 1978 | + // Clear out buffers and double-check that all gcWork caches |
| 1979 | + // are empty. This should be ensured by gcMarkDone before we |
| 1980 | + // enter mark termination. |
| 1981 | + // |
| 1982 | + // TODO: We could clear out buffers just before mark if this |
| 1983 | + // has a non-negligible impact on STW time. |
1996 | 1984 | for _, p := range allp {
|
| 1985 | + // The write barrier may have buffered pointers since |
| 1986 | + // the gcMarkDone barrier. However, since the barrier |
| 1987 | + // ensured all reachable objects were marked, all of |
| 1988 | + // these must be pointers to black objects. Hence we |
| 1989 | + // can just discard the write barrier buffer. |
| 1990 | + if debug.gccheckmark > 0 { |
| 1991 | + // For debugging, flush the buffer and make |
| 1992 | + // sure it really was all marked. |
| 1993 | + wbBufFlush1(p) |
| 1994 | + } else { |
| 1995 | + p.wbBuf.reset() |
| 1996 | + } |
| 1997 | + |
1997 | 1998 | gcw := &p.gcw
|
1998 | 1999 | if !gcw.empty() {
|
1999 | 2000 | throw("P has cached GC work at end of mark termination")
|
2000 | 2001 | }
|
2001 |
| - if gcw.scanWork != 0 || gcw.bytesMarked != 0 { |
2002 |
| - throw("P has unflushed stats at end of mark termination") |
2003 |
| - } |
| 2002 | + // There may still be cached empty buffers, which we |
| 2003 | + // need to flush since we're going to free them. Also, |
| 2004 | + // there may be non-zero stats because we allocated |
| 2005 | + // black after the gcMarkDone barrier. |
| 2006 | + gcw.dispose() |
2004 | 2007 | }
|
2005 | 2008 |
|
2006 | 2009 | cachestats()
|
|
0 commit comments