From 39b9cd15902103e4d8b8d5847bdcdc004188b298 Mon Sep 17 00:00:00 2001 From: tom doron Date: Wed, 17 Mar 2021 18:16:59 -0700 Subject: [PATCH 1/2] add lifecycle metrics motivation: startup/shutdown metrics are important for real-life services, for example spike in start metrics indicates a crash-loop changes: * add start and shutdown counters to ComponentLifecycle * add start and shutdown timers to report duration of startup and shutdown operations --- Sources/Lifecycle/Lifecycle.swift | 17 ++- .../ComponentLifecycleTests+XCTest.swift | 1 + .../ComponentLifecycleTests.swift | 108 ++++++++++++++++++ 3 files changed, 123 insertions(+), 3 deletions(-) diff --git a/Sources/Lifecycle/Lifecycle.swift b/Sources/Lifecycle/Lifecycle.swift index 2af5ae7..5b969a5 100644 --- a/Sources/Lifecycle/Lifecycle.swift +++ b/Sources/Lifecycle/Lifecycle.swift @@ -18,9 +18,9 @@ import Darwin import Glibc #endif import Backtrace +import CoreMetrics import Dispatch import Logging -import Metrics // MARK: - LifecycleTask @@ -473,9 +473,12 @@ public class ComponentLifecycle: LifecycleTask { guard case .idle = self.state else { preconditionFailure("invalid state, \(self.state)") } - self.log("starting") self.state = .starting(queue) } + + self.log("starting") + Counter(label: "\(self.label).lifecycle.start").increment() + if tasks.count == 0 { self.log(level: .notice, "no tasks provided") } @@ -517,7 +520,9 @@ public class ComponentLifecycle: LifecycleTask { return callback(index, nil) } self.log("starting tasks [\(tasks[index].label)]") + let startTime = DispatchTime.now() start { error in + Timer(label: "\(self.label).\(tasks[index].label).lifecycle.start").recordNanoseconds(DispatchTime.now().uptimeNanoseconds - startTime.uptimeNanoseconds) if let error = error { self.log(level: .error, "failed to start [\(tasks[index].label)]: \(error)") return callback(index, error) @@ -532,9 +537,12 @@ public class ComponentLifecycle: LifecycleTask { private func _shutdown(on queue: DispatchQueue, tasks: [LifecycleTask], callback: @escaping () -> Void) { self.stateLock.withLock { - log("shutting down") self.state = .shuttingDown(queue) } + + self.log("shutting down") + Counter(label: "\(self.label).lifecycle.shutdown").increment() + self.shutdownTask(on: queue, tasks: tasks.reversed(), index: 0, errors: nil) { errors in self.stateLock.withLock { guard case .shuttingDown = self.state else { @@ -555,8 +563,11 @@ public class ComponentLifecycle: LifecycleTask { if index >= tasks.count { return callback(errors) } + self.log("stopping tasks [\(tasks[index].label)]") + let startTime = DispatchTime.now() shutdown { error in + Timer(label: "\(self.label).\(tasks[index].label).lifecycle.shutdown").recordNanoseconds(DispatchTime.now().uptimeNanoseconds - startTime.uptimeNanoseconds) var errors = errors if let error = error { if errors == nil { diff --git a/Tests/LifecycleTests/ComponentLifecycleTests+XCTest.swift b/Tests/LifecycleTests/ComponentLifecycleTests+XCTest.swift index eb95048..5faa6e9 100644 --- a/Tests/LifecycleTests/ComponentLifecycleTests+XCTest.swift +++ b/Tests/LifecycleTests/ComponentLifecycleTests+XCTest.swift @@ -66,6 +66,7 @@ extension ComponentLifecycleTests { ("testStatefulNIO", testStatefulNIO), ("testStatefulNIOStartFailure", testStatefulNIOStartFailure), ("testStatefulNIOShutdownFailure", testStatefulNIOShutdownFailure), + ("testMetrics", testMetrics), ] } } diff --git a/Tests/LifecycleTests/ComponentLifecycleTests.swift b/Tests/LifecycleTests/ComponentLifecycleTests.swift index c4fb3fe..be239ec 100644 --- a/Tests/LifecycleTests/ComponentLifecycleTests.swift +++ b/Tests/LifecycleTests/ComponentLifecycleTests.swift @@ -14,6 +14,7 @@ @testable import Lifecycle import LifecycleNIOCompat +import Metrics import NIO import XCTest @@ -1247,4 +1248,111 @@ final class ComponentLifecycleTests: XCTestCase { XCTAssertFalse(item.shutdown, "expected item to be shutdown") } + + func testMetrics() { + let metrics = TestMetrics() + MetricsSystem.bootstrap(metrics) + + let items = (0 ..< 3).map { _ in GoodItem(id: UUID().uuidString, startDelay: 0.1, shutdownDelay: 0.1) } + let lifecycle = ComponentLifecycle(label: "test") + lifecycle.register(items) + lifecycle.start { startError in + XCTAssertNil(startError, "not expecting error") + lifecycle.shutdown { shutdownErrors in + XCTAssertNil(shutdownErrors, "not expecting error") + } + } + lifecycle.wait() + XCTAssertEqual(metrics.counters["\(lifecycle.label).lifecycle.start"]?.value, 1, "expected start counter to be 1") + XCTAssertEqual(metrics.counters["\(lifecycle.label).lifecycle.shutdown"]?.value, 1, "expected shutdown counter to be 1") + items.forEach { XCTAssertGreaterThan(metrics.timers["\(lifecycle.label).\($0.label).lifecycle.shutdown"]?.value ?? 0, 0, "expected start timer to be non-zero") } + items.forEach { XCTAssertGreaterThan(metrics.timers["\(lifecycle.label).\($0.label).lifecycle.shutdown"]?.value ?? 0, 0, "expected shutdown timer to be non-zero") } + } +} + +class TestMetrics: MetricsFactory, RecorderHandler { + var counters = [String: TestCounter]() + var timers = [String: TestTimer]() + let lock = Lock() + + public init() {} + + public func makeCounter(label: String, dimensions: [(String, String)]) -> CounterHandler { + let counter = TestCounter(label: label) + self.lock.withLock { + self.counters[label] = counter + } + return counter + } + + public func makeRecorder(label: String, dimensions: [(String, String)], aggregate: Bool) -> RecorderHandler { + return self + } + + public func makeTimer(label: String, dimensions: [(String, String)]) -> TimerHandler { + let timer = TestTimer(label: label) + self.lock.withLock { + self.timers[label] = timer + } + return timer + } + + public func destroyCounter(_: CounterHandler) {} + public func destroyRecorder(_: RecorderHandler) {} + public func destroyTimer(_: TimerHandler) {} + + public func record(_: Int64) {} + public func record(_: Double) {} + + class TestCounter: CounterHandler { + let label: String + var _value: Int64 + let lock = Lock() + + init(label: String) { + self.label = label + self._value = 0 + } + + public func increment(by: Int64) { + self.lock.withLock { + self._value += by + } + } + + public func reset() { + self.lock.withLock { + self._value = 0 + } + } + + public var value: Int64 { + return self.lock.withLock { + return self._value + } + } + } + + class TestTimer: TimerHandler { + let label: String + var _value: Int64 + let lock = Lock() + + init(label: String) { + self.label = label + self._value = 0 + } + + public func recordNanoseconds(_ value: Int64) { + self.lock.withLock { + self._value = value + } + } + + public var value: Int64 { + return self.lock.withLock { + return self._value + } + } + } } From 079008bf2dbd2a56e60e066506487e0a8770639a Mon Sep 17 00:00:00 2001 From: tom doron Date: Thu, 18 Mar 2021 09:58:16 -0700 Subject: [PATCH 2/2] fixup --- Tests/LifecycleTests/ComponentLifecycleTests.swift | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Tests/LifecycleTests/ComponentLifecycleTests.swift b/Tests/LifecycleTests/ComponentLifecycleTests.swift index be239ec..1624ca7 100644 --- a/Tests/LifecycleTests/ComponentLifecycleTests.swift +++ b/Tests/LifecycleTests/ComponentLifecycleTests.swift @@ -1265,7 +1265,7 @@ final class ComponentLifecycleTests: XCTestCase { lifecycle.wait() XCTAssertEqual(metrics.counters["\(lifecycle.label).lifecycle.start"]?.value, 1, "expected start counter to be 1") XCTAssertEqual(metrics.counters["\(lifecycle.label).lifecycle.shutdown"]?.value, 1, "expected shutdown counter to be 1") - items.forEach { XCTAssertGreaterThan(metrics.timers["\(lifecycle.label).\($0.label).lifecycle.shutdown"]?.value ?? 0, 0, "expected start timer to be non-zero") } + items.forEach { XCTAssertGreaterThan(metrics.timers["\(lifecycle.label).\($0.label).lifecycle.start"]?.value ?? 0, 0, "expected start timer to be non-zero") } items.forEach { XCTAssertGreaterThan(metrics.timers["\(lifecycle.label).\($0.label).lifecycle.shutdown"]?.value ?? 0, 0, "expected shutdown timer to be non-zero") } } }