[Prometheus] Add & instrument Lambda environment metrics (#94)

gregfurman · web-flow · commit 34b22f50460f · 2025-03-25T16:06:50.000+02:00
diff --git a/prometheus/localstack_prometheus/expose.py b/prometheus/localstack_prometheus/expose.py
@@ -4,8 +4,6 @@
 
 def retrieve_metrics(request: http.Request):
     """Expose the Prometheus metrics"""
-    _generate_latest_metrics, content_type = choose_encoder(
-        request.headers.get("Content-Type", "")
-    )
+    _generate_latest_metrics, content_type = choose_encoder(request.headers.get("Content-Type", ""))
     data = _generate_latest_metrics()
     return http.Response(response=data, status=200, mimetype=content_type)
diff --git a/prometheus/localstack_prometheus/extension.py b/prometheus/localstack_prometheus/extension.py
@@ -9,7 +9,10 @@
 
 from localstack_prometheus.expose import retrieve_metrics
 from localstack_prometheus.handler import RequestMetricsHandler, ResponseMetricsHandler
-from localstack_prometheus.instruments.patch import apply_poller_tracking_patches
+from localstack_prometheus.instruments.patch import (
+    apply_lambda_tracking_patches,
+    apply_poller_tracking_patches,
+)
 
 LOG = logging.getLogger(__name__)
 
@@ -18,6 +21,7 @@ class PrometheusMetricsExtension(Extension):
     name = "prometheus"
 
     def on_extension_load(self):
+        apply_lambda_tracking_patches()
         apply_poller_tracking_patches()
         LOG.debug("PrometheusMetricsExtension: extension is loaded")
 
diff --git a/prometheus/localstack_prometheus/handler.py b/prometheus/localstack_prometheus/handler.py
@@ -6,7 +6,7 @@
 from localstack.http import Response
 
 from localstack_prometheus.metrics.core import (
-    LOCALSTACK_IN_FLIGHT_REQUESTS_GAUGE,
+    LOCALSTACK_IN_FLIGHT_REQUESTS,
     LOCALSTACK_REQUEST_PROCESSING_DURATION_SECONDS,
 )
 
@@ -22,9 +22,7 @@ class RequestMetricsHandler(Handler):
     Handler that records the start time of incoming requests
     """
 
-    def __call__(
-        self, chain: HandlerChain, context: TimedRequestContext, response: Response
-    ):
+    def __call__(self, chain: HandlerChain, context: TimedRequestContext, response: Response):
         # Record the start time
         context.start_time = time.perf_counter()
 
@@ -33,27 +31,21 @@ def __call__(
             return
 
         service, operation = context.service_operation
-        LOCALSTACK_IN_FLIGHT_REQUESTS_GAUGE.labels(
-            service=service, operation=operation
-        ).inc()
+        LOCALSTACK_IN_FLIGHT_REQUESTS.labels(service=service, operation=operation).inc()
 
 
 class ResponseMetricsHandler(Handler):
     """
     Handler that records metrics when a response is ready
     """
 
-    def __call__(
-        self, chain: HandlerChain, context: TimedRequestContext, response: Response
-    ):
+    def __call__(self, chain: HandlerChain, context: TimedRequestContext, response: Response):
         # Do not record metrics if no service operation information is found
         if not context.service_operation:
             return
 
         service, operation = context.service_operation
-        LOCALSTACK_IN_FLIGHT_REQUESTS_GAUGE.labels(
-            service=service, operation=operation
-        ).dec()
+        LOCALSTACK_IN_FLIGHT_REQUESTS.labels(service=service, operation=operation).dec()
 
         # Do not record if response is None
         if response is None:
diff --git a/prometheus/localstack_prometheus/instruments/lambda_.py b/prometheus/localstack_prometheus/instruments/lambda_.py
@@ -0,0 +1,81 @@
+import contextlib
+from typing import ContextManager
+
+from localstack.services.lambda_.invocation.assignment import AssignmentService
+from localstack.services.lambda_.invocation.docker_runtime_executor import (
+    DockerRuntimeExecutor,
+)
+from localstack.services.lambda_.invocation.execution_environment import (
+    ExecutionEnvironment,
+)
+from localstack.services.lambda_.invocation.lambda_models import (
+    FunctionVersion,
+    InitializationType,
+)
+
+from localstack_prometheus.metrics.lambda_ import (
+    LOCALSTACK_LAMBDA_ENVIRONMENT_ACTIVE,
+    LOCALSTACK_LAMBDA_ENVIRONMENT_CONTAINERS_RUNNING,
+    LOCALSTACK_LAMBDA_ENVIRONMENT_START_TOTAL,
+)
+
+
+def count_version_environments(
+    assignment_service: AssignmentService, version_manager_id: str, prov_type: InitializationType
+):
+    """Count environments of a specific provisioning type for a specific version manager"""
+    return sum(
+        env.initialization_type == prov_type
+        for env in assignment_service.environments.get(version_manager_id, {}).values()
+    )
+
+
+def count_service_environments(
+    assignment_service: AssignmentService, prov_type: InitializationType
+):
+    """Count environments of a specific provisioning type across all function versions"""
+    return sum(
+        count_version_environments(assignment_service, version_manager_id, prov_type)
+        for version_manager_id in assignment_service.environments
+    )
+
+
+def init_assignment_service_with_metrics(fn, self: AssignmentService):
+    fn(self)
+    # Initialise these once, with all subsequent calls being evaluated at collection time.
+    LOCALSTACK_LAMBDA_ENVIRONMENT_ACTIVE.labels(
+        provisioning_type="provisioned-concurrency"
+    ).set_function(lambda: count_service_environments(self, "provisioned-concurrency"))
+
+    LOCALSTACK_LAMBDA_ENVIRONMENT_ACTIVE.labels(provisioning_type="on-demand").set_function(
+        lambda: count_service_environments(self, "on-demand")
+    )
+
+
+def tracked_docker_start(fn, self: DockerRuntimeExecutor, env_vars: dict[str, str]):
+    fn(self, env_vars)
+    LOCALSTACK_LAMBDA_ENVIRONMENT_CONTAINERS_RUNNING.inc()
+
+
+def tracked_docker_stop(fn, self: DockerRuntimeExecutor):
+    fn(self)
+    LOCALSTACK_LAMBDA_ENVIRONMENT_CONTAINERS_RUNNING.dec()
+
+
+@contextlib.contextmanager
+def tracked_get_environment(
+    fn,
+    self: AssignmentService,
+    version_manager_id: str,
+    function_version: FunctionVersion,
+    provisioning_type: InitializationType,
+) -> ContextManager[ExecutionEnvironment]:
+    applicable_env_count = count_version_environments(self, version_manager_id, provisioning_type)
+    # If there are no applicable environments, this will be a cold start.
+    # Otherwise, it'll be warm.
+    start_type = "warm" if applicable_env_count > 0 else "cold"
+    LOCALSTACK_LAMBDA_ENVIRONMENT_START_TOTAL.labels(
+        start_type=start_type, provisioning_type=provisioning_type
+    ).inc()
+    with fn(self, version_manager_id, function_version, provisioning_type) as execution_env:
+        yield execution_env
diff --git a/prometheus/localstack_prometheus/instruments/patch.py b/prometheus/localstack_prometheus/instruments/patch.py
@@ -12,8 +12,18 @@
 from localstack.services.lambda_.event_source_mapping.senders.lambda_sender import (
     LambdaSender,
 )
+from localstack.services.lambda_.invocation.assignment import AssignmentService
+from localstack.services.lambda_.invocation.docker_runtime_executor import (
+    DockerRuntimeExecutor,
+)
 from localstack.utils.patch import Patch, Patches
 
+from localstack_prometheus.instruments.lambda_ import (
+    init_assignment_service_with_metrics,
+    tracked_docker_start,
+    tracked_docker_stop,
+    tracked_get_environment,
+)
 from localstack_prometheus.instruments.poller import tracked_poll_events
 from localstack_prometheus.instruments.sender import tracked_send_events
 from localstack_prometheus.instruments.sqs_poller import tracked_sqs_handle_messages
@@ -22,6 +32,27 @@
 LOG = logging.getLogger(__name__)
 
 
+def apply_lambda_tracking_patches():
+    """Apply all Lambda environment metrics tracking patches in one call"""
+    patches = Patches(
+        [
+            # Track starting and stopping of containers function
+            Patch.function(target=DockerRuntimeExecutor.start, fn=tracked_docker_start),
+            Patch.function(target=DockerRuntimeExecutor.stop, fn=tracked_docker_stop),
+            # Track cold and warm starts
+            Patch.function(target=AssignmentService.get_environment, fn=tracked_get_environment),
+            # Track and collect all environment
+            Patch.function(
+                target=AssignmentService.__init__, fn=init_assignment_service_with_metrics
+            ),
+        ]
+    )
+
+    patches.apply()
+    LOG.debug("Applied all Lambda environment tracking patches")
+    return patches
+
+
 def apply_poller_tracking_patches():
     """Apply all poller metrics tracking patches in one call"""
     patches = Patches(
@@ -34,9 +65,7 @@ def apply_poller_tracking_patches():
             Patch.function(target=LambdaSender.send_events, fn=tracked_send_events),
             # TODO: Standardise a single abstract method that all Poller subclasses can use to fetch records
             # SQS-specific patches
-            Patch.function(
-                target=SqsPoller.handle_messages, fn=tracked_sqs_handle_messages
-            ),
+            Patch.function(target=SqsPoller.handle_messages, fn=tracked_sqs_handle_messages),
             # Stream-specific patches
             Patch.function(target=KinesisPoller.get_records, fn=tracked_get_records),
             Patch.function(target=DynamoDBPoller.get_records, fn=tracked_get_records),
diff --git a/prometheus/localstack_prometheus/metrics/__init__.py b/prometheus/localstack_prometheus/metrics/__init__.py
@@ -1 +0,0 @@
-
diff --git a/prometheus/localstack_prometheus/metrics/core.py b/prometheus/localstack_prometheus/metrics/core.py
@@ -8,7 +8,7 @@
     buckets=[0.005, 0.05, 0.5, 5, 30, 60, 300, 900, 3600],
 )
 
-LOCALSTACK_IN_FLIGHT_REQUESTS_GAUGE = Gauge(
+LOCALSTACK_IN_FLIGHT_REQUESTS = Gauge(
     "localstack_in_flight_requests",
     "Total number of currently in-flight requests",
     ["service", "operation"],
diff --git a/prometheus/localstack_prometheus/metrics/lambda_.py b/prometheus/localstack_prometheus/metrics/lambda_.py
@@ -0,0 +1,19 @@
+from prometheus_client import Counter, Gauge
+
+# Lambda environment metrics
+LOCALSTACK_LAMBDA_ENVIRONMENT_START_TOTAL = Counter(
+    "localstack_lambda_environment_start_total",
+    "Total count of all Lambda environment starts.",
+    ["start_type", "provisioning_type"],
+)
+
+LOCALSTACK_LAMBDA_ENVIRONMENT_CONTAINERS_RUNNING = Gauge(
+    "localstack_lambda_environment_containers_running",
+    "Number of LocalStack Lambda Docker containers currently running.",
+)
+
+LOCALSTACK_LAMBDA_ENVIRONMENT_ACTIVE = Gauge(
+    "localstack_lambda_environments_active",
+    "Number of currently active LocalStack Lambda environments.",
+    ["provisioning_type"],
+)

Original file line number	Diff line number	Diff line change
`@@ -8,7 +8,7 @@`
`8`	`8`	`buckets=[0.005, 0.05, 0.5, 5, 30, 60, 300, 900, 3600],`
`9`	`9`	`)`
`10`	`10`
`11`		`-LOCALSTACK_IN_FLIGHT_REQUESTS_GAUGE = Gauge(`
	`11`	`+LOCALSTACK_IN_FLIGHT_REQUESTS = Gauge(`
`12`	`12`	`"localstack_in_flight_requests",`
`13`	`13`	`"Total number of currently in-flight requests",`
`14`	`14`	`["service", "operation"],`