From 5f30e76bcebacd2a129f8c17933b6365f4d49776 Mon Sep 17 00:00:00 2001 From: Tianning Li Date: Thu, 28 Aug 2025 14:59:23 -0400 Subject: [PATCH 1/2] feat: No longer launch Go-based agent for compatibility/OTLP/AAP config (#788) https://datadoghq.atlassian.net/browse/SVLS-7398 - As part of coming release, bottlecap agent no longer launches Go-based agent when compatibility/AAP/OTLP features are active - Emit the same metric when detecting any of above configuration - Update corresponding unit tests - Refactor the references of enhanced metrics. Manifests: - [Test lambda function](https://us-east-1.console.aws.amazon.com/lambda/home?region=us-east-1#/functions/ltn1-fullinstrument-bn-cold-python310-lambda?code=&subtab=envVars&tab=testing) with [logs](https://us-east-1.console.aws.amazon.com/cloudwatch/home?region=us-east-1#logsV2:log-groups/log-group/$252Faws$252Flambda$252Fltn1-fullinstrument-bn-cold-python310-lambda/log-events/2025$252F08$252F21$252F$255B$2524LATEST$255Df3788d359677452dad162488ff15456f$3FfilterPattern$3Dotel) showing compatibility/AAP/OTPL are enabled image - [Logging](https://app.datadoghq.com/logs/livetail?query=functionname%3Altn1-fullinstrument-bn-cold-python310-lambda%20Metric&agg_m=count&agg_m_source=base&agg_t=count&cols=host%2Cservice&fromUser=true&messageDisplay=inline&refresh_mode=paused&storage=driveline&stream_sort=desc&viz=stream&from_ts=1755787655569&to_ts=1755787689060&live=false) image - [Metric](https://app.datadoghq.com/screen/integration/aws_lambda_enhanced_metrics?fromUser=false&fullscreen_end_ts=1755788220000&fullscreen_paused=true&fullscreen_refresh_mode=paused&fullscreen_section=overview&fullscreen_start_ts=1755787200000&fullscreen_widget=2&graph-explorer__tile_def=N4IgbglgXiBcIBcD2AHANhAzgkAaEAxgK7ZIC2A%2BhgHYDWmcA2gLr4BOApgI5EfYOxGoTphRJqmDhQBmSNmQCGOeJgIK0CtnhA8ObCHyagAJkoUVMSImwIc4IMhwT6CDfNQWP7utgE8AjNo%2BvvaYRGSwpggKxkgA5gB0kmxgemh8mAkcAB4IHBIQ4gnSChBoSKlswAAkCgDumBQKBARW1Ai41ZxxhdSd0kTUBAi9AL4ABABGvuPAA0Mj4h6OowkKja2DCAAUAJTaCnFx3UpyoeEgo6wgsvJEGgJCN3Jk9wrevH6BV-iWbMqgTbtOAAJgADPg5MY9BRpkZEL4UHZ4LdXhptBBqNDsnAISAoXp7NDVJdmKMfiBsL50nBgOSgA&refresh_mode=sliding&from_ts=1755783890661&to_ts=1755787490661&live=true) image --- bottlecap/src/bin/bottlecap/main.rs | 78 ++++++++---- bottlecap/src/config/mod.rs | 120 ++++++++---------- .../src/lifecycle/invocation/processor.rs | 13 +- bottlecap/src/metrics/enhanced/constants.rs | 2 + bottlecap/src/metrics/enhanced/lambda.rs | 86 +++++++++++-- bottlecap/src/otlp/mod.rs | 6 +- bottlecap/src/proxy/interceptor.rs | 5 +- 7 files changed, 200 insertions(+), 110 deletions(-) diff --git a/bottlecap/src/bin/bottlecap/main.rs b/bottlecap/src/bin/bottlecap/main.rs index 0e87473f0..94582b236 100644 --- a/bottlecap/src/bin/bottlecap/main.rs +++ b/bottlecap/src/bin/bottlecap/main.rs @@ -37,6 +37,7 @@ use bottlecap::{ }, logger, logs::{agent::LogsAgent, flusher::LogsFlusher}, + metrics::enhanced::lambda::Lambda as enhanced_metrics, otlp::{agent::Agent as OtlpAgent, should_enable_otlp_agent}, proxy::{interceptor, should_start_proxy}, secrets::decrypt, @@ -86,9 +87,7 @@ use std::{ collections::{HashMap, hash_map}, env, io::{Error, Result}, - os::unix::process::CommandExt, path::Path, - process::Command, sync::Arc, time::{Duration, Instant}, }; @@ -404,14 +403,7 @@ fn load_configs(start_time: Instant) -> (AwsConfig, AwsCredentials, Arc) let aws_credentials = AwsCredentials::from_env(); let lambda_directory: String = env::var("LAMBDA_TASK_ROOT").unwrap_or_else(|_| "/var/task".to_string()); - let config = match config::get_config(Path::new(&lambda_directory)) { - Ok(config) => Arc::new(config), - Err(_e) => { - let err = Command::new("/opt/datadog-agent-go").exec(); - panic!("Error starting the extension: {err:?}"); - } - }; - + let config = Arc::new(config::get_config(Path::new(&lambda_directory))); (aws_config, aws_credentials, config) } @@ -510,19 +502,29 @@ async fn extension_loop_active( ); start_dogstatsd_aggregator(metrics_aggr_service); + let metrics_intake_url = create_metrics_intake_url_prefix(config); let metrics_flushers = Arc::new(TokioMutex::new(start_metrics_flushers( Arc::clone(&api_key_factory), &metrics_aggr_handle, + &metrics_intake_url, config, ))); + // Create lambda enhanced metrics instance once + let lambda_enhanced_metrics = + enhanced_metrics::new(metrics_aggr_handle.clone(), Arc::clone(config)); + + // Send config issue metrics + let config_issues = config::fallback(config); + send_config_issue_metric(&config_issues, &lambda_enhanced_metrics); + let propagator = Arc::new(DatadogCompositePropagator::new(Arc::clone(config))); // Lifecycle Invocation Processor let invocation_processor = Arc::new(TokioMutex::new(InvocationProcessor::new( Arc::clone(&tags_provider), Arc::clone(config), Arc::clone(&aws_config), - metrics_aggr_handle.clone(), + lambda_enhanced_metrics, Arc::clone(&propagator), ))); // AppSec processor (if enabled) @@ -1018,33 +1020,33 @@ fn start_logs_agent( (logs_agent_channel, logs_flusher) } -fn start_metrics_flushers( - api_key_factory: Arc, - metrics_aggr_handle: &MetricsAggregatorHandle, - config: &Arc, -) -> Vec { - let mut flushers = Vec::new(); - - let metrics_intake_url = if !config.dd_url.is_empty() { +fn create_metrics_intake_url_prefix(config: &Config) -> MetricsIntakeUrlPrefix { + if !config.dd_url.is_empty() { let dd_dd_url = DdDdUrl::new(config.dd_url.clone()).expect("can't parse DD_DD_URL"); - let prefix_override = MetricsIntakeUrlPrefixOverride::maybe_new(None, Some(dd_dd_url)); - MetricsIntakeUrlPrefix::new(None, prefix_override) + MetricsIntakeUrlPrefix::new(None, prefix_override).expect("can't parse DD_DD_URL prefix") } else if !config.url.is_empty() { let dd_url = DdUrl::new(config.url.clone()).expect("can't parse DD_URL"); - let prefix_override = MetricsIntakeUrlPrefixOverride::maybe_new(Some(dd_url), None); - MetricsIntakeUrlPrefix::new(None, prefix_override) + MetricsIntakeUrlPrefix::new(None, prefix_override).expect("can't parse DD_URL prefix") } else { - // use site let metrics_site = MetricsSite::new(config.site.clone()).expect("can't parse site"); - MetricsIntakeUrlPrefix::new(Some(metrics_site), None) - }; + MetricsIntakeUrlPrefix::new(Some(metrics_site), None).expect("can't parse site prefix") + } +} + +fn start_metrics_flushers( + api_key_factory: Arc, + metrics_aggr_handle: &MetricsAggregatorHandle, + metrics_intake_url: &MetricsIntakeUrlPrefix, + config: &Arc, +) -> Vec { + let mut flushers = Vec::new(); let flusher_config = MetricsFlusherConfig { api_key_factory, aggregator_handle: metrics_aggr_handle.clone(), - metrics_intake_url_prefix: metrics_intake_url.expect("can't parse site or override"), + metrics_intake_url_prefix: metrics_intake_url.clone(), https_proxy: config.proxy_https.clone(), timeout: Duration::from_secs(config.flush_timeout), retry_strategy: DsdRetryStrategy::Immediate(3), @@ -1169,6 +1171,28 @@ fn start_trace_agent( ) } +/// Sends metrics indicating issue with configuration. +/// +/// # Arguments +/// * `issue_reasons` - Vector of messages describing the issue with the configurations +/// * `lambda_enhanced_metrics` - The lambda enhanced metrics instance +fn send_config_issue_metric(issue_reasons: &[String], lambda_enhanced_metrics: &enhanced_metrics) { + if issue_reasons.is_empty() { + return; + } + let now = std::time::UNIX_EPOCH + .elapsed() + .expect("can't poll clock") + .as_secs() + .try_into() + .unwrap_or_default(); + + // Setup a separate metric for each config issue reason + for issue_reason in issue_reasons { + lambda_enhanced_metrics.set_config_load_issue_metric(now, issue_reason); + } +} + fn start_dogstatsd_aggregator(aggr_service: MetricsAggregatorService) { tokio::spawn(async move { aggr_service.run().await; diff --git a/bottlecap/src/config/mod.rs b/bottlecap/src/config/mod.rs index 0dc7b734f..976308c9e 100644 --- a/bottlecap/src/config/mod.rs +++ b/bottlecap/src/config/mod.rs @@ -442,10 +442,13 @@ impl Default for Config { } fn log_fallback_reason(reason: &str) { - println!("{{\"DD_EXTENSION_FALLBACK_REASON\":\"{reason}\"}}"); + error!("Fallback support for {reason} is no longer available."); } -fn fallback(config: &Config) -> Result<(), ConfigError> { +#[must_use = "fallback reasons should be processed to emit appropriate metrics"] +pub fn fallback(config: &Config) -> Vec { + let mut fallback_reasons = Vec::new(); + // Customer explicitly opted out of the Next Gen extension let opted_out = match config.extension_version.as_deref() { Some("compatibility") => true, @@ -454,21 +457,18 @@ fn fallback(config: &Config) -> Result<(), ConfigError> { }; if opted_out { - log_fallback_reason("extension_version"); - return Err(ConfigError::UnsupportedField( - "extension_version".to_string(), - )); + let reason = "extension_version"; + log_fallback_reason(reason); + fallback_reasons.push(reason.to_string()); } // ASM / .NET // todo(duncanista): Remove once the .NET runtime is fixed if config.serverless_appsec_enabled && has_dotnet_binary() { - log_fallback_reason("serverless_appsec_enabled_dotnet"); - return Err(ConfigError::UnsupportedField( - "serverless_appsec_enabled_dotnet".to_string(), - )); + let reason = "serverless_appsec_enabled_dotnet"; + log_fallback_reason(reason); + fallback_reasons.push(reason.to_string()); } - // OTLP let has_otlp_config = config .otlp_config_receiver_protocols_grpc_endpoint @@ -500,25 +500,22 @@ fn fallback(config: &Config) -> Result<(), ConfigError> { || config.otlp_config_logs_enabled; if has_otlp_config { - log_fallback_reason("otel"); - return Err(ConfigError::UnsupportedField("otel".to_string())); + let reason = "otel"; + log_fallback_reason(reason); + fallback_reasons.push(reason.to_string()); } - Ok(()) + fallback_reasons } #[allow(clippy::module_name_repetitions)] -pub fn get_config(config_directory: &Path) -> Result { +#[must_use = "configuration must be used to initialize the application"] +pub fn get_config(config_directory: &Path) -> Config { let path: std::path::PathBuf = config_directory.join("datadog.yaml"); let mut config_builder = ConfigBuilder::default() .add_source(Box::new(YamlConfigSource { path })) .add_source(Box::new(EnvConfigSource)); - - let config = config_builder.build(); - - fallback(&config)?; - - Ok(config) + config_builder.build() } #[inline] @@ -731,11 +728,7 @@ pub mod tests { figment::Jail::expect_with(|jail| { jail.clear_env(); jail.set_env("DD_EXTENSION_VERSION", "compatibility"); - let config = get_config(Path::new("")).expect_err("should reject unknown fields"); - assert_eq!( - config, - ConfigError::UnsupportedField("extension_version".to_string()) - ); + let _config = get_config(Path::new("")); Ok(()) }); } @@ -749,8 +742,7 @@ pub mod tests { "localhost:4138", ); - let config = get_config(Path::new("")).expect_err("should reject unknown fields"); - assert_eq!(config, ConfigError::UnsupportedField("otel".to_string())); + let _config = get_config(Path::new("")); Ok(()) }); } @@ -770,8 +762,7 @@ pub mod tests { ", )?; - let config = get_config(Path::new("")).expect_err("should reject unknown fields"); - assert_eq!(config, ConfigError::UnsupportedField("otel".to_string())); + let _config = get_config(Path::new("")); Ok(()) }); } @@ -781,7 +772,7 @@ pub mod tests { figment::Jail::expect_with(|jail| { jail.clear_env(); - let config = get_config(Path::new("")).expect("should parse config"); + let config = get_config(Path::new("")); assert_eq!( config.logs_config_logs_dd_url, "https://http-intake.logs.datadoghq.com".to_string() @@ -799,7 +790,7 @@ pub mod tests { "agent-http-intake-pci.logs.datadoghq.com:443", ); - let config = get_config(Path::new("")).expect("should parse config"); + let config = get_config(Path::new("")); assert_eq!( config.logs_config_logs_dd_url, "agent-http-intake-pci.logs.datadoghq.com:443".to_string() @@ -814,7 +805,7 @@ pub mod tests { jail.clear_env(); jail.set_env("DD_APM_DD_URL", "https://trace-pci.agent.datadoghq.com"); - let config = get_config(Path::new("")).expect("should parse config"); + let config = get_config(Path::new("")); assert_eq!( config.apm_dd_url, "https://trace-pci.agent.datadoghq.com/api/v0.2/traces".to_string() @@ -829,7 +820,7 @@ pub mod tests { jail.clear_env(); jail.set_env("DD_DD_URL", "custom_proxy:3128"); - let config = get_config(Path::new("")).expect("should parse config"); + let config = get_config(Path::new("")); assert_eq!(config.dd_url, "custom_proxy:3128".to_string()); Ok(()) }); @@ -841,7 +832,7 @@ pub mod tests { jail.clear_env(); jail.set_env("DD_URL", "custom_proxy:3128"); - let config = get_config(Path::new("")).expect("should parse config"); + let config = get_config(Path::new("")); assert_eq!(config.url, "custom_proxy:3128".to_string()); Ok(()) }); @@ -852,7 +843,7 @@ pub mod tests { figment::Jail::expect_with(|jail| { jail.clear_env(); - let config = get_config(Path::new("")).expect("should parse config"); + let config = get_config(Path::new("")); assert_eq!(config.dd_url, String::new()); Ok(()) }); @@ -862,13 +853,9 @@ pub mod tests { fn test_allowed_but_disabled() { figment::Jail::expect_with(|jail| { jail.clear_env(); - jail.set_env( - "DD_OTLP_CONFIG_RECEIVER_PROTOCOLS_GRPC_ENDPOINT", - "localhost:4138", - ); + jail.set_env("DD_SERVERLESS_APPSEC_ENABLED", "true"); - let config = get_config(Path::new("")).expect_err("should reject unknown fields"); - assert_eq!(config, ConfigError::UnsupportedField("otel".to_string())); + let _config = get_config(Path::new("")); Ok(()) }); } @@ -884,7 +871,7 @@ pub mod tests { ", )?; jail.set_env("DD_SITE", "datad0g.com"); - let config = get_config(Path::new("")).expect("should parse config"); + let config = get_config(Path::new("")); assert_eq!(config.site, "datad0g.com"); Ok(()) }); @@ -900,7 +887,7 @@ pub mod tests { r" ", )?; - let config = get_config(Path::new("")).expect("should parse config"); + let config = get_config(Path::new("")); assert_eq!(config.site, "datadoghq.com"); Ok(()) }); @@ -911,7 +898,7 @@ pub mod tests { figment::Jail::expect_with(|jail| { jail.clear_env(); jail.set_env("DD_SITE", "datadoghq.eu"); - let config = get_config(Path::new("")).expect("should parse config"); + let config = get_config(Path::new("")); assert_eq!(config.site, "datadoghq.eu"); Ok(()) }); @@ -922,7 +909,7 @@ pub mod tests { figment::Jail::expect_with(|jail| { jail.clear_env(); jail.set_env("DD_LOG_LEVEL", "TRACE"); - let config = get_config(Path::new("")).expect("should parse config"); + let config = get_config(Path::new("")); assert_eq!(config.log_level, LogLevel::Trace); Ok(()) }); @@ -932,7 +919,7 @@ pub mod tests { fn test_parse_default() { figment::Jail::expect_with(|jail| { jail.clear_env(); - let config = get_config(Path::new("")).expect("should parse config"); + let config = get_config(Path::new("")); assert_eq!( config, Config { @@ -956,7 +943,7 @@ pub mod tests { figment::Jail::expect_with(|jail| { jail.clear_env(); jail.set_env("DD_PROXY_HTTPS", "my-proxy:3128"); - let config = get_config(Path::new("")).expect("should parse config"); + let config = get_config(Path::new("")); assert_eq!(config.proxy_https, Some("my-proxy:3128".to_string())); Ok(()) }); @@ -972,7 +959,7 @@ pub mod tests { "NO_PROXY", "127.0.0.1,localhost,172.16.0.0/12,us-east-1.amazonaws.com,datadoghq.eu", ); - let config = get_config(Path::new("")).expect("should parse noproxy"); + let config = get_config(Path::new("")); assert_eq!(config.proxy_https, None); Ok(()) }); @@ -990,7 +977,7 @@ pub mod tests { ", )?; - let config = get_config(Path::new("")).expect("should parse weird proxy config"); + let config = get_config(Path::new("")); assert_eq!(config.proxy_https, Some("my-proxy:3128".to_string())); Ok(()) }); @@ -1010,7 +997,7 @@ pub mod tests { ", )?; - let config = get_config(Path::new("")).expect("should parse weird proxy config"); + let config = get_config(Path::new("")); assert_eq!(config.proxy_https, None); // Assertion to ensure config.site runs before proxy // because we chenck that noproxy contains the site @@ -1024,7 +1011,7 @@ pub mod tests { figment::Jail::expect_with(|jail| { jail.clear_env(); jail.set_env("DD_SERVERLESS_FLUSH_STRATEGY", "end"); - let config = get_config(Path::new("")).expect("should parse config"); + let config = get_config(Path::new("")); assert_eq!(config.serverless_flush_strategy, FlushStrategy::End); Ok(()) }); @@ -1035,7 +1022,7 @@ pub mod tests { figment::Jail::expect_with(|jail| { jail.clear_env(); jail.set_env("DD_SERVERLESS_FLUSH_STRATEGY", "periodically,100000"); - let config = get_config(Path::new("")).expect("should parse config"); + let config = get_config(Path::new("")); assert_eq!( config.serverless_flush_strategy, FlushStrategy::Periodically(PeriodicStrategy { interval: 100_000 }) @@ -1049,7 +1036,7 @@ pub mod tests { figment::Jail::expect_with(|jail| { jail.clear_env(); jail.set_env("DD_SERVERLESS_FLUSH_STRATEGY", "invalid_strategy"); - let config = get_config(Path::new("")).expect("should parse config"); + let config = get_config(Path::new("")); assert_eq!(config.serverless_flush_strategy, FlushStrategy::Default); Ok(()) }); @@ -1063,7 +1050,7 @@ pub mod tests { "DD_SERVERLESS_FLUSH_STRATEGY", "periodically,invalid_interval", ); - let config = get_config(Path::new("")).expect("should parse config"); + let config = get_config(Path::new("")); assert_eq!(config.serverless_flush_strategy, FlushStrategy::Default); Ok(()) }); @@ -1076,7 +1063,7 @@ pub mod tests { jail.set_env("DD_VERSION", "123"); jail.set_env("DD_ENV", "123456890"); jail.set_env("DD_SERVICE", "123456"); - let config = get_config(Path::new("")).expect("should parse config"); + let config = get_config(Path::new("")); assert_eq!(config.version.expect("failed to parse DD_VERSION"), "123"); assert_eq!(config.env.expect("failed to parse DD_ENV"), "123456890"); assert_eq!( @@ -1106,7 +1093,7 @@ pub mod tests { pattern: exclude-me-yaml ", )?; - let config = get_config(Path::new("")).expect("should parse config"); + let config = get_config(Path::new("")); assert_eq!( config.logs_config_processing_rules, Some(vec![ProcessingRule { @@ -1135,7 +1122,7 @@ pub mod tests { pattern: exclude ", )?; - let config = get_config(Path::new("")).expect("should parse config"); + let config = get_config(Path::new("")); assert_eq!( config.logs_config_processing_rules, Some(vec![ProcessingRule { @@ -1164,7 +1151,7 @@ pub mod tests { repl: 'REDACTED' ", )?; - let config = get_config(Path::new("")).expect("should parse config"); + let config = get_config(Path::new("")); let rule = parse_rules_from_string( r#"[ {"name": "*", "pattern": "foo", "repl": "REDACTED"} @@ -1195,7 +1182,7 @@ pub mod tests { repl: 'REDACTED-YAML' ", )?; - let config = get_config(Path::new("")).expect("should parse config"); + let config = get_config(Path::new("")); let rule = parse_rules_from_string( r#"[ {"name": "*", "pattern": "foo", "repl": "REDACTED-ENV"} @@ -1222,7 +1209,7 @@ pub mod tests { remove_paths_with_digits: true ", )?; - let config = get_config(Path::new("")).expect("should parse config"); + let config = get_config(Path::new("")); assert!(config.apm_config_obfuscation_http_remove_query_string,); assert!(config.apm_config_obfuscation_http_remove_paths_with_digits,); Ok(()) @@ -1237,7 +1224,7 @@ pub mod tests { "datadog,tracecontext,b3,b3multi", ); jail.set_env("DD_EXTENSION_VERSION", "next"); - let config = get_config(Path::new("")).expect("should parse config"); + let config = get_config(Path::new("")); let expected_styles = vec![ TracePropagationStyle::Datadog, @@ -1256,7 +1243,7 @@ pub mod tests { figment::Jail::expect_with(|jail| { jail.clear_env(); jail.set_env("DD_TRACE_PROPAGATION_STYLE_EXTRACT", "datadog"); - let config = get_config(Path::new("")).expect("should parse config"); + let config = get_config(Path::new("")); assert_eq!( config.trace_propagation_style, @@ -1281,8 +1268,7 @@ pub mod tests { "DD_APM_REPLACE_TAGS", r#"[{"name":"resource.name","pattern":"(.*)/(foo[:%].+)","repl":"$1/{foo}"}]"#, ); - let config = get_config(Path::new("")); - assert!(config.is_ok()); + let _config = get_config(Path::new("")); Ok(()) }); } @@ -1295,7 +1281,7 @@ pub mod tests { jail.set_env("DD_ENHANCED_METRICS", "1"); jail.set_env("DD_LOGS_CONFIG_USE_COMPRESSION", "TRUE"); jail.set_env("DD_CAPTURE_LAMBDA_PAYLOAD", "0"); - let config = get_config(Path::new("")).expect("should parse config"); + let config = get_config(Path::new("")); assert!(config.serverless_logs_enabled); assert!(config.enhanced_metrics); assert!(config.logs_config_use_compression); @@ -1319,7 +1305,7 @@ pub mod tests { jail.set_env("DD_SITE", "us5.datadoghq.com"); jail.set_env("DD_API_KEY", "env-api-key"); jail.set_env("DD_FLUSH_TIMEOUT", "10"); - let config = get_config(Path::new("")).expect("should parse config"); + let config = get_config(Path::new("")); assert_eq!(config.site, "us5.datadoghq.com"); assert_eq!(config.api_key, "env-api-key"); diff --git a/bottlecap/src/lifecycle/invocation/processor.rs b/bottlecap/src/lifecycle/invocation/processor.rs index a1d0d94b6..0fbb77340 100644 --- a/bottlecap/src/lifecycle/invocation/processor.rs +++ b/bottlecap/src/lifecycle/invocation/processor.rs @@ -89,7 +89,7 @@ impl Processor { tags_provider: Arc, config: Arc, aws_config: Arc, - metrics_aggregator: dogstatsd::aggregator_service::AggregatorHandle, + enhanced_metrics: EnhancedMetrics, propagator: Arc, ) -> Self { let resource = tags_provider @@ -106,7 +106,7 @@ impl Processor { context_buffer: ContextBuffer::default(), inferrer: SpanInferrer::new(Arc::clone(&config)), propagator, - enhanced_metrics: EnhancedMetrics::new(metrics_aggregator, Arc::clone(&config)), + enhanced_metrics, aws_config, tracer_detected: false, runtime: None, @@ -988,8 +988,15 @@ mod tests { tokio::spawn(service.run()); + let enhanced_metrics = EnhancedMetrics::new(handle, Arc::clone(&config)); let propagator = Arc::new(DatadogCompositePropagator::new(Arc::clone(&config))); - Processor::new(tags_provider, config, aws_config, handle, propagator) + Processor::new( + tags_provider, + config, + aws_config, + enhanced_metrics, + propagator, + ) } #[test] diff --git a/bottlecap/src/metrics/enhanced/constants.rs b/bottlecap/src/metrics/enhanced/constants.rs index 44666910f..110c0b618 100644 --- a/bottlecap/src/metrics/enhanced/constants.rs +++ b/bottlecap/src/metrics/enhanced/constants.rs @@ -45,6 +45,8 @@ pub const THREADS_USE_METRIC: &str = "aws.lambda.enhanced.threads_use"; pub const SHUTDOWNS_METRIC: &str = "aws.lambda.enhanced.shutdowns"; //pub const ASM_INVOCATIONS_METRIC: &str = "aws.lambda.enhanced.asm.invocations"; pub const UNUSED_INIT: &str = "aws.lambda.enhanced.unused_init"; +pub const DATADOG_SERVERLESS_EXTENSION_FAILOVER_CONFIG_ISSUE_METRIC: &str = + "datadog.serverless.extension.failover"; pub const ENHANCED_METRICS_ENV_VAR: &str = "DD_ENHANCED_METRICS"; // Monitoring interval for tmp, fd, and threads metrics diff --git a/bottlecap/src/metrics/enhanced/lambda.rs b/bottlecap/src/metrics/enhanced/lambda.rs index 7e3276251..04287c317 100644 --- a/bottlecap/src/metrics/enhanced/lambda.rs +++ b/bottlecap/src/metrics/enhanced/lambda.rs @@ -60,18 +60,29 @@ impl Lambda { .insert(String::from("runtime"), runtime.to_string()); } - fn get_dynamic_value_tags(&self) -> Option { - let vec_tags: Vec = self - .dynamic_value_tags - .iter() - .map(|(k, v)| format!("{k}:{v}")) - .collect(); + fn tags_to_sorted_tags(tags: &HashMap) -> Option { + let vec_tags: Vec = tags.iter().map(|(k, v)| format!("{k}:{v}")).collect(); let string_tags = vec_tags.join(","); SortedTags::parse(&string_tags).ok() } + fn get_dynamic_value_tags(&self) -> Option { + Self::tags_to_sorted_tags(&self.dynamic_value_tags) + } + + fn get_combined_tags(&self, additional_tags: &HashMap) -> Option { + if additional_tags.is_empty() { + return self.get_dynamic_value_tags(); + } + + let mut combined_tags = self.dynamic_value_tags.clone(); + combined_tags.extend(additional_tags.clone()); + + Self::tags_to_sorted_tags(&combined_tags) + } + pub fn increment_invocation_metric(&self, timestamp: i64) { self.increment_metric(constants::INVOCATIONS_METRIC, timestamp); } @@ -94,6 +105,19 @@ impl Lambda { self.increment_metric(constants::OUT_OF_MEMORY_METRIC, timestamp); } + /// Set up a metric tracking configuration load issue with details + pub fn set_config_load_issue_metric(&self, timestamp: i64, reason_msg: &str) { + let dynamic_tags = self.get_combined_tags(&HashMap::from([( + "reason".to_string(), + reason_msg.to_string(), + )])); + self.increment_metric_with_tags( + constants::DATADOG_SERVERLESS_EXTENSION_FAILOVER_CONFIG_ISSUE_METRIC, + timestamp, + dynamic_tags, + ); + } + pub fn set_init_duration_metric( &mut self, init_type: InitType, @@ -122,13 +146,23 @@ impl Lambda { } fn increment_metric(&self, metric_name: &str, timestamp: i64) { + self.increment_metric_with_tags(metric_name, timestamp, self.get_dynamic_value_tags()); + } + + /// Helper function to emit metric with supplied tags + fn increment_metric_with_tags( + &self, + metric_name: &str, + timestamp: i64, + tags: Option, + ) { if !self.config.enhanced_metrics { return; } let metric = Metric::new( metric_name.into(), MetricValue::distribution(1f64), - self.get_dynamic_value_tags(), + tags, Some(timestamp), ); if let Err(e) = self.aggr_handle.insert_batch(vec![metric]) { @@ -767,9 +801,19 @@ mod tests { } async fn assert_sketch(handle: &AggregatorHandle, metric_id: &str, value: f64, timestamp: i64) { + assert_sketch_with_tag(handle, metric_id, value, timestamp, None).await; + } + + async fn assert_sketch_with_tag( + handle: &AggregatorHandle, + metric_id: &str, + value: f64, + timestamp: i64, + tags: Option, + ) { let ts = (timestamp / 10) * 10; if let Some(e) = handle - .get_entry_by_id(metric_id.into(), None, ts) + .get_entry_by_id(metric_id.into(), tags, ts) .await .unwrap() { @@ -1371,4 +1415,30 @@ mod tests { .is_none() ); } + + #[tokio::test] + async fn test_set_config_load_issue_metric() { + let (metrics_aggr, my_config) = setup(); + let lambda = Lambda::new(metrics_aggr.clone(), my_config); + let now: i64 = std::time::UNIX_EPOCH + .elapsed() + .expect("unable to poll clock, unrecoverable") + .as_secs() + .try_into() + .unwrap_or_default(); + let test_reason = "test_config_issue"; + + lambda.set_config_load_issue_metric(now, test_reason); + + // Create the expected tags for the metric lookup + let expected_tags = SortedTags::parse(&format!("reason:{test_reason}")).ok(); + assert_sketch_with_tag( + &metrics_aggr, + constants::DATADOG_SERVERLESS_EXTENSION_FAILOVER_CONFIG_ISSUE_METRIC, + 1f64, + now, + expected_tags, + ) + .await; + } } diff --git a/bottlecap/src/otlp/mod.rs b/bottlecap/src/otlp/mod.rs index 292a80180..4e0f4aed7 100644 --- a/bottlecap/src/otlp/mod.rs +++ b/bottlecap/src/otlp/mod.rs @@ -37,7 +37,7 @@ mod tests { ", )?; - let config = get_config(Path::new("")).expect("should parse config"); + let config = get_config(Path::new("")); // Since the default for traces is `true`, we don't need to set it. assert!(should_enable_otlp_agent(&Arc::new(config))); @@ -55,7 +55,7 @@ mod tests { "0.0.0.0:4318", ); - let config = get_config(Path::new("")).expect("should parse config"); + let config = get_config(Path::new("")); // Since the default for traces is `true`, we don't need to set it. assert!(should_enable_otlp_agent(&Arc::new(config))); @@ -74,7 +74,7 @@ mod tests { "0.0.0.0:4318", ); - let config = get_config(Path::new("")).expect("should parse config"); + let config = get_config(Path::new("")); assert!(!should_enable_otlp_agent(&Arc::new(config))); diff --git a/bottlecap/src/proxy/interceptor.rs b/bottlecap/src/proxy/interceptor.rs index 6c681978e..bab2a4459 100644 --- a/bottlecap/src/proxy/interceptor.rs +++ b/bottlecap/src/proxy/interceptor.rs @@ -492,7 +492,8 @@ mod tests { tokio::spawn(service.run()); - let metrics_aggregator = handle; + let enhanced_metrics = + crate::metrics::enhanced::lambda::Lambda::new(handle, Arc::clone(&config)); let aws_config = Arc::new(AwsConfig { region: "us-east-1".to_string(), function_name: "arn:some-function".to_string(), @@ -506,7 +507,7 @@ mod tests { Arc::clone(&tags_provider), Arc::clone(&config), Arc::clone(&aws_config), - metrics_aggregator, + enhanced_metrics, Arc::clone(&propagator), ))); let appsec_processor = match AppSecProcessor::new(&config) { From 9bad11015b08b6ae08c34500d9e16e87f37b7922 Mon Sep 17 00:00:00 2001 From: Tianning Li Date: Thu, 28 Aug 2025 14:59:23 -0400 Subject: [PATCH 2/2] feat: No longer launch Go-based agent for compatibility/OTLP/AAP config (#788) https://datadoghq.atlassian.net/browse/SVLS-7398 - As part of coming release, bottlecap agent no longer launches Go-based agent when compatibility/AAP/OTLP features are active - Emit the same metric when detecting any of above configuration - Update corresponding unit tests - Refactor the references of enhanced metrics. Manifests: - [Test lambda function](https://us-east-1.console.aws.amazon.com/lambda/home?region=us-east-1#/functions/ltn1-fullinstrument-bn-cold-python310-lambda?code=&subtab=envVars&tab=testing) with [logs](https://us-east-1.console.aws.amazon.com/cloudwatch/home?region=us-east-1#logsV2:log-groups/log-group/$252Faws$252Flambda$252Fltn1-fullinstrument-bn-cold-python310-lambda/log-events/2025$252F08$252F21$252F$255B$2524LATEST$255Df3788d359677452dad162488ff15456f$3FfilterPattern$3Dotel) showing compatibility/AAP/OTPL are enabled image - [Logging](https://app.datadoghq.com/logs/livetail?query=functionname%3Altn1-fullinstrument-bn-cold-python310-lambda%20Metric&agg_m=count&agg_m_source=base&agg_t=count&cols=host%2Cservice&fromUser=true&messageDisplay=inline&refresh_mode=paused&storage=driveline&stream_sort=desc&viz=stream&from_ts=1755787655569&to_ts=1755787689060&live=false) image - [Metric](https://app.datadoghq.com/screen/integration/aws_lambda_enhanced_metrics?fromUser=false&fullscreen_end_ts=1755788220000&fullscreen_paused=true&fullscreen_refresh_mode=paused&fullscreen_section=overview&fullscreen_start_ts=1755787200000&fullscreen_widget=2&graph-explorer__tile_def=N4IgbglgXiBcIBcD2AHANhAzgkAaEAxgK7ZIC2A%2BhgHYDWmcA2gLr4BOApgI5EfYOxGoTphRJqmDhQBmSNmQCGOeJgIK0CtnhA8ObCHyagAJkoUVMSImwIc4IMhwT6CDfNQWP7utgE8AjNo%2BvvaYRGSwpggKxkgA5gB0kmxgemh8mAkcAB4IHBIQ4gnSChBoSKlswAAkCgDumBQKBARW1Ai41ZxxhdSd0kTUBAi9AL4ABABGvuPAA0Mj4h6OowkKja2DCAAUAJTaCnFx3UpyoeEgo6wgsvJEGgJCN3Jk9wrevH6BV-iWbMqgTbtOAAJgADPg5MY9BRpkZEL4UHZ4LdXhptBBqNDsnAISAoXp7NDVJdmKMfiBsL50nBgOSgA&refresh_mode=sliding&from_ts=1755783890661&to_ts=1755787490661&live=true) image --- bottlecap/src/bin/bottlecap/main.rs | 2 +- bottlecap/src/config/mod.rs | 122 ++++++++++++++++++++++------ 2 files changed, 98 insertions(+), 26 deletions(-) diff --git a/bottlecap/src/bin/bottlecap/main.rs b/bottlecap/src/bin/bottlecap/main.rs index 94582b236..a3d70e00d 100644 --- a/bottlecap/src/bin/bottlecap/main.rs +++ b/bottlecap/src/bin/bottlecap/main.rs @@ -515,7 +515,7 @@ async fn extension_loop_active( enhanced_metrics::new(metrics_aggr_handle.clone(), Arc::clone(config)); // Send config issue metrics - let config_issues = config::fallback(config); + let config_issues = config::inspect_config(config); send_config_issue_metric(&config_issues, &lambda_enhanced_metrics); let propagator = Arc::new(DatadogCompositePropagator::new(Arc::clone(config))); diff --git a/bottlecap/src/config/mod.rs b/bottlecap/src/config/mod.rs index 976308c9e..6b5fa17d4 100644 --- a/bottlecap/src/config/mod.rs +++ b/bottlecap/src/config/mod.rs @@ -441,13 +441,13 @@ impl Default for Config { } } -fn log_fallback_reason(reason: &str) { - error!("Fallback support for {reason} is no longer available."); +fn log_unsupported_reason(reason: &str) { + error!("Found unsupported config: {reason} is no longer available."); } -#[must_use = "fallback reasons should be processed to emit appropriate metrics"] -pub fn fallback(config: &Config) -> Vec { - let mut fallback_reasons = Vec::new(); +#[must_use = "Unsupported reasons should be processed to emit appropriate metrics"] +pub fn inspect_config(config: &Config) -> Vec { + let mut unsupported_reasons = Vec::new(); // Customer explicitly opted out of the Next Gen extension let opted_out = match config.extension_version.as_deref() { @@ -458,16 +458,16 @@ pub fn fallback(config: &Config) -> Vec { if opted_out { let reason = "extension_version"; - log_fallback_reason(reason); - fallback_reasons.push(reason.to_string()); + log_unsupported_reason(reason); + unsupported_reasons.push(reason.to_string()); } // ASM / .NET // todo(duncanista): Remove once the .NET runtime is fixed if config.serverless_appsec_enabled && has_dotnet_binary() { let reason = "serverless_appsec_enabled_dotnet"; - log_fallback_reason(reason); - fallback_reasons.push(reason.to_string()); + log_unsupported_reason(reason); + unsupported_reasons.push(reason.to_string()); } // OTLP let has_otlp_config = config @@ -501,11 +501,11 @@ pub fn fallback(config: &Config) -> Vec { if has_otlp_config { let reason = "otel"; - log_fallback_reason(reason); - fallback_reasons.push(reason.to_string()); + log_unsupported_reason(reason); + unsupported_reasons.push(reason.to_string()); } - fallback_reasons + unsupported_reasons } #[allow(clippy::module_name_repetitions)] @@ -724,15 +724,98 @@ pub mod tests { }; #[test] - fn test_reject_on_opted_out() { + fn test_baseline_case() { figment::Jail::expect_with(|jail| { jail.clear_env(); - jail.set_env("DD_EXTENSION_VERSION", "compatibility"); let _config = get_config(Path::new("")); Ok(()) }); } + #[test] + fn test_inspect_config() { + struct TestCase { + name: &'static str, + env_vars: Vec<(&'static str, &'static str)>, + yaml_content: Option<&'static str>, + expected_reasons: Vec<&'static str>, + } + + let test_cases = vec![ + TestCase { + name: "default config - no unsupported reasons", + env_vars: vec![], + yaml_content: None, + expected_reasons: vec![], + }, + TestCase { + name: "extension_version compatibility - should discover", + env_vars: vec![("DD_EXTENSION_VERSION", "compatibility")], + yaml_content: None, + expected_reasons: vec!["extension_version"], + }, + TestCase { + name: "otlp config enabled - should discover", + env_vars: vec![( + "DD_OTLP_CONFIG_RECEIVER_PROTOCOLS_GRPC_ENDPOINT", + "localhost:4317", + )], + yaml_content: None, + expected_reasons: vec!["otel"], + }, + TestCase { + name: "multiple unsupported reasons", + env_vars: vec![ + ("DD_EXTENSION_VERSION", "compatibility"), + ("DD_OTLP_CONFIG_METRICS_ENABLED", "true"), + ], + yaml_content: None, + expected_reasons: vec!["extension_version", "otel"], + }, + TestCase { + name: "otlp config via yaml - should discover", + env_vars: vec![], + yaml_content: Some( + r" + otlp_config: + receiver: + protocols: + grpc: + endpoint: localhost:4317 + ", + ), + expected_reasons: vec!["otel"], + }, + ]; + + for test_case in test_cases { + figment::Jail::expect_with(|jail| { + jail.clear_env(); + + // Set environment variables + for (key, value) in &test_case.env_vars { + jail.set_env(key, value); + } + + // Create YAML file if provided + if let Some(yaml_content) = test_case.yaml_content { + jail.create_file("datadog.yaml", yaml_content)?; + } + + let config = get_config(Path::new("")); + let unsupported_reasons = inspect_config(&config); + + assert_eq!( + unsupported_reasons, test_case.expected_reasons, + "Test case '{}' failed: expected {:?}, got {:?}", + test_case.name, test_case.expected_reasons, unsupported_reasons + ); + + Ok(()) + }); + } + } + #[test] fn test_fallback_on_otel() { figment::Jail::expect_with(|jail| { @@ -849,17 +932,6 @@ pub mod tests { }); } - #[test] - fn test_allowed_but_disabled() { - figment::Jail::expect_with(|jail| { - jail.clear_env(); - jail.set_env("DD_SERVERLESS_APPSEC_ENABLED", "true"); - - let _config = get_config(Path::new("")); - Ok(()) - }); - } - #[test] fn test_precedence() { figment::Jail::expect_with(|jail| {