|
| 1 | +// A script for use in the Beats script processor, to remap raw OTel telemetry |
| 2 | +// from its prometheus endpoint to backwards-compatible Beats metrics fields |
| 3 | +// that can be viewed in Agent dashboards. |
| 4 | + |
| 5 | +function process(event) { |
| 6 | + // This hard-coded exporter name will not work for the general |
| 7 | + // (non-monitoring) use case. |
| 8 | + var elastic_exporter = event.Get("prometheus.labels.exporter") == "elasticsearch/_agent-component/monitoring"; |
| 9 | + var elastic_scope = event.Get("prometheus.labels.otel_scope_name") == "github.com/open-telemetry/opentelemetry-collector-contrib/exporter/elasticsearchexporter"; |
| 10 | + |
| 11 | + // We accept general collector fields that are scoped to the elasticsearch |
| 12 | + // exporter (queue metrics, sent / error stats), or fields specifically |
| 13 | + // scoped to the elasticsearch exporter (custom elastic metrics). |
| 14 | + if (!elastic_exporter && !elastic_scope) { |
| 15 | + event.Cancel(); |
| 16 | + return; |
| 17 | + } |
| 18 | + |
| 19 | + // Hack: if the scope is elastic-custom fields, deterministically mangle the |
| 20 | + // agent.id. Since the label set is different, these are passed through in |
| 21 | + // different events, and if we don't do this one of the events will be |
| 22 | + // rejected as a duplicate since they have the same component id, agent id, |
| 23 | + // and metricset. |
| 24 | + var id = event.Get("agent.id"); |
| 25 | + if (id != null && id.length > 0) { |
| 26 | + // Increment / wrap the last hex character of the uuid |
| 27 | + var prefix = id.substring(0, id.length - 1); |
| 28 | + var last = id.substring(id.length - 1); |
| 29 | + var rotated = "0"; |
| 30 | + if (last < "f") { |
| 31 | + rotated = String.fromCharCode(last.charCodeAt(0) + 1); |
| 32 | + } |
| 33 | + id = prefix + rotated; |
| 34 | + event.Put("agent.id", id); |
| 35 | + } |
| 36 | + |
| 37 | + // The event will be discarded unless we find some valid metric to convert. |
| 38 | + var keep_event = false; |
| 39 | + |
| 40 | + var queue_size = event.Get("prometheus.metrics.otelcol_exporter_queue_size"); |
| 41 | + var queue_capacity = event.Get("prometheus.metrics.otelcol_exporter_queue_capacity"); |
| 42 | + if (queue_size != null) { |
| 43 | + keep_event = true; |
| 44 | + event.Put("beat.stats.libbeat.pipeline.queue.filled.events", queue_size); |
| 45 | + } |
| 46 | + if (queue_capacity != null) { |
| 47 | + keep_event = true; |
| 48 | + event.Put("beat.stats.libbeat.pipeline.queue.max_events", queue_capacity); |
| 49 | + } |
| 50 | + if (queue_size != null && queue_capacity != null) { |
| 51 | + var queue_pct = queue_size / queue_capacity; |
| 52 | + if (!isNaN(queue_pct)) { |
| 53 | + event.Put("beat.stats.libbeat.pipeline.queue.filled.pct", queue_pct); |
| 54 | + } |
| 55 | + } |
| 56 | + |
| 57 | + var total_sent = 0; |
| 58 | + var total_sent_valid = false; |
| 59 | + // Add send statistics from all source types |
| 60 | + var sent_logs = event.Get("prometheus.metrics.otelcol_exporter_sent_log_records_total"); |
| 61 | + if (sent_logs != null) { |
| 62 | + total_sent += sent_logs; |
| 63 | + total_sent_valid = true; |
| 64 | + } |
| 65 | + var sent_spans = event.Get("prometheus.metrics.otelcol_exporter_sent_spans_total"); |
| 66 | + if (sent_spans != null) { |
| 67 | + total_sent += sent_spans; |
| 68 | + total_sent_valid = true; |
| 69 | + } |
| 70 | + var sent_metrics = event.Get("prometheus.metrics.otelcol_exporter_sent_metric_points_total"); |
| 71 | + if (sent_metrics != null) { |
| 72 | + total_sent += sent_metrics; |
| 73 | + total_sent_valid = true; |
| 74 | + } |
| 75 | + if (total_sent_valid) { |
| 76 | + event.Put("beat.stats.libbeat.output.events.acked", total_sent); |
| 77 | + keep_event = true; |
| 78 | + } |
| 79 | + |
| 80 | + var total_failed = 0; |
| 81 | + var total_failed_valid = false; |
| 82 | + // Add failed statistics from all source types |
| 83 | + var failed_logs = event.Get("prometheus.metrics.otelcol_exporter_send_failed_log_records_total"); |
| 84 | + if (failed_logs != null) { |
| 85 | + total_failed += failed_logs; |
| 86 | + total_failed_valid = true; |
| 87 | + } |
| 88 | + var failed_spans = event.Get("prometheus.metrics.otelcol_exporter_send_failed_spans_total"); |
| 89 | + if (failed_spans != null) { |
| 90 | + total_failed += failed_spans; |
| 91 | + total_failed_valid = true; |
| 92 | + } |
| 93 | + var failed_metrics = event.Get("prometheus.metrics.otelcol_exporter_send_failed_metric_points_total"); |
| 94 | + if (failed_metrics != null) { |
| 95 | + total_failed += failed_metrics; |
| 96 | + total_failed_valid = true; |
| 97 | + } |
| 98 | + if (total_failed_valid) { |
| 99 | + event.Put("beat.stats.libbeat.output.events.dropped", total_failed); |
| 100 | + keep_event = true; |
| 101 | + } |
| 102 | + |
| 103 | + var flushed_bytes = event.Get("prometheus.metrics.otelcol_elasticsearch_flushed_bytes_total"); |
| 104 | + if (flushed_bytes != null) { |
| 105 | + event.Put("beat.stats.libbeat.output.write.bytes", flushed_bytes); |
| 106 | + keep_event = true; |
| 107 | + } |
| 108 | + |
| 109 | + var retried_docs = event.Get("prometheus.metrics.otelcol_elasticsearch_docs_retried_ratio_total"); |
| 110 | + if (retried_docs != null) { |
| 111 | + // "failed" in the beats metric means an event failed to ingest but was |
| 112 | + // not dropped, and will be retried. |
| 113 | + event.Put("beat.stats.libbeat.output.events.failed", retried_docs); |
| 114 | + keep_event = true; |
| 115 | + } |
| 116 | + |
| 117 | + var request_count = event.Get("prometheus.metrics.otelcol_elasticsearch_bulk_requests_count_ratio_total"); |
| 118 | + if (request_count != null) { |
| 119 | + // This is not an exact semantic match for how Beats measures batch count, |
| 120 | + // but it's close. |
| 121 | + event.Put("beat.stats.libbeat.output.events.batches", request_count); |
| 122 | + keep_event = true; |
| 123 | + } |
| 124 | + |
| 125 | + var processed_docs_count = event.Get("prometheus.metrics.otelcol_elasticsearch_docs_processed_ratio_total"); |
| 126 | + if (processed_docs_count != null) { |
| 127 | + // Approximate semantic match: the otel metric counts all document |
| 128 | + // ingestion attempts, including success, failure, and retries, |
| 129 | + // which is a better match for the Beats definition of total events |
| 130 | + // than otelcol_elasticsearch_docs_received_ratio_total which |
| 131 | + // includes only unique events seen (regardless of retries etc). |
| 132 | + event.Put("beat.stats.libbeat.output.events.total", processed_docs_count); |
| 133 | + keep_event = true; |
| 134 | + } |
| 135 | + |
| 136 | + if (!keep_event) { |
| 137 | + event.Cancel(); |
| 138 | + } |
| 139 | +} |
0 commit comments