Skip to content

Commit 18ea9c0

Browse files
FrapschenBenjaminBraunDev
authored andcommitted
[traces] init the trace sdk (kubernetes-sigs#1638)
* typo * apply review's suggestion * update * move otel env to helm template * update * skip lint * fix lint * add file header * update README.md * typo * apply review's suggestion
1 parent 63b3ef0 commit 18ea9c0

File tree

7 files changed

+246
-40
lines changed

7 files changed

+246
-40
lines changed

cmd/epp/runner/runner.go

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -110,6 +110,7 @@ var (
110110
modelServerMetricsScheme = flag.String("model-server-metrics-scheme", "http", "Scheme to scrape metrics from pods")
111111
modelServerMetricsHttpsInsecureSkipVerify = flag.Bool("model-server-metrics-https-insecure-skip-verify", true, "When using 'https' scheme for 'model-server-metrics-scheme', configure 'InsecureSkipVerify' (default to true)")
112112
haEnableLeaderElection = flag.Bool("ha-enable-leader-election", false, "Enables leader election for high availability. When enabled, readiness probes will only pass on the leader.")
113+
tracing = flag.Bool("tracing", true, "Enables emitting traces")
113114

114115
// Latency Predictor Flag
115116
enableLatencyPredictor = flag.Bool("enable-latency-predictor", false, "Enable the regression-based latency predictor and scheduler scorer.")
@@ -148,6 +149,13 @@ func (r *Runner) Run(ctx context.Context) error {
148149
flag.Parse()
149150
initLogging(&opts)
150151

152+
if *tracing {
153+
err := common.InitTracing(ctx, setupLog)
154+
if err != nil {
155+
return err
156+
}
157+
}
158+
151159
setupLog.Info("GIE build", "commit-sha", version.CommitSHA, "build-ref", version.BuildRef)
152160

153161
// Validate flags

config/charts/inferencepool/README.md

Lines changed: 43 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -166,30 +166,34 @@ $ helm uninstall pool-1
166166

167167
The following table list the configurable parameters of the chart.
168168

169-
| **Parameter Name** | **Description** |
170-
|---------------------------------------------|------------------------------------------------------------------------------------------------------------------------|
171-
| `inferencePool.apiVersion` | The API version of the InferencePool resource. Defaults to `inference.networking.k8s.io/v1`. This can be changed to `inference.networking.x-k8s.io/v1alpha2` to support older API versions. |
172-
| `inferencePool.targetPortNumber` | Target port number for the vllm backends, will be used to scrape metrics by the inference extension. Defaults to 8000. |
173-
| `inferencePool.modelServerType` | Type of the model servers in the pool, valid options are [vllm, triton-tensorrt-llm], default is vllm. |
174-
| `inferencePool.modelServers.matchLabels` | Label selector to match vllm backends managed by the inference pool. |
175-
| `inferenceExtension.replicas` | Number of replicas for the endpoint picker extension service. If More than one replica is used, EPP will run in HA active-passive mode. Defaults to `1`. |
176-
| `inferenceExtension.image.name` | Name of the container image used for the endpoint picker. |
177-
| `inferenceExtension.image.hub` | Registry URL where the endpoint picker image is hosted. |
178-
| `inferenceExtension.image.tag` | Image tag of the endpoint picker. |
179-
| `inferenceExtension.image.pullPolicy` | Image pull policy for the container. Possible values: `Always`, `IfNotPresent`, or `Never`. Defaults to `Always`. |
180-
| `inferenceExtension.env` | List of environment variables to set in the endpoint picker container as free-form YAML. Defaults to `[]`. |
181-
| `inferenceExtension.extraContainerPorts` | List of additional container ports to expose. Defaults to `[]`. |
182-
| `inferenceExtension.extraServicePorts` | List of additional service ports to expose. Defaults to `[]`. |
183-
| `inferenceExtension.flags` | List of flags which are passed through to endpoint picker. Example flags, enable-pprof, grpc-port etc. Refer [runner.go](https://github.com/kubernetes-sigs/gateway-api-inference-extension/blob/main/cmd/epp/runner/runner.go) for complete list. |
184-
| `inferenceExtension.affinity` | Affinity for the endpoint picker. Defaults to `{}`. |
185-
| `inferenceExtension.tolerations` | Tolerations for the endpoint picker. Defaults to `[]`. | |
186-
| `inferenceExtension.monitoring.interval` | Metrics scraping interval for monitoring. Defaults to `10s`. |
187-
| `inferenceExtension.monitoring.secret.name` | Name of the service account token secret for metrics authentication. Defaults to `inference-gateway-sa-metrics-reader-secret`. |
188-
| `inferenceExtension.monitoring.prometheus.enabled` | Enable Prometheus ServiceMonitor creation for EPP metrics collection. Defaults to `false`. |
189-
| `inferenceExtension.monitoring.gke.enabled` | Enable GKE monitoring resources (`PodMonitoring` and RBAC). Defaults to `false`. |
190-
| `inferenceExtension.pluginsCustomConfig` | Custom config that is passed to EPP as inline yaml. |
191-
| `provider.name` | Name of the Inference Gateway implementation being used. Possible values: [`none`, `gke`, or `istio`]. Defaults to `none`. |
192-
| `provider.gke.autopilot` | Set to `true` if the cluster is a GKE Autopilot cluster. This is only used if `provider.name` is `gke`. Defaults to `false`. |
169+
| **Parameter Name** | **Description** |
170+
|----------------------------------------------------|---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
171+
| `inferencePool.apiVersion` | The API version of the InferencePool resource. Defaults to `inference.networking.k8s.io/v1`. This can be changed to `inference.networking.x-k8s.io/v1alpha2` to support older API versions. |
172+
| `inferencePool.targetPortNumber` | Target port number for the vllm backends, will be used to scrape metrics by the inference extension. Defaults to 8000. |
173+
| `inferencePool.modelServerType` | Type of the model servers in the pool, valid options are [vllm, triton-tensorrt-llm], default is vllm. |
174+
| `inferencePool.modelServers.matchLabels` | Label selector to match vllm backends managed by the inference pool. |
175+
| `inferenceExtension.replicas` | Number of replicas for the endpoint picker extension service. If More than one replica is used, EPP will run in HA active-passive mode. Defaults to `1`. |
176+
| `inferenceExtension.image.name` | Name of the container image used for the endpoint picker. |
177+
| `inferenceExtension.image.hub` | Registry URL where the endpoint picker image is hosted. |
178+
| `inferenceExtension.image.tag` | Image tag of the endpoint picker. |
179+
| `inferenceExtension.image.pullPolicy` | Image pull policy for the container. Possible values: `Always`, `IfNotPresent`, or `Never`. Defaults to `Always`. |
180+
| `inferenceExtension.env` | List of environment variables to set in the endpoint picker container as free-form YAML. Defaults to `[]`. |
181+
| `inferenceExtension.extraContainerPorts` | List of additional container ports to expose. Defaults to `[]`. |
182+
| `inferenceExtension.extraServicePorts` | List of additional service ports to expose. Defaults to `[]`. |
183+
| `inferenceExtension.flags` | List of flags which are passed through to endpoint picker. Example flags, enable-pprof, grpc-port etc. Refer [runner.go](https://github.com/kubernetes-sigs/gateway-api-inference-extension/blob/main/cmd/epp/runner/runner.go) for complete list. |
184+
| `inferenceExtension.affinity` | Affinity for the endpoint picker. Defaults to `{}`. |
185+
| `inferenceExtension.tolerations` | Tolerations for the endpoint picker. Defaults to `[]`. |
186+
| `inferenceExtension.monitoring.interval` | Metrics scraping interval for monitoring. Defaults to `10s`. |
187+
| `inferenceExtension.monitoring.secret.name` | Name of the service account token secret for metrics authentication. Defaults to `inference-gateway-sa-metrics-reader-secret`. |
188+
| `inferenceExtension.monitoring.prometheus.enabled` | Enable Prometheus ServiceMonitor creation for EPP metrics collection. Defaults to `false`. |
189+
| `inferenceExtension.monitoring.gke.enabled` | Enable GKE monitoring resources (`PodMonitoring` and RBAC). Defaults to `false`. |
190+
| `inferenceExtension.pluginsCustomConfig` | Custom config that is passed to EPP as inline yaml. |
191+
| `inferenceExtension.tracing.enabled` | Enables or disables OpenTelemetry tracing globally for the EndpointPicker. |
192+
| `inferenceExtension.tracing.otelExporterEndpoint` | OpenTelemetry collector endpoint. |
193+
| `inferenceExtension.tracing.sampling.sampler` | The trace sampler to use. Currently, only `parentbased_traceidratio` is supported. This sampler respects the parent span’s sampling decision when present, and applies the configured ratio for root spans. |
194+
| `inferenceExtension.tracing.sampling.samplerArg` | Sampler-specific argument. For `parentbased_traceidratio`, this defines the base sampling rate for new traces (root spans), as a float string in the range [0.0, 1.0]. For example, "0.1" enables 10% sampling. |
195+
| `provider.name` | Name of the Inference Gateway implementation being used. Possible values: [`none`, `gke`, or `istio`]. Defaults to `none`. |
196+
| `provider.gke.autopilot` | Set to `true` if the cluster is a GKE Autopilot cluster. This is only used if `provider.name` is `gke`. Defaults to `false`. |
193197

194198
### Provider Specific Configuration
195199

@@ -214,6 +218,21 @@ These are the options available to you with `provider.name` set to `istio`:
214218
| `istio.destinationRule.host` | Custom host value for the destination rule. If not set this will use the default value which is derrived from the epp service name and release namespace to gerenate a valid service address. |
215219
| `istio.destinationRule.trafficPolicy.connectionPool` | Configure the connectionPool level settings of the traffic policy |
216220

221+
#### OpenTelemetry
222+
223+
The EndpointPicker supports OpenTelemetry-based tracing. To enable trace collection, use the following configuration:
224+
```yaml
225+
inferenceExtension:
226+
tracing:
227+
enabled: true
228+
otelExporterEndpoint: "http://localhost:4317"
229+
sampling:
230+
sampler: "parentbased_traceidratio"
231+
samplerArg: "0.1"
232+
```
233+
Make sure that the `otelExporterEndpoint` points to your OpenTelemetry collector endpoint.
234+
Current only the `parentbased_traceidratio` sampler is supported. You can adjust the base sampling ratio using the `samplerArg` (e.g., 0.1 means 10% of traces will be sampled).
235+
217236
## Notes
218237

219238
This chart will only deploy an InferencePool and its corresponding EndpointPicker extension. Before install the chart, please make sure that the inference extension CRDs are installed in the cluster. For more details, please refer to the [getting started guide](https://gateway-api-inference-extension.sigs.k8s.io/guides/).

config/charts/inferencepool/templates/epp-deployment.yaml

Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -62,6 +62,12 @@ spec:
6262
- "--{{ .name }}"
6363
- "{{ .value }}"
6464
{{- end }}
65+
- "--tracing"
66+
{{- if .Values.inferenceExtension.tracing.enabled }}
67+
- "true"
68+
{{- else }}
69+
- "false"
70+
{{- end }}
6571
ports:
6672
- name: grpc
6773
containerPort: 9002
@@ -101,6 +107,30 @@ spec:
101107
valueFrom:
102108
fieldRef:
103109
fieldPath: metadata.namespace
110+
{{- if .Values.inferenceExtension.tracing.enabled }}
111+
- name: OTEL_SERVICE_NAME
112+
value: "gateway-api-inference-extension"
113+
- name: OTEL_EXPORTER_OTLP_ENDPOINT
114+
value: {{ .Values.inferenceExtension.tracing.otelExporterEndpoint | quote }}
115+
- name: OTEL_TRACES_EXPORTER
116+
value: "otlp"
117+
- name: OTEL_RESOURCE_ATTRIBUTES_NODE_NAME
118+
valueFrom:
119+
fieldRef:
120+
apiVersion: v1
121+
fieldPath: spec.nodeName
122+
- name: OTEL_RESOURCE_ATTRIBUTES_POD_NAME
123+
valueFrom:
124+
fieldRef:
125+
apiVersion: v1
126+
fieldPath: metadata.name
127+
- name: OTEL_RESOURCE_ATTRIBUTES
128+
value: 'k8s.namespace.name=$(NAMESPACE),k8s.node.name=$(OTEL_RESOURCE_ATTRIBUTES_NODE_NAME),k8s.pod.name=$(OTEL_RESOURCE_ATTRIBUTES_POD_NAME)'
129+
- name: OTEL_TRACES_SAMPLER
130+
value: {{ .Values.inferenceExtension.tracing.sampling.sampler | quote }}
131+
- name: OTEL_TRACES_SAMPLER_ARG
132+
value: {{ .Values.inferenceExtension.tracing.sampling.samplerArg | quote }}
133+
{{- end }}
104134
{{- if .Values.inferenceExtension.env }}
105135
{{- toYaml .Values.inferenceExtension.env | nindent 8 }}
106136
{{- end }}

config/charts/inferencepool/values.yaml

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -53,6 +53,12 @@ inferenceExtension:
5353

5454
gke:
5555
enabled: false
56+
tracing:
57+
enabled: false
58+
otelExporterEndpoint: "http://localhost:4317"
59+
sampling:
60+
sampler: "parentbased_traceidratio"
61+
samplerArg: "0.1"
5662

5763
inferencePool:
5864
targetPorts:
@@ -85,4 +91,4 @@ istio:
8591
trafficPolicy: {}
8692
# connectionPool:
8793
# http:
88-
# maxRequestsPerConnection: 256000
94+
# maxRequestsPerConnection: 256000

0 commit comments

Comments
 (0)