Skip to content

Commit 47112bd

Browse files
VihasMakwanacmacknzpkoutsovasilis
authored
[otel] Implement EDOT diagnostics extension (#10052)
* edot diagnostics * revert otel.yml * add for supervised * cleanup * more fixes * spelling and comments * docs and comments * test case * diagram * fix test * fix CI * cleanup and npipe * comments * rename * go.mod and notice * go.mod and notice * restore otel-merged.yaml * refactor ZipArchiveEDOT * checkci * license * initial commit * timeout * fix tests * fix tests * comments * error handling * lint * test case * readme * gofmt * remove println * test * add cpu profile * remove telemetry for now. * remove redundant code * doc * fix npipe * Update internal/pkg/otel/extension/elasticdiagnostics/extension.go Co-authored-by: Craig MacKenzie <[email protected]> * readme * readme * Update internal/pkg/otel/extension/elasticdiagnostics/extension.go Co-authored-by: Panos Koutsovasilis <[email protected]> * mutex * comments * Update generated_component_test.go * Update generated_component_test.go * comments * test thorough * use eventuallyWith * update go.mod and notice * add cpu * comment * test * fix test * lint * lint * test * comments * cleanup * comments * fix test --------- Co-authored-by: Craig MacKenzie <[email protected]> Co-authored-by: Panos Koutsovasilis <[email protected]>
1 parent 9989255 commit 47112bd

File tree

18 files changed

+477
-75
lines changed

18 files changed

+477
-75
lines changed

internal/pkg/agent/application/paths/common.go

Lines changed: 30 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -33,6 +33,9 @@ const (
3333
// ControlSocketName is the control socket name.
3434
ControlSocketName = "elastic-agent.sock"
3535

36+
// Our DiagnosticsExtension will use DiagnosticsExtensionSocketName to listen and serve diagnostic requests.
37+
DiagnosticsExtensionSocketName = "edot-diagnostics-extension.sock"
38+
3639
// WindowsControlSocketInstalledPath is the control socket path used when installed on Windows.
3740
WindowsControlSocketInstalledPath = `npipe:///elastic-agent-system`
3841

@@ -52,16 +55,17 @@ const (
5255
var ExternalInputsPattern = filepath.Join("inputs.d", "*.yml")
5356

5457
var (
55-
topPath string
56-
configPath string
57-
configFilePath string
58-
logsPath string
59-
downloadsPath string
60-
componentsPath string
61-
installPath string
62-
controlSocketPath string
63-
unversionedHome bool
64-
tmpCreator sync.Once
58+
topPath string
59+
configPath string
60+
configFilePath string
61+
logsPath string
62+
downloadsPath string
63+
componentsPath string
64+
installPath string
65+
controlSocketPath string
66+
diagnosticsExtensionSocket string
67+
unversionedHome bool
68+
tmpCreator sync.Once
6569
)
6670

6771
func init() {
@@ -70,6 +74,7 @@ func init() {
7074
configPath = topPath
7175
logsPath = topPath
7276
controlSocketPath = initialControlSocketPath(topPath)
77+
diagnosticsExtensionSocket = SocketFromPath(runtime.GOOS, topPath, DiagnosticsExtensionSocketName)
7378
unversionedHome = false // only versioned by container subcommand
7479

7580
// these should never change
@@ -339,8 +344,14 @@ func RunningInstalled() bool {
339344
// ControlSocketFromPath returns the control socket path for an Elastic Agent running
340345
// on the defined platform, and its executing directory.
341346
func ControlSocketFromPath(platform string, path string) string {
347+
return SocketFromPath(platform, path, ControlSocketName)
348+
}
349+
350+
// SocketFromPath returns the socket path for an Elastic Agent running
351+
// on the defined platform for a given socket, and its executing directory.
352+
func SocketFromPath(platform string, path string, socketName string) string {
342353
// socket should be inside this directory
343-
socketPath := filepath.Join(path, ControlSocketName)
354+
socketPath := filepath.Join(path, socketName)
344355
if platform == "windows" {
345356
// on windows the control socket always uses the fallback
346357
return utils.SocketURLWithFallback(socketPath, path)
@@ -355,6 +366,14 @@ func ControlSocketFromPath(platform string, path string) string {
355366
return utils.SocketURLWithFallback(socketPath, path)
356367
}
357368

369+
func DiagnosticsExtensionSocket() string {
370+
return diagnosticsExtensionSocket
371+
}
372+
373+
func SetDiagnosticsExtensionSocket(socket string) {
374+
diagnosticsExtensionSocket = SocketFromPath(runtime.GOOS, topPath, socket)
375+
}
376+
358377
func pathSplit(path string) []string {
359378
dir, file := filepath.Split(path)
360379
if dir == "" && file == "" {
Lines changed: 113 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,113 @@
1+
// Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
2+
// or more contributor license agreements. Licensed under the Elastic License 2.0;
3+
// you may not use this file except in compliance with the Elastic License 2.0.
4+
5+
package cmd
6+
7+
import (
8+
"fmt"
9+
"os"
10+
"time"
11+
12+
"github.com/spf13/cobra"
13+
14+
"github.com/elastic/elastic-agent/internal/pkg/agent/application/paths"
15+
"github.com/elastic/elastic-agent/internal/pkg/cli"
16+
"github.com/elastic/elastic-agent/internal/pkg/diagnostics"
17+
"github.com/elastic/elastic-agent/internal/pkg/otel"
18+
"github.com/elastic/elastic-agent/pkg/control/v2/client"
19+
)
20+
21+
func newOtelDiagnosticsCommand(streams *cli.IOStreams) *cobra.Command {
22+
cmd := &cobra.Command{
23+
Use: "diagnostics",
24+
Short: "Gather diagnostics information from the EDOT and write it to a zip archive",
25+
Long: "This command gathers diagnostics information from the EDOT and writes it to a zip archive",
26+
RunE: func(cmd *cobra.Command, _ []string) error {
27+
if err := otelDiagnosticCmd(streams, cmd); err != nil {
28+
fmt.Fprintf(streams.Err, "Error: %v\n%s\n", err, troubleshootMessage())
29+
os.Exit(1)
30+
}
31+
return nil
32+
},
33+
SilenceUsage: true,
34+
SilenceErrors: true,
35+
}
36+
cmd.Flags().StringP("file", "f", "", "name of the output diagnostics zip archive")
37+
cmd.Flags().BoolP("cpu-profile", "p", false, "wait to collect a CPU profile")
38+
return cmd
39+
}
40+
41+
func otelDiagnosticCmd(streams *cli.IOStreams, cmd *cobra.Command) error {
42+
cpuProfile, _ := cmd.Flags().GetBool("cpu-profile")
43+
resp, err := otel.PerformDiagnosticsExt(cmd.Context(), cpuProfile)
44+
if err != nil {
45+
return fmt.Errorf("failed to get edot diagnostics: %w", err)
46+
}
47+
48+
agentDiag := make([]client.DiagnosticFileResult, 0)
49+
for _, r := range resp.GlobalDiagnostics {
50+
agentDiag = append(agentDiag, client.DiagnosticFileResult{
51+
Name: r.Name,
52+
Filename: r.Filename,
53+
ContentType: r.ContentType,
54+
Content: r.Content,
55+
Description: r.Description,
56+
})
57+
}
58+
59+
componentDiag := make([]client.DiagnosticComponentResult, 0)
60+
for _, r := range resp.ComponentDiagnostics {
61+
res := client.DiagnosticComponentResult{
62+
Results: make([]client.DiagnosticFileResult, 0),
63+
}
64+
res.Results = append(res.Results, client.DiagnosticFileResult{
65+
Name: r.Name,
66+
Filename: r.Filename,
67+
ContentType: r.ContentType,
68+
Content: r.Content,
69+
Description: r.Description,
70+
})
71+
res.ComponentID = r.Name
72+
componentDiag = append(componentDiag, res)
73+
}
74+
componentDiag = aggregateComponentDiagnostics(componentDiag)
75+
76+
filepath, _ := cmd.Flags().GetString("file")
77+
if filepath == "" {
78+
ts := time.Now().UTC()
79+
filepath = "edot-diagnostics-" + ts.Format("2006-01-02T15-04-05Z07-00") + ".zip" // RFC3339 format that replaces : with -, so it will work on Windows
80+
}
81+
f, err := createFile(filepath)
82+
if err != nil {
83+
return fmt.Errorf("could not create diagnostics file %q: %w", filepath, err)
84+
}
85+
defer f.Close()
86+
87+
// In EDOT, the logs path does not exist, so we ignore that error.
88+
if err := diagnostics.ZipArchive(streams.Err, f, paths.Top(), agentDiag, nil, componentDiag, false); err != nil && !os.IsNotExist(err) {
89+
return fmt.Errorf("unable to create archive %q: %w", filepath, err)
90+
}
91+
fmt.Fprintf(streams.Out, "Created diagnostics archive %q\n", filepath)
92+
fmt.Fprintln(streams.Out, "** WARNING **\nCreated archive may contain plain text credentials.\nEnsure that files in archive are redacted before sharing.\n*******")
93+
return nil
94+
}
95+
96+
// aggregateComponentDiagnostics takes a slice of DiagnosticComponentResult and merges
97+
// results for components with the same ComponentID.
98+
func aggregateComponentDiagnostics(diags []client.DiagnosticComponentResult) []client.DiagnosticComponentResult {
99+
m := make(map[string]client.DiagnosticComponentResult)
100+
for _, d := range diags {
101+
if existing, ok := m[d.ComponentID]; ok {
102+
existing.Results = append(existing.Results, d.Results...)
103+
m[d.ComponentID] = existing
104+
} else {
105+
m[d.ComponentID] = d
106+
}
107+
}
108+
result := make([]client.DiagnosticComponentResult, 0, len(m))
109+
for _, v := range m {
110+
result = append(result, v)
111+
}
112+
return result
113+
}

internal/pkg/agent/cmd/otel.go

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -19,9 +19,11 @@ import (
1919
"github.com/elastic/elastic-agent-libs/logp"
2020
"github.com/elastic/elastic-agent-libs/service"
2121

22+
"github.com/elastic/elastic-agent/internal/pkg/agent/application/paths"
2223
"github.com/elastic/elastic-agent/internal/pkg/cli"
2324
"github.com/elastic/elastic-agent/internal/pkg/otel"
2425
"github.com/elastic/elastic-agent/internal/pkg/otel/agentprovider"
26+
"github.com/elastic/elastic-agent/internal/pkg/otel/extension/elasticdiagnostics"
2527
"github.com/elastic/elastic-agent/internal/pkg/otel/manager"
2628
"github.com/elastic/elastic-agent/internal/pkg/otel/monitoring"
2729
"github.com/elastic/elastic-agent/internal/pkg/release"
@@ -71,6 +73,7 @@ func newOtelCommandWithArgs(args []string, streams *cli.IOStreams) *cobra.Comman
7173
setupOtelFlags(cmd.Flags())
7274
cmd.AddCommand(newValidateCommandWithArgs(args, streams))
7375
cmd.AddCommand(newComponentsCommandWithArgs(args, streams))
76+
cmd.AddCommand(newOtelDiagnosticsCommand(streams))
7477

7578
return cmd
7679
}
@@ -128,6 +131,9 @@ type edotSettings struct {
128131

129132
func prepareCollectorSettings(configFiles []string, supervised bool, supervisedLoggingLevel string) (edotSettings, error) {
130133
var settings edotSettings
134+
conf := map[string]any{
135+
"endpoint": paths.DiagnosticsExtensionSocket(),
136+
}
131137
if supervised {
132138
// add stdin config provider
133139
configProvider, err := agentprovider.NewBufferProvider(os.Stdin)
@@ -136,6 +142,7 @@ func prepareCollectorSettings(configFiles []string, supervised bool, supervisedL
136142
}
137143
settings.otelSettings = otel.NewSettings(release.Version(), []string{configProvider.URI()},
138144
otel.WithConfigProviderFactory(configProvider.NewFactory()),
145+
otel.WithConfigConvertorFactory(manager.NewForceExtensionConverterFactory(elasticdiagnostics.DiagnosticsExtensionID.String(), conf)),
139146
)
140147

141148
// setup logger
@@ -173,7 +180,7 @@ func prepareCollectorSettings(configFiles []string, supervised bool, supervisedL
173180

174181
settings.otelSettings.DisableGracefulShutdown = false
175182
} else {
176-
settings.otelSettings = otel.NewSettings(release.Version(), configFiles)
183+
settings.otelSettings = otel.NewSettings(release.Version(), configFiles, otel.WithConfigConvertorFactory(manager.NewForceExtensionConverterFactory(elasticdiagnostics.DiagnosticsExtensionID.String(), conf)))
177184
}
178185
return settings, nil
179186
}

internal/pkg/agent/cmd/run.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,7 @@ import (
3030
"github.com/elastic/elastic-agent-libs/service"
3131
"github.com/elastic/elastic-agent-system-metrics/report"
3232
"github.com/elastic/elastic-agent/internal/pkg/agent/vault"
33+
"github.com/elastic/elastic-agent/internal/pkg/diagnostics"
3334

3435
"github.com/elastic/elastic-agent/internal/pkg/agent/application"
3536
"github.com/elastic/elastic-agent/internal/pkg/agent/application/coordinator"
@@ -50,7 +51,6 @@ import (
5051
"github.com/elastic/elastic-agent/internal/pkg/cli"
5152
"github.com/elastic/elastic-agent/internal/pkg/config"
5253
monitoringCfg "github.com/elastic/elastic-agent/internal/pkg/core/monitoring/config"
53-
"github.com/elastic/elastic-agent/internal/pkg/diagnostics"
5454
"github.com/elastic/elastic-agent/internal/pkg/release"
5555
"github.com/elastic/elastic-agent/pkg/component"
5656
"github.com/elastic/elastic-agent/pkg/control/v2/server"

internal/pkg/diagnostics/diagnostics.go

Lines changed: 51 additions & 51 deletions
Original file line numberDiff line numberDiff line change
@@ -226,8 +226,9 @@ func ZipArchive(
226226
if err != nil {
227227
return fmt.Errorf("error creating .zip header for components/ directory: %w", err)
228228
}
229+
229230
// iterate over components
230-
for dirName, units := range compDirs {
231+
for dirName, comp := range componentResults {
231232
_, err := zw.CreateHeader(&zip.FileHeader{
232233
Name: fmt.Sprintf("components/%s/", dirName),
233234
Method: zip.Deflate,
@@ -236,66 +237,65 @@ func ZipArchive(
236237
if err != nil {
237238
return fmt.Errorf("error creating .zip header for component directory: %w", err)
238239
}
239-
// create component diags
240-
if comp, ok := componentResults[dirName]; ok {
241-
// check for component-level errors
242-
if comp.Err != nil {
243-
err = writeErrorResult(zw, fmt.Sprintf("components/%s/error.txt", dirName), comp.Err.Error())
240+
if comp.Err != nil {
241+
err = writeErrorResult(zw, fmt.Sprintf("components/%s/error.txt", dirName), comp.Err.Error())
242+
if err != nil {
243+
return fmt.Errorf("error while writing error result for component %s: %w", comp.ComponentID, err)
244+
}
245+
} else {
246+
for _, res := range comp.Results {
247+
248+
filePath := fmt.Sprintf("components/%s/%s", dirName, res.Filename)
249+
resFileWriter, err := zw.CreateHeader(&zip.FileHeader{
250+
Name: filePath,
251+
Method: zip.Deflate,
252+
Modified: ts,
253+
})
244254
if err != nil {
245-
return fmt.Errorf("error while writing error result for component %s: %w", comp.ComponentID, err)
255+
return fmt.Errorf("error creating .zip header for %s: %w", res.Filename, err)
246256
}
247-
} else {
248-
for _, res := range comp.Results {
249-
250-
filePath := fmt.Sprintf("components/%s/%s", dirName, res.Filename)
251-
resFileWriter, err := zw.CreateHeader(&zip.FileHeader{
252-
Name: filePath,
253-
Method: zip.Deflate,
254-
Modified: ts,
255-
})
256-
if err != nil {
257-
return fmt.Errorf("error creating .zip header for %s: %w", res.Filename, err)
258-
}
259-
err = writeRedacted(errOut, resFileWriter, filePath, res)
260-
if err != nil {
261-
return fmt.Errorf("error writing %s in zip file: %w", res.Filename, err)
262-
}
257+
err = writeRedacted(errOut, resFileWriter, filePath, res)
258+
if err != nil {
259+
return fmt.Errorf("error writing %s in zip file: %w", res.Filename, err)
263260
}
264261
}
265-
266262
}
267263
// create unit diags
268-
for _, ud := range units {
269-
unitDir := strings.ReplaceAll(strings.TrimPrefix(ud.UnitID, ud.ComponentID+"-"), "/", "-")
270-
_, err := zw.CreateHeader(&zip.FileHeader{
271-
Name: fmt.Sprintf("components/%s/%s/", dirName, unitDir),
272-
Method: zip.Deflate,
273-
Modified: ts,
274-
})
275-
if err != nil {
276-
return fmt.Errorf("error creating .zip header for unit directory: %w", err)
277-
}
278-
// check for unit-level errors
279-
if ud.Err != nil {
280-
err = writeErrorResult(zw, fmt.Sprintf("components/%s/%s/error.txt", dirName, unitDir), ud.Err.Error())
281-
if err != nil {
282-
return fmt.Errorf("error while writing error result for unit %s: %w", ud.UnitID, err)
283-
}
284-
continue
285-
}
286-
for _, fr := range ud.Results {
287-
filePath := fmt.Sprintf("components/%s/%s/%s", dirName, unitDir, fr.Filename)
288-
w, err := zw.CreateHeader(&zip.FileHeader{
289-
Name: filePath,
264+
if units, ok := compDirs[dirName]; ok {
265+
// check for component-level errors
266+
// create unit diags
267+
for _, ud := range units {
268+
unitDir := strings.ReplaceAll(strings.TrimPrefix(ud.UnitID, ud.ComponentID+"-"), "/", "-")
269+
_, err := zw.CreateHeader(&zip.FileHeader{
270+
Name: fmt.Sprintf("components/%s/%s/", dirName, unitDir),
290271
Method: zip.Deflate,
291-
Modified: fr.Generated,
272+
Modified: ts,
292273
})
293274
if err != nil {
294-
return err
275+
return fmt.Errorf("error creating .zip header for unit directory: %w", err)
295276
}
296-
err = writeRedacted(errOut, w, filePath, fr)
297-
if err != nil {
298-
return err
277+
// check for unit-level errors
278+
if ud.Err != nil {
279+
err = writeErrorResult(zw, fmt.Sprintf("components/%s/%s/error.txt", dirName, unitDir), ud.Err.Error())
280+
if err != nil {
281+
return fmt.Errorf("error while writing error result for unit %s: %w", ud.UnitID, err)
282+
}
283+
continue
284+
}
285+
for _, fr := range ud.Results {
286+
filePath := fmt.Sprintf("components/%s/%s/%s", dirName, unitDir, fr.Filename)
287+
w, err := zw.CreateHeader(&zip.FileHeader{
288+
Name: filePath,
289+
Method: zip.Deflate,
290+
Modified: fr.Generated,
291+
})
292+
if err != nil {
293+
return err
294+
}
295+
err = writeRedacted(errOut, w, filePath, fr)
296+
if err != nil {
297+
return err
298+
}
299299
}
300300
}
301301
}

internal/pkg/otel/components.go

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -81,6 +81,8 @@ import (
8181
filestorage "github.com/open-telemetry/opentelemetry-collector-contrib/extension/storage/filestorage"
8282
"go.opentelemetry.io/collector/extension/memorylimiterextension" // for putting backpressure when approach a memory limit
8383

84+
elasticdiagnostics "github.com/elastic/elastic-agent/internal/pkg/otel/extension/elasticdiagnostics"
85+
8486
"github.com/elastic/opentelemetry-collector-components/extension/apikeyauthextension"
8587
"github.com/elastic/opentelemetry-collector-components/extension/apmconfigextension"
8688

@@ -203,6 +205,7 @@ func components(extensionFactories ...extension.Factory) func() (otelcol.Factori
203205
apmconfigextension.NewFactory(),
204206
headersetterextension.NewFactory(),
205207
beatsauthextension.NewFactory(),
208+
elasticdiagnostics.NewFactory(),
206209
}
207210
extensions = append(extensions, extensionFactories...)
208211
factories.Extensions, err = otelcol.MakeFactoryMap[extension.Factory](extensions...)

0 commit comments

Comments
 (0)