Skip to content

Commit 94241cb

Browse files
committed
Merge branch 'master' into grafana-dashboard-ts
2 parents cb4f625 + ab86d9b commit 94241cb

File tree

8 files changed

+89
-24
lines changed

8 files changed

+89
-24
lines changed

README.md

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,8 @@ HolmesGPT is an AI agent for investigating problems in your cloud, finding the r
77
88
Find more about HolmesGPT's maintainers and adopters [here](./ADOPTERS.md).
99

10+
📚 **[Read the full documentation at holmesgpt.dev](https://holmesgpt.dev/)** for installation guides, tutorials, API reference, and more.
11+
1012
<p align="center">
1113
<a href="#how-it-works"><strong>How it Works</strong></a> |
1214
<a href="#installation"><strong>Installation</strong></a> |

holmes/core/supabase_dal.py

Lines changed: 9 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@
66
import os
77
import threading
88
from datetime import datetime, timedelta
9+
from enum import Enum
910
from typing import Dict, List, Optional, Tuple
1011
from uuid import uuid4
1112

@@ -53,6 +54,11 @@
5354
ENRICHMENT_BLACKLIST_SET = set(ENRICHMENT_BLACKLIST)
5455

5556

57+
class FindingType(str, Enum):
58+
ISSUE = "issue"
59+
CONFIGURATION_CHANGE = "configuration_change"
60+
61+
5662
class RobustaToken(BaseModel):
5763
store_url: str
5864
api_key: str
@@ -237,14 +243,15 @@ def get_resource_recommendation(
237243
logging.exception("Supabase error while retrieving efficiency data")
238244
return None
239245

240-
def get_configuration_changes_metadata(
246+
def get_issues_metadata(
241247
self,
242248
start_datetime: str,
243249
end_datetime: str,
244250
limit: int = 100,
245251
workload: Optional[str] = None,
246252
ns: Optional[str] = None,
247253
cluster: Optional[str] = None,
254+
finding_type: FindingType = FindingType.CONFIGURATION_CHANGE,
248255
) -> Optional[List[Dict]]:
249256
if not self.enabled:
250257
return []
@@ -265,12 +272,12 @@ def get_configuration_changes_metadata(
265272
)
266273
.eq("account_id", self.account_id)
267274
.eq("cluster", cluster)
268-
.eq("finding_type", "configuration_change")
269275
.gte("creation_date", start_datetime)
270276
.lte("creation_date", end_datetime)
271277
.limit(limit)
272278
)
273279

280+
query = query.eq("finding_type", finding_type.value)
274281
if workload:
275282
query.eq("subject_name", workload)
276283
if ns:

holmes/main.py

Lines changed: 2 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,18 +1,15 @@
11
# ruff: noqa: E402
22
import os
3-
import sys
43

54
from holmes.utils.cert_utils import add_custom_certificate
6-
from holmes.utils.colors import USER_COLOR
75

86
ADDITIONAL_CERTIFICATE: str = os.environ.get("CERTIFICATE", "")
97
if add_custom_certificate(ADDITIONAL_CERTIFICATE):
108
print("added custom certificate")
119

1210
# DO NOT ADD ANY IMPORTS OR CODE ABOVE THIS LINE
1311
# IMPORTING ABOVE MIGHT INITIALIZE AN HTTPS CLIENT THAT DOESN'T TRUST THE CUSTOM CERTIFICATE
14-
15-
12+
import sys
1613
import json
1714
import logging
1815
import socket
@@ -44,6 +41,7 @@
4441
from holmes.utils.console.logging import init_logging
4542
from holmes.utils.console.result import handle_result
4643
from holmes.utils.file_utils import write_json_file
44+
from holmes.utils.colors import USER_COLOR
4745

4846
app = typer.Typer(add_completion=False, pretty_exceptions_show_locals=False)
4947
investigate_app = typer.Typer(

holmes/plugins/toolsets/robusta/robusta.py

Lines changed: 32 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@
44

55
from typing import Optional, Dict, Any, List
66
from holmes.common.env_vars import load_bool
7-
from holmes.core.supabase_dal import SupabaseDal
7+
from holmes.core.supabase_dal import SupabaseDal, FindingType
88
from holmes.core.tools import (
99
StaticPrerequisite,
1010
Tool,
@@ -168,7 +168,7 @@ def __init__(
168168
required=True,
169169
),
170170
END_TIME: ToolParameter(
171-
description="The starting time boundary for the search period. String in RFC3339 format.",
171+
description="The ending time boundary for the search period. String in RFC3339 format.",
172172
type="string",
173173
required=True,
174174
),
@@ -188,7 +188,7 @@ def __init__(
188188
required=False,
189189
),
190190
"workload": ToolParameter(
191-
description="The kubernetes workload name for filtering configuration changes. Deployment name or Pod name for example.",
191+
description="Kubernetes resource name to filter configuration changes (e.g., Pod, Deployment, Job, etc.). Must be the full name. For Pods, include the exact generated suffix.",
192192
type="string",
193193
required=False,
194194
),
@@ -203,10 +203,13 @@ def __init__(
203203
self._dal = dal
204204

205205
def _fetch_change_history(
206-
self, params: Dict, cluster: Optional[str] = None
206+
self,
207+
params: Dict,
208+
cluster: Optional[str] = None,
209+
finding_type: FindingType = FindingType.CONFIGURATION_CHANGE,
207210
) -> Optional[List[Dict]]:
208211
if self._dal and self._dal.enabled:
209-
return self._dal.get_configuration_changes_metadata(
212+
return self._dal.get_issues_metadata(
210213
start_datetime=params["start_datetime"],
211214
end_datetime=params["end_datetime"],
212215
limit=min(
@@ -216,6 +219,7 @@ def _fetch_change_history(
216219
ns=params.get("namespace"),
217220
workload=params.get("workload"),
218221
cluster=cluster,
222+
finding_type=finding_type,
219223
)
220224
return None
221225

@@ -231,7 +235,7 @@ def _invoke(self, params: dict, context: ToolInvokeContext) -> StructuredToolRes
231235
else:
232236
return StructuredToolResult(
233237
status=StructuredToolResultStatus.NO_DATA,
234-
data=f"Could not find changes for {params}",
238+
data=f"{self.name} found no data. {params}",
235239
params=params,
236240
)
237241
except Exception as e:
@@ -254,7 +258,7 @@ def __init__(self, dal: Optional[SupabaseDal]):
254258
name="fetch_configuration_changes_metadata",
255259
description=(
256260
"Fetch configuration changes metadata in a given time range. "
257-
"By default, fetch all cluster changes. Can be filtered on a given namespace or a specific workload. "
261+
"By default, fetch all cluster changes. Can be filtered on a given namespace or a specific kubernetes resource. "
258262
"Use fetch_finding_by_id to get detailed change of one specific configuration change."
259263
),
260264
)
@@ -285,6 +289,26 @@ def get_parameterized_one_liner(self, params: Dict) -> str:
285289
return f"Robusta: Search External Change History {params}"
286290

287291

292+
class FetchResourceIssuesMetadata(FetchConfigurationChangesMetadataBase):
293+
def __init__(self, dal: Optional[SupabaseDal]):
294+
super().__init__(
295+
dal=dal,
296+
name="fetch_resource_issues_metadata",
297+
description=(
298+
"Fetch issues and alert metadata in a given time range. "
299+
"Must be filtered on a given namespace and specific kubernetes resource such as pod, deployment, job, etc."
300+
"Use fetch_finding_by_id to get further information on a specific issue or alert."
301+
),
302+
add_cluster_filter=True,
303+
)
304+
305+
def _fetch_resource_issues(self, params: Dict) -> Optional[List[Dict]]: # type: ignore
306+
return super()._fetch_change_history(params, finding_type=FindingType.ISSUE)
307+
308+
def get_parameterized_one_liner(self, params: Dict) -> str:
309+
return f"Robusta: fetch resource issues metadata {params}"
310+
311+
288312
class RobustaToolset(Toolset):
289313
def __init__(self, dal: Optional[SupabaseDal]):
290314
dal_prereq = StaticPrerequisite(
@@ -300,6 +324,7 @@ def __init__(self, dal: Optional[SupabaseDal]):
300324
FetchRobustaFinding(dal),
301325
FetchConfigurationChangesMetadata(dal),
302326
FetchResourceRecommendation(dal),
327+
FetchResourceIssuesMetadata(dal),
303328
]
304329

305330
if PULL_EXTERNAL_FINDINGS:

holmes/plugins/toolsets/robusta/robusta_instructions.jinja2

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,8 @@
33
* If a change seems important to the investigation, Use fetch_finding_by_id with the configuration change ID to get full details of the change.
44
* You must ALWAYS call fetch_configuration_changes_metadata when investigating an alert
55
* Never respond without calling fetch_configuration_changes_metadata
6+
* When investigating a resource (pod, deployment, or job), if no relevant information is available from the live cluster at the time of investigation, call the fetch_resource_issues_metadata function to retrieve its historical alert data.
7+
* You can use fetch_resource_issues_metadata to get issues context for a specific kubernetes resource. Start with a 4 hours window and try to expand to 24 hours windows if nothing comes up.
68
* When investigating an alert, look at historical configuration changes that happen 4 hours before the alert started
79
* If you found a change that caused the alert, you MUST write: 'The issue was introduced by ...' with a short description of the change, and the date of it.
810
For example:

server.py

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,6 @@
11
# ruff: noqa: E402
2-
import json
32
import os
4-
from typing import List, Optional
53

6-
import litellm
7-
import sentry_sdk
8-
from holmes import get_version, is_official_release
94
from holmes.utils.cert_utils import add_custom_certificate
105

116
ADDITIONAL_CERTIFICATE: str = os.environ.get("CERTIFICATE", "")
@@ -20,7 +15,12 @@
2015
import uvicorn
2116
import colorlog
2217
import time
18+
import json
19+
from typing import List, Optional
2320

21+
import litellm
22+
import sentry_sdk
23+
from holmes import get_version, is_official_release
2424
from litellm.exceptions import AuthenticationError
2525
from fastapi import FastAPI, HTTPException, Request
2626
from fastapi.responses import StreamingResponse
Lines changed: 0 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1,15 +1,9 @@
11
toolsets:
2-
prometheus/metrics:
3-
enabled: False
42
kubernetes/kube-lineage-extras:
53
enabled: true
64
kubernetes/logs:
75
enabled: False
8-
kubernetes/core:
9-
enabled: true
106
datadog/logs:
117
enabled: True
128
datadog/metrics:
139
enabled: True
14-
datadog/traces:
15-
enabled: True

tests/test_app_imports.py

Lines changed: 37 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,37 @@
1+
import os
2+
import pytest
3+
4+
EXPECTED_LINES = [
5+
"# ruff: noqa: E402\n",
6+
"import os\n",
7+
"\n",
8+
"from holmes.utils.cert_utils import add_custom_certificate\n",
9+
"\n",
10+
'ADDITIONAL_CERTIFICATE: str = os.environ.get("CERTIFICATE", "")\n',
11+
"if add_custom_certificate(ADDITIONAL_CERTIFICATE):\n",
12+
' print("added custom certificate")\n',
13+
"\n",
14+
"# DO NOT ADD ANY IMPORTS OR CODE ABOVE THIS LINE\n",
15+
"# IMPORTING ABOVE MIGHT INITIALIZE AN HTTPS CLIENT THAT DOESN'T TRUST THE CUSTOM CERTIFICATE\n",
16+
]
17+
18+
19+
@pytest.mark.parametrize(
20+
"file_path,file_name",
21+
[
22+
("holmes/main.py", "main.py"),
23+
("server.py", "server.py"),
24+
("experimental/ag-ui/server-agui.py", "server-agui.py"),
25+
],
26+
)
27+
def test_app_files_have_correct_initial_lines(file_path, file_name):
28+
"""Test that app files start with the required certificate handling code."""
29+
full_path = os.path.join(os.path.dirname(__file__), "..", file_path)
30+
31+
with open(full_path, "r") as f:
32+
lines = f.readlines()
33+
34+
for i, expected_line in enumerate(EXPECTED_LINES):
35+
assert (
36+
lines[i] == expected_line
37+
), f"Line {i + 1} should be: {expected_line.strip()!r}, but got: {lines[i].strip()!r}. This tests make sure the import order in {file_name} file is correct, if you see this, go to {file_name} file and move your imports code to lower lines."

0 commit comments

Comments
 (0)