diff --git a/CHANGELOG.md b/CHANGELOG.md index a4d6a6e..01ac405 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -8,6 +8,19 @@ The intended audience of this file is for `incydr` SDK and CLI consumers -- as such, changes that don't affect how a consumer would use the library or CLI tool (e.g. adding unit tests, updating documentation, etc) are not captured here. +## Unreleased + +### Added +- Support for subgroups in file event queries and saved searches. See [this documentation](https://support.code42.com/hc/en-us/articles/14827671672087-Forensic-Search-reference#h_01JKEF6ESSMTEGFG28WZM6TNDR) for more details about this type of query. +- New methods for EventQuery() to enable more flexible filtering: + - `is_any` + - `is_none` + - `date_range` + - `subquery` + +### Fixed +- An issue where in some cases saved searches could not be retrieved. + ## 2.5.0 - 2025-06-06 ### Added diff --git a/docs/sdk/clients/file_event_queries.md b/docs/sdk/clients/file_event_queries.md index 9e86b27..c75f3d5 100644 --- a/docs/sdk/clients/file_event_queries.md +++ b/docs/sdk/clients/file_event_queries.md @@ -6,7 +6,7 @@ Use the `EventQuery` class to create a query for searching and filtering file ev ::: _incydr_sdk.queries.file_events.EventQuery :docstring: - :members: equals not_equals exists does_not_exist greater_than less_than matches_any + :members: equals not_equals exists does_not_exist greater_than less_than matches_any is_any is_none date_range subquery ## Query Building diff --git a/src/_incydr_sdk/core/settings.py b/src/_incydr_sdk/core/settings.py index 80af344..f6c529d 100644 --- a/src/_incydr_sdk/core/settings.py +++ b/src/_incydr_sdk/core/settings.py @@ -50,8 +50,7 @@ class IncydrSettings(BaseSettings): Usage: >> import incydr - >>> client = incydr.Client() - >>> client.settings.page_size = 10 + >>> client = incydr.Client(page_size = 10) Settings can also be loaded from shell environment variables or .env files. Just prefix a setting's attribute name with `INCYDR_` when configuring via enviroment vars. diff --git a/src/_incydr_sdk/enums/file_events.py b/src/_incydr_sdk/enums/file_events.py index ab2ee39..1e17427 100644 --- a/src/_incydr_sdk/enums/file_events.py +++ b/src/_incydr_sdk/enums/file_events.py @@ -5,6 +5,8 @@ class Operator(_Enum): # all valid filter operators for querying file events IS = "IS" IS_NOT = "IS_NOT" + IS_ANY = "IS_ANY" + IS_NONE = "IS_NONE" EXISTS = "EXISTS" DOES_NOT_EXIST = "DOES_NOT_EXIST" GREATER_THAN = "GREATER_THAN" diff --git a/src/_incydr_sdk/file_events/client.py b/src/_incydr_sdk/file_events/client.py index 5f1887f..9353a92 100644 --- a/src/_incydr_sdk/file_events/client.py +++ b/src/_incydr_sdk/file_events/client.py @@ -14,13 +14,15 @@ class InvalidQueryException(IncydrException): """Raised when the file events search endpoint returns a 400.""" - def __init__(self, query=None): + def __init__(self, query=None, exception=None): self.query = query self.message = ( "400 Response Error: Invalid query. Please double check your query filters are valid. " "\nTip: Make sure you're specifying your filter fields in dot notation. " "\nFor example, filter by 'file.archiveId' to filter by the archiveId field within the file object.)" + f"\nRaw problem data from the response: {exception.response.json()['problems']}" ) + self.original_exception = exception super().__init__(self.message) @@ -65,7 +67,7 @@ def search(self, query: EventQuery) -> FileEventsPage: response = self._parent.session.post("/v2/file-events", json=query.dict()) except HTTPError as err: if err.response.status_code == 400: - raise InvalidQueryException(query) + raise InvalidQueryException(query=query, exception=err) raise err page = FileEventsPage.parse_response(response) query.page_token = page.next_pg_token diff --git a/src/_incydr_sdk/file_events/models/response.py b/src/_incydr_sdk/file_events/models/response.py index 2aa9fc0..137505e 100644 --- a/src/_incydr_sdk/file_events/models/response.py +++ b/src/_incydr_sdk/file_events/models/response.py @@ -1,3 +1,5 @@ +from __future__ import annotations + from datetime import datetime from typing import List from typing import Optional @@ -32,6 +34,17 @@ class SearchFilterGroup(ResponseModel): ) +class SearchFilterGroupV2(ResponseModel): + subgroup_clause: Optional[str] = Field( + alias="subgroupClause", + description="Grouping clause for subgroups.", + example="AND", + ) + subgroups: Optional[List[Union[SearchFilterGroup, SearchFilterGroupV2]]] = Field( + description="One or more FilterGroups to be combined in a query, or a FilterSubgroupV2" + ) + + class QueryProblem(ResponseModel): """ A model containing data on a query problem. @@ -99,7 +112,7 @@ class SavedSearch(ResponseModel): * **created_by_username**: `str` - The username of the user who created the saved search. * **creation_timestamp**: `datetime` - The time at which the saved search was created. * **group_clause**: `GroupClause` - `AND` or `OR`. Grouping clause for any specified groups. Defaults to `AND`. - * **groups**: `List[SearchFilterGroup]` - One or more FilterGroups to be combined in a query. + * **groups**: `List[Union[SearchFilterGroup, SearchFilterGroupV2]]` - One or more FilterGroups or FilterGroupV2s to be combined in a query. * **id**: `str` - The ID for the saved search. * **modified_by_uid**: `str` - The ID of the user who last modified the saved search. * **modified_by_username**: `str` - The username of the user who last modified the saved search. @@ -139,7 +152,7 @@ class SavedSearch(ResponseModel): description="Grouping clause for any specified groups.", example="OR", ) - groups: Optional[List[SearchFilterGroup]] = Field( + groups: Optional[List[Union[SearchFilterGroup, SearchFilterGroupV2]]] = Field( description="One or more FilterGroups to be combined in a query." ) id: Optional[str] = Field( diff --git a/src/_incydr_sdk/queries/file_events.py b/src/_incydr_sdk/queries/file_events.py index 8676d08..1ae5504 100644 --- a/src/_incydr_sdk/queries/file_events.py +++ b/src/_incydr_sdk/queries/file_events.py @@ -1,3 +1,5 @@ +from __future__ import annotations + from datetime import datetime from datetime import timedelta from typing import List @@ -24,6 +26,8 @@ from _incydr_sdk.enums.file_events import ShareType from _incydr_sdk.enums.file_events import TrustReason from _incydr_sdk.file_events.models.response import SavedSearch +from _incydr_sdk.file_events.models.response import SearchFilterGroup +from _incydr_sdk.file_events.models.response import SearchFilterGroupV2 from _incydr_sdk.queries.utils import parse_ts_to_ms_str _term_enum_map = { @@ -42,7 +46,7 @@ class Filter(BaseModel): term: str operator: Union[Operator, str] - value: Optional[Union[int, str]] + value: Optional[Union[int, str, List[str]]] class Config: use_enum_values = True @@ -86,6 +90,11 @@ class FilterGroup(BaseModel): filters: Optional[List[Filter]] +class FilterGroupV2(BaseModel): + subgroupClause: str = "AND" + subgroups: List[Union[FilterGroupV2, FilterGroup]] + + class Query(Model): groupClause: str = "AND" groups: Optional[List[FilterGroup]] @@ -271,6 +280,73 @@ def less_than(self, term: str, value: int): ) return self + @validate_arguments + def is_any(self, term: str, values: List[str]): + """ + Adds a `is_any` filter to the query. The opposite of the `is_none` filter. + + When passed as part of a query, returns events when the field corresponding to the filter term matches any of the provided values. + + Example: + `EventQuery(**kwargs).is_any("destination.category", ["AI Tools", "Cloud Storage"])` creates a query which will return file events where the destination category is either AI Tools or Cloud Storage. + + **Parameters**: + + * **term**: `str` - The term which corresponds to a file event field. + * **values**: `List[str]` - The values to match. + """ + self.groups.append( + FilterGroup( + filters=[Filter(term=term, operator=Operator.IS_ANY, value=values)] + ) + ) + return self + + @validate_arguments + def is_none(self, term: str, values: List[str]): + """ + Adds a `is_none` filter to the query. The opposite of the `is_any` filter. + + When passed as part of a query, returns events when the field corresponding to the filter term matches none of the provided values. + + Example: + `EventQuery(**kwargs).is_any("destination.category", ["AI Tools", "Cloud Storage"])` creates a query which will return file events where the destination category is anything other than AI Tools or Cloud Storage. + + **Parameters**: + + * **term**: `str` - The term which corresponds to a file event field. + * **values**: `List[str]` - The values for the term to not match. + """ + self.groups.append( + FilterGroup( + filters=[Filter(term=term, operator=Operator.IS_NONE, value=values)] + ) + ) + return self + + def date_range(self, term: str, start_date=None, end_date=None): + """ + Adds a date-based filter for the specified term. + + When passed as part of a query, returns events within the specified date range, or all events before/after the specified date if only one of start_date or end_date is given. + + Example: + `EventQuery(**kwargs).date_range(term="event.inserted", start_date="P1D")` creates a query that returns all events inserted into Forensic Search within the past day. + + **Parameters**: + + * **term**: `str` - The term which corresponds to a file event field. + * **start_date**: `int`, `float`, `str`, `datetime`, `timedelta` - Start of the date range to query for events. Defaults to None. + * **end_date**: `int`, `float`, `str`, `datetime` - End of the date range to query for events. Defaults to None. + """ + if start_date or end_date: + self.groups.append( + _create_date_range_filter_group( + start_date=start_date, end_date=end_date, term=term + ) + ) + return self + def matches_any(self): """ Sets operator to combine multiple filters to `OR`. @@ -281,6 +357,27 @@ def matches_any(self): self.group_clause = "OR" return self + def subquery(self, subgroup_query: EventQuery): + """ + Adds a subgroup to the query, with any filter groups or subgroups from the subgroup_query added to the present query. + + Example: + `EventQuery().greater_than("risk.score", 1).subquery(EventQuery().matches_any().equals("destination.category", "AI Tools").equals("file.name", "example"))` + + This example creates a query which matches events having a risk score of 1 or greater and have a destination category equal to "AI Tools" or have a filename equal to "example" + + **Parameters**: + + * **subgroup_query**: `EventQuery` - An EventQuery object. The filter groups and subgroups will be added to the present query. The subgroup query's group clause will be used for the created subgroup. + """ + self.groups.append( + FilterGroupV2( + subgroupClause=subgroup_query.group_clause, + subgroups=subgroup_query.groups, + ) + ) + return self + @classmethod def from_saved_search(cls, saved_search: SavedSearch): """ @@ -291,13 +388,7 @@ def from_saved_search(cls, saved_search: SavedSearch): query.group_clause = saved_search.group_clause if saved_search.groups: for i in saved_search.groups: - filters = [ - Filter.construct(value=f.value, operator=f.operator, term=f.term) - for f in i.filters - ] - query.groups.append( - FilterGroup.construct(filterClause=i.filter_clause, filters=filters) - ) + query.groups.append(_handle_filter_group_type(i)) if saved_search.srt_dir: query.sort_dir = saved_search.srt_dir if saved_search.srt_key: @@ -305,7 +396,7 @@ def from_saved_search(cls, saved_search: SavedSearch): return query -def _create_date_range_filter_group(start_date, end_date): +def _create_date_range_filter_group(start_date, end_date, term=None): def _validate_duration_str(iso_duration_str): try: parse_duration(iso_duration_str) @@ -320,7 +411,7 @@ def _validate_duration_str(iso_duration_str): start_date = duration_isoformat(start_date) filters.append( Filter( - term=EventSearchTerm.TIMESTAMP, + term=term or EventSearchTerm.TIMESTAMP, operator=Operator.WITHIN_THE_LAST, value=start_date, ) @@ -329,7 +420,7 @@ def _validate_duration_str(iso_duration_str): if start_date: filters.append( Filter( - term=EventSearchTerm.TIMESTAMP, + term=term or EventSearchTerm.TIMESTAMP, operator=Operator.ON_OR_AFTER, value=parse_ts_to_ms_str(start_date), ) @@ -338,9 +429,41 @@ def _validate_duration_str(iso_duration_str): if end_date: filters.append( Filter( - term=EventSearchTerm.TIMESTAMP, + term=term or EventSearchTerm.TIMESTAMP, operator=Operator.ON_OR_BEFORE, value=parse_ts_to_ms_str(end_date), ) ) return FilterGroup(filters=filters) + + +def _create_filter_group(filter_group: SearchFilterGroup) -> FilterGroup: + filters = [ + Filter.construct(value=f.value, operator=f.operator, term=f.term) + for f in filter_group.filters + ] + return FilterGroup.construct( + filterClause=filter_group.filter_clause, filters=filters + ) + + +def _create_filter_group_v2(filter_group_v2: SearchFilterGroupV2) -> FilterGroupV2: + subgroups = [] + for subgroup in filter_group_v2.subgroups: + subgroups.append(_handle_filter_group_type(subgroup)) + return FilterGroupV2.construct( + subgroupClause=filter_group_v2.subgroup_clause, subgroups=subgroups + ) + + +def _handle_filter_group_type( + filter_group: Union[SearchFilterGroup, SearchFilterGroupV2] +) -> Union[FilterGroup, FilterGroupV2]: + if isinstance(filter_group, SearchFilterGroup): + return _create_filter_group(filter_group) + if isinstance(filter_group, SearchFilterGroupV2): + return _create_filter_group_v2(filter_group) + else: + raise TypeError( + "Query filter group must be one of: SearchFilterGroup, SearchFilterGroupV2" + ) diff --git a/tests/queries/test_event_query.py b/tests/queries/test_event_query.py index 5f756a0..2ae30f7 100644 --- a/tests/queries/test_event_query.py +++ b/tests/queries/test_event_query.py @@ -4,12 +4,101 @@ import pytest from pydantic import ValidationError +from _incydr_sdk.file_events.models.response import SavedSearch from _incydr_sdk.queries.file_events import Filter from _incydr_sdk.queries.file_events import FilterGroup +from _incydr_sdk.queries.file_events import FilterGroupV2 from incydr import EventQuery TEST_START_DATE = "P1D" TEST_TIMESTAMP = "2020-09-10 11:12:13" +TEST_SAVED_SEARCH = SavedSearch().parse_obj( + { + "apiVersion": 2, + "columns": None, + "createdByUID": "testcreatoruid", + "createdByUsername": "example@code42.com", + "creationTimestamp": "2025-02-04T15:36:59.926404Z", + "groupClause": "AND", + "groups": [ + { + "filterClause": "AND", + "filters": [ + { + "operator": "WITHIN_THE_LAST", + "term": "@timestamp", + "value": "P90D", + "display": None, + } + ], + "display": '{"data":{"isMultivalue":false},"version":"v2"}', + }, + { + "filterClause": "OR", + "filters": [ + { + "operator": "IS", + "term": "file.category", + "value": "Image", + "display": None, + } + ], + "display": '{"data":{"isMultivalue":true},"version":"v2"}', + }, + { + "subgroupClause": "OR", + "subgroups": [ + { + "subgroupClause": "AND", + "subgroups": [ + { + "filterClause": "AND", + "filters": [ + { + "operator": "IS", + "term": "file.name", + "value": "*gomez*", + "display": None, + } + ], + "display": '{"data":{"isMultivalue":false},"version":"v2"}', + } + ], + "display": None, + }, + { + "subgroupClause": "AND", + "subgroups": [ + { + "filterClause": "AND", + "filters": [ + { + "operator": "IS", + "term": "file.name", + "value": "*Ticia*", + "display": None, + } + ], + "display": '{"data":{"isMultivalue":false},"version":"v2"}', + } + ], + "display": None, + }, + ], + "display": None, + }, + ], + "id": "test-search-id", + "modifiedByUID": "test-modified-uid", + "modifiedByUsername": "example@code42.com", + "modifiedTimestamp": "2025-02-04T15:36:59.926404Z", + "name": "Chad Ticia/Gomez block saved search", + "notes": "testing functionality of search blocks", + "srtDir": "desc", + "srtKey": None, + "tenantId": "test-tenant-id", + } +) @pytest.mark.parametrize( @@ -223,3 +312,79 @@ def test_event_query_less_than_when_non_numerical_value_raises_error(): def test_event_query_matches_any_sets_query_group_clause_to_or(): q = EventQuery(start_date=TEST_START_DATE).matches_any() assert q.group_clause == "OR" + + +def test_event_query_from_saved_search_creates_expected_filter_groups(): + q = EventQuery().from_saved_search(TEST_SAVED_SEARCH) + assert isinstance(q.groups[0], FilterGroup) + assert isinstance(q.groups[1], FilterGroup) + assert isinstance(q.groups[2], FilterGroupV2) + + +def test_event_query_is_any_creates_correct_filter(): + q = EventQuery().is_any("term", ["value1", "value2"]) + expected = FilterGroup( + filters=[Filter(term="term", operator="IS_ANY", value=["value1", "value2"])] + ) + assert q.groups.pop() == expected + + +def test_event_query_is_none_creates_correct_filter(): + q = EventQuery().is_none("term", ["value1", "value2"]) + expected = FilterGroup( + filters=[Filter(term="term", operator="IS_NONE", value=["value1", "value2"])] + ) + assert q.groups.pop() == expected + + +@pytest.mark.parametrize( + "start_timestamp", + [ + TEST_TIMESTAMP, + 1599736333.0, + 1599736333, + datetime.strptime(TEST_TIMESTAMP, "%Y-%m-%d %H:%M:%S"), + ], +) +def test_date_range_filter_creates_correct_filter(start_timestamp): + q = EventQuery().date_range(term="date_term", start_date=start_timestamp) + expected = FilterGroup( + filters=[ + Filter( + term="date_term", + operator="ON_OR_AFTER", + value="2020-09-10T11:12:13.000Z", + ) + ] + ) + assert q.groups.pop() == expected + + +def test_subquery_creates_expected_filter_subgroup(): + subgroup_q = ( + EventQuery() + .matches_any() + .equals("destination.category", ["AI Tools", "Cloud Storage"]) + ) + expected = FilterGroup( + filters=[ + Filter(term="destination.category", operator="IS", value="AI Tools"), + Filter(term="destination.category", operator="IS", value="Cloud Storage"), + ], + filterClause="OR", + ) + q = EventQuery().subquery(subgroup_q) + assert q.group_clause == "AND" + assert q.groups[0].subgroupClause == "OR" + assert q.groups[0].subgroups[0] == expected + + +def test_subquery_handles_nested_subquery(): + q = EventQuery().subquery( + EventQuery().subquery(EventQuery().equals("term", "value")) + ) + assert isinstance(q.groups[0], FilterGroupV2) + assert isinstance(q.groups[0].subgroups[0], FilterGroupV2) + assert isinstance(q.groups[0].subgroups[0].subgroups[0], FilterGroup) + assert q.groups[0].subgroups[0].subgroups[0].filters[0].term == "term" + assert q.groups[0].subgroups[0].subgroups[0].filters[0].value == "value" diff --git a/tests/test_file_events.py b/tests/test_file_events.py index 5391e80..9dd1cc4 100644 --- a/tests/test_file_events.py +++ b/tests/test_file_events.py @@ -406,7 +406,93 @@ srt_key=None, tenantId="c4e43418-07d9-4a9f-a138-29f39a124d33", ) - +TEST_SAVED_SEARCH_3 = SavedSearch().parse_obj( + { + "apiVersion": 2, + "columns": None, + "createdByUID": "testcreatoruid", + "createdByUsername": "example@code42.com", + "creationTimestamp": "2025-02-04T15:36:59.926404Z", + "groupClause": "AND", + "groups": [ + { + "filterClause": "AND", + "filters": [ + { + "operator": "WITHIN_THE_LAST", + "term": "@timestamp", + "value": "P90D", + "display": None, + } + ], + "display": '{"data":{"isMultivalue":false},"version":"v2"}', + }, + { + "filterClause": "OR", + "filters": [ + { + "operator": "IS", + "term": "file.category", + "value": "Image", + "display": None, + } + ], + "display": '{"data":{"isMultivalue":true},"version":"v2"}', + }, + { + "subgroupClause": "OR", + "subgroups": [ + { + "subgroupClause": "AND", + "subgroups": [ + { + "filterClause": "AND", + "filters": [ + { + "operator": "IS", + "term": "file.name", + "value": "*gomez*", + "display": None, + } + ], + "display": '{"data":{"isMultivalue":false},"version":"v2"}', + } + ], + "display": None, + }, + { + "subgroupClause": "AND", + "subgroups": [ + { + "filterClause": "AND", + "filters": [ + { + "operator": "IS", + "term": "file.name", + "value": "*Ticia*", + "display": None, + } + ], + "display": '{"data":{"isMultivalue":false},"version":"v2"}', + } + ], + "display": None, + }, + ], + "display": None, + }, + ], + "id": "test-search-id", + "modifiedByUID": "test-modified-uid", + "modifiedByUsername": "example@code42.com", + "modifiedTimestamp": "2025-02-04T15:36:59.926404Z", + "name": "Chad Ticia/Gomez block saved search", + "notes": "testing functionality of search blocks", + "srtDir": "desc", + "srtKey": None, + "tenantId": "test-tenant-id", + } +) TEST_DICT_QUERY = { "groupClause": "AND", @@ -487,6 +573,14 @@ def mock_get_saved_search(httpserver_auth): ).respond_with_json(search_data) +@pytest.fixture +def mock_get_saved_search_with_subgroups(httpserver_auth): + search_data = {"searches": [json.loads(TEST_SAVED_SEARCH_3.json())]} + httpserver_auth.expect_request( + f"/v2/file-events/saved-searches/{TEST_SAVED_SEARCH_ID}", method="GET" + ).respond_with_json(search_data) + + @pytest.fixture def mock_list_saved_searches(httpserver_auth): search_data = { @@ -559,6 +653,15 @@ def test_get_saved_search_returns_expected_data(mock_get_saved_search): assert search.json() == TEST_SAVED_SEARCH_1.json() +def test_get_saved_search_returns_expected_data_when_search_has_subgroups( + mock_get_saved_search_with_subgroups, +): + client = Client() + search = client.file_events.v2.get_saved_search(TEST_SAVED_SEARCH_ID) + assert isinstance(search, SavedSearch) + assert search.json() == TEST_SAVED_SEARCH_3.json() + + # ************************************************ CLI ************************************************ format_arg = pytest.mark.parametrize(