Skip to content

Commit 5f65452

Browse files
authored
feat: suppot search with highlighter in pymilvus (#3110)
relate: milvus-io/milvus#42589 Signed-off-by: aoiasd <[email protected]>
1 parent 96b35c6 commit 5f65452

18 files changed

+719
-603
lines changed

pymilvus/__init__.py

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -45,7 +45,14 @@
4545
from .orm.index import Index
4646
from .orm.partition import Partition
4747
from .orm.role import Role
48-
from .orm.schema import CollectionSchema, FieldSchema, Function, FunctionScore, StructFieldSchema
48+
from .orm.schema import (
49+
CollectionSchema,
50+
FieldSchema,
51+
Function,
52+
FunctionScore,
53+
LexicalHighlighter,
54+
StructFieldSchema,
55+
)
4956
from .orm.utility import (
5057
create_resource_group,
5158
create_user,
@@ -96,6 +103,7 @@
96103
"Hits",
97104
"Index",
98105
"IndexType",
106+
"LexicalHighlighter",
99107
"MilvusClient",
100108
"MilvusException",
101109
"MilvusUnavailableException",

pymilvus/client/async_grpc_handler.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -23,7 +23,7 @@
2323
)
2424
from pymilvus.grpc_gen import common_pb2, milvus_pb2_grpc
2525
from pymilvus.grpc_gen import milvus_pb2 as milvus_types
26-
from pymilvus.orm.schema import Function
26+
from pymilvus.orm.schema import Function, Highlighter
2727
from pymilvus.settings import Config
2828

2929
from . import entity_helper, ts_utils, utils
@@ -830,6 +830,7 @@ async def search(
830830
round_decimal: int = -1,
831831
timeout: Optional[float] = None,
832832
ranker: Optional[Function] = None,
833+
highlighter: Optional[Highlighter] = None,
833834
**kwargs,
834835
):
835836
await self.ensure_channel_ready()
@@ -860,6 +861,7 @@ async def search(
860861
output_fields,
861862
round_decimal,
862863
ranker=ranker,
864+
highlighter=highlighter,
863865
**kwargs,
864866
)
865867
return await self._execute_search(request, timeout, round_decimal=round_decimal, **kwargs)

pymilvus/client/grpc_handler.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -23,7 +23,7 @@
2323
)
2424
from pymilvus.grpc_gen import common_pb2, milvus_pb2_grpc
2525
from pymilvus.grpc_gen import milvus_pb2 as milvus_types
26-
from pymilvus.orm.schema import Function, FunctionScore
26+
from pymilvus.orm.schema import Function, FunctionScore, Highlighter
2727
from pymilvus.settings import Config
2828

2929
from . import entity_helper, interceptor, ts_utils, utils
@@ -1010,6 +1010,7 @@ def search(
10101010
round_decimal: int = -1,
10111011
timeout: Optional[float] = None,
10121012
ranker: Union[Function, FunctionScore] = None,
1013+
highlighter: Optional[Highlighter] = None,
10131014
**kwargs,
10141015
):
10151016
check_pass_param(
@@ -1034,6 +1035,7 @@ def search(
10341035
output_fields,
10351036
round_decimal,
10361037
ranker=ranker,
1038+
highlighter=highlighter,
10371039
**kwargs,
10381040
)
10391041
return self._execute_search(request, timeout, round_decimal=round_decimal, **kwargs)

pymilvus/client/prepare.py

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@
1616
FieldSchema,
1717
Function,
1818
FunctionScore,
19+
Highlighter,
1920
isVectorDataType,
2021
)
2122
from pymilvus.orm.types import infer_dtype_by_scalar_data
@@ -1370,6 +1371,7 @@ def search_requests_with_expr(
13701371
output_fields: Optional[List[str]] = None,
13711372
round_decimal: int = -1,
13721373
ranker: Optional[Union[Function, FunctionScore]] = None,
1374+
highlighter: Optional[Highlighter] = None,
13731375
**kwargs,
13741376
) -> milvus_types.SearchRequest:
13751377
use_default_consistency = ts_utils.construct_guarantee_ts(collection_name, kwargs)
@@ -1524,6 +1526,9 @@ def search_requests_with_expr(
15241526
elif ranker is not None:
15251527
raise ParamError(message="The search ranker must be a Function or FunctionScore.")
15261528

1529+
if highlighter is not None:
1530+
request.highlighter.CopyFrom(Prepare.highlighter_schema(highlighter))
1531+
15271532
return request
15281533

15291534
@classmethod
@@ -1611,6 +1616,16 @@ def common_kv_value(v: Any) -> str:
16111616
return json.dumps(v)
16121617
return str(v)
16131618

1619+
@staticmethod
1620+
def highlighter_schema(highlighter: Highlighter) -> common_types.Highlighter:
1621+
return common_types.Highlighter(
1622+
type=highlighter.type,
1623+
params=[
1624+
common_types.KeyValuePair(key=str(k), value=Prepare.common_kv_value(v))
1625+
for k, v in highlighter.params.items()
1626+
],
1627+
)
1628+
16141629
@staticmethod
16151630
def function_score_schema(function_score: FunctionScore) -> schema_types.FunctionScore:
16161631
functions = [

pymilvus/client/search_result.py

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,7 @@ def __init__(
3030
all_scores: List[float],
3131
fields_data: List[schema_pb2.FieldData],
3232
output_fields: List[str],
33+
highlight_results: List[common_pb2.HighlightResult],
3334
pk_name: str,
3435
):
3536
self.ids = all_pks[start:end]
@@ -93,6 +94,14 @@ def __init__(
9394
else:
9495
msg = f"Unsupported field type: {field_data.type}"
9596
raise MilvusException(msg)
97+
98+
if len(highlight_results) > 0:
99+
for i, hit in enumerate(top_k_res):
100+
hit["highlight"] = {
101+
result.field_name: list(result.datas[i + start].fragments)
102+
for result in highlight_results
103+
}
104+
96105
super().__init__(top_k_res)
97106

98107
def __str__(self) -> str:
@@ -331,9 +340,11 @@ def _parse_search_result_data(
331340
all_scores,
332341
res.fields_data,
333342
res.output_fields,
343+
res.highlight_results,
334344
_pk_name,
335345
)
336346
)
347+
337348
nq_thres += topk
338349
return data
339350

@@ -723,6 +734,10 @@ def score(self) -> float:
723734
"""Alias of distance, will be deprecated soon"""
724735
return self.distance
725736

737+
@property
738+
def highlight(self) -> Dict[str, Any]:
739+
return self.data.get("highlight")
740+
726741
@property
727742
def fields(self) -> Dict[str, Any]:
728743
"""Patch for orm, will be deprecated soon"""

pymilvus/client/types.py

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -144,6 +144,11 @@ class FunctionType(IntEnum):
144144
RERANK = 3
145145

146146

147+
class HighlightType(IntEnum):
148+
LEXICAL = 0
149+
SEMANTIC = 1
150+
151+
147152
class RangeType(IntEnum):
148153
LT = 0 # less than
149154
LTE = 1 # less than or equal

pymilvus/grpc_gen/common_pb2.py

Lines changed: 12 additions & 12 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

pymilvus/grpc_gen/common_pb2.pyi

Lines changed: 0 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -355,9 +355,6 @@ class ObjectPrivilege(int, metaclass=_enum_type_wrapper.EnumTypeWrapper):
355355
PrivilegeAddFileResource: _ClassVar[ObjectPrivilege]
356356
PrivilegeRemoveFileResource: _ClassVar[ObjectPrivilege]
357357
PrivilegeListFileResources: _ClassVar[ObjectPrivilege]
358-
PrivilegeAddCollectionFunction: _ClassVar[ObjectPrivilege]
359-
PrivilegeAlterCollectionFunction: _ClassVar[ObjectPrivilege]
360-
PrivilegeDropCollectionFunction: _ClassVar[ObjectPrivilege]
361358
PrivilegeUpdateReplicateConfiguration: _ClassVar[ObjectPrivilege]
362359

363360
class StateCode(int, metaclass=_enum_type_wrapper.EnumTypeWrapper):
@@ -706,9 +703,6 @@ PrivilegeAddCollectionField: ObjectPrivilege
706703
PrivilegeAddFileResource: ObjectPrivilege
707704
PrivilegeRemoveFileResource: ObjectPrivilege
708705
PrivilegeListFileResources: ObjectPrivilege
709-
PrivilegeAddCollectionFunction: ObjectPrivilege
710-
PrivilegeAlterCollectionFunction: ObjectPrivilege
711-
PrivilegeDropCollectionFunction: ObjectPrivilege
712706
PrivilegeUpdateReplicateConfiguration: ObjectPrivilege
713707
Initializing: StateCode
714708
Healthy: StateCode

pymilvus/grpc_gen/milvus_pb2.py

Lines changed: 477 additions & 477 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

0 commit comments

Comments
 (0)