Skip to content

Commit 05eb87b

Browse files
committed
feat: suppot search with highlighter in pymilvus (milvus-io#3110)
relate: milvus-io/milvus#42589 Signed-off-by: aoiasd <[email protected]>
1 parent 558ffed commit 05eb87b

18 files changed

+613
-451
lines changed

pymilvus/__init__.py

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -45,7 +45,14 @@
4545
from .orm.index import Index
4646
from .orm.partition import Partition
4747
from .orm.role import Role
48-
from .orm.schema import CollectionSchema, FieldSchema, Function, FunctionScore, StructFieldSchema
48+
from .orm.schema import (
49+
CollectionSchema,
50+
FieldSchema,
51+
Function,
52+
FunctionScore,
53+
LexicalHighlighter,
54+
StructFieldSchema,
55+
)
4956
from .orm.utility import (
5057
create_resource_group,
5158
create_user,
@@ -96,6 +103,7 @@
96103
"Hits",
97104
"Index",
98105
"IndexType",
106+
"LexicalHighlighter",
99107
"MilvusClient",
100108
"MilvusException",
101109
"MilvusUnavailableException",

pymilvus/client/async_grpc_handler.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -23,7 +23,7 @@
2323
)
2424
from pymilvus.grpc_gen import common_pb2, milvus_pb2_grpc
2525
from pymilvus.grpc_gen import milvus_pb2 as milvus_types
26-
from pymilvus.orm.schema import Function
26+
from pymilvus.orm.schema import Function, Highlighter
2727
from pymilvus.settings import Config
2828

2929
from . import entity_helper, ts_utils, utils
@@ -828,6 +828,7 @@ async def search(
828828
round_decimal: int = -1,
829829
timeout: Optional[float] = None,
830830
ranker: Optional[Function] = None,
831+
highlighter: Optional[Highlighter] = None,
831832
**kwargs,
832833
):
833834
await self.ensure_channel_ready()
@@ -852,6 +853,7 @@ async def search(
852853
output_fields,
853854
round_decimal,
854855
ranker=ranker,
856+
highlighter=highlighter,
855857
**kwargs,
856858
)
857859
return await self._execute_search(request, timeout, round_decimal=round_decimal, **kwargs)

pymilvus/client/grpc_handler.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -23,7 +23,7 @@
2323
)
2424
from pymilvus.grpc_gen import common_pb2, milvus_pb2_grpc
2525
from pymilvus.grpc_gen import milvus_pb2 as milvus_types
26-
from pymilvus.orm.schema import Function, FunctionScore
26+
from pymilvus.orm.schema import Function, FunctionScore, Highlighter
2727
from pymilvus.settings import Config
2828

2929
from . import entity_helper, interceptor, ts_utils, utils
@@ -1010,6 +1010,7 @@ def search(
10101010
round_decimal: int = -1,
10111011
timeout: Optional[float] = None,
10121012
ranker: Union[Function, FunctionScore] = None,
1013+
highlighter: Optional[Highlighter] = None,
10131014
**kwargs,
10141015
):
10151016
check_pass_param(
@@ -1034,6 +1035,7 @@ def search(
10341035
output_fields,
10351036
round_decimal,
10361037
ranker=ranker,
1038+
highlighter=highlighter,
10371039
**kwargs,
10381040
)
10391041
return self._execute_search(request, timeout, round_decimal=round_decimal, **kwargs)

pymilvus/client/prepare.py

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@
1616
FieldSchema,
1717
Function,
1818
FunctionScore,
19+
Highlighter,
1920
isVectorDataType,
2021
)
2122
from pymilvus.orm.types import infer_dtype_by_scalar_data
@@ -1370,6 +1371,7 @@ def search_requests_with_expr(
13701371
output_fields: Optional[List[str]] = None,
13711372
round_decimal: int = -1,
13721373
ranker: Optional[Union[Function, FunctionScore]] = None,
1374+
highlighter: Optional[Highlighter] = None,
13731375
**kwargs,
13741376
) -> milvus_types.SearchRequest:
13751377
use_default_consistency = ts_utils.construct_guarantee_ts(collection_name, kwargs)
@@ -1524,6 +1526,9 @@ def search_requests_with_expr(
15241526
elif ranker is not None:
15251527
raise ParamError(message="The search ranker must be a Function or FunctionScore.")
15261528

1529+
if highlighter is not None:
1530+
request.highlighter.CopyFrom(Prepare.highlighter_schema(highlighter))
1531+
15271532
return request
15281533

15291534
@classmethod
@@ -1611,6 +1616,16 @@ def common_kv_value(v: Any) -> str:
16111616
return json.dumps(v)
16121617
return str(v)
16131618

1619+
@staticmethod
1620+
def highlighter_schema(highlighter: Highlighter) -> common_types.Highlighter:
1621+
return common_types.Highlighter(
1622+
type=highlighter.type,
1623+
params=[
1624+
common_types.KeyValuePair(key=str(k), value=Prepare.common_kv_value(v))
1625+
for k, v in highlighter.params.items()
1626+
],
1627+
)
1628+
16141629
@staticmethod
16151630
def function_score_schema(function_score: FunctionScore) -> schema_types.FunctionScore:
16161631
functions = [

pymilvus/client/search_result.py

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,7 @@ def __init__(
3030
all_scores: List[float],
3131
fields_data: List[schema_pb2.FieldData],
3232
output_fields: List[str],
33+
highlight_results: List[common_pb2.HighlightResult],
3334
pk_name: str,
3435
):
3536
self.ids = all_pks[start:end]
@@ -93,6 +94,14 @@ def __init__(
9394
else:
9495
msg = f"Unsupported field type: {field_data.type}"
9596
raise MilvusException(msg)
97+
98+
if len(highlight_results) > 0:
99+
for i, hit in enumerate(top_k_res):
100+
hit["highlight"] = {
101+
result.field_name: list(result.datas[i + start].fragments)
102+
for result in highlight_results
103+
}
104+
96105
super().__init__(top_k_res)
97106

98107
def __str__(self) -> str:
@@ -331,9 +340,11 @@ def _parse_search_result_data(
331340
all_scores,
332341
res.fields_data,
333342
res.output_fields,
343+
res.highlight_results,
334344
_pk_name,
335345
)
336346
)
347+
337348
nq_thres += topk
338349
return data
339350

@@ -723,6 +734,10 @@ def score(self) -> float:
723734
"""Alias of distance, will be deprecated soon"""
724735
return self.distance
725736

737+
@property
738+
def highlight(self) -> Dict[str, Any]:
739+
return self.data.get("highlight")
740+
726741
@property
727742
def fields(self) -> Dict[str, Any]:
728743
"""Patch for orm, will be deprecated soon"""

pymilvus/client/types.py

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -144,6 +144,11 @@ class FunctionType(IntEnum):
144144
RERANK = 3
145145

146146

147+
class HighlightType(IntEnum):
148+
LEXICAL = 0
149+
SEMANTIC = 1
150+
151+
147152
class RangeType(IntEnum):
148153
LT = 0 # less than
149154
LTE = 1 # less than or equal

pymilvus/grpc_gen/common_pb2.py

Lines changed: 24 additions & 24 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

pymilvus/grpc_gen/common_pb2.pyi

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -171,6 +171,7 @@ class MsgType(int, metaclass=_enum_type_wrapper.EnumTypeWrapper):
171171
FlushSegment: _ClassVar[MsgType]
172172
CreateSegment: _ClassVar[MsgType]
173173
Import: _ClassVar[MsgType]
174+
FlushAll: _ClassVar[MsgType]
174175
Search: _ClassVar[MsgType]
175176
SearchResult: _ClassVar[MsgType]
176177
GetIndexState: _ClassVar[MsgType]
@@ -241,6 +242,7 @@ class MsgType(int, metaclass=_enum_type_wrapper.EnumTypeWrapper):
241242
AlterDatabase: _ClassVar[MsgType]
242243
DescribeDatabase: _ClassVar[MsgType]
243244
AddCollectionField: _ClassVar[MsgType]
245+
AlterWAL: _ClassVar[MsgType]
244246

245247
class DslType(int, metaclass=_enum_type_wrapper.EnumTypeWrapper):
246248
__slots__ = ()
@@ -357,6 +359,7 @@ class ObjectPrivilege(int, metaclass=_enum_type_wrapper.EnumTypeWrapper):
357359
PrivilegeAddCollectionFunction: _ClassVar[ObjectPrivilege]
358360
PrivilegeAlterCollectionFunction: _ClassVar[ObjectPrivilege]
359361
PrivilegeDropCollectionFunction: _ClassVar[ObjectPrivilege]
362+
PrivilegeUpdateReplicateConfiguration: _ClassVar[ObjectPrivilege]
360363

361364
class StateCode(int, metaclass=_enum_type_wrapper.EnumTypeWrapper):
362365
__slots__ = ()
@@ -538,6 +541,7 @@ ManualFlush: MsgType
538541
FlushSegment: MsgType
539542
CreateSegment: MsgType
540543
Import: MsgType
544+
FlushAll: MsgType
541545
Search: MsgType
542546
SearchResult: MsgType
543547
GetIndexState: MsgType
@@ -608,6 +612,7 @@ ListDatabases: MsgType
608612
AlterDatabase: MsgType
609613
DescribeDatabase: MsgType
610614
AddCollectionField: MsgType
615+
AlterWAL: MsgType
611616
Dsl: DslType
612617
BoolExprV1: DslType
613618
UndefiedState: CompactionState
@@ -706,6 +711,7 @@ PrivilegeListFileResources: ObjectPrivilege
706711
PrivilegeAddCollectionFunction: ObjectPrivilege
707712
PrivilegeAlterCollectionFunction: ObjectPrivilege
708713
PrivilegeDropCollectionFunction: ObjectPrivilege
714+
PrivilegeUpdateReplicateConfiguration: ObjectPrivilege
709715
Initializing: StateCode
710716
Healthy: StateCode
711717
Abnormal: StateCode

pymilvus/grpc_gen/milvus_pb2.py

Lines changed: 342 additions & 320 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

0 commit comments

Comments
 (0)