@@ -14,10 +14,10 @@ class DummyEmbedEncoder(BaseEncoder):
14
14
def __init__ (self , model_name = "dummy-embedding" ):
15
15
self .model_name = model_name
16
16
17
- def encode (self , text : str ) -> list : # noqa: ARG002
17
+ def embed (self , text : str ) -> list : # noqa: ARG002
18
18
return np .random .random ((10 , 1 ))
19
19
20
- def encode_batch (self , texts : list [str ]) -> list [list [float ]]:
20
+ def embed_documents (self , texts : list [str ]) -> list [list [float ]]:
21
21
return np .random .random ((10 , len (texts )))
22
22
23
23
@@ -32,13 +32,37 @@ def _use_test_qdrant_settings(settings, mocker):
32
32
settings .QDRANT_HOST = "https://test"
33
33
settings .QDRANT_BASE_COLLECTION_NAME = "test"
34
34
settings .CONTENT_FILE_EMBEDDING_CHUNK_OVERLAP = 0
35
+ settings .CONTENT_FILE_EMBEDDING_SEMANTIC_CHUNKING_ENABLED = False
35
36
mock_qdrant = mocker .patch ("qdrant_client.QdrantClient" )
37
+ mocker .patch ("vector_search.utils.SemanticChunker" )
38
+
36
39
mock_qdrant .scroll .return_value = [
37
40
[],
38
41
None ,
39
42
]
40
- get_text_splitter_patch = mocker .patch ("vector_search.utils._get_text_splitter" )
41
- get_text_splitter_patch .return_value = RecursiveCharacterTextSplitter ()
43
+ get_text_splitter_patch = mocker .patch ("vector_search.utils._chunk_documents" )
44
+ get_text_splitter_patch .return_value = (
45
+ RecursiveCharacterTextSplitter ().create_documents (
46
+ texts = ["test dociment" ],
47
+ metadatas = [
48
+ {
49
+ "run_title" : "" ,
50
+ "platform" : "" ,
51
+ "offered_by" : "" ,
52
+ "run_readable_id" : "" ,
53
+ "resource_readable_id" : "" ,
54
+ "content_type" : "" ,
55
+ "file_extension" : "" ,
56
+ "content_feature_type" : "" ,
57
+ "course_number" : "" ,
58
+ "file_type" : "" ,
59
+ "description" : "" ,
60
+ "key" : "" ,
61
+ "url" : "" ,
62
+ }
63
+ ],
64
+ )
65
+ )
42
66
mock_qdrant .count .return_value = CountResult (count = 10 )
43
67
mocker .patch (
44
68
"vector_search.utils.qdrant_client" ,
0 commit comments