Skip to content

Commit 4409f53

Browse files
Revert "Merge pull request #18 from redis-performance/restore-performance-optimizations"
This reverts commit 25ca5ac, reversing changes made to a8d26cd.
1 parent 826dab3 commit 4409f53

File tree

2 files changed

+20
-88
lines changed

2 files changed

+20
-88
lines changed

engine/base_client/search.py

Lines changed: 20 additions & 52 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,6 @@
22
import time
33
from multiprocessing import get_context
44
from typing import Iterable, List, Optional, Tuple
5-
from itertools import islice
65

76
import numpy as np
87
import tqdm
@@ -84,34 +83,26 @@ def search_all(
8483

8584
# Handle num_queries parameter
8685
if num_queries > 0:
87-
# If we need more queries than available, use a cycling generator
86+
# If we need more queries than available, cycle through the list
8887
if num_queries > len(queries_list) and len(queries_list) > 0:
8988
print(f"Requested {num_queries} queries but only {len(queries_list)} are available.")
90-
print(f"Using a cycling generator to efficiently process queries.")
91-
92-
# Create a cycling generator function
93-
def cycling_query_generator(queries, total_count):
94-
"""Generate queries by cycling through the available ones."""
95-
count = 0
96-
while count < total_count:
97-
for query in queries:
98-
if count < total_count:
99-
yield query
100-
count += 1
101-
else:
102-
break
103-
104-
# Use the generator instead of creating a full list
105-
used_queries = cycling_query_generator(queries_list, num_queries)
106-
# We need to know the total count for the progress bar
107-
total_query_count = num_queries
89+
print(f"Extending queries by cycling through the available ones.")
90+
# Calculate how many complete cycles and remaining items we need
91+
complete_cycles = num_queries // len(queries_list)
92+
remaining = num_queries % len(queries_list)
93+
94+
# Create the extended list
95+
extended_queries = []
96+
for _ in range(complete_cycles):
97+
extended_queries.extend(queries_list)
98+
extended_queries.extend(queries_list[:remaining])
99+
100+
used_queries = extended_queries
108101
else:
109102
used_queries = queries_list[:num_queries]
110-
total_query_count = len(used_queries)
111103
print(f"Using {num_queries} queries")
112104
else:
113105
used_queries = queries_list
114-
total_query_count = len(used_queries)
115106

116107
if parallel == 1:
117108
start = time.perf_counter()
@@ -121,32 +112,22 @@ def cycling_query_generator(queries, total_count):
121112
else:
122113
ctx = get_context(self.get_mp_start_method())
123114

124-
def process_initializer():
125-
"""Initialize each process before starting the search."""
126-
self.__class__.init_client(
115+
with ctx.Pool(
116+
processes=parallel,
117+
initializer=self.__class__.init_client,
118+
initargs=(
127119
self.host,
128120
distance,
129121
self.connection_params,
130122
self.search_params,
131-
)
132-
self.setup_search()
133-
134-
# Dynamically chunk the generator
135-
query_chunks = list(chunked_iterable(used_queries, max(1, len(used_queries) // parallel)))
136-
137-
with ctx.Pool(
138-
processes=parallel,
139-
initializer=process_initializer,
123+
),
140124
) as pool:
141125
if parallel > 10:
142126
time.sleep(15) # Wait for all processes to start
143127
start = time.perf_counter()
144-
results = pool.starmap(
145-
process_chunk,
146-
[(chunk, search_one) for chunk in query_chunks],
128+
precisions, latencies = list(
129+
zip(*pool.imap_unordered(search_one, iterable=tqdm.tqdm(used_queries)))
147130
)
148-
precisions, latencies = zip(*[result for chunk in results for result in chunk])
149-
150131
total_time = time.perf_counter() - start
151132

152133
self.__class__.delete_client()
@@ -175,16 +156,3 @@ def post_search(self):
175156
@classmethod
176157
def delete_client(cls):
177158
pass
178-
179-
180-
def chunked_iterable(iterable, size):
181-
"""Yield successive chunks of a given size from an iterable."""
182-
it = iter(iterable)
183-
while chunk := list(islice(it, size)):
184-
yield chunk
185-
186-
187-
def process_chunk(chunk, search_one):
188-
"""Process a chunk of queries using the search_one function."""
189-
# No progress bar in worker processes to avoid cluttering the output
190-
return [search_one(query) for query in chunk]

test_multiprocessing.py

Lines changed: 0 additions & 36 deletions
This file was deleted.

0 commit comments

Comments
 (0)