Skip to content

Commit efc6215

Browse files
authored
Control search index's text match scope through (local) latencies. (#8674)
1 parent 96e525a commit efc6215

File tree

6 files changed

+283
-67
lines changed

6 files changed

+283
-67
lines changed

app/lib/search/mem_index.dart

Lines changed: 93 additions & 65 deletions
Original file line numberDiff line numberDiff line change
@@ -226,6 +226,7 @@ class InMemoryPackageIndex {
226226
packageScores,
227227
parsedQueryText,
228228
includeNameMatches: (query.offset ?? 0) == 0,
229+
textMatchExtent: query.textMatchExtent ?? TextMatchExtent.api,
229230
);
230231

231232
final nameMatches = textResults?.nameMatches;
@@ -287,7 +288,9 @@ class InMemoryPackageIndex {
287288
boundedList(indexedHits, offset: query.offset, limit: query.limit);
288289

289290
late List<PackageHit> packageHits;
290-
if (textResults != null && (textResults.topApiPages?.isNotEmpty ?? false)) {
291+
if ((query.textMatchExtent ?? TextMatchExtent.api).shouldMatchApi() &&
292+
textResults != null &&
293+
(textResults.topApiPages?.isNotEmpty ?? false)) {
291294
packageHits = indexedHits.map((ps) {
292295
final apiPages = textResults.topApiPages?[ps.index]
293296
// TODO(https://github.com/dart-lang/pub-dev/issues/7106): extract title for the page
@@ -305,6 +308,7 @@ class InMemoryPackageIndex {
305308
nameMatches: nameMatches,
306309
topicMatches: topicMatches,
307310
packageHits: packageHits,
311+
errorMessage: textResults?.errorMessage,
308312
);
309313
}
310314

@@ -332,61 +336,81 @@ class InMemoryPackageIndex {
332336
IndexedScore<String> packageScores,
333337
String? text, {
334338
required bool includeNameMatches,
339+
required TextMatchExtent textMatchExtent,
335340
}) {
341+
if (text == null || text.isEmpty) {
342+
return null;
343+
}
344+
336345
final sw = Stopwatch()..start();
337-
if (text != null && text.isNotEmpty) {
338-
final words = splitForQuery(text);
339-
if (words.isEmpty) {
340-
for (var i = 0; i < packageScores.length; i++) {
341-
packageScores.setValue(i, 0);
342-
}
343-
return _TextResults.empty();
344-
}
346+
final words = splitForQuery(text);
347+
if (words.isEmpty) {
348+
packageScores.fillRange(0, packageScores.length, 0);
349+
return _TextResults.empty();
350+
}
345351

346-
bool aborted = false;
352+
final matchName = textMatchExtent.shouldMatchName();
353+
if (!matchName) {
354+
packageScores.fillRange(0, packageScores.length, 0);
355+
return _TextResults.empty(
356+
errorMessage:
357+
'Search index in reduced mode: unable to match query text.');
358+
}
347359

348-
bool checkAborted() {
349-
if (!aborted && sw.elapsed > _textSearchTimeout) {
350-
aborted = true;
351-
_logger.info(
352-
'[pub-aborted-search-query] Aborted text search after ${sw.elapsedMilliseconds} ms.');
353-
}
354-
return aborted;
360+
bool aborted = false;
361+
bool checkAborted() {
362+
if (!aborted && sw.elapsed > _textSearchTimeout) {
363+
aborted = true;
364+
_logger.info(
365+
'[pub-aborted-search-query] Aborted text search after ${sw.elapsedMilliseconds} ms.');
355366
}
367+
return aborted;
368+
}
369+
370+
Set<String>? nameMatches;
371+
if (includeNameMatches && _documentsByName.containsKey(text)) {
372+
nameMatches ??= <String>{};
373+
nameMatches.add(text);
374+
}
375+
376+
// Multiple words are scored separately, and then the individual scores
377+
// are multiplied. We can use a package filter that is applied after each
378+
// word to reduce the scope of the later words based on the previous results.
379+
/// However, API docs search should be filtered on the original list.
380+
final indexedPositiveList = packageScores.toIndexedPositiveList();
356381

357-
Set<String>? nameMatches;
358-
if (includeNameMatches && _documentsByName.containsKey(text)) {
382+
final matchDescription = textMatchExtent.shouldMatchDescription();
383+
final matchReadme = textMatchExtent.shouldMatchReadme();
384+
final matchApi = textMatchExtent.shouldMatchApi();
385+
386+
for (final word in words) {
387+
if (includeNameMatches && _documentsByName.containsKey(word)) {
359388
nameMatches ??= <String>{};
360-
nameMatches.add(text);
389+
nameMatches.add(word);
361390
}
362391

363-
// Multiple words are scored separately, and then the individual scores
364-
// are multiplied. We can use a package filter that is applied after each
365-
// word to reduce the scope of the later words based on the previous results.
366-
/// However, API docs search should be filtered on the original list.
367-
final indexedPositiveList = packageScores.toIndexedPositiveList();
368-
369-
for (final word in words) {
370-
if (includeNameMatches && _documentsByName.containsKey(word)) {
371-
nameMatches ??= <String>{};
372-
nameMatches.add(word);
373-
}
392+
_scorePool.withScore(
393+
value: 0.0,
394+
fn: (wordScore) {
395+
_packageNameIndex.searchWord(word,
396+
score: wordScore, filterOnNonZeros: packageScores);
374397

375-
_scorePool.withScore(
376-
value: 0.0,
377-
fn: (wordScore) {
378-
_packageNameIndex.searchWord(word,
379-
score: wordScore, filterOnNonZeros: packageScores);
398+
if (matchDescription) {
380399
_descrIndex.searchAndAccumulate(word, score: wordScore);
400+
}
401+
if (matchReadme) {
381402
_readmeIndex.searchAndAccumulate(word,
382403
weight: 0.75, score: wordScore);
383-
packageScores.multiplyAllFrom(wordScore);
384-
},
385-
);
386-
}
404+
}
405+
packageScores.multiplyAllFrom(wordScore);
406+
},
407+
);
408+
}
387409

388-
final topApiPages =
389-
List<List<MapEntry<String, double>>?>.filled(_documents.length, null);
410+
final topApiPages =
411+
List<List<MapEntry<String, double>>?>.filled(_documents.length, null);
412+
413+
if (matchApi) {
390414
const maxApiPageCount = 2;
391415
if (!checkAborted()) {
392416
_apiSymbolIndex.withSearchWords(words, weight: 0.70, (symbolPages) {
@@ -420,29 +444,28 @@ class InMemoryPackageIndex {
420444
}
421445
});
422446
}
447+
}
423448

424-
// filter results based on exact phrases
425-
final phrases = extractExactPhrases(text);
426-
if (!aborted && phrases.isNotEmpty) {
427-
for (var i = 0; i < packageScores.length; i++) {
428-
if (packageScores.isNotPositive(i)) continue;
429-
final doc = _documents[i];
430-
final matchedAllPhrases = phrases.every((phrase) =>
431-
doc.package.contains(phrase) ||
432-
doc.description!.contains(phrase) ||
433-
doc.readme!.contains(phrase));
434-
if (!matchedAllPhrases) {
435-
packageScores.setValue(i, 0);
436-
}
449+
// filter results based on exact phrases
450+
final phrases = extractExactPhrases(text);
451+
if (!aborted && phrases.isNotEmpty) {
452+
for (var i = 0; i < packageScores.length; i++) {
453+
if (packageScores.isNotPositive(i)) continue;
454+
final doc = _documents[i];
455+
final matchedAllPhrases = phrases.every((phrase) =>
456+
(matchName && doc.package.contains(phrase)) ||
457+
(matchDescription && doc.description!.contains(phrase)) ||
458+
(matchReadme && doc.readme!.contains(phrase)));
459+
if (!matchedAllPhrases) {
460+
packageScores.setValue(i, 0);
437461
}
438462
}
439-
440-
return _TextResults(
441-
topApiPages,
442-
nameMatches: nameMatches?.toList(),
443-
);
444463
}
445-
return null;
464+
465+
return _TextResults(
466+
topApiPages,
467+
nameMatches: nameMatches?.toList(),
468+
);
446469
}
447470

448471
List<IndexedPackageHit> _rankWithValues(
@@ -521,15 +544,20 @@ class InMemoryPackageIndex {
521544
class _TextResults {
522545
final List<List<MapEntry<String, double>>?>? topApiPages;
523546
final List<String>? nameMatches;
547+
final String? errorMessage;
524548

525-
factory _TextResults.empty() => _TextResults(
526-
null,
527-
nameMatches: null,
528-
);
549+
factory _TextResults.empty({String? errorMessage}) {
550+
return _TextResults(
551+
null,
552+
nameMatches: null,
553+
errorMessage: errorMessage,
554+
);
555+
}
529556

530557
_TextResults(
531558
this.topApiPages, {
532559
required this.nameMatches,
560+
this.errorMessage,
533561
});
534562
}
535563

app/lib/search/search_service.dart

Lines changed: 49 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@ import 'dart:math' show max;
88
import 'package:_pub_shared/search/search_form.dart';
99
import 'package:_pub_shared/search/tags.dart';
1010
import 'package:clock/clock.dart';
11+
import 'package:collection/collection.dart';
1112
import 'package:json_annotation/json_annotation.dart';
1213
import 'package:pub_dev/shared/utils.dart';
1314

@@ -165,6 +166,9 @@ class ServiceSearchQuery {
165166
final int? offset;
166167
final int? limit;
167168

169+
/// The scope/depth of text matching.
170+
final TextMatchExtent? textMatchExtent;
171+
168172
ServiceSearchQuery._({
169173
this.query,
170174
TagsPredicate? tagsPredicate,
@@ -173,6 +177,7 @@ class ServiceSearchQuery {
173177
this.order,
174178
this.offset,
175179
this.limit,
180+
this.textMatchExtent,
176181
}) : parsedQuery = ParsedQueryText.parse(query),
177182
tagsPredicate = tagsPredicate ?? TagsPredicate(),
178183
publisherId = publisherId?.trimToNull();
@@ -185,6 +190,7 @@ class ServiceSearchQuery {
185190
int? minPoints,
186191
int offset = 0,
187192
int? limit = 10,
193+
TextMatchExtent? textMatchExtent,
188194
}) {
189195
final q = query?.trimToNull();
190196
return ServiceSearchQuery._(
@@ -195,6 +201,7 @@ class ServiceSearchQuery {
195201
order: order,
196202
offset: offset,
197203
limit: limit,
204+
textMatchExtent: textMatchExtent,
198205
);
199206
}
200207

@@ -210,6 +217,10 @@ class ServiceSearchQuery {
210217
int.tryParse(uri.queryParameters['minPoints'] ?? '0') ?? 0;
211218
final offset = int.tryParse(uri.queryParameters['offset'] ?? '0') ?? 0;
212219
final limit = int.tryParse(uri.queryParameters['limit'] ?? '0') ?? 0;
220+
final textMatchExtentValue =
221+
uri.queryParameters['textMatchExtent']?.trim() ?? '';
222+
final textMatchExtent = TextMatchExtent.values
223+
.firstWhereOrNull((e) => e.name == textMatchExtentValue);
213224

214225
return ServiceSearchQuery.parse(
215226
query: q,
@@ -219,6 +230,7 @@ class ServiceSearchQuery {
219230
minPoints: minPoints,
220231
offset: max(0, offset),
221232
limit: max(_minSearchLimit, limit),
233+
textMatchExtent: textMatchExtent,
222234
);
223235
}
224236

@@ -229,6 +241,7 @@ class ServiceSearchQuery {
229241
SearchOrder? order,
230242
int? offset,
231243
int? limit,
244+
TextMatchExtent? textMatchExtent,
232245
}) {
233246
return ServiceSearchQuery._(
234247
query: query ?? this.query,
@@ -238,6 +251,7 @@ class ServiceSearchQuery {
238251
minPoints: minPoints,
239252
offset: offset ?? this.offset,
240253
limit: limit ?? this.limit,
254+
textMatchExtent: textMatchExtent ?? this.textMatchExtent,
241255
);
242256
}
243257

@@ -251,6 +265,7 @@ class ServiceSearchQuery {
251265
'minPoints': minPoints.toString(),
252266
'limit': limit?.toString(),
253267
'order': order?.name,
268+
if (textMatchExtent != null) 'textMatchExtent': textMatchExtent!.name,
254269
};
255270
map.removeWhere((k, v) => v == null);
256271
return map;
@@ -277,7 +292,8 @@ class ServiceSearchQuery {
277292
_hasOnlyFreeText &&
278293
_isNaturalOrder &&
279294
_hasNoOwnershipScope &&
280-
!_isFlutterFavorite;
295+
!_isFlutterFavorite &&
296+
(textMatchExtent ?? TextMatchExtent.api).shouldMatchApi();
281297

282298
bool get considerHighlightedHit => _hasOnlyFreeText && _hasNoOwnershipScope;
283299
bool get includeHighlightedHit => considerHighlightedHit && offset == 0;
@@ -295,6 +311,38 @@ class ServiceSearchQuery {
295311
}
296312
}
297313

314+
/// The scope (depth) of the text matching.
315+
enum TextMatchExtent {
316+
/// No text search is done.
317+
/// Requests with text queries will return a failure message.
318+
none,
319+
320+
/// Text search is on package names.
321+
name,
322+
323+
/// Text search is on package names, descriptions and topic tags.
324+
description,
325+
326+
/// Text search is on names, descriptions, topic tags and readme content.
327+
readme,
328+
329+
/// Text search is on names, descriptions, topic tags, readme content and API symbols.
330+
api,
331+
;
332+
333+
/// Text search is on package names.
334+
bool shouldMatchName() => index >= name.index;
335+
336+
/// Text search is on package names, descriptions and topic tags.
337+
bool shouldMatchDescription() => index >= description.index;
338+
339+
/// Text search is on names, descriptions, topic tags and readme content.
340+
bool shouldMatchReadme() => index >= readme.index;
341+
342+
/// Text search is on names, descriptions, topic tags, readme content and API symbols.
343+
bool shouldMatchApi() => index >= api.index;
344+
}
345+
298346
class QueryValidity {
299347
final String? rejectReason;
300348

app/lib/service/entrypoint/search.dart

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -43,7 +43,7 @@ class SearchCommand extends Command {
4343
);
4444
registerScopeExitCallback(index.close);
4545

46-
registerSearchIndex(IsolateSearchIndex(index));
46+
registerSearchIndex(LatencyAwareSearchIndex(IsolateSearchIndex(index)));
4747

4848
void scheduleRenew() {
4949
scheduleMicrotask(() async {

0 commit comments

Comments
 (0)