Skip to content

Commit aac20fb

Browse files
authored
Experimental API search exposed through query. (#1293)
* Experimental API search exposed through query. * Reduce the batch size of search index. * Do not clear TokenIndex when updating with the same value.
1 parent f3dfac2 commit aac20fb

File tree

5 files changed

+70
-26
lines changed

5 files changed

+70
-26
lines changed

app/lib/search/index_simple.dart

Lines changed: 44 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,6 @@
33
// BSD-style license that can be found in the LICENSE file.
44

55
import 'dart:async';
6-
import 'dart:io';
76
import 'dart:math' as math;
87

98
import 'package:gcloud/service_scope.dart' as ss;
@@ -24,20 +23,19 @@ void registerPackageIndex(PackageIndex index) =>
2423
ss.register(#packageIndexService, index);
2524

2625
class SimplePackageIndex implements PackageIndex {
27-
final bool enableApiIndex;
2826
final Map<String, PackageDocument> _packages = <String, PackageDocument>{};
2927
final Map<String, String> _normalizedPackageText = <String, String>{};
3028
final TokenIndex _nameIndex = new TokenIndex(minLength: 2);
3129
final TokenIndex _descrIndex = new TokenIndex(minLength: 3);
3230
final TokenIndex _readmeIndex = new TokenIndex(minLength: 3);
3331
final TokenIndex _apiDocIndex = new TokenIndex(minLength: 3);
3432
final StringInternPool _internPool = new StringInternPool();
33+
final bool _apiSearchEnabled;
3534
DateTime _lastUpdated;
3635
bool _isReady = false;
3736

38-
SimplePackageIndex({bool enableApiIndex})
39-
: this.enableApiIndex =
40-
enableApiIndex ?? Platform.environment['SEARCH_API_INDEX'] == '1';
37+
SimplePackageIndex({bool apiSearchEnabled: false})
38+
: _apiSearchEnabled = apiSearchEnabled;
4139

4240
@override
4341
bool get isReady => _isReady;
@@ -74,17 +72,20 @@ class SimplePackageIndex implements PackageIndex {
7472
@override
7573
Future addPackage(PackageDocument document) async {
7674
final PackageDocument doc = document.intern(_internPool.intern);
77-
await removePackage(doc.package);
78-
if (document.isDiscontinued == true) return; // isDiscontinued may be null
75+
76+
// isDiscontinued may be null
77+
if (document.isDiscontinued == true) {
78+
await removePackage(doc.package);
79+
return;
80+
}
81+
7982
_packages[doc.package] = doc;
8083
_nameIndex.add(doc.package, doc.package);
8184
_descrIndex.add(doc.package, doc.description);
8285
_readmeIndex.add(doc.package, doc.readme);
83-
if (enableApiIndex) {
84-
for (ApiDocPage page in doc.apiDocPages ?? const []) {
85-
_apiDocIndex.add(
86-
_apiDocPageId(doc.package, page), page.symbols?.join(' '));
87-
}
86+
for (ApiDocPage page in doc.apiDocPages ?? const []) {
87+
_apiDocIndex.add(
88+
_apiDocPageId(doc.package, page), page.symbols?.join(' '));
8889
}
8990
final String allText = [doc.package, doc.description, doc.readme]
9091
.where((s) => s != null)
@@ -179,7 +180,8 @@ class SimplePackageIndex implements PackageIndex {
179180
}
180181

181182
// do text matching
182-
final Score textScore = _searchText(packages, query.parsedQuery.text);
183+
final Score textScore = _searchText(packages, query.parsedQuery.text,
184+
_apiSearchEnabled || query.parsedQuery.isApiEnabled);
183185

184186
// filter packages that doesn't match text query
185187
if (textScore != null) {
@@ -299,15 +301,16 @@ class SimplePackageIndex implements PackageIndex {
299301
return new Score(values);
300302
}
301303

302-
Score _searchText(Set<String> packages, String text) {
304+
Score _searchText(Set<String> packages, String text, bool isExperimental) {
303305
if (text != null && text.isNotEmpty) {
304306
final List<String> words = splitForIndexing(text).toList();
305307
final int wordCount = words.length;
306308
final List<Score> wordScores = words.map((String word) {
307309
final nameTokens = _nameIndex.lookupTokens(word);
308310
final descrTokens = _descrIndex.lookupTokens(word);
309311
final readmeTokens = _readmeIndex.lookupTokens(word);
310-
final apiDocTokens = _apiDocIndex.lookupTokens(word);
312+
final apiDocTokens =
313+
isExperimental ? _apiDocIndex.lookupTokens(word) : new TokenMatch();
311314
final maxTokenLength = [
312315
nameTokens.maxLength,
313316
descrTokens.maxLength,
@@ -326,15 +329,20 @@ class SimplePackageIndex implements PackageIndex {
326329
final readme = new Score(_readmeIndex.scoreDocs(readmeTokens,
327330
weight: 0.90, wordCount: wordCount));
328331

329-
final apiPages = new Score(_apiDocIndex.scoreDocs(apiDocTokens,
330-
weight: 0.80, wordCount: wordCount));
331-
final apiPackages = <String, double>{};
332-
for (String key in apiPages.getKeys()) {
333-
final pkg = _apiDocPkg(key);
334-
final value = apiPages[key];
335-
apiPackages[pkg] = math.max(value, apiPackages[pkg] ?? 0.0);
332+
Score apiScore;
333+
if (isExperimental) {
334+
final apiPages = new Score(_apiDocIndex.scoreDocs(apiDocTokens,
335+
weight: 0.80, wordCount: wordCount));
336+
final apiPackages = <String, double>{};
337+
for (String key in apiPages.getKeys()) {
338+
final pkg = _apiDocPkg(key);
339+
final value = apiPages[key];
340+
apiPackages[pkg] = math.max(value, apiPackages[pkg] ?? 0.0);
341+
}
342+
apiScore = new Score(apiPackages);
343+
} else {
344+
apiScore = new Score({});
336345
}
337-
final apiScore = new Score(apiPackages);
338346

339347
return Score.max([name, descr, readme, apiScore]).removeLowValues(
340348
fraction: 0.01, minValue: 0.001);
@@ -528,6 +536,7 @@ class TokenMatch {
528536
}
529537

530538
class TokenIndex {
539+
final Map<String, String> _textHashes = <String, String>{};
531540
final Map<String, Set<String>> _inverseIds = <String, Set<String>>{};
532541
final Map<String, double> _docSizes = <String, double>{};
533542
final int _minLength;
@@ -541,17 +550,28 @@ class TokenIndex {
541550

542551
void add(String id, String text) {
543552
final Set<String> tokens = _tokenize(text, _minLength);
544-
if (tokens == null || tokens.isEmpty) return;
553+
if (tokens == null || tokens.isEmpty) {
554+
if (_textHashes.containsKey(id)) {
555+
remove(id);
556+
}
557+
return;
558+
}
559+
final String textHash = '${text.hashCode}/${tokens.length}';
560+
if (_textHashes.containsKey(id) && _textHashes[id] != textHash) {
561+
remove(id);
562+
}
545563
for (String token in tokens) {
546564
final Set<String> set = _inverseIds.putIfAbsent(token, () => new Set());
547565
set.add(id);
548566
}
549567
// Document size is a highly scaled-down proxy of the length.
550568
final docSize = 1 + math.log(1 + tokens.length) / 100;
551569
_docSizes[id] = docSize;
570+
_textHashes[id] = textHash;
552571
}
553572

554573
void remove(String id) {
574+
_textHashes.remove(id);
555575
_docSizes.remove(id);
556576
final List<String> removeKeys = [];
557577
_inverseIds.forEach((String key, Set<String> set) {

app/lib/search/updater.dart

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -86,7 +86,7 @@ class BatchIndexUpdater implements TaskRunner {
8686
await _ongoingBatchUpdate;
8787
}
8888
_batch.add(task);
89-
if (_batch.length < 20) {
89+
if (_batch.length < 5) {
9090
_batchUpdateTimer ??= new Timer(const Duration(seconds: 10), () {
9191
_updateBatch();
9292
});

app/lib/shared/search_service.dart

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -351,16 +351,21 @@ class ParsedQuery {
351351
/// Match authors and uploaders.
352352
final List<String> emails;
353353

354+
/// Enable experimental API search.
355+
final bool isApiEnabled;
356+
354357
ParsedQuery._(
355358
this.text,
356359
this.packagePrefix,
357360
this.refDependencies,
358361
this.allDependencies,
359362
this.emails,
363+
this.isApiEnabled,
360364
);
361365

362366
factory ParsedQuery._parse(String q) {
363367
String queryText = q ?? '';
368+
queryText = ' $queryText ';
364369
String packagePrefix;
365370
final Match pkgMatch = _packageRegexp.firstMatch(queryText);
366371
if (pkgMatch != null) {
@@ -381,6 +386,11 @@ class ParsedQuery {
381386
final List<String> allDependencies = extractRegExp(_allDependencyRegExp);
382387
final List<String> emails = extractRegExp(_emailRegexp);
383388

389+
final bool isApiEnabled = queryText.contains(' !!api ');
390+
if (isApiEnabled) {
391+
queryText = queryText.replaceFirst(' !!api ', ' ');
392+
}
393+
384394
queryText = queryText.replaceAll(_whitespacesRegExp, ' ').trim();
385395
if (queryText.isEmpty) {
386396
queryText = null;
@@ -392,6 +402,7 @@ class ParsedQuery {
392402
dependencies,
393403
allDependencies,
394404
emails,
405+
isApiEnabled,
395406
);
396407
}
397408

app/test/search/api_doc_page_test.dart

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,7 @@ void main() {
1515
SimplePackageIndex index;
1616

1717
setUpAll(() async {
18-
index = new SimplePackageIndex(enableApiIndex: true);
18+
index = new SimplePackageIndex(apiSearchEnabled: true);
1919
await index.addPackage(new PackageDocument(
2020
package: 'foo',
2121
version: '1.0.0',

app/test/shared/search_service_test.dart

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,19 @@ void main() {
2929
expect(new SearchQuery.parse(query: 'text').parsedQuery.text, 'text');
3030
expect(new SearchQuery.parse(query: ' text ').query, 'text');
3131
expect(new SearchQuery.parse(query: ' text ').parsedQuery.text, 'text');
32+
expect(new SearchQuery.parse(query: ' text ').parsedQuery.isApiEnabled,
33+
isFalse);
34+
});
35+
36+
test('experimental API search', () {
37+
expect(new SearchQuery.parse(query: '!!api').parsedQuery.isApiEnabled,
38+
isTrue);
39+
expect(
40+
new SearchQuery.parse(query: 'text !!api').parsedQuery.isApiEnabled,
41+
isTrue);
42+
expect(
43+
new SearchQuery.parse(query: '!!api text').parsedQuery.isApiEnabled,
44+
isTrue);
3245
});
3346

3447
test('no dependency', () {

0 commit comments

Comments
 (0)