Skip to content

Commit 9a8b362

Browse files
committed
emoji: Recognize word-aligned matches in ranking
Fixes #1068.
1 parent 40e413c commit 9a8b362

File tree

2 files changed

+70
-42
lines changed

2 files changed

+70
-42
lines changed

lib/model/emoji.dart

Lines changed: 24 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -337,7 +337,15 @@ enum EmojiMatchQuality {
337337
/// The query matches a prefix of the emoji name, but not the whole name.
338338
prefix,
339339

340-
/// The query matches somewhere in the emoji name, but not at the start.
340+
/// The query matches starting at the start of a word in the emoji name,
341+
/// but not the start of the whole name.
342+
///
343+
/// For example a name "ab_cd_ef" would match queries "c" or "cd_e"
344+
/// at this level, but not a query "b_cd_ef".
345+
wordAligned,
346+
347+
/// The query matches somewhere in the emoji name,
348+
/// but not at the start of any word.
341349
other;
342350

343351
/// The best possible quality of match.
@@ -448,25 +456,17 @@ class EmojiAutocompleteQuery extends ComposeAutocompleteQuery {
448456
// See also commentary in [_rankResult].
449457

450458
// TODO(#1067) this assumes emojiName is already lower-case (and no diacritics)
451-
if (emojiName == _adjusted) return EmojiMatchQuality.exact;
452-
if (emojiName.startsWith(_adjusted)) return EmojiMatchQuality.prefix;
453-
if (_nameMatches(emojiName)) return EmojiMatchQuality.other;
454-
return null;
455-
}
456-
457-
bool _nameMatches(String emojiName) {
459+
if (emojiName == _adjusted) return EmojiMatchQuality.exact;
460+
if (emojiName.startsWith(_adjusted)) return EmojiMatchQuality.prefix;
461+
if (emojiName.contains(_sepAdjusted)) return EmojiMatchQuality.wordAligned;
458462
if (!_adjusted.contains(_separator)) {
459463
// If the query is a single token (doesn't contain a separator),
460-
// the match can be anywhere in the string.
461-
return emojiName.contains(_adjusted);
464+
// allow a match anywhere in the string, too.
465+
if (emojiName.contains(_adjusted)) return EmojiMatchQuality.other;
466+
} else {
467+
// Otherwise, require at least a word-aligned match.
462468
}
463-
464-
// If there is a separator in the query, then we
465-
// require the match to start at the start of a token.
466-
// (E.g. for 'ab_cd_ef', query could be 'ab_c' or 'cd_ef',
467-
// but not 'b_cd_ef'.)
468-
assert(!emojiName.startsWith(_adjusted)); // checked before calling this method
469-
return emojiName.contains(_sepAdjusted);
469+
return null;
470470
}
471471

472472
/// A measure of the result's quality in the context of the query,
@@ -475,11 +475,9 @@ class EmojiAutocompleteQuery extends ComposeAutocompleteQuery {
475475
// Compare sort_emojis in Zulip web:
476476
// https://github.com/zulip/zulip/blob/83a121c7e/web/shared/src/typeahead.ts#L322-L382
477477
//
478-
// Behavior differences we should or might copy, TODO(#1068):
479-
// * Web ranks matches starting at a word boundary ahead of
480-
// other non-prefix matches; we don't yet.
481-
// * Relatedly, web favors popular emoji only upon a word-aligned match.
478+
// Behavior differences we might copy, TODO:
482479
// * Web ranks each name of a Unicode emoji separately.
480+
// * Web recognizes a word-aligned match starting after [ /-] as well as [_].
483481
//
484482
// Behavior differences that web should probably fix, TODO(web):
485483
// * Among popular emoji with non-exact matches,
@@ -508,15 +506,15 @@ class EmojiAutocompleteQuery extends ComposeAutocompleteQuery {
508506
ReactionType.unicodeEmoji => false,
509507
};
510508
return switch (matchQuality) {
511-
EmojiMatchQuality.exact => 0,
512-
EmojiMatchQuality.prefix => isPopular ? 1 : isCustomEmoji ? 3 : 4,
513-
// TODO word-boundary vs. not
514-
EmojiMatchQuality.other => isPopular ? 2 : isCustomEmoji ? 5 : 6,
509+
EmojiMatchQuality.exact => 0,
510+
EmojiMatchQuality.prefix => isPopular ? 1 : isCustomEmoji ? 3 : 5,
511+
EmojiMatchQuality.wordAligned => isPopular ? 2 : isCustomEmoji ? 4 : 6,
512+
EmojiMatchQuality.other => isCustomEmoji ? 7 : 8,
515513
};
516514
}
517515

518516
/// The number of possible values returned by [_rankResult].
519-
static const _numResultRanks = 7;
517+
static const _numResultRanks = 9;
520518

521519
@override
522520
String toString() {

test/model/emoji_test.dart

Lines changed: 46 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -347,7 +347,7 @@ void main() {
347347
check(matchOfName('o', 'open_book')).prefix;
348348
check(matchOfName('open', 'open_book')).prefix;
349349
check(matchOfName('pe', 'open_book')).other;
350-
check(matchOfName('boo', 'open_book')).other;
350+
check(matchOfName('boo', 'open_book')).wordAligned;
351351
check(matchOfName('ok', 'open_book')).other;
352352
});
353353

@@ -359,7 +359,7 @@ void main() {
359359
check(matchOfName('pen_', 'open_book')).none;
360360
check(matchOfName('n_b', 'open_book')).none;
361361

362-
check(matchOfName('blue_dia', 'large_blue_diamond')).other;
362+
check(matchOfName('blue_dia', 'large_blue_diamond')).wordAligned;
363363
});
364364

365365
test('spaces in query behave as underscores', () {
@@ -370,7 +370,7 @@ void main() {
370370
check(matchOfName('pen ', 'open_book')).none;
371371
check(matchOfName('n b', 'open_book')).none;
372372

373-
check(matchOfName('blue dia', 'large_blue_diamond')).other;
373+
check(matchOfName('blue dia', 'large_blue_diamond')).wordAligned;
374374
});
375375

376376
test('query is lower-cased', () {
@@ -389,13 +389,17 @@ void main() {
389389
check(matchOfNames('open b', ['x', 'open_book'])).prefix;
390390
check(matchOfNames('pen_', ['x', 'open_book'])).none;
391391

392+
check(matchOfNames('blue_dia', ['x', 'large_blue_diamond'])).wordAligned;
393+
392394
check(matchOfNames('Smi', ['x', 'smile'])).prefix;
393395
});
394396

395397
test('best match among name and aliases prevails', () {
396-
check(matchOfNames('a', ['ab', 'a', 'ba', 'x'])).exact;
397-
check(matchOfNames('a', ['ba', 'ab', 'x'])).prefix;
398-
check(matchOfNames('a', ['ba', 'ab'])).prefix;
398+
check(matchOfNames('a', ['ab', 'a', 'b_a', 'ba', 'x'])).exact;
399+
check(matchOfNames('a', ['ba', 'ab', 'b_a', 'x'])).prefix;
400+
check(matchOfNames('a', ['ba', 'ab', 'b_a'])).prefix;
401+
check(matchOfNames('a', ['ba', 'b_a', 'x'])).wordAligned;
402+
check(matchOfNames('a', ['b_a', 'ba'])).wordAligned;
399403
check(matchOfNames('a', ['ba', 'x'])).other;
400404
check(matchOfNames('a', ['x', 'y', 'z'])).none;
401405
});
@@ -441,7 +445,7 @@ void main() {
441445
check(matchOf('eqeq', realmCandidate('eqeq'))).exact;
442446
check(matchOf('open_', realmCandidate('open_book'))).prefix;
443447
check(matchOf('n_b', realmCandidate('open_book'))).none;
444-
check(matchOf('blue dia', realmCandidate('large_blue_diamond'))).other;
448+
check(matchOf('blue dia', realmCandidate('large_blue_diamond'))).wordAligned;
445449
check(matchOf('Smi', realmCandidate('smile'))).prefix;
446450
});
447451

@@ -476,10 +480,12 @@ void main() {
476480

477481
final octopus = unicode(['octopus'], emojiCode: '1f419');
478482
final workingOnIt = unicode(['working_on_it'], emojiCode: '1f6e0');
483+
final love = unicode(['love'], emojiCode: '2764'); // aka :heart:
479484

480-
test('ranks exact before prefix before other match', () {
485+
test('ranks match quality exact/prefix/word-aligned/other', () {
481486
checkPrecedes('o', unicode(['o']), unicode(['onion']));
482-
checkPrecedes('o', unicode(['onion']), unicode(['book']));
487+
checkPrecedes('o', unicode(['onion']), unicode(['squared_ok']));
488+
checkPrecedes('o', unicode(['squared_ok']), unicode(['book']));
483489
});
484490

485491
test('ranks popular before realm before other Unicode', () {
@@ -498,28 +504,51 @@ void main() {
498504
checkPrecedes('o', octopus, realmCandidate('open_book'));
499505
});
500506

501-
test('ranks popular-vs-not more significant than prefix/other', () {
502-
// Popular other beats realm prefix.
507+
test('ranks popular-vs-not more significant than prefix/word-aligned', () {
508+
// Popular word-aligned beats realm prefix.
503509
checkPrecedes('o', workingOnIt, realmCandidate('open_book'));
504510
});
505511

506-
test('ranks prefix/other more significant than custom/other', () {
507-
// Generic Unicode prefix beats realm other.
508-
checkPrecedes('o', unicode(['ok']), realmCandidate('yo'));
512+
test('ranks popular as if generic when non-word-aligned', () {
513+
// Generic word-aligned beats popular other.
514+
checkPrecedes('o', unicode(['squared_ok']), love);
515+
// Popular other ranks below even custom other…
516+
checkPrecedes('o', realmCandidate('yo'), love);
517+
// … and same as generic Unicode other.
518+
checkSameRank('o', love, unicode(['book']));
519+
520+
// And that emoji really does count as popular,
521+
// beating custom emoji when both have a prefix match.
522+
checkPrecedes('l', love, realmCandidate('logs'));
523+
});
524+
525+
test('ranks custom/other more significant than prefix/word-aligned', () {
526+
// Custom word-aligned beats generic prefix.
527+
checkPrecedes('o', realmCandidate('laughing_blue_octopus'),
528+
unicode(['ok']));
529+
});
530+
531+
test('ranks word-aligned/other more significant than custom/other', () {
532+
// Generic Unicode word-aligned beats realm other.
533+
checkPrecedes('o', unicode(['squared_ok']), realmCandidate('yo'));
509534
});
510535

511536
test('full list of ranks', () {
512537
check([
513538
rankOf('o', unicode(['o'])), // exact (generic)
514539
rankOf('o', octopus), // prefix popular
515-
rankOf('o', workingOnIt), // other popular
540+
rankOf('o', workingOnIt), // word-aligned popular
516541
rankOf('o', realmCandidate('open_book')), // prefix realm
517542
rankOf('z', zulipCandidate()), // == prefix :zulip:
543+
rankOf('y', realmCandidate('thank_you')), // word-aligned realm
544+
// (word-aligned :zulip: is impossible because the name is one word)
518545
rankOf('o', unicode(['ok'])), // prefix generic
546+
rankOf('o', unicode(['squared_ok'])), // word-aligned generic
519547
rankOf('o', realmCandidate('yo')), // other realm
520548
rankOf('p', zulipCandidate()), // == other :zulip:
521549
rankOf('o', unicode(['book'])), // other generic
522-
]).deepEquals([0, 1, 2, 3, 3, 4, 5, 5, 6]);
550+
rankOf('o', love), // == other popular
551+
]).deepEquals([0, 1, 2, 3, 3, 4, 5, 6, 7, 7, 8, 8]);
523552
});
524553
});
525554
}
@@ -548,6 +577,7 @@ extension EmojiCandidateChecks on Subject<EmojiCandidate> {
548577
extension EmojiMatchQualityChecks on Subject<EmojiMatchQuality?> {
549578
void get exact => equals(EmojiMatchQuality.exact);
550579
void get prefix => equals(EmojiMatchQuality.prefix);
580+
void get wordAligned => equals(EmojiMatchQuality.wordAligned);
551581
void get other => equals(EmojiMatchQuality.other);
552582
void get none => isNull();
553583
}

0 commit comments

Comments
 (0)