Skip to content
This repository was archived by the owner on Apr 4, 2023. It is now read-only.

Commit 19d4414

Browse files
Merge #550
550: Add the two new pagination and faceting settings r=ManyTheFish a=Kerollmops This PR adds two new settings in the database, those settings are described [in this spec](meilisearch/specifications#157). Co-authored-by: Kerollmops <[email protected]>
2 parents 9580b9d + 445d547 commit 19d4414

File tree

6 files changed

+102
-16
lines changed

6 files changed

+102
-16
lines changed

milli/src/index.rs

Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -56,6 +56,8 @@ pub mod main_key {
5656
pub const TWO_TYPOS_WORD_LEN: &str = "two-typos-word-len";
5757
pub const EXACT_WORDS: &str = "exact-words";
5858
pub const EXACT_ATTRIBUTES: &str = "exact-attributes";
59+
pub const MAX_VALUES_PER_FACET: &str = "max-values-per-facet";
60+
pub const PAGINATION_LIMITED_TO: &str = "pagination-limited-to";
5961
}
6062

6163
pub mod db_name {
@@ -1087,6 +1089,34 @@ impl Index {
10871089
self.main.delete::<_, Str>(txn, main_key::EXACT_ATTRIBUTES)?;
10881090
Ok(())
10891091
}
1092+
1093+
pub fn max_values_per_facet(&self, txn: &RoTxn) -> heed::Result<Option<usize>> {
1094+
self.main.get::<_, Str, OwnedType<usize>>(txn, main_key::MAX_VALUES_PER_FACET)
1095+
}
1096+
1097+
pub(crate) fn put_max_values_per_facet(&self, txn: &mut RwTxn, val: usize) -> heed::Result<()> {
1098+
self.main.put::<_, Str, OwnedType<usize>>(txn, main_key::MAX_VALUES_PER_FACET, &val)
1099+
}
1100+
1101+
pub(crate) fn delete_max_values_per_facet(&self, txn: &mut RwTxn) -> heed::Result<bool> {
1102+
self.main.delete::<_, Str>(txn, main_key::MAX_VALUES_PER_FACET)
1103+
}
1104+
1105+
pub fn pagination_limited_to(&self, txn: &RoTxn) -> heed::Result<Option<usize>> {
1106+
self.main.get::<_, Str, OwnedType<usize>>(txn, main_key::PAGINATION_LIMITED_TO)
1107+
}
1108+
1109+
pub(crate) fn put_pagination_limited_to(
1110+
&self,
1111+
txn: &mut RwTxn,
1112+
val: usize,
1113+
) -> heed::Result<()> {
1114+
self.main.put::<_, Str, OwnedType<usize>>(txn, main_key::PAGINATION_LIMITED_TO, &val)
1115+
}
1116+
1117+
pub(crate) fn delete_pagination_limited_to(&self, txn: &mut RwTxn) -> heed::Result<bool> {
1118+
self.main.delete::<_, Str>(txn, main_key::PAGINATION_LIMITED_TO)
1119+
}
10901120
}
10911121

10921122
#[cfg(test)]

milli/src/lib.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -38,7 +38,7 @@ pub use self::heed_codec::{
3838
pub use self::index::Index;
3939
pub use self::search::{
4040
FacetDistribution, Filter, FormatOptions, MatchBounds, MatcherBuilder, MatchingWord,
41-
MatchingWords, Search, SearchResult,
41+
MatchingWords, Search, SearchResult, DEFAULT_VALUES_PER_FACET,
4242
};
4343

4444
pub type Result<T> = std::result::Result<T, error::Error>;

milli/src/search/facet/facet_distribution.rs

Lines changed: 15 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,7 @@ use crate::{FieldId, Index, Result};
1515

1616
/// The default number of values by facets that will
1717
/// be fetched from the key-value store.
18-
const DEFAULT_VALUES_BY_FACET: usize = 100;
18+
pub const DEFAULT_VALUES_PER_FACET: usize = 100;
1919

2020
/// Threshold on the number of candidates that will make
2121
/// the system to choose between one algorithm or another.
@@ -24,7 +24,7 @@ const CANDIDATES_THRESHOLD: u64 = 3000;
2424
pub struct FacetDistribution<'a> {
2525
facets: Option<HashSet<String>>,
2626
candidates: Option<RoaringBitmap>,
27-
max_values_by_facet: usize,
27+
max_values_per_facet: usize,
2828
rtxn: &'a heed::RoTxn<'a>,
2929
index: &'a Index,
3030
}
@@ -34,7 +34,7 @@ impl<'a> FacetDistribution<'a> {
3434
FacetDistribution {
3535
facets: None,
3636
candidates: None,
37-
max_values_by_facet: DEFAULT_VALUES_BY_FACET,
37+
max_values_per_facet: DEFAULT_VALUES_PER_FACET,
3838
rtxn,
3939
index,
4040
}
@@ -45,8 +45,8 @@ impl<'a> FacetDistribution<'a> {
4545
self
4646
}
4747

48-
pub fn max_values_by_facet(&mut self, max: usize) -> &mut Self {
49-
self.max_values_by_facet = max;
48+
pub fn max_values_per_facet(&mut self, max: usize) -> &mut Self {
49+
self.max_values_per_facet = max;
5050
self
5151
}
5252

@@ -82,7 +82,8 @@ impl<'a> FacetDistribution<'a> {
8282
let ((_, _, value), ()) = result?;
8383
*distribution.entry(value.to_string()).or_insert(0) += 1;
8484

85-
if distribution.len() - distribution_prelength == self.max_values_by_facet {
85+
if distribution.len() - distribution_prelength == self.max_values_per_facet
86+
{
8687
break;
8788
}
8889
}
@@ -108,7 +109,7 @@ impl<'a> FacetDistribution<'a> {
108109
.or_insert_with(|| (original_value, 0));
109110
*count += 1;
110111

111-
if normalized_distribution.len() == self.max_values_by_facet {
112+
if normalized_distribution.len() == self.max_values_per_facet {
112113
break;
113114
}
114115
}
@@ -141,7 +142,7 @@ impl<'a> FacetDistribution<'a> {
141142
if !docids.is_empty() {
142143
distribution.insert(value.to_string(), docids.len());
143144
}
144-
if distribution.len() == self.max_values_by_facet {
145+
if distribution.len() == self.max_values_per_facet {
145146
break;
146147
}
147148
}
@@ -164,7 +165,7 @@ impl<'a> FacetDistribution<'a> {
164165
if !docids.is_empty() {
165166
distribution.insert(original.to_string(), docids.len());
166167
}
167-
if distribution.len() == self.max_values_by_facet {
168+
if distribution.len() == self.max_values_per_facet {
168169
break;
169170
}
170171
}
@@ -186,7 +187,7 @@ impl<'a> FacetDistribution<'a> {
186187
for result in range {
187188
let ((_, _, value, _), docids) = result?;
188189
distribution.insert(value.to_string(), docids.len());
189-
if distribution.len() == self.max_values_by_facet {
190+
if distribution.len() == self.max_values_per_facet {
190191
break;
191192
}
192193
}
@@ -202,7 +203,7 @@ impl<'a> FacetDistribution<'a> {
202203
for result in iter {
203204
let ((_, normalized_value), (original_value, docids)) = result?;
204205
normalized_distribution.insert(normalized_value, (original_value, docids.len()));
205-
if normalized_distribution.len() == self.max_values_by_facet {
206+
if normalized_distribution.len() == self.max_values_per_facet {
206207
break;
207208
}
208209
}
@@ -290,12 +291,13 @@ impl<'a> FacetDistribution<'a> {
290291

291292
impl fmt::Debug for FacetDistribution<'_> {
292293
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
293-
let FacetDistribution { facets, candidates, max_values_by_facet, rtxn: _, index: _ } = self;
294+
let FacetDistribution { facets, candidates, max_values_per_facet, rtxn: _, index: _ } =
295+
self;
294296

295297
f.debug_struct("FacetDistribution")
296298
.field("facets", facets)
297299
.field("candidates", candidates)
298-
.field("max_values_by_facet", max_values_by_facet)
300+
.field("max_values_per_facet", max_values_per_facet)
299301
.finish()
300302
}
301303
}

milli/src/search/facet/mod.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
pub use self::facet_distribution::FacetDistribution;
1+
pub use self::facet_distribution::{FacetDistribution, DEFAULT_VALUES_PER_FACET};
22
pub use self::facet_number::{FacetNumberIter, FacetNumberRange, FacetNumberRevRange};
33
pub use self::facet_string::FacetStringIter;
44
pub use self::filter::Filter;

milli/src/search/mod.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,7 @@ use log::debug;
1515
use once_cell::sync::Lazy;
1616
use roaring::bitmap::RoaringBitmap;
1717

18-
pub use self::facet::{FacetDistribution, FacetNumberIter, Filter};
18+
pub use self::facet::{FacetDistribution, FacetNumberIter, Filter, DEFAULT_VALUES_PER_FACET};
1919
use self::fst_utils::{Complement, Intersection, StartsWith, Union};
2020
pub use self::matches::{
2121
FormatOptions, MatchBounds, Matcher, MatcherBuilder, MatchingWord, MatchingWords,

milli/src/update/settings.rs

Lines changed: 54 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -104,6 +104,8 @@ pub struct Settings<'a, 't, 'u, 'i> {
104104
exact_words: Setting<BTreeSet<String>>,
105105
/// Attributes on which typo tolerance is disabled.
106106
exact_attributes: Setting<HashSet<String>>,
107+
max_values_per_facet: Setting<usize>,
108+
pagination_limited_to: Setting<usize>,
107109
}
108110

109111
impl<'a, 't, 'u, 'i> Settings<'a, 't, 'u, 'i> {
@@ -129,6 +131,8 @@ impl<'a, 't, 'u, 'i> Settings<'a, 't, 'u, 'i> {
129131
min_word_len_two_typos: Setting::NotSet,
130132
min_word_len_one_typo: Setting::NotSet,
131133
exact_attributes: Setting::NotSet,
134+
max_values_per_facet: Setting::NotSet,
135+
pagination_limited_to: Setting::NotSet,
132136
indexer_config,
133137
}
134138
}
@@ -246,6 +250,22 @@ impl<'a, 't, 'u, 'i> Settings<'a, 't, 'u, 'i> {
246250
self.exact_attributes = Setting::Reset;
247251
}
248252

253+
pub fn set_max_values_per_facet(&mut self, value: usize) {
254+
self.max_values_per_facet = Setting::Set(value);
255+
}
256+
257+
pub fn reset_max_values_per_facet(&mut self) {
258+
self.max_values_per_facet = Setting::Reset;
259+
}
260+
261+
pub fn set_pagination_limited_to(&mut self, value: usize) {
262+
self.pagination_limited_to = Setting::Set(value);
263+
}
264+
265+
pub fn reset_pagination_limited_to(&mut self) {
266+
self.pagination_limited_to = Setting::Reset;
267+
}
268+
249269
fn reindex<F>(&mut self, cb: &F, old_fields_ids_map: FieldsIdsMap) -> Result<()>
250270
where
251271
F: Fn(UpdateIndexingStep) + Sync,
@@ -612,6 +632,34 @@ impl<'a, 't, 'u, 'i> Settings<'a, 't, 'u, 'i> {
612632
Ok(())
613633
}
614634

635+
fn update_max_values_per_facet(&mut self) -> Result<()> {
636+
match self.max_values_per_facet {
637+
Setting::Set(max) => {
638+
self.index.put_max_values_per_facet(&mut self.wtxn, max)?;
639+
}
640+
Setting::Reset => {
641+
self.index.delete_max_values_per_facet(&mut self.wtxn)?;
642+
}
643+
Setting::NotSet => (),
644+
}
645+
646+
Ok(())
647+
}
648+
649+
fn update_pagination_limited_to(&mut self) -> Result<()> {
650+
match self.pagination_limited_to {
651+
Setting::Set(max) => {
652+
self.index.put_pagination_limited_to(&mut self.wtxn, max)?;
653+
}
654+
Setting::Reset => {
655+
self.index.delete_pagination_limited_to(&mut self.wtxn)?;
656+
}
657+
Setting::NotSet => (),
658+
}
659+
660+
Ok(())
661+
}
662+
615663
pub fn execute<F>(mut self, progress_callback: F) -> Result<()>
616664
where
617665
F: Fn(UpdateIndexingStep) + Sync,
@@ -630,6 +678,8 @@ impl<'a, 't, 'u, 'i> Settings<'a, 't, 'u, 'i> {
630678
self.update_authorize_typos()?;
631679
self.update_min_typo_word_len()?;
632680
self.update_exact_words()?;
681+
self.update_max_values_per_facet()?;
682+
self.update_pagination_limited_to()?;
633683

634684
// If there is new faceted fields we indicate that we must reindex as we must
635685
// index new fields as facets. It means that the distinct attribute,
@@ -1525,6 +1575,8 @@ mod tests {
15251575
min_word_len_one_typo,
15261576
exact_words,
15271577
exact_attributes,
1578+
max_values_per_facet,
1579+
pagination_limited_to,
15281580
} = builder;
15291581

15301582
assert!(matches!(searchable_fields, Setting::NotSet));
@@ -1541,5 +1593,7 @@ mod tests {
15411593
assert!(matches!(min_word_len_one_typo, Setting::NotSet));
15421594
assert!(matches!(exact_words, Setting::NotSet));
15431595
assert!(matches!(exact_attributes, Setting::NotSet));
1596+
assert!(matches!(max_values_per_facet, Setting::NotSet));
1597+
assert!(matches!(pagination_limited_to, Setting::NotSet));
15441598
}
15451599
}

0 commit comments

Comments
 (0)