Skip to content

Commit 18adc3f

Browse files
authored
[8.19] Synthetic source: avoid storing multi fields of type text and match_only_text by default (#129251)
Backporting #129126 to 8.19 branch. Don't store text and match_only_text field by default when source mode is synthetic and a field is a multi field or when there is a suitable multi field. Without this change, ES would store field otherwise twice in a multi-field configuration. For example: ``` ... "os": { "properties": { "name": { "ignore_above": 1024, "type": "keyword", "fields": { "text": { "type": "match_only_text" } } } ... ``` In this case, two stored fields were added, one in case for the `name` field and one for `name.text` multi-field. This change prevents this, and would never store a stored field when text or match_only_text field is a multi-field.
1 parent c198e8f commit 18adc3f

File tree

6 files changed

+214
-23
lines changed

6 files changed

+214
-23
lines changed

docs/changelog/129126.yaml

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,6 @@
1+
pr: 129126
2+
summary: "Synthetic source: avoid storing multi fields of type text and `match_only_text`\
3+
\ by default"
4+
area: Mapping
5+
type: bug
6+
issues: []

modules/mapper-extras/src/main/java/org/elasticsearch/index/mapper/extras/MatchOnlyTextFieldMapper.java

Lines changed: 19 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -33,6 +33,7 @@
3333
import org.elasticsearch.common.lucene.Lucene;
3434
import org.elasticsearch.common.unit.Fuzziness;
3535
import org.elasticsearch.index.IndexVersion;
36+
import org.elasticsearch.index.IndexVersions;
3637
import org.elasticsearch.index.analysis.IndexAnalyzers;
3738
import org.elasticsearch.index.analysis.NamedAnalyzer;
3839
import org.elasticsearch.index.fielddata.FieldDataContext;
@@ -101,12 +102,9 @@ public static class Builder extends FieldMapper.Builder {
101102
private final Parameter<Map<String, String>> meta = Parameter.metaParam();
102103

103104
private final TextParams.Analyzers analyzers;
105+
private final boolean withinMultiField;
104106

105-
public Builder(String name, IndexAnalyzers indexAnalyzers) {
106-
this(name, IndexVersion.current(), indexAnalyzers);
107-
}
108-
109-
public Builder(String name, IndexVersion indexCreatedVersion, IndexAnalyzers indexAnalyzers) {
107+
public Builder(String name, IndexVersion indexCreatedVersion, IndexAnalyzers indexAnalyzers, boolean withinMultiField) {
110108
super(name);
111109
this.indexCreatedVersion = indexCreatedVersion;
112110
this.analyzers = new TextParams.Analyzers(
@@ -115,6 +113,7 @@ public Builder(String name, IndexVersion indexCreatedVersion, IndexAnalyzers ind
115113
m -> ((MatchOnlyTextFieldMapper) m).positionIncrementGap,
116114
indexCreatedVersion
117115
);
116+
this.withinMultiField = withinMultiField;
118117
}
119118

120119
@Override
@@ -140,18 +139,21 @@ private MatchOnlyTextFieldType buildFieldType(MapperBuilderContext context) {
140139
@Override
141140
public MatchOnlyTextFieldMapper build(MapperBuilderContext context) {
142141
MatchOnlyTextFieldType tft = buildFieldType(context);
143-
return new MatchOnlyTextFieldMapper(
144-
leafName(),
145-
Defaults.FIELD_TYPE,
146-
tft,
147-
builderParams(this, context),
148-
context.isSourceSynthetic(),
149-
this
150-
);
142+
final boolean storeSource;
143+
if (indexCreatedVersion.onOrAfter(IndexVersions.MAPPER_TEXT_MATCH_ONLY_MULTI_FIELDS_DEFAULT_NOT_STORED_8_19)) {
144+
storeSource = context.isSourceSynthetic()
145+
&& withinMultiField == false
146+
&& multiFieldsBuilder.hasSyntheticSourceCompatibleKeywordField() == false;
147+
} else {
148+
storeSource = context.isSourceSynthetic();
149+
}
150+
return new MatchOnlyTextFieldMapper(leafName(), Defaults.FIELD_TYPE, tft, builderParams(this, context), storeSource, this);
151151
}
152152
}
153153

154-
public static final TypeParser PARSER = new TypeParser((n, c) -> new Builder(n, c.indexVersionCreated(), c.getIndexAnalyzers()));
154+
public static final TypeParser PARSER = new TypeParser(
155+
(n, c) -> new Builder(n, c.indexVersionCreated(), c.getIndexAnalyzers(), c.isWithinMultiField())
156+
);
155157

156158
public static class MatchOnlyTextFieldType extends StringFieldType {
157159

@@ -406,6 +408,7 @@ private String storedFieldNameForSyntheticSource() {
406408
private final int positionIncrementGap;
407409
private final boolean storeSource;
408410
private final FieldType fieldType;
411+
private final boolean withinMultiField;
409412

410413
private MatchOnlyTextFieldMapper(
411414
String simpleName,
@@ -424,6 +427,7 @@ private MatchOnlyTextFieldMapper(
424427
this.indexAnalyzer = builder.analyzers.getIndexAnalyzer();
425428
this.positionIncrementGap = builder.analyzers.positionIncrementGap.getValue();
426429
this.storeSource = storeSource;
430+
this.withinMultiField = builder.withinMultiField;
427431
}
428432

429433
@Override
@@ -433,7 +437,7 @@ public Map<String, NamedAnalyzer> indexAnalyzers() {
433437

434438
@Override
435439
public FieldMapper.Builder getMergeBuilder() {
436-
return new Builder(leafName(), indexCreatedVersion, indexAnalyzers).init(this);
440+
return new Builder(leafName(), indexCreatedVersion, indexAnalyzers, withinMultiField).init(this);
437441
}
438442

439443
@Override

modules/mapper-extras/src/test/java/org/elasticsearch/index/mapper/extras/MatchOnlyTextFieldMapperTests.java

Lines changed: 90 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,7 @@
2323
import org.apache.lucene.tests.index.RandomIndexWriter;
2424
import org.elasticsearch.common.Strings;
2525
import org.elasticsearch.core.Tuple;
26+
import org.elasticsearch.index.IndexSettings;
2627
import org.elasticsearch.index.mapper.DocumentMapper;
2728
import org.elasticsearch.index.mapper.KeywordFieldMapper;
2829
import org.elasticsearch.index.mapper.LuceneDocument;
@@ -46,8 +47,10 @@
4647
import java.util.stream.Collectors;
4748

4849
import static org.hamcrest.Matchers.containsString;
50+
import static org.hamcrest.Matchers.empty;
4951
import static org.hamcrest.Matchers.equalTo;
5052
import static org.hamcrest.Matchers.instanceOf;
53+
import static org.hamcrest.core.Is.is;
5154

5255
public class MatchOnlyTextFieldMapperTests extends MapperTestCase {
5356

@@ -255,4 +258,91 @@ public void testDocValuesLoadedFromSynthetic() throws IOException {
255258
protected IngestScriptSupport ingestScriptSupport() {
256259
throw new AssumptionViolatedException("not supported");
257260
}
261+
262+
public void testStoreParameterDefaultsSyntheticSource() throws IOException {
263+
var indexSettingsBuilder = getIndexSettingsBuilder();
264+
indexSettingsBuilder.put(IndexSettings.INDEX_MAPPER_SOURCE_MODE_SETTING.getKey(), "synthetic");
265+
var indexSettings = indexSettingsBuilder.build();
266+
267+
var mapping = mapping(b -> {
268+
b.startObject("name");
269+
b.field("type", "match_only_text");
270+
b.endObject();
271+
});
272+
DocumentMapper mapper = createMapperService(indexSettings, mapping).documentMapper();
273+
274+
var source = source(b -> b.field("name", "quick brown fox"));
275+
ParsedDocument doc = mapper.parse(source);
276+
277+
{
278+
List<IndexableField> fields = doc.rootDoc().getFields("name");
279+
IndexableFieldType fieldType = fields.get(0).fieldType();
280+
assertThat(fieldType.stored(), is(false));
281+
}
282+
{
283+
List<IndexableField> fields = doc.rootDoc().getFields("name._original");
284+
IndexableFieldType fieldType = fields.get(0).fieldType();
285+
assertThat(fieldType.stored(), is(true));
286+
}
287+
}
288+
289+
public void testStoreParameterDefaultsSyntheticSourceWithKeywordMultiField() throws IOException {
290+
var indexSettingsBuilder = getIndexSettingsBuilder();
291+
indexSettingsBuilder.put(IndexSettings.INDEX_MAPPER_SOURCE_MODE_SETTING.getKey(), "synthetic");
292+
var indexSettings = indexSettingsBuilder.build();
293+
294+
var mapping = mapping(b -> {
295+
b.startObject("name");
296+
b.field("type", "match_only_text");
297+
b.startObject("fields");
298+
b.startObject("keyword");
299+
b.field("type", "keyword");
300+
b.endObject();
301+
b.endObject();
302+
b.endObject();
303+
});
304+
DocumentMapper mapper = createMapperService(indexSettings, mapping).documentMapper();
305+
306+
var source = source(b -> b.field("name", "quick brown fox"));
307+
ParsedDocument doc = mapper.parse(source);
308+
{
309+
List<IndexableField> fields = doc.rootDoc().getFields("name");
310+
IndexableFieldType fieldType = fields.get(0).fieldType();
311+
assertThat(fieldType.stored(), is(false));
312+
}
313+
{
314+
List<IndexableField> fields = doc.rootDoc().getFields("name._original");
315+
assertThat(fields, empty());
316+
}
317+
}
318+
319+
public void testStoreParameterDefaultsSyntheticSourceTextFieldIsMultiField() throws IOException {
320+
var indexSettingsBuilder = getIndexSettingsBuilder();
321+
indexSettingsBuilder.put(IndexSettings.INDEX_MAPPER_SOURCE_MODE_SETTING.getKey(), "synthetic");
322+
var indexSettings = indexSettingsBuilder.build();
323+
324+
var mapping = mapping(b -> {
325+
b.startObject("name");
326+
b.field("type", "keyword");
327+
b.startObject("fields");
328+
b.startObject("text");
329+
b.field("type", "match_only_text");
330+
b.endObject();
331+
b.endObject();
332+
b.endObject();
333+
});
334+
DocumentMapper mapper = createMapperService(indexSettings, mapping).documentMapper();
335+
336+
var source = source(b -> b.field("name", "quick brown fox"));
337+
ParsedDocument doc = mapper.parse(source);
338+
{
339+
List<IndexableField> fields = doc.rootDoc().getFields("name.text");
340+
IndexableFieldType fieldType = fields.get(0).fieldType();
341+
assertThat(fieldType.stored(), is(false));
342+
}
343+
{
344+
List<IndexableField> fields = doc.rootDoc().getFields("name.text._original");
345+
assertThat(fields, empty());
346+
}
347+
}
258348
}

server/src/main/java/org/elasticsearch/index/IndexVersions.java

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -133,6 +133,7 @@ private static IndexVersion def(int id, Version luceneVersion) {
133133
public static final IndexVersion DEFAULT_OVERSAMPLE_VALUE_FOR_BBQ_BACKPORT_8_X = def(8_530_0_00, Version.LUCENE_9_12_1);
134134
public static final IndexVersion SEMANTIC_TEXT_DEFAULTS_TO_BBQ_BACKPORT_8_X = def(8_531_0_00, Version.LUCENE_9_12_1);
135135
public static final IndexVersion INDEX_INT_SORT_INT_TYPE_8_19 = def(8_532_0_00, Version.LUCENE_9_12_1);
136+
public static final IndexVersion MAPPER_TEXT_MATCH_ONLY_MULTI_FIELDS_DEFAULT_NOT_STORED_8_19 = def(8_533_0_00, Version.LUCENE_9_12_1);
136137
/*
137138
* STOP! READ THIS FIRST! No, really,
138139
* ____ _____ ___ ____ _ ____ _____ _ ____ _____ _ _ ___ ____ _____ ___ ____ ____ _____ _

server/src/main/java/org/elasticsearch/index/mapper/TextFieldMapper.java

Lines changed: 31 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -287,11 +287,19 @@ public static class Builder extends FieldMapper.Builder {
287287

288288
final TextParams.Analyzers analyzers;
289289

290+
private final boolean withinMultiField;
291+
290292
public Builder(String name, IndexAnalyzers indexAnalyzers, boolean isSyntheticSourceEnabled) {
291-
this(name, IndexVersion.current(), indexAnalyzers, isSyntheticSourceEnabled);
293+
this(name, IndexVersion.current(), indexAnalyzers, isSyntheticSourceEnabled, false);
292294
}
293295

294-
public Builder(String name, IndexVersion indexCreatedVersion, IndexAnalyzers indexAnalyzers, boolean isSyntheticSourceEnabled) {
296+
public Builder(
297+
String name,
298+
IndexVersion indexCreatedVersion,
299+
IndexAnalyzers indexAnalyzers,
300+
boolean isSyntheticSourceEnabled,
301+
boolean withinMultiField
302+
) {
295303
super(name);
296304

297305
// If synthetic source is used we need to either store this field
@@ -300,10 +308,17 @@ public Builder(String name, IndexVersion indexCreatedVersion, IndexAnalyzers ind
300308
// storing the field without requiring users to explicitly set 'store'.
301309
//
302310
// If 'store' parameter was explicitly provided we'll reject the request.
303-
this.store = Parameter.storeParam(
304-
m -> ((TextFieldMapper) m).store,
305-
() -> isSyntheticSourceEnabled && multiFieldsBuilder.hasSyntheticSourceCompatibleKeywordField() == false
306-
);
311+
// Note that if current builder is a multi field, then we don't need to store, given that responsibility lies with parent field
312+
this.withinMultiField = withinMultiField;
313+
this.store = Parameter.storeParam(m -> ((TextFieldMapper) m).store, () -> {
314+
if (indexCreatedVersion.onOrAfter(IndexVersions.MAPPER_TEXT_MATCH_ONLY_MULTI_FIELDS_DEFAULT_NOT_STORED_8_19)) {
315+
return isSyntheticSourceEnabled
316+
&& this.withinMultiField == false
317+
&& multiFieldsBuilder.hasSyntheticSourceCompatibleKeywordField() == false;
318+
} else {
319+
return isSyntheticSourceEnabled;
320+
}
321+
});
307322
this.indexCreatedVersion = indexCreatedVersion;
308323
this.analyzers = new TextParams.Analyzers(
309324
indexAnalyzers,
@@ -484,7 +499,13 @@ public TextFieldMapper build(MapperBuilderContext context) {
484499
private static final IndexVersion MINIMUM_COMPATIBILITY_VERSION = IndexVersion.fromId(5000099);
485500

486501
public static final TypeParser PARSER = new TypeParser(
487-
(n, c) -> new Builder(n, c.indexVersionCreated(), c.getIndexAnalyzers(), SourceFieldMapper.isSynthetic(c.getIndexSettings())),
502+
(n, c) -> new Builder(
503+
n,
504+
c.indexVersionCreated(),
505+
c.getIndexAnalyzers(),
506+
SourceFieldMapper.isSynthetic(c.getIndexSettings()),
507+
c.isWithinMultiField()
508+
),
488509
MINIMUM_COMPATIBILITY_VERSION
489510
);
490511

@@ -1307,6 +1328,7 @@ public Query existsQuery(SearchExecutionContext context) {
13071328
private final SubFieldInfo phraseFieldInfo;
13081329

13091330
private final boolean isSyntheticSourceEnabled;
1331+
private final boolean isWithinMultiField;
13101332

13111333
private TextFieldMapper(
13121334
String simpleName,
@@ -1340,6 +1362,7 @@ private TextFieldMapper(
13401362
this.freqFilter = builder.freqFilter.getValue();
13411363
this.fieldData = builder.fieldData.get();
13421364
this.isSyntheticSourceEnabled = builder.isSyntheticSourceEnabled;
1365+
this.isWithinMultiField = builder.withinMultiField;
13431366
}
13441367

13451368
@Override
@@ -1363,7 +1386,7 @@ public Map<String, NamedAnalyzer> indexAnalyzers() {
13631386

13641387
@Override
13651388
public FieldMapper.Builder getMergeBuilder() {
1366-
return new Builder(leafName(), indexCreatedVersion, indexAnalyzers, isSyntheticSourceEnabled).init(this);
1389+
return new Builder(leafName(), indexCreatedVersion, indexAnalyzers, isSyntheticSourceEnabled, isWithinMultiField).init(this);
13671390
}
13681391

13691392
@Override

server/src/test/java/org/elasticsearch/index/mapper/TextFieldMapperTests.java

Lines changed: 67 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -307,6 +307,73 @@ public void testStoreParameterDefaults() throws IOException {
307307
}
308308
}
309309

310+
public void testStoreParameterDefaultsSyntheticSource() throws IOException {
311+
var indexSettingsBuilder = getIndexSettingsBuilder();
312+
indexSettingsBuilder.put(IndexSettings.INDEX_MAPPER_SOURCE_MODE_SETTING.getKey(), "synthetic");
313+
var indexSettings = indexSettingsBuilder.build();
314+
315+
var mapping = mapping(b -> {
316+
b.startObject("name");
317+
b.field("type", "text");
318+
b.endObject();
319+
});
320+
DocumentMapper mapper = createMapperService(indexSettings, mapping).documentMapper();
321+
322+
var source = source(b -> b.field("name", "quick brown fox"));
323+
ParsedDocument doc = mapper.parse(source);
324+
List<IndexableField> fields = doc.rootDoc().getFields("name");
325+
IndexableFieldType fieldType = fields.get(0).fieldType();
326+
assertThat(fieldType.stored(), is(true));
327+
}
328+
329+
public void testStoreParameterDefaultsSyntheticSourceWithKeywordMultiField() throws IOException {
330+
var indexSettingsBuilder = getIndexSettingsBuilder();
331+
indexSettingsBuilder.put(IndexSettings.INDEX_MAPPER_SOURCE_MODE_SETTING.getKey(), "synthetic");
332+
var indexSettings = indexSettingsBuilder.build();
333+
334+
var mapping = mapping(b -> {
335+
b.startObject("name");
336+
b.field("type", "text");
337+
b.startObject("fields");
338+
b.startObject("keyword");
339+
b.field("type", "keyword");
340+
b.endObject();
341+
b.endObject();
342+
b.endObject();
343+
});
344+
DocumentMapper mapper = createMapperService(indexSettings, mapping).documentMapper();
345+
346+
var source = source(b -> b.field("name", "quick brown fox"));
347+
ParsedDocument doc = mapper.parse(source);
348+
List<IndexableField> fields = doc.rootDoc().getFields("name");
349+
IndexableFieldType fieldType = fields.get(0).fieldType();
350+
assertThat(fieldType.stored(), is(false));
351+
}
352+
353+
public void testStoreParameterDefaultsSyntheticSourceTextFieldIsMultiField() throws IOException {
354+
var indexSettingsBuilder = getIndexSettingsBuilder();
355+
indexSettingsBuilder.put(IndexSettings.INDEX_MAPPER_SOURCE_MODE_SETTING.getKey(), "synthetic");
356+
var indexSettings = indexSettingsBuilder.build();
357+
358+
var mapping = mapping(b -> {
359+
b.startObject("name");
360+
b.field("type", "keyword");
361+
b.startObject("fields");
362+
b.startObject("text");
363+
b.field("type", "text");
364+
b.endObject();
365+
b.endObject();
366+
b.endObject();
367+
});
368+
DocumentMapper mapper = createMapperService(indexSettings, mapping).documentMapper();
369+
370+
var source = source(b -> b.field("name", "quick brown fox"));
371+
ParsedDocument doc = mapper.parse(source);
372+
List<IndexableField> fields = doc.rootDoc().getFields("name.text");
373+
IndexableFieldType fieldType = fields.get(0).fieldType();
374+
assertThat(fieldType.stored(), is(false));
375+
}
376+
310377
public void testBWCSerialization() throws IOException {
311378
MapperService mapperService = createMapperService(fieldMapping(b -> {
312379
b.field("type", "text");

0 commit comments

Comments
 (0)