From 6ae704920a3f43e98597b35e5e3ad4c68df3a932 Mon Sep 17 00:00:00 2001 From: Martijn van Groningen Date: Sun, 8 Jun 2025 21:57:23 +0200 Subject: [PATCH 1/8] Avoid storing multi fields as type text and match_only_text. Don't store text and match_only_text field by default when source mode is synthetic and a field is a multi field or when there is a suitable multi field. Without this change, ES would store field otherwise twice in a multi-field configuration. For example: ``` ... "os": { "properties": { "name": { "ignore_above": 1024, "type": "keyword", "fields": { "text": { "type": "match_only_text" } } } ... ``` In this case, two stored fields were added, one in case for the `name` field and one for `name.text` multi-field. This change prevents this, and would never store a stored field when text or match_only_text field is a multi-field. --- .../extras/MatchOnlyTextFieldMapper.java | 12 +-- .../extras/MatchOnlyTextFieldMapperTests.java | 90 +++++++++++++++++++ .../index/mapper/FieldMapper.java | 2 + .../index/mapper/TextFieldMapper.java | 5 +- .../index/mapper/TextFieldMapperTests.java | 67 ++++++++++++++ 5 files changed, 167 insertions(+), 9 deletions(-) diff --git a/modules/mapper-extras/src/main/java/org/elasticsearch/index/mapper/extras/MatchOnlyTextFieldMapper.java b/modules/mapper-extras/src/main/java/org/elasticsearch/index/mapper/extras/MatchOnlyTextFieldMapper.java index 055f6091ac484..0568781ff5459 100644 --- a/modules/mapper-extras/src/main/java/org/elasticsearch/index/mapper/extras/MatchOnlyTextFieldMapper.java +++ b/modules/mapper-extras/src/main/java/org/elasticsearch/index/mapper/extras/MatchOnlyTextFieldMapper.java @@ -140,14 +140,10 @@ private MatchOnlyTextFieldType buildFieldType(MapperBuilderContext context) { @Override public MatchOnlyTextFieldMapper build(MapperBuilderContext context) { MatchOnlyTextFieldType tft = buildFieldType(context); - return new MatchOnlyTextFieldMapper( - leafName(), - Defaults.FIELD_TYPE, - tft, - builderParams(this, context), - context.isSourceSynthetic(), - this - ); + boolean storeSource = context.isSourceSynthetic() + && currentFieldIsAMultiField == false + && multiFieldsBuilder.hasSyntheticSourceCompatibleKeywordField() == false; + return new MatchOnlyTextFieldMapper(leafName(), Defaults.FIELD_TYPE, tft, builderParams(this, context), storeSource, this); } } diff --git a/modules/mapper-extras/src/test/java/org/elasticsearch/index/mapper/extras/MatchOnlyTextFieldMapperTests.java b/modules/mapper-extras/src/test/java/org/elasticsearch/index/mapper/extras/MatchOnlyTextFieldMapperTests.java index b913edb1f2791..22841f8c42bf1 100644 --- a/modules/mapper-extras/src/test/java/org/elasticsearch/index/mapper/extras/MatchOnlyTextFieldMapperTests.java +++ b/modules/mapper-extras/src/test/java/org/elasticsearch/index/mapper/extras/MatchOnlyTextFieldMapperTests.java @@ -23,6 +23,7 @@ import org.apache.lucene.tests.index.RandomIndexWriter; import org.elasticsearch.common.Strings; import org.elasticsearch.core.Tuple; +import org.elasticsearch.index.IndexSettings; import org.elasticsearch.index.mapper.DocumentMapper; import org.elasticsearch.index.mapper.KeywordFieldMapper; import org.elasticsearch.index.mapper.LuceneDocument; @@ -46,8 +47,10 @@ import java.util.stream.Collectors; import static org.hamcrest.Matchers.containsString; +import static org.hamcrest.Matchers.empty; import static org.hamcrest.Matchers.equalTo; import static org.hamcrest.Matchers.instanceOf; +import static org.hamcrest.core.Is.is; public class MatchOnlyTextFieldMapperTests extends MapperTestCase { @@ -255,4 +258,91 @@ public void testDocValuesLoadedFromSynthetic() throws IOException { protected IngestScriptSupport ingestScriptSupport() { throw new AssumptionViolatedException("not supported"); } + + public void testStoreParameterDefaultsSyntheticSource() throws IOException { + var indexSettingsBuilder = getIndexSettingsBuilder(); + indexSettingsBuilder.put(IndexSettings.INDEX_MAPPER_SOURCE_MODE_SETTING.getKey(), "synthetic"); + var indexSettings = indexSettingsBuilder.build(); + + var mapping = mapping(b -> { + b.startObject("name"); + b.field("type", "match_only_text"); + b.endObject(); + }); + DocumentMapper mapper = createMapperService(indexSettings, mapping).documentMapper(); + + var source = source(b -> b.field("name", "quick brown fox")); + ParsedDocument doc = mapper.parse(source); + + { + List fields = doc.rootDoc().getFields("name"); + IndexableFieldType fieldType = fields.get(0).fieldType(); + assertThat(fieldType.stored(), is(false)); + } + { + List fields = doc.rootDoc().getFields("name._original"); + IndexableFieldType fieldType = fields.get(0).fieldType(); + assertThat(fieldType.stored(), is(true)); + } + } + + public void testStoreParameterDefaultsSyntheticSourceWithKeywordMultiField() throws IOException { + var indexSettingsBuilder = getIndexSettingsBuilder(); + indexSettingsBuilder.put(IndexSettings.INDEX_MAPPER_SOURCE_MODE_SETTING.getKey(), "synthetic"); + var indexSettings = indexSettingsBuilder.build(); + + var mapping = mapping(b -> { + b.startObject("name"); + b.field("type", "match_only_text"); + b.startObject("fields"); + b.startObject("keyword"); + b.field("type", "keyword"); + b.endObject(); + b.endObject(); + b.endObject(); + }); + DocumentMapper mapper = createMapperService(indexSettings, mapping).documentMapper(); + + var source = source(b -> b.field("name", "quick brown fox")); + ParsedDocument doc = mapper.parse(source); + { + List fields = doc.rootDoc().getFields("name"); + IndexableFieldType fieldType = fields.get(0).fieldType(); + assertThat(fieldType.stored(), is(false)); + } + { + List fields = doc.rootDoc().getFields("name._original"); + assertThat(fields, empty()); + } + } + + public void testStoreParameterDefaultsSyntheticSourceTextFieldIsMultiField() throws IOException { + var indexSettingsBuilder = getIndexSettingsBuilder(); + indexSettingsBuilder.put(IndexSettings.INDEX_MAPPER_SOURCE_MODE_SETTING.getKey(), "synthetic"); + var indexSettings = indexSettingsBuilder.build(); + + var mapping = mapping(b -> { + b.startObject("name"); + b.field("type", "keyword"); + b.startObject("fields"); + b.startObject("text"); + b.field("type", "match_only_text"); + b.endObject(); + b.endObject(); + b.endObject(); + }); + DocumentMapper mapper = createMapperService(indexSettings, mapping).documentMapper(); + + var source = source(b -> b.field("name", "quick brown fox")); + ParsedDocument doc = mapper.parse(source); + { + List fields = doc.rootDoc().getFields("name.text"); + IndexableFieldType fieldType = fields.get(0).fieldType(); + assertThat(fieldType.stored(), is(false)); + } + { + List fields = doc.rootDoc().getFields("name.text._original"); + assertThat(fields, empty()); + } + } } diff --git a/server/src/main/java/org/elasticsearch/index/mapper/FieldMapper.java b/server/src/main/java/org/elasticsearch/index/mapper/FieldMapper.java index 03f463b25a967..0c57b19c3ed2f 100644 --- a/server/src/main/java/org/elasticsearch/index/mapper/FieldMapper.java +++ b/server/src/main/java/org/elasticsearch/index/mapper/FieldMapper.java @@ -602,6 +602,7 @@ public static class Builder { private boolean hasSyntheticSourceCompatibleKeywordField; public Builder add(FieldMapper.Builder builder) { + builder.currentFieldIsAMultiField = true; mapperBuilders.put(builder.leafName(), builder::build); if (builder instanceof KeywordFieldMapper.Builder kwd) { @@ -1384,6 +1385,7 @@ public abstract static class Builder extends Mapper.Builder implements ToXConten protected Optional sourceKeepMode = Optional.empty(); protected boolean hasScript = false; protected OnScriptError onScriptError = null; + protected boolean currentFieldIsAMultiField = false; /** * Creates a new Builder with a field name diff --git a/server/src/main/java/org/elasticsearch/index/mapper/TextFieldMapper.java b/server/src/main/java/org/elasticsearch/index/mapper/TextFieldMapper.java index 08cef586e1438..0de372bf8c6b3 100644 --- a/server/src/main/java/org/elasticsearch/index/mapper/TextFieldMapper.java +++ b/server/src/main/java/org/elasticsearch/index/mapper/TextFieldMapper.java @@ -300,9 +300,12 @@ public Builder(String name, IndexVersion indexCreatedVersion, IndexAnalyzers ind // storing the field without requiring users to explicitly set 'store'. // // If 'store' parameter was explicitly provided we'll reject the request. + // Note that if current builder is a multi field, then we don't need to store, given that responsibility lies with parent field this.store = Parameter.storeParam( m -> ((TextFieldMapper) m).store, - () -> isSyntheticSourceEnabled && multiFieldsBuilder.hasSyntheticSourceCompatibleKeywordField() == false + () -> isSyntheticSourceEnabled + && currentFieldIsAMultiField == false + && multiFieldsBuilder.hasSyntheticSourceCompatibleKeywordField() == false ); this.indexCreatedVersion = indexCreatedVersion; this.analyzers = new TextParams.Analyzers( diff --git a/server/src/test/java/org/elasticsearch/index/mapper/TextFieldMapperTests.java b/server/src/test/java/org/elasticsearch/index/mapper/TextFieldMapperTests.java index 4670738c1d210..fa2fce306ff8a 100644 --- a/server/src/test/java/org/elasticsearch/index/mapper/TextFieldMapperTests.java +++ b/server/src/test/java/org/elasticsearch/index/mapper/TextFieldMapperTests.java @@ -307,6 +307,73 @@ public void testStoreParameterDefaults() throws IOException { } } + public void testStoreParameterDefaultsSyntheticSource() throws IOException { + var indexSettingsBuilder = getIndexSettingsBuilder(); + indexSettingsBuilder.put(IndexSettings.INDEX_MAPPER_SOURCE_MODE_SETTING.getKey(), "synthetic"); + var indexSettings = indexSettingsBuilder.build(); + + var mapping = mapping(b -> { + b.startObject("name"); + b.field("type", "text"); + b.endObject(); + }); + DocumentMapper mapper = createMapperService(indexSettings, mapping).documentMapper(); + + var source = source(b -> b.field("name", "quick brown fox")); + ParsedDocument doc = mapper.parse(source); + List fields = doc.rootDoc().getFields("name"); + IndexableFieldType fieldType = fields.get(0).fieldType(); + assertThat(fieldType.stored(), is(true)); + } + + public void testStoreParameterDefaultsSyntheticSourceWithKeywordMultiField() throws IOException { + var indexSettingsBuilder = getIndexSettingsBuilder(); + indexSettingsBuilder.put(IndexSettings.INDEX_MAPPER_SOURCE_MODE_SETTING.getKey(), "synthetic"); + var indexSettings = indexSettingsBuilder.build(); + + var mapping = mapping(b -> { + b.startObject("name"); + b.field("type", "text"); + b.startObject("fields"); + b.startObject("keyword"); + b.field("type", "keyword"); + b.endObject(); + b.endObject(); + b.endObject(); + }); + DocumentMapper mapper = createMapperService(indexSettings, mapping).documentMapper(); + + var source = source(b -> b.field("name", "quick brown fox")); + ParsedDocument doc = mapper.parse(source); + List fields = doc.rootDoc().getFields("name"); + IndexableFieldType fieldType = fields.get(0).fieldType(); + assertThat(fieldType.stored(), is(false)); + } + + public void testStoreParameterDefaultsSyntheticSourceTextFieldIsMultiField() throws IOException { + var indexSettingsBuilder = getIndexSettingsBuilder(); + indexSettingsBuilder.put(IndexSettings.INDEX_MAPPER_SOURCE_MODE_SETTING.getKey(), "synthetic"); + var indexSettings = indexSettingsBuilder.build(); + + var mapping = mapping(b -> { + b.startObject("name"); + b.field("type", "keyword"); + b.startObject("fields"); + b.startObject("text"); + b.field("type", "text"); + b.endObject(); + b.endObject(); + b.endObject(); + }); + DocumentMapper mapper = createMapperService(indexSettings, mapping).documentMapper(); + + var source = source(b -> b.field("name", "quick brown fox")); + ParsedDocument doc = mapper.parse(source); + List fields = doc.rootDoc().getFields("name.text"); + IndexableFieldType fieldType = fields.get(0).fieldType(); + assertThat(fieldType.stored(), is(false)); + } + public void testBWCSerialization() throws IOException { MapperService mapperService = createMapperService(fieldMapping(b -> { b.field("type", "text"); From a311d59a162ce098ddf368b617589c2f1c13fa55 Mon Sep 17 00:00:00 2001 From: Martijn van Groningen Date: Sun, 8 Jun 2025 22:01:00 +0200 Subject: [PATCH 2/8] Update docs/changelog/129126.yaml --- docs/changelog/129126.yaml | 6 ++++++ 1 file changed, 6 insertions(+) create mode 100644 docs/changelog/129126.yaml diff --git a/docs/changelog/129126.yaml b/docs/changelog/129126.yaml new file mode 100644 index 0000000000000..b719af9892ba3 --- /dev/null +++ b/docs/changelog/129126.yaml @@ -0,0 +1,6 @@ +pr: 129126 +summary: "Synthetic source: avoid storing multi fields of type text and `match_only_text`\ + \ by default" +area: Mapping +type: bug +issues: [] From 0d6ea729ee2dac20615b0d3f332c66ae4665aa98 Mon Sep 17 00:00:00 2001 From: Martijn van Groningen Date: Mon, 9 Jun 2025 10:35:00 +0200 Subject: [PATCH 3/8] Keep track of currentFieldIsAMultiField, so that we can determine the right default value for store param after serialization / field mapper merging. --- .../index/mapper/FieldMapper.java | 25 ++++++++++++++++--- 1 file changed, 21 insertions(+), 4 deletions(-) diff --git a/server/src/main/java/org/elasticsearch/index/mapper/FieldMapper.java b/server/src/main/java/org/elasticsearch/index/mapper/FieldMapper.java index 0c57b19c3ed2f..c5efe9a984b87 100644 --- a/server/src/main/java/org/elasticsearch/index/mapper/FieldMapper.java +++ b/server/src/main/java/org/elasticsearch/index/mapper/FieldMapper.java @@ -88,19 +88,28 @@ public abstract class FieldMapper extends Mapper { * @param sourceKeepMode mode for storing the field source in synthetic source mode * @param hasScript whether a script is defined for the field * @param onScriptError the behaviour for when the defined script fails at runtime + * @param currentFieldIsAMultiField whether current field is part of a multi-field definition */ protected record BuilderParams( MultiFields multiFields, CopyTo copyTo, Optional sourceKeepMode, boolean hasScript, - OnScriptError onScriptError + OnScriptError onScriptError, + boolean currentFieldIsAMultiField ) { public static BuilderParams empty() { return empty; } - private static final BuilderParams empty = new BuilderParams(MultiFields.empty(), CopyTo.empty(), Optional.empty(), false, null); + private static final BuilderParams empty = new BuilderParams( + MultiFields.empty(), + CopyTo.empty(), + Optional.empty(), + false, + null, + false + ); } protected final MappedFieldType mappedFieldType; @@ -1398,6 +1407,7 @@ protected Builder(String name) { * Initialises all parameters from an existing mapper */ public Builder init(FieldMapper initializer) { + this.currentFieldIsAMultiField = initializer.builderParams.currentFieldIsAMultiField; for (Parameter param : getParameters()) { param.init(initializer); } @@ -1412,8 +1422,15 @@ public Builder addMultiField(FieldMapper.Builder builder) { return this; } - protected BuilderParams builderParams(Mapper.Builder mainFieldBuilder, MapperBuilderContext context) { - return new BuilderParams(multiFieldsBuilder.build(mainFieldBuilder, context), copyTo, sourceKeepMode, hasScript, onScriptError); + protected BuilderParams builderParams(FieldMapper.Builder mainFieldBuilder, MapperBuilderContext context) { + return new BuilderParams( + multiFieldsBuilder.build(mainFieldBuilder, context), + copyTo, + sourceKeepMode, + hasScript, + onScriptError, + mainFieldBuilder.currentFieldIsAMultiField + ); } protected void merge(FieldMapper in, Conflicts conflicts, MapperMergeContext mapperMergeContext) { From ae04e6643e79925dff3e23e27e6cf7e78f2c35a4 Mon Sep 17 00:00:00 2001 From: Martijn van Groningen Date: Tue, 10 Jun 2025 11:52:39 +0200 Subject: [PATCH 4/8] iter --- .../extras/MatchOnlyTextFieldMapper.java | 18 ++++---- .../index/mapper/FieldMapper.java | 45 ++++++++----------- .../index/mapper/TextFieldMapper.java | 26 ++++++++--- 3 files changed, 47 insertions(+), 42 deletions(-) diff --git a/modules/mapper-extras/src/main/java/org/elasticsearch/index/mapper/extras/MatchOnlyTextFieldMapper.java b/modules/mapper-extras/src/main/java/org/elasticsearch/index/mapper/extras/MatchOnlyTextFieldMapper.java index 0568781ff5459..a16304a7069a4 100644 --- a/modules/mapper-extras/src/main/java/org/elasticsearch/index/mapper/extras/MatchOnlyTextFieldMapper.java +++ b/modules/mapper-extras/src/main/java/org/elasticsearch/index/mapper/extras/MatchOnlyTextFieldMapper.java @@ -102,12 +102,8 @@ public static class Builder extends FieldMapper.Builder { private final TextParams.Analyzers analyzers; - public Builder(String name, IndexAnalyzers indexAnalyzers) { - this(name, IndexVersion.current(), indexAnalyzers); - } - - public Builder(String name, IndexVersion indexCreatedVersion, IndexAnalyzers indexAnalyzers) { - super(name); + public Builder(String name, IndexVersion indexCreatedVersion, IndexAnalyzers indexAnalyzers, boolean withinMultiField) { + super(name, withinMultiField); this.indexCreatedVersion = indexCreatedVersion; this.analyzers = new TextParams.Analyzers( indexAnalyzers, @@ -141,13 +137,15 @@ private MatchOnlyTextFieldType buildFieldType(MapperBuilderContext context) { public MatchOnlyTextFieldMapper build(MapperBuilderContext context) { MatchOnlyTextFieldType tft = buildFieldType(context); boolean storeSource = context.isSourceSynthetic() - && currentFieldIsAMultiField == false + && withinMultiField == false && multiFieldsBuilder.hasSyntheticSourceCompatibleKeywordField() == false; return new MatchOnlyTextFieldMapper(leafName(), Defaults.FIELD_TYPE, tft, builderParams(this, context), storeSource, this); } } - public static final TypeParser PARSER = new TypeParser((n, c) -> new Builder(n, c.indexVersionCreated(), c.getIndexAnalyzers())); + public static final TypeParser PARSER = new TypeParser( + (n, c) -> new Builder(n, c.indexVersionCreated(), c.getIndexAnalyzers(), c.isWithinMultiField()) + ); public static class MatchOnlyTextFieldType extends StringFieldType { @@ -402,6 +400,7 @@ private String storedFieldNameForSyntheticSource() { private final int positionIncrementGap; private final boolean storeSource; private final FieldType fieldType; + private final boolean withinMultiField; private MatchOnlyTextFieldMapper( String simpleName, @@ -420,6 +419,7 @@ private MatchOnlyTextFieldMapper( this.indexAnalyzer = builder.analyzers.getIndexAnalyzer(); this.positionIncrementGap = builder.analyzers.positionIncrementGap.getValue(); this.storeSource = storeSource; + this.withinMultiField = builder.isWithinMultiField(); } @Override @@ -429,7 +429,7 @@ public Map indexAnalyzers() { @Override public FieldMapper.Builder getMergeBuilder() { - return new Builder(leafName(), indexCreatedVersion, indexAnalyzers).init(this); + return new Builder(leafName(), indexCreatedVersion, indexAnalyzers, withinMultiField).init(this); } @Override diff --git a/server/src/main/java/org/elasticsearch/index/mapper/FieldMapper.java b/server/src/main/java/org/elasticsearch/index/mapper/FieldMapper.java index c5efe9a984b87..8d1d42d2eca3f 100644 --- a/server/src/main/java/org/elasticsearch/index/mapper/FieldMapper.java +++ b/server/src/main/java/org/elasticsearch/index/mapper/FieldMapper.java @@ -83,33 +83,24 @@ public abstract class FieldMapper extends Mapper { static final Parameter[] EMPTY_PARAMETERS = new Parameter[0]; /** - * @param multiFields sub fields of this mapper - * @param copyTo copyTo fields of this mapper - * @param sourceKeepMode mode for storing the field source in synthetic source mode - * @param hasScript whether a script is defined for the field - * @param onScriptError the behaviour for when the defined script fails at runtime - * @param currentFieldIsAMultiField whether current field is part of a multi-field definition + * @param multiFields sub fields of this mapper + * @param copyTo copyTo fields of this mapper + * @param sourceKeepMode mode for storing the field source in synthetic source mode + * @param hasScript whether a script is defined for the field + * @param onScriptError the behaviour for when the defined script fails at runtime */ protected record BuilderParams( MultiFields multiFields, CopyTo copyTo, Optional sourceKeepMode, boolean hasScript, - OnScriptError onScriptError, - boolean currentFieldIsAMultiField + OnScriptError onScriptError ) { public static BuilderParams empty() { return empty; } - private static final BuilderParams empty = new BuilderParams( - MultiFields.empty(), - CopyTo.empty(), - Optional.empty(), - false, - null, - false - ); + private static final BuilderParams empty = new BuilderParams(MultiFields.empty(), CopyTo.empty(), Optional.empty(), false, null); } protected final MappedFieldType mappedFieldType; @@ -611,7 +602,6 @@ public static class Builder { private boolean hasSyntheticSourceCompatibleKeywordField; public Builder add(FieldMapper.Builder builder) { - builder.currentFieldIsAMultiField = true; mapperBuilders.put(builder.leafName(), builder::build); if (builder instanceof KeywordFieldMapper.Builder kwd) { @@ -1394,20 +1384,24 @@ public abstract static class Builder extends Mapper.Builder implements ToXConten protected Optional sourceKeepMode = Optional.empty(); protected boolean hasScript = false; protected OnScriptError onScriptError = null; - protected boolean currentFieldIsAMultiField = false; + protected final boolean withinMultiField; /** * Creates a new Builder with a field name */ protected Builder(String name) { + this(name, false); + } + + protected Builder(String name, boolean withinMultiField) { super(name); + this.withinMultiField = withinMultiField; } /** * Initialises all parameters from an existing mapper */ public Builder init(FieldMapper initializer) { - this.currentFieldIsAMultiField = initializer.builderParams.currentFieldIsAMultiField; for (Parameter param : getParameters()) { param.init(initializer); } @@ -1423,14 +1417,7 @@ public Builder addMultiField(FieldMapper.Builder builder) { } protected BuilderParams builderParams(FieldMapper.Builder mainFieldBuilder, MapperBuilderContext context) { - return new BuilderParams( - multiFieldsBuilder.build(mainFieldBuilder, context), - copyTo, - sourceKeepMode, - hasScript, - onScriptError, - mainFieldBuilder.currentFieldIsAMultiField - ); + return new BuilderParams(multiFieldsBuilder.build(mainFieldBuilder, context), copyTo, sourceKeepMode, hasScript, onScriptError); } protected void merge(FieldMapper in, Conflicts conflicts, MapperMergeContext mapperMergeContext) { @@ -1452,6 +1439,10 @@ protected final void validate() { } } + public boolean isWithinMultiField() { + return withinMultiField; + } + /** * @return the list of parameters defined for this mapper */ diff --git a/server/src/main/java/org/elasticsearch/index/mapper/TextFieldMapper.java b/server/src/main/java/org/elasticsearch/index/mapper/TextFieldMapper.java index 0de372bf8c6b3..9c4ba89708e96 100644 --- a/server/src/main/java/org/elasticsearch/index/mapper/TextFieldMapper.java +++ b/server/src/main/java/org/elasticsearch/index/mapper/TextFieldMapper.java @@ -288,11 +288,17 @@ public static class Builder extends FieldMapper.Builder { final TextParams.Analyzers analyzers; public Builder(String name, IndexAnalyzers indexAnalyzers, boolean isSyntheticSourceEnabled) { - this(name, IndexVersion.current(), indexAnalyzers, isSyntheticSourceEnabled); + this(name, IndexVersion.current(), indexAnalyzers, isSyntheticSourceEnabled, false); } - public Builder(String name, IndexVersion indexCreatedVersion, IndexAnalyzers indexAnalyzers, boolean isSyntheticSourceEnabled) { - super(name); + public Builder( + String name, + IndexVersion indexCreatedVersion, + IndexAnalyzers indexAnalyzers, + boolean isSyntheticSourceEnabled, + boolean withinMultiField + ) { + super(name, withinMultiField); // If synthetic source is used we need to either store this field // to recreate the source or use keyword multi-fields for that. @@ -304,7 +310,7 @@ public Builder(String name, IndexVersion indexCreatedVersion, IndexAnalyzers ind this.store = Parameter.storeParam( m -> ((TextFieldMapper) m).store, () -> isSyntheticSourceEnabled - && currentFieldIsAMultiField == false + && this.withinMultiField == false && multiFieldsBuilder.hasSyntheticSourceCompatibleKeywordField() == false ); this.indexCreatedVersion = indexCreatedVersion; @@ -485,7 +491,13 @@ public TextFieldMapper build(MapperBuilderContext context) { } public static final TypeParser PARSER = createTypeParserWithLegacySupport( - (n, c) -> new Builder(n, c.indexVersionCreated(), c.getIndexAnalyzers(), SourceFieldMapper.isSynthetic(c.getIndexSettings())) + (n, c) -> new Builder( + n, + c.indexVersionCreated(), + c.getIndexAnalyzers(), + SourceFieldMapper.isSynthetic(c.getIndexSettings()), + c.isWithinMultiField() + ) ); private static class PhraseWrappedAnalyzer extends AnalyzerWrapper { @@ -1307,6 +1319,7 @@ public Query existsQuery(SearchExecutionContext context) { private final SubFieldInfo phraseFieldInfo; private final boolean isSyntheticSourceEnabled; + private final boolean isWithinMultiField; private TextFieldMapper( String simpleName, @@ -1340,6 +1353,7 @@ private TextFieldMapper( this.freqFilter = builder.freqFilter.getValue(); this.fieldData = builder.fieldData.get(); this.isSyntheticSourceEnabled = builder.isSyntheticSourceEnabled; + this.isWithinMultiField = builder.withinMultiField; } @Override @@ -1363,7 +1377,7 @@ public Map indexAnalyzers() { @Override public FieldMapper.Builder getMergeBuilder() { - return new Builder(leafName(), indexCreatedVersion, indexAnalyzers, isSyntheticSourceEnabled).init(this); + return new Builder(leafName(), indexCreatedVersion, indexAnalyzers, isSyntheticSourceEnabled, isWithinMultiField).init(this); } @Override From 4c8047c206b999b7d15af7078f92c71bba347a68 Mon Sep 17 00:00:00 2001 From: Martijn van Groningen Date: Tue, 10 Jun 2025 13:02:48 +0200 Subject: [PATCH 5/8] restore jdoc formatting --- .../org/elasticsearch/index/mapper/FieldMapper.java | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/server/src/main/java/org/elasticsearch/index/mapper/FieldMapper.java b/server/src/main/java/org/elasticsearch/index/mapper/FieldMapper.java index 8d1d42d2eca3f..b1e1b3eb9ee6e 100644 --- a/server/src/main/java/org/elasticsearch/index/mapper/FieldMapper.java +++ b/server/src/main/java/org/elasticsearch/index/mapper/FieldMapper.java @@ -83,11 +83,11 @@ public abstract class FieldMapper extends Mapper { static final Parameter[] EMPTY_PARAMETERS = new Parameter[0]; /** - * @param multiFields sub fields of this mapper - * @param copyTo copyTo fields of this mapper - * @param sourceKeepMode mode for storing the field source in synthetic source mode - * @param hasScript whether a script is defined for the field - * @param onScriptError the behaviour for when the defined script fails at runtime + * @param multiFields sub fields of this mapper + * @param copyTo copyTo fields of this mapper + * @param sourceKeepMode mode for storing the field source in synthetic source mode + * @param hasScript whether a script is defined for the field + * @param onScriptError the behaviour for when the defined script fails at runtime */ protected record BuilderParams( MultiFields multiFields, From 50874787861f9aefb3b7e9a32b70ec88c13dc32e Mon Sep 17 00:00:00 2001 From: Martijn van Groningen Date: Tue, 10 Jun 2025 13:07:59 +0200 Subject: [PATCH 6/8] gate new behaviour behind an index version --- .../mapper/extras/MatchOnlyTextFieldMapper.java | 12 +++++++++--- .../org/elasticsearch/index/IndexVersions.java | 1 + .../index/mapper/TextFieldMapper.java | 15 +++++++++------ 3 files changed, 19 insertions(+), 9 deletions(-) diff --git a/modules/mapper-extras/src/main/java/org/elasticsearch/index/mapper/extras/MatchOnlyTextFieldMapper.java b/modules/mapper-extras/src/main/java/org/elasticsearch/index/mapper/extras/MatchOnlyTextFieldMapper.java index a16304a7069a4..7b61bb8fa4fa9 100644 --- a/modules/mapper-extras/src/main/java/org/elasticsearch/index/mapper/extras/MatchOnlyTextFieldMapper.java +++ b/modules/mapper-extras/src/main/java/org/elasticsearch/index/mapper/extras/MatchOnlyTextFieldMapper.java @@ -33,6 +33,7 @@ import org.elasticsearch.common.lucene.Lucene; import org.elasticsearch.common.unit.Fuzziness; import org.elasticsearch.index.IndexVersion; +import org.elasticsearch.index.IndexVersions; import org.elasticsearch.index.analysis.IndexAnalyzers; import org.elasticsearch.index.analysis.NamedAnalyzer; import org.elasticsearch.index.fielddata.FieldDataContext; @@ -136,9 +137,14 @@ private MatchOnlyTextFieldType buildFieldType(MapperBuilderContext context) { @Override public MatchOnlyTextFieldMapper build(MapperBuilderContext context) { MatchOnlyTextFieldType tft = buildFieldType(context); - boolean storeSource = context.isSourceSynthetic() - && withinMultiField == false - && multiFieldsBuilder.hasSyntheticSourceCompatibleKeywordField() == false; + final boolean storeSource; + if (indexCreatedVersion.onOrAfter(IndexVersions.MAPPER_TEXT_MATCH_ONLY_MULTI_FIELDS_DEFAULT_NOT_STORED)) { + storeSource = context.isSourceSynthetic() + && withinMultiField == false + && multiFieldsBuilder.hasSyntheticSourceCompatibleKeywordField() == false; + } else { + storeSource = context.isSourceSynthetic(); + } return new MatchOnlyTextFieldMapper(leafName(), Defaults.FIELD_TYPE, tft, builderParams(this, context), storeSource, this); } } diff --git a/server/src/main/java/org/elasticsearch/index/IndexVersions.java b/server/src/main/java/org/elasticsearch/index/IndexVersions.java index f32d4d7a2a302..970b63081225d 100644 --- a/server/src/main/java/org/elasticsearch/index/IndexVersions.java +++ b/server/src/main/java/org/elasticsearch/index/IndexVersions.java @@ -171,6 +171,7 @@ private static Version parseUnchecked(String version) { public static final IndexVersion DEFAULT_TO_ACORN_HNSW_FILTER_HEURISTIC = def(9_026_0_00, Version.LUCENE_10_2_1); public static final IndexVersion SEQ_NO_WITHOUT_POINTS = def(9_027_0_00, Version.LUCENE_10_2_1); public static final IndexVersion INDEX_INT_SORT_INT_TYPE = def(9_028_0_00, Version.LUCENE_10_2_1); + public static final IndexVersion MAPPER_TEXT_MATCH_ONLY_MULTI_FIELDS_DEFAULT_NOT_STORED = def(9_029_0_00, Version.LUCENE_10_2_1); /* * STOP! READ THIS FIRST! No, really, diff --git a/server/src/main/java/org/elasticsearch/index/mapper/TextFieldMapper.java b/server/src/main/java/org/elasticsearch/index/mapper/TextFieldMapper.java index 9c4ba89708e96..05fe7ed5b837b 100644 --- a/server/src/main/java/org/elasticsearch/index/mapper/TextFieldMapper.java +++ b/server/src/main/java/org/elasticsearch/index/mapper/TextFieldMapper.java @@ -307,12 +307,15 @@ public Builder( // // If 'store' parameter was explicitly provided we'll reject the request. // Note that if current builder is a multi field, then we don't need to store, given that responsibility lies with parent field - this.store = Parameter.storeParam( - m -> ((TextFieldMapper) m).store, - () -> isSyntheticSourceEnabled - && this.withinMultiField == false - && multiFieldsBuilder.hasSyntheticSourceCompatibleKeywordField() == false - ); + this.store = Parameter.storeParam(m -> ((TextFieldMapper) m).store, () -> { + if (indexCreatedVersion.onOrAfter(IndexVersions.MAPPER_TEXT_MATCH_ONLY_MULTI_FIELDS_DEFAULT_NOT_STORED)) { + return isSyntheticSourceEnabled + && this.withinMultiField == false + && multiFieldsBuilder.hasSyntheticSourceCompatibleKeywordField() == false; + } else { + return isSyntheticSourceEnabled; + } + }); this.indexCreatedVersion = indexCreatedVersion; this.analyzers = new TextParams.Analyzers( indexAnalyzers, From a98f0401447fba38b0391dc4ee848ad880d1253b Mon Sep 17 00:00:00 2001 From: Martijn van Groningen Date: Tue, 10 Jun 2025 13:58:30 +0200 Subject: [PATCH 7/8] don't generalize withinMultiField --- .../index/mapper/extras/MatchOnlyTextFieldMapper.java | 6 ++++-- .../org/elasticsearch/index/mapper/FieldMapper.java | 10 ---------- .../elasticsearch/index/mapper/TextFieldMapper.java | 5 ++++- 3 files changed, 8 insertions(+), 13 deletions(-) diff --git a/modules/mapper-extras/src/main/java/org/elasticsearch/index/mapper/extras/MatchOnlyTextFieldMapper.java b/modules/mapper-extras/src/main/java/org/elasticsearch/index/mapper/extras/MatchOnlyTextFieldMapper.java index 7b61bb8fa4fa9..1f799cc6d3d4c 100644 --- a/modules/mapper-extras/src/main/java/org/elasticsearch/index/mapper/extras/MatchOnlyTextFieldMapper.java +++ b/modules/mapper-extras/src/main/java/org/elasticsearch/index/mapper/extras/MatchOnlyTextFieldMapper.java @@ -102,9 +102,10 @@ public static class Builder extends FieldMapper.Builder { private final Parameter> meta = Parameter.metaParam(); private final TextParams.Analyzers analyzers; + private final boolean withinMultiField; public Builder(String name, IndexVersion indexCreatedVersion, IndexAnalyzers indexAnalyzers, boolean withinMultiField) { - super(name, withinMultiField); + super(name); this.indexCreatedVersion = indexCreatedVersion; this.analyzers = new TextParams.Analyzers( indexAnalyzers, @@ -112,6 +113,7 @@ public Builder(String name, IndexVersion indexCreatedVersion, IndexAnalyzers ind m -> ((MatchOnlyTextFieldMapper) m).positionIncrementGap, indexCreatedVersion ); + this.withinMultiField = withinMultiField; } @Override @@ -425,7 +427,7 @@ private MatchOnlyTextFieldMapper( this.indexAnalyzer = builder.analyzers.getIndexAnalyzer(); this.positionIncrementGap = builder.analyzers.positionIncrementGap.getValue(); this.storeSource = storeSource; - this.withinMultiField = builder.isWithinMultiField(); + this.withinMultiField = builder.withinMultiField; } @Override diff --git a/server/src/main/java/org/elasticsearch/index/mapper/FieldMapper.java b/server/src/main/java/org/elasticsearch/index/mapper/FieldMapper.java index b1e1b3eb9ee6e..34339d69379ce 100644 --- a/server/src/main/java/org/elasticsearch/index/mapper/FieldMapper.java +++ b/server/src/main/java/org/elasticsearch/index/mapper/FieldMapper.java @@ -1384,18 +1384,12 @@ public abstract static class Builder extends Mapper.Builder implements ToXConten protected Optional sourceKeepMode = Optional.empty(); protected boolean hasScript = false; protected OnScriptError onScriptError = null; - protected final boolean withinMultiField; /** * Creates a new Builder with a field name */ protected Builder(String name) { - this(name, false); - } - - protected Builder(String name, boolean withinMultiField) { super(name); - this.withinMultiField = withinMultiField; } /** @@ -1439,10 +1433,6 @@ protected final void validate() { } } - public boolean isWithinMultiField() { - return withinMultiField; - } - /** * @return the list of parameters defined for this mapper */ diff --git a/server/src/main/java/org/elasticsearch/index/mapper/TextFieldMapper.java b/server/src/main/java/org/elasticsearch/index/mapper/TextFieldMapper.java index 05fe7ed5b837b..0e49506cd92e7 100644 --- a/server/src/main/java/org/elasticsearch/index/mapper/TextFieldMapper.java +++ b/server/src/main/java/org/elasticsearch/index/mapper/TextFieldMapper.java @@ -287,6 +287,8 @@ public static class Builder extends FieldMapper.Builder { final TextParams.Analyzers analyzers; + private final boolean withinMultiField; + public Builder(String name, IndexAnalyzers indexAnalyzers, boolean isSyntheticSourceEnabled) { this(name, IndexVersion.current(), indexAnalyzers, isSyntheticSourceEnabled, false); } @@ -298,7 +300,7 @@ public Builder( boolean isSyntheticSourceEnabled, boolean withinMultiField ) { - super(name, withinMultiField); + super(name); // If synthetic source is used we need to either store this field // to recreate the source or use keyword multi-fields for that. @@ -307,6 +309,7 @@ public Builder( // // If 'store' parameter was explicitly provided we'll reject the request. // Note that if current builder is a multi field, then we don't need to store, given that responsibility lies with parent field + this.withinMultiField = withinMultiField; this.store = Parameter.storeParam(m -> ((TextFieldMapper) m).store, () -> { if (indexCreatedVersion.onOrAfter(IndexVersions.MAPPER_TEXT_MATCH_ONLY_MULTI_FIELDS_DEFAULT_NOT_STORED)) { return isSyntheticSourceEnabled From 281749c34f7168ad5085509a1442fc478c6308a1 Mon Sep 17 00:00:00 2001 From: Martijn van Groningen Date: Tue, 10 Jun 2025 14:02:30 +0200 Subject: [PATCH 8/8] undo change --- .../main/java/org/elasticsearch/index/mapper/FieldMapper.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/server/src/main/java/org/elasticsearch/index/mapper/FieldMapper.java b/server/src/main/java/org/elasticsearch/index/mapper/FieldMapper.java index 34339d69379ce..03f463b25a967 100644 --- a/server/src/main/java/org/elasticsearch/index/mapper/FieldMapper.java +++ b/server/src/main/java/org/elasticsearch/index/mapper/FieldMapper.java @@ -1410,7 +1410,7 @@ public Builder addMultiField(FieldMapper.Builder builder) { return this; } - protected BuilderParams builderParams(FieldMapper.Builder mainFieldBuilder, MapperBuilderContext context) { + protected BuilderParams builderParams(Mapper.Builder mainFieldBuilder, MapperBuilderContext context) { return new BuilderParams(multiFieldsBuilder.build(mainFieldBuilder, context), copyTo, sourceKeepMode, hasScript, onScriptError); }