|
50 | 50 | import org.apache.paimon.table.source.ChainSplit; |
51 | 51 | import org.apache.paimon.table.source.DataSplit; |
52 | 52 | import org.apache.paimon.table.source.DeletionFile; |
| 53 | +import org.apache.paimon.table.source.KeyValueSystemFieldsRecordReader; |
53 | 54 | import org.apache.paimon.table.source.Split; |
54 | 55 | import org.apache.paimon.types.DataField; |
55 | 56 | import org.apache.paimon.types.RowType; |
|
62 | 63 | import java.io.IOException; |
63 | 64 | import java.util.ArrayList; |
64 | 65 | import java.util.Arrays; |
| 66 | +import java.util.Collections; |
65 | 67 | import java.util.Comparator; |
66 | 68 | import java.util.List; |
67 | 69 | import java.util.Set; |
@@ -97,6 +99,11 @@ public class MergeFileSplitRead implements SplitRead<KeyValue> { |
97 | 99 | @Nullable private int[][] outerProjection; |
98 | 100 | @Nullable private VariantAccessInfo[] variantAccess; |
99 | 101 |
|
| 102 | + private List<KeyValueSystemFieldsRecordReader.SystemFieldExtractor> systemFieldExtractors = |
| 103 | + Collections.emptyList(); |
| 104 | + |
| 105 | + @Nullable private int[] projection = null; |
| 106 | + |
100 | 107 | private boolean forceKeepDelete = false; |
101 | 108 |
|
102 | 109 | public MergeFileSplitRead( |
@@ -137,18 +144,31 @@ public MergeFileSplitRead withReadKeyType(RowType readKeyType) { |
137 | 144 | return this; |
138 | 145 | } |
139 | 146 |
|
| 147 | + public List<KeyValueSystemFieldsRecordReader.SystemFieldExtractor> getSystemFieldExtractors() { |
| 148 | + return systemFieldExtractors; |
| 149 | + } |
| 150 | + |
| 151 | + @Nullable |
| 152 | + public int[] getProjection() { |
| 153 | + return projection; |
| 154 | + } |
| 155 | + |
140 | 156 | @Override |
141 | 157 | public MergeFileSplitRead withReadType(RowType readType) { |
| 158 | + this.systemFieldExtractors = collectSystemFieldExtractors(readType); |
| 159 | + this.projection = createProjection(readType); |
| 160 | + |
142 | 161 | // todo: replace projectedFields with readType |
143 | 162 | RowType tableRowType = tableSchema.logicalRowType(); |
| 163 | + List<String> fieldNames = tableSchema.fieldNames(); |
144 | 164 | int[][] projectedFields = |
145 | 165 | Arrays.stream(tableRowType.getFieldIndices(readType.getFieldNames())) |
| 166 | + .filter(i -> i >= 0) // Filter out system fields (index = -1) |
146 | 167 | .mapToObj(i -> new int[] {i}) |
147 | 168 | .toArray(int[][]::new); |
148 | 169 | int[][] newProjectedFields = projectedFields; |
149 | 170 | if (sequenceFields.size() > 0) { |
150 | 171 | // make sure projection contains sequence fields |
151 | | - List<String> fieldNames = tableSchema.fieldNames(); |
152 | 172 | List<String> projectedNames = Projection.of(projectedFields).project(fieldNames); |
153 | 173 | int[] lackFields = |
154 | 174 | sequenceFields.stream() |
@@ -408,4 +428,68 @@ public UserDefinedSeqComparator createUdsComparator() { |
408 | 428 | return UserDefinedSeqComparator.create( |
409 | 429 | readerFactoryBuilder.readValueType(), sequenceFields, sequenceOrder); |
410 | 430 | } |
| 431 | + |
| 432 | + /** |
| 433 | + * Collects system field extractors for the requested read type. |
| 434 | + * |
| 435 | + * @param readType the requested read type (may contain system fields) |
| 436 | + * @return list of extractors for system fields present in readType |
| 437 | + */ |
| 438 | + private List<KeyValueSystemFieldsRecordReader.SystemFieldExtractor> |
| 439 | + collectSystemFieldExtractors(RowType readType) { |
| 440 | + if (readType == null) { |
| 441 | + return Collections.emptyList(); |
| 442 | + } |
| 443 | + |
| 444 | + List<KeyValueSystemFieldsRecordReader.SystemFieldExtractor> extractors = new ArrayList<>(); |
| 445 | + for (String fieldName : readType.getFieldNames()) { |
| 446 | + KeyValueSystemFieldsRecordReader.SystemFieldExtractor extractor = |
| 447 | + KeyValueSystemFieldsRecordReader.getExtractor(fieldName); |
| 448 | + if (extractor != null) { |
| 449 | + extractors.add(extractor); |
| 450 | + } |
| 451 | + } |
| 452 | + return extractors; |
| 453 | + } |
| 454 | + |
| 455 | + /** |
| 456 | + * Creates a projection array to reorder fields from natural order to requested order. |
| 457 | + * |
| 458 | + * <p>Example: readType = [pt, rowkind, col1], systemFieldExtractors = [rowkind] Natural order: |
| 459 | + * [rowkind(0), pt(1), col1(2)] (physical fields pt, col1 in readType order) Requested order: |
| 460 | + * [pt, rowkind, col1] Projection: [1, 0, 2] |
| 461 | + * |
| 462 | + * @param readType the requested read type (may contain system fields) |
| 463 | + * @return projection array, or null if fields are already in natural order |
| 464 | + */ |
| 465 | + @Nullable |
| 466 | + private int[] createProjection(RowType readType) { |
| 467 | + if (readType == null || systemFieldExtractors.isEmpty()) { |
| 468 | + return null; |
| 469 | + } |
| 470 | + |
| 471 | + List<String> readFieldNames = readType.getFieldNames(); |
| 472 | + int[] projection = new int[readFieldNames.size()]; |
| 473 | + // System fields are first in natural order |
| 474 | + int systemIdx = 0; |
| 475 | + // Physical fields follow system fields in natural order |
| 476 | + int physicalIdx = systemFieldExtractors.size(); |
| 477 | + boolean needsProjection = false; |
| 478 | + |
| 479 | + for (int i = 0; i < readFieldNames.size(); i++) { |
| 480 | + String fieldName = readFieldNames.get(i); |
| 481 | + // Check if it's a system field |
| 482 | + if (KeyValueSystemFieldsRecordReader.getExtractor(fieldName) != null) { |
| 483 | + projection[i] = systemIdx++; |
| 484 | + } else { |
| 485 | + projection[i] = physicalIdx++; |
| 486 | + } |
| 487 | + |
| 488 | + if (projection[i] != i) { |
| 489 | + needsProjection = true; |
| 490 | + } |
| 491 | + } |
| 492 | + |
| 493 | + return needsProjection ? projection : null; |
| 494 | + } |
411 | 495 | } |
0 commit comments