Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -555,6 +555,9 @@ private TimeLogicalTypeAnnotation(boolean isAdjustedToUTC, TimeUnit unit) {
@Override
@InterfaceAudience.Private
public OriginalType toOriginalType() {
if (!isAdjustedToUTC) {
return null;
}
switch (unit) {
case MILLIS:
return OriginalType.TIME_MILLIS;
Expand Down Expand Up @@ -634,6 +637,9 @@ private TimestampLogicalTypeAnnotation(boolean isAdjustedToUTC, TimeUnit unit) {
@Override
@InterfaceAudience.Private
public OriginalType toOriginalType() {
if (!isAdjustedToUTC) {
return null;
}
switch (unit) {
case MILLIS:
return OriginalType.TIMESTAMP_MILLIS;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -309,6 +309,9 @@ public Optional<ConvertedType> visit(LogicalTypeAnnotation.DateLogicalTypeAnnota

@Override
public Optional<ConvertedType> visit(LogicalTypeAnnotation.TimeLogicalTypeAnnotation timeLogicalType) {
if (!timeLogicalType.isAdjustedToUTC()) {
return empty();
}
switch (timeLogicalType.getUnit()) {
case MILLIS:
return of(ConvertedType.TIME_MILLIS);
Expand All @@ -323,6 +326,9 @@ public Optional<ConvertedType> visit(LogicalTypeAnnotation.TimeLogicalTypeAnnota

@Override
public Optional<ConvertedType> visit(LogicalTypeAnnotation.TimestampLogicalTypeAnnotation timestampLogicalType) {
if (!timestampLogicalType.isAdjustedToUTC()) {
return empty();
}
switch (timestampLogicalType.getUnit()) {
case MICROS:
return of(ConvertedType.TIMESTAMP_MICROS);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,18 @@

import static java.util.Collections.emptyList;
import static org.apache.parquet.format.converter.ParquetMetadataConverter.filterFileMetaDataByStart;
import static org.apache.parquet.schema.LogicalTypeAnnotation.TimeUnit.MICROS;
import static org.apache.parquet.schema.LogicalTypeAnnotation.TimeUnit.MILLIS;
import static org.apache.parquet.schema.LogicalTypeAnnotation.TimeUnit.NANOS;
import static org.apache.parquet.schema.LogicalTypeAnnotation.bsonType;
import static org.apache.parquet.schema.LogicalTypeAnnotation.dateType;
import static org.apache.parquet.schema.LogicalTypeAnnotation.decimalType;
import static org.apache.parquet.schema.LogicalTypeAnnotation.enumType;
import static org.apache.parquet.schema.LogicalTypeAnnotation.intType;
import static org.apache.parquet.schema.LogicalTypeAnnotation.jsonType;
import static org.apache.parquet.schema.LogicalTypeAnnotation.listType;
import static org.apache.parquet.schema.LogicalTypeAnnotation.mapType;
import static org.apache.parquet.schema.LogicalTypeAnnotation.stringType;
import static org.apache.parquet.schema.LogicalTypeAnnotation.timeType;
import static org.apache.parquet.schema.LogicalTypeAnnotation.timestampType;
import static org.apache.parquet.schema.MessageTypeParser.parseMessageType;
Expand Down Expand Up @@ -196,47 +208,89 @@ public void testTimeLogicalTypes() {
ParquetMetadataConverter parquetMetadataConverter = new ParquetMetadataConverter();
MessageType expected = Types.buildMessage()
.required(PrimitiveTypeName.INT64)
.as(timestampType(false, LogicalTypeAnnotation.TimeUnit.MILLIS))
.as(timestampType(false, MILLIS))
.named("aTimestampNonUtcMillis")
.required(PrimitiveTypeName.INT64)
.as(timestampType(true, LogicalTypeAnnotation.TimeUnit.MILLIS))
.as(timestampType(true, MILLIS))
.named("aTimestampUtcMillis")
.required(PrimitiveTypeName.INT64)
.as(timestampType(false, LogicalTypeAnnotation.TimeUnit.MICROS))
.as(timestampType(false, MICROS))
.named("aTimestampNonUtcMicros")
.required(PrimitiveTypeName.INT64)
.as(timestampType(true, LogicalTypeAnnotation.TimeUnit.MICROS))
.as(timestampType(true, MICROS))
.named("aTimestampUtcMicros")
.required(PrimitiveTypeName.INT64)
.as(timestampType(false, LogicalTypeAnnotation.TimeUnit.NANOS))
.as(timestampType(false, NANOS))
.named("aTimestampNonUtcNanos")
.required(PrimitiveTypeName.INT64)
.as(timestampType(true, LogicalTypeAnnotation.TimeUnit.NANOS))
.as(timestampType(true, NANOS))
.named("aTimestampUtcNanos")
.required(PrimitiveTypeName.INT32)
.as(timeType(false, LogicalTypeAnnotation.TimeUnit.MILLIS))
.as(timeType(false, MILLIS))
.named("aTimeNonUtcMillis")
.required(PrimitiveTypeName.INT32)
.as(timeType(true, LogicalTypeAnnotation.TimeUnit.MILLIS))
.as(timeType(true, MILLIS))
.named("aTimeUtcMillis")
.required(PrimitiveTypeName.INT64)
.as(timeType(false, LogicalTypeAnnotation.TimeUnit.MICROS))
.as(timeType(false, MICROS))
.named("aTimeNonUtcMicros")
.required(PrimitiveTypeName.INT64)
.as(timeType(true, LogicalTypeAnnotation.TimeUnit.MICROS))
.as(timeType(true, MICROS))
.named("aTimeUtcMicros")
.required(PrimitiveTypeName.INT64)
.as(timeType(false, LogicalTypeAnnotation.TimeUnit.NANOS))
.as(timeType(false, NANOS))
.named("aTimeNonUtcNanos")
.required(PrimitiveTypeName.INT64)
.as(timeType(true, LogicalTypeAnnotation.TimeUnit.NANOS))
.as(timeType(true, NANOS))
.named("aTimeUtcNanos")
.named("Message");
List<SchemaElement> parquetSchema = parquetMetadataConverter.toParquetSchema(expected);
MessageType schema = parquetMetadataConverter.fromParquetSchema(parquetSchema, null);
assertEquals(expected, schema);
}

@Test
public void testLogicalToConvertedTypeConversion() {
ParquetMetadataConverter parquetMetadataConverter = new ParquetMetadataConverter();

assertEquals(ConvertedType.UTF8, parquetMetadataConverter.convertToConvertedType(stringType()));
assertEquals(ConvertedType.ENUM, parquetMetadataConverter.convertToConvertedType(enumType()));

assertEquals(ConvertedType.INT_8, parquetMetadataConverter.convertToConvertedType(intType(8, true)));
assertEquals(ConvertedType.INT_16, parquetMetadataConverter.convertToConvertedType(intType(16, true)));
assertEquals(ConvertedType.INT_32, parquetMetadataConverter.convertToConvertedType(intType(32, true)));
assertEquals(ConvertedType.INT_64, parquetMetadataConverter.convertToConvertedType(intType(64, true)));
assertEquals(ConvertedType.UINT_8, parquetMetadataConverter.convertToConvertedType(intType(8, false)));
assertEquals(ConvertedType.UINT_16, parquetMetadataConverter.convertToConvertedType(intType(16, false)));
assertEquals(ConvertedType.UINT_32, parquetMetadataConverter.convertToConvertedType(intType(32, false)));
assertEquals(ConvertedType.UINT_64, parquetMetadataConverter.convertToConvertedType(intType(64, false)));
assertEquals(ConvertedType.DECIMAL, parquetMetadataConverter.convertToConvertedType(decimalType(8, 16)));

assertEquals(ConvertedType.TIMESTAMP_MILLIS, parquetMetadataConverter.convertToConvertedType(timestampType(true, MILLIS)));
assertEquals(ConvertedType.TIMESTAMP_MICROS, parquetMetadataConverter.convertToConvertedType(timestampType(true, MICROS)));
assertNull(parquetMetadataConverter.convertToConvertedType(timestampType(true, NANOS)));
assertNull(parquetMetadataConverter.convertToConvertedType(timestampType(false, MILLIS)));
assertNull(parquetMetadataConverter.convertToConvertedType(timestampType(false, MICROS)));
assertNull(parquetMetadataConverter.convertToConvertedType(timestampType(false, NANOS)));

assertEquals(ConvertedType.TIME_MILLIS, parquetMetadataConverter.convertToConvertedType(timeType(true, MILLIS)));
assertEquals(ConvertedType.TIME_MICROS, parquetMetadataConverter.convertToConvertedType(timeType(true, MICROS)));
assertNull(parquetMetadataConverter.convertToConvertedType(timeType(true, NANOS)));
assertNull(parquetMetadataConverter.convertToConvertedType(timeType(false, MILLIS)));
assertNull(parquetMetadataConverter.convertToConvertedType(timeType(false, MICROS)));
assertNull(parquetMetadataConverter.convertToConvertedType(timeType(false, NANOS)));

assertEquals(ConvertedType.DATE, parquetMetadataConverter.convertToConvertedType(dateType()));

assertEquals(ConvertedType.INTERVAL, parquetMetadataConverter.convertToConvertedType(LogicalTypeAnnotation.IntervalLogicalTypeAnnotation.getInstance()));
assertEquals(ConvertedType.JSON, parquetMetadataConverter.convertToConvertedType(jsonType()));
assertEquals(ConvertedType.BSON, parquetMetadataConverter.convertToConvertedType(bsonType()));

assertEquals(ConvertedType.LIST, parquetMetadataConverter.convertToConvertedType(listType()));
assertEquals(ConvertedType.MAP, parquetMetadataConverter.convertToConvertedType(mapType()));
assertEquals(ConvertedType.MAP_KEY_VALUE, parquetMetadataConverter.convertToConvertedType(LogicalTypeAnnotation.MapKeyValueTypeAnnotation.getInstance()));
}

@Test
public void testEnumEquivalence() {
ParquetMetadataConverter parquetMetadataConverter = new ParquetMetadataConverter();
Expand Down Expand Up @@ -1024,7 +1078,7 @@ public void testColumnIndexConversion() {
stats.updateStats(200l);
stats.updateStats(500l);
builder.add(stats);
org.apache.parquet.format.ColumnIndex parquetColumnIndex =
org.apache.parquet.format.ColumnIndex parquetColumnIndex =
ParquetMetadataConverter.toParquetColumnIndex(type, builder.build());
ColumnIndex columnIndex = ParquetMetadataConverter.fromParquetColumnIndex(type, parquetColumnIndex);
assertEquals(BoundaryOrder.ASCENDING, columnIndex.getBoundaryOrder());
Expand Down