diff --git a/parquet-column/src/main/java/org/apache/parquet/schema/LogicalTypeAnnotation.java b/parquet-column/src/main/java/org/apache/parquet/schema/LogicalTypeAnnotation.java index 5f61ed60b4..44723767ae 100644 --- a/parquet-column/src/main/java/org/apache/parquet/schema/LogicalTypeAnnotation.java +++ b/parquet-column/src/main/java/org/apache/parquet/schema/LogicalTypeAnnotation.java @@ -555,6 +555,9 @@ private TimeLogicalTypeAnnotation(boolean isAdjustedToUTC, TimeUnit unit) { @Override @InterfaceAudience.Private public OriginalType toOriginalType() { + if (!isAdjustedToUTC) { + return null; + } switch (unit) { case MILLIS: return OriginalType.TIME_MILLIS; @@ -634,6 +637,9 @@ private TimestampLogicalTypeAnnotation(boolean isAdjustedToUTC, TimeUnit unit) { @Override @InterfaceAudience.Private public OriginalType toOriginalType() { + if (!isAdjustedToUTC) { + return null; + } switch (unit) { case MILLIS: return OriginalType.TIMESTAMP_MILLIS; diff --git a/parquet-hadoop/src/main/java/org/apache/parquet/format/converter/ParquetMetadataConverter.java b/parquet-hadoop/src/main/java/org/apache/parquet/format/converter/ParquetMetadataConverter.java index efb0608e8c..fb0ca7b09a 100644 --- a/parquet-hadoop/src/main/java/org/apache/parquet/format/converter/ParquetMetadataConverter.java +++ b/parquet-hadoop/src/main/java/org/apache/parquet/format/converter/ParquetMetadataConverter.java @@ -309,6 +309,9 @@ public Optional visit(LogicalTypeAnnotation.DateLogicalTypeAnnota @Override public Optional visit(LogicalTypeAnnotation.TimeLogicalTypeAnnotation timeLogicalType) { + if (!timeLogicalType.isAdjustedToUTC()) { + return empty(); + } switch (timeLogicalType.getUnit()) { case MILLIS: return of(ConvertedType.TIME_MILLIS); @@ -323,6 +326,9 @@ public Optional visit(LogicalTypeAnnotation.TimeLogicalTypeAnnota @Override public Optional visit(LogicalTypeAnnotation.TimestampLogicalTypeAnnotation timestampLogicalType) { + if (!timestampLogicalType.isAdjustedToUTC()) { + return empty(); + } switch (timestampLogicalType.getUnit()) { case MICROS: return of(ConvertedType.TIMESTAMP_MICROS); diff --git a/parquet-hadoop/src/test/java/org/apache/parquet/format/converter/TestParquetMetadataConverter.java b/parquet-hadoop/src/test/java/org/apache/parquet/format/converter/TestParquetMetadataConverter.java index 358a29a671..65244f478d 100644 --- a/parquet-hadoop/src/test/java/org/apache/parquet/format/converter/TestParquetMetadataConverter.java +++ b/parquet-hadoop/src/test/java/org/apache/parquet/format/converter/TestParquetMetadataConverter.java @@ -20,6 +20,18 @@ import static java.util.Collections.emptyList; import static org.apache.parquet.format.converter.ParquetMetadataConverter.filterFileMetaDataByStart; +import static org.apache.parquet.schema.LogicalTypeAnnotation.TimeUnit.MICROS; +import static org.apache.parquet.schema.LogicalTypeAnnotation.TimeUnit.MILLIS; +import static org.apache.parquet.schema.LogicalTypeAnnotation.TimeUnit.NANOS; +import static org.apache.parquet.schema.LogicalTypeAnnotation.bsonType; +import static org.apache.parquet.schema.LogicalTypeAnnotation.dateType; +import static org.apache.parquet.schema.LogicalTypeAnnotation.decimalType; +import static org.apache.parquet.schema.LogicalTypeAnnotation.enumType; +import static org.apache.parquet.schema.LogicalTypeAnnotation.intType; +import static org.apache.parquet.schema.LogicalTypeAnnotation.jsonType; +import static org.apache.parquet.schema.LogicalTypeAnnotation.listType; +import static org.apache.parquet.schema.LogicalTypeAnnotation.mapType; +import static org.apache.parquet.schema.LogicalTypeAnnotation.stringType; import static org.apache.parquet.schema.LogicalTypeAnnotation.timeType; import static org.apache.parquet.schema.LogicalTypeAnnotation.timestampType; import static org.apache.parquet.schema.MessageTypeParser.parseMessageType; @@ -196,40 +208,40 @@ public void testTimeLogicalTypes() { ParquetMetadataConverter parquetMetadataConverter = new ParquetMetadataConverter(); MessageType expected = Types.buildMessage() .required(PrimitiveTypeName.INT64) - .as(timestampType(false, LogicalTypeAnnotation.TimeUnit.MILLIS)) + .as(timestampType(false, MILLIS)) .named("aTimestampNonUtcMillis") .required(PrimitiveTypeName.INT64) - .as(timestampType(true, LogicalTypeAnnotation.TimeUnit.MILLIS)) + .as(timestampType(true, MILLIS)) .named("aTimestampUtcMillis") .required(PrimitiveTypeName.INT64) - .as(timestampType(false, LogicalTypeAnnotation.TimeUnit.MICROS)) + .as(timestampType(false, MICROS)) .named("aTimestampNonUtcMicros") .required(PrimitiveTypeName.INT64) - .as(timestampType(true, LogicalTypeAnnotation.TimeUnit.MICROS)) + .as(timestampType(true, MICROS)) .named("aTimestampUtcMicros") .required(PrimitiveTypeName.INT64) - .as(timestampType(false, LogicalTypeAnnotation.TimeUnit.NANOS)) + .as(timestampType(false, NANOS)) .named("aTimestampNonUtcNanos") .required(PrimitiveTypeName.INT64) - .as(timestampType(true, LogicalTypeAnnotation.TimeUnit.NANOS)) + .as(timestampType(true, NANOS)) .named("aTimestampUtcNanos") .required(PrimitiveTypeName.INT32) - .as(timeType(false, LogicalTypeAnnotation.TimeUnit.MILLIS)) + .as(timeType(false, MILLIS)) .named("aTimeNonUtcMillis") .required(PrimitiveTypeName.INT32) - .as(timeType(true, LogicalTypeAnnotation.TimeUnit.MILLIS)) + .as(timeType(true, MILLIS)) .named("aTimeUtcMillis") .required(PrimitiveTypeName.INT64) - .as(timeType(false, LogicalTypeAnnotation.TimeUnit.MICROS)) + .as(timeType(false, MICROS)) .named("aTimeNonUtcMicros") .required(PrimitiveTypeName.INT64) - .as(timeType(true, LogicalTypeAnnotation.TimeUnit.MICROS)) + .as(timeType(true, MICROS)) .named("aTimeUtcMicros") .required(PrimitiveTypeName.INT64) - .as(timeType(false, LogicalTypeAnnotation.TimeUnit.NANOS)) + .as(timeType(false, NANOS)) .named("aTimeNonUtcNanos") .required(PrimitiveTypeName.INT64) - .as(timeType(true, LogicalTypeAnnotation.TimeUnit.NANOS)) + .as(timeType(true, NANOS)) .named("aTimeUtcNanos") .named("Message"); List parquetSchema = parquetMetadataConverter.toParquetSchema(expected); @@ -237,6 +249,48 @@ public void testTimeLogicalTypes() { assertEquals(expected, schema); } + @Test + public void testLogicalToConvertedTypeConversion() { + ParquetMetadataConverter parquetMetadataConverter = new ParquetMetadataConverter(); + + assertEquals(ConvertedType.UTF8, parquetMetadataConverter.convertToConvertedType(stringType())); + assertEquals(ConvertedType.ENUM, parquetMetadataConverter.convertToConvertedType(enumType())); + + assertEquals(ConvertedType.INT_8, parquetMetadataConverter.convertToConvertedType(intType(8, true))); + assertEquals(ConvertedType.INT_16, parquetMetadataConverter.convertToConvertedType(intType(16, true))); + assertEquals(ConvertedType.INT_32, parquetMetadataConverter.convertToConvertedType(intType(32, true))); + assertEquals(ConvertedType.INT_64, parquetMetadataConverter.convertToConvertedType(intType(64, true))); + assertEquals(ConvertedType.UINT_8, parquetMetadataConverter.convertToConvertedType(intType(8, false))); + assertEquals(ConvertedType.UINT_16, parquetMetadataConverter.convertToConvertedType(intType(16, false))); + assertEquals(ConvertedType.UINT_32, parquetMetadataConverter.convertToConvertedType(intType(32, false))); + assertEquals(ConvertedType.UINT_64, parquetMetadataConverter.convertToConvertedType(intType(64, false))); + assertEquals(ConvertedType.DECIMAL, parquetMetadataConverter.convertToConvertedType(decimalType(8, 16))); + + assertEquals(ConvertedType.TIMESTAMP_MILLIS, parquetMetadataConverter.convertToConvertedType(timestampType(true, MILLIS))); + assertEquals(ConvertedType.TIMESTAMP_MICROS, parquetMetadataConverter.convertToConvertedType(timestampType(true, MICROS))); + assertNull(parquetMetadataConverter.convertToConvertedType(timestampType(true, NANOS))); + assertNull(parquetMetadataConverter.convertToConvertedType(timestampType(false, MILLIS))); + assertNull(parquetMetadataConverter.convertToConvertedType(timestampType(false, MICROS))); + assertNull(parquetMetadataConverter.convertToConvertedType(timestampType(false, NANOS))); + + assertEquals(ConvertedType.TIME_MILLIS, parquetMetadataConverter.convertToConvertedType(timeType(true, MILLIS))); + assertEquals(ConvertedType.TIME_MICROS, parquetMetadataConverter.convertToConvertedType(timeType(true, MICROS))); + assertNull(parquetMetadataConverter.convertToConvertedType(timeType(true, NANOS))); + assertNull(parquetMetadataConverter.convertToConvertedType(timeType(false, MILLIS))); + assertNull(parquetMetadataConverter.convertToConvertedType(timeType(false, MICROS))); + assertNull(parquetMetadataConverter.convertToConvertedType(timeType(false, NANOS))); + + assertEquals(ConvertedType.DATE, parquetMetadataConverter.convertToConvertedType(dateType())); + + assertEquals(ConvertedType.INTERVAL, parquetMetadataConverter.convertToConvertedType(LogicalTypeAnnotation.IntervalLogicalTypeAnnotation.getInstance())); + assertEquals(ConvertedType.JSON, parquetMetadataConverter.convertToConvertedType(jsonType())); + assertEquals(ConvertedType.BSON, parquetMetadataConverter.convertToConvertedType(bsonType())); + + assertEquals(ConvertedType.LIST, parquetMetadataConverter.convertToConvertedType(listType())); + assertEquals(ConvertedType.MAP, parquetMetadataConverter.convertToConvertedType(mapType())); + assertEquals(ConvertedType.MAP_KEY_VALUE, parquetMetadataConverter.convertToConvertedType(LogicalTypeAnnotation.MapKeyValueTypeAnnotation.getInstance())); + } + @Test public void testEnumEquivalence() { ParquetMetadataConverter parquetMetadataConverter = new ParquetMetadataConverter(); @@ -1024,7 +1078,7 @@ public void testColumnIndexConversion() { stats.updateStats(200l); stats.updateStats(500l); builder.add(stats); - org.apache.parquet.format.ColumnIndex parquetColumnIndex = + org.apache.parquet.format.ColumnIndex parquetColumnIndex = ParquetMetadataConverter.toParquetColumnIndex(type, builder.build()); ColumnIndex columnIndex = ParquetMetadataConverter.fromParquetColumnIndex(type, parquetColumnIndex); assertEquals(BoundaryOrder.ASCENDING, columnIndex.getBoundaryOrder());