diff --git a/parquet-column/pom.xml b/parquet-column/pom.xml index 654fad27cd..01b5b8e8c6 100644 --- a/parquet-column/pom.xml +++ b/parquet-column/pom.xml @@ -76,6 +76,12 @@ ${slf4j.version} + + org.locationtech.jts + jts-core + ${jts.version} + + com.carrotsearch junit-benchmarks diff --git a/parquet-column/src/main/java/org/apache/parquet/column/schema/EdgeInterpolationAlgorithm.java b/parquet-column/src/main/java/org/apache/parquet/column/schema/EdgeInterpolationAlgorithm.java new file mode 100644 index 0000000000..5357073a8f --- /dev/null +++ b/parquet-column/src/main/java/org/apache/parquet/column/schema/EdgeInterpolationAlgorithm.java @@ -0,0 +1,64 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.parquet.column.schema; + +/** + * Edge interpolation algorithm for Geography logical type + */ +public enum EdgeInterpolationAlgorithm { + SPHERICAL(0), + VINCENTY(1), + THOMAS(2), + ANDOYER(3), + KARNEY(4); + + private final int value; + + private EdgeInterpolationAlgorithm(int value) { + this.value = value; + } + + /** + * Get the integer value of this enum value, as defined in the Thrift IDL. + */ + public int getValue() { + return value; + } + + /** + * Find the enum type by its integer value, as defined in the Thrift IDL. + * @return null if the value is not found. + */ + public static EdgeInterpolationAlgorithm findByValue(int value) { + switch (value) { + case 0: + return SPHERICAL; + case 1: + return VINCENTY; + case 2: + return THOMAS; + case 3: + return ANDOYER; + case 4: + return KARNEY; + default: + throw new IllegalArgumentException("Unrecognized EdgeInterpolationAlgorithm value: " + value); + } + } +} diff --git a/parquet-column/src/main/java/org/apache/parquet/schema/LogicalTypeAnnotation.java b/parquet-column/src/main/java/org/apache/parquet/schema/LogicalTypeAnnotation.java index 749beaa95e..be98e071f6 100644 --- a/parquet-column/src/main/java/org/apache/parquet/schema/LogicalTypeAnnotation.java +++ b/parquet-column/src/main/java/org/apache/parquet/schema/LogicalTypeAnnotation.java @@ -41,8 +41,13 @@ import java.util.Set; import java.util.function.Supplier; import org.apache.parquet.Preconditions; +import org.apache.parquet.column.schema.EdgeInterpolationAlgorithm; public abstract class LogicalTypeAnnotation { + + public static final String DEFAULT_CRS = "OGC:CRS84"; + public static final EdgeInterpolationAlgorithm DEFAULT_ALGO = EdgeInterpolationAlgorithm.SPHERICAL; + enum LogicalTypeToken { MAP { @Override @@ -155,6 +160,31 @@ protected LogicalTypeAnnotation fromString(List params) { return float16Type(); } }, + GEOMETRY { + @Override + protected LogicalTypeAnnotation fromString(List params) { + if (params.size() > 1) { + throw new RuntimeException( + "Expecting at most 1 parameter for geometry logical type, got " + params.size()); + } + String crs = params.isEmpty() ? null : params.get(0); + return geometryType(crs); + } + }, + GEOGRAPHY { + @Override + protected LogicalTypeAnnotation fromString(List params) { + if (params.size() > 2) { + throw new RuntimeException( + "Expecting at most 2 parameters for geography logical type (crs and edge algorithm), got " + + params.size()); + } + String crs = !params.isEmpty() ? params.get(0) : null; + EdgeInterpolationAlgorithm algo = + params.size() > 1 ? EdgeInterpolationAlgorithm.valueOf(params.get(1)) : null; + return geographyType(crs, algo); + } + }, UNKNOWN { @Override protected LogicalTypeAnnotation fromString(List params) { @@ -334,6 +364,18 @@ public static Float16LogicalTypeAnnotation float16Type() { return Float16LogicalTypeAnnotation.INSTANCE; } + public static GeometryLogicalTypeAnnotation geometryType(String crs) { + return new GeometryLogicalTypeAnnotation(crs); + } + + public static GeographyLogicalTypeAnnotation geographyType(String crs, EdgeInterpolationAlgorithm edgeAlgorithm) { + return new GeographyLogicalTypeAnnotation(crs, edgeAlgorithm); + } + + public static GeographyLogicalTypeAnnotation geographyType() { + return new GeographyLogicalTypeAnnotation(null, null); + } + public static UnknownLogicalTypeAnnotation unknownType() { return UnknownLogicalTypeAnnotation.INSTANCE; } @@ -1183,6 +1225,124 @@ public boolean equals(Object obj) { } } + public static class GeometryLogicalTypeAnnotation extends LogicalTypeAnnotation { + private final String crs; + + private GeometryLogicalTypeAnnotation(String crs) { + this.crs = crs; + } + + @Override + @Deprecated + public OriginalType toOriginalType() { + return null; + } + + @Override + public Optional accept(LogicalTypeAnnotationVisitor logicalTypeAnnotationVisitor) { + return logicalTypeAnnotationVisitor.visit(this); + } + + @Override + LogicalTypeToken getType() { + return LogicalTypeToken.GEOMETRY; + } + + @Override + protected String typeParametersAsString() { + if (crs == null || crs.isEmpty()) { + return ""; + } + return String.format("(%s)", crs); + } + + public String getCrs() { + return crs; + } + + @Override + public boolean equals(Object obj) { + if (!(obj instanceof GeometryLogicalTypeAnnotation)) { + return false; + } + GeometryLogicalTypeAnnotation other = (GeometryLogicalTypeAnnotation) obj; + return Objects.equals(crs, other.crs); + } + + @Override + public int hashCode() { + return Objects.hash(crs); + } + + @Override + PrimitiveStringifier valueStringifier(PrimitiveType primitiveType) { + return PrimitiveStringifier.WKB_STRINGIFIER; + } + } + + public static class GeographyLogicalTypeAnnotation extends LogicalTypeAnnotation { + private final String crs; + private final EdgeInterpolationAlgorithm algorithm; + + private GeographyLogicalTypeAnnotation(String crs, EdgeInterpolationAlgorithm algorithm) { + this.crs = crs; + this.algorithm = algorithm; + } + + @Override + @Deprecated + public OriginalType toOriginalType() { + return null; + } + + @Override + public Optional accept(LogicalTypeAnnotationVisitor logicalTypeAnnotationVisitor) { + return logicalTypeAnnotationVisitor.visit(this); + } + + @Override + LogicalTypeToken getType() { + return LogicalTypeToken.GEOGRAPHY; + } + + @Override + protected String typeParametersAsString() { + boolean hasCrs = crs != null && !crs.isEmpty(); + boolean hasAlgo = algorithm != null; + if (!hasCrs && !hasAlgo) { + return ""; + } + return String.format("(%s,%s)", hasCrs ? crs : DEFAULT_CRS, hasAlgo ? algorithm : DEFAULT_ALGO); + } + + public String getCrs() { + return crs; + } + + public EdgeInterpolationAlgorithm getAlgorithm() { + return algorithm; + } + + @Override + public boolean equals(Object obj) { + if (!(obj instanceof GeographyLogicalTypeAnnotation)) { + return false; + } + GeographyLogicalTypeAnnotation other = (GeographyLogicalTypeAnnotation) obj; + return Objects.equals(crs, other.crs) && Objects.equals(algorithm, other.algorithm); + } + + @Override + public int hashCode() { + return Objects.hash(crs, algorithm); + } + + @Override + PrimitiveStringifier valueStringifier(PrimitiveType primitiveType) { + return PrimitiveStringifier.WKB_STRINGIFIER; + } + } + /** * Implement this interface to visit a logical type annotation in the schema. * The default implementation for each logical type specific visitor method is empty. @@ -1259,6 +1419,14 @@ default Optional visit(Float16LogicalTypeAnnotation float16LogicalType) { return empty(); } + default Optional visit(GeometryLogicalTypeAnnotation geometryLogicalType) { + return empty(); + } + + default Optional visit(GeographyLogicalTypeAnnotation geographyLogicalType) { + return empty(); + } + default Optional visit(UnknownLogicalTypeAnnotation unknownLogicalTypeAnnotation) { return empty(); } diff --git a/parquet-column/src/main/java/org/apache/parquet/schema/PrimitiveStringifier.java b/parquet-column/src/main/java/org/apache/parquet/schema/PrimitiveStringifier.java index 7aface72a7..3bbcca981b 100644 --- a/parquet-column/src/main/java/org/apache/parquet/schema/PrimitiveStringifier.java +++ b/parquet-column/src/main/java/org/apache/parquet/schema/PrimitiveStringifier.java @@ -35,6 +35,9 @@ import java.util.concurrent.TimeUnit; import javax.naming.OperationNotSupportedException; import org.apache.parquet.io.api.Binary; +import org.locationtech.jts.geom.Geometry; +import org.locationtech.jts.io.ParseException; +import org.locationtech.jts.io.WKBReader; /** * Class that provides string representations for the primitive values. These string values are to be used for @@ -442,6 +445,21 @@ private void appendHex(byte[] array, int offset, int length, StringBuilder build } }; + static final PrimitiveStringifier WKB_STRINGIFIER = new BinaryStringifierBase("WKB_STRINGIFIER") { + + @Override + String stringifyNotNull(Binary value) { + + try { + WKBReader reader = new WKBReader(); + Geometry geometry = reader.read(value.getBytesUnsafe()); + return geometry.toText(); + } catch (ParseException e) { + return BINARY_INVALID; + } + } + }; + static final PrimitiveStringifier FLOAT16_STRINGIFIER = new BinaryStringifierBase("FLOAT16_STRINGIFIER") { @Override diff --git a/parquet-column/src/main/java/org/apache/parquet/schema/PrimitiveType.java b/parquet-column/src/main/java/org/apache/parquet/schema/PrimitiveType.java index e74d7cde02..6beff4da93 100644 --- a/parquet-column/src/main/java/org/apache/parquet/schema/PrimitiveType.java +++ b/parquet-column/src/main/java/org/apache/parquet/schema/PrimitiveType.java @@ -271,6 +271,18 @@ public Optional visit( LogicalTypeAnnotation.BsonLogicalTypeAnnotation bsonLogicalType) { return of(PrimitiveComparator.UNSIGNED_LEXICOGRAPHICAL_BINARY_COMPARATOR); } + + @Override + public Optional visit( + LogicalTypeAnnotation.GeometryLogicalTypeAnnotation geometryLogicalType) { + return of(PrimitiveComparator.UNSIGNED_LEXICOGRAPHICAL_BINARY_COMPARATOR); + } + + @Override + public Optional visit( + LogicalTypeAnnotation.GeographyLogicalTypeAnnotation geographyLogicalType) { + return of(PrimitiveComparator.UNSIGNED_LEXICOGRAPHICAL_BINARY_COMPARATOR); + } }) .orElseThrow(() -> new ShouldNeverHappenException( "No comparator logic implemented for BINARY logical type: " + logicalType)); diff --git a/parquet-column/src/main/java/org/apache/parquet/schema/Types.java b/parquet-column/src/main/java/org/apache/parquet/schema/Types.java index 399672022c..fd82d36768 100644 --- a/parquet-column/src/main/java/org/apache/parquet/schema/Types.java +++ b/parquet-column/src/main/java/org/apache/parquet/schema/Types.java @@ -577,6 +577,18 @@ public Optional visit( return checkBinaryPrimitiveType(enumLogicalType); } + @Override + public Optional visit( + LogicalTypeAnnotation.GeometryLogicalTypeAnnotation geometryLogicalType) { + return checkBinaryPrimitiveType(geometryLogicalType); + } + + @Override + public Optional visit( + LogicalTypeAnnotation.GeographyLogicalTypeAnnotation geographyLogicalType) { + return checkBinaryPrimitiveType(geographyLogicalType); + } + private Optional checkFixedPrimitiveType( int l, LogicalTypeAnnotation logicalTypeAnnotation) { Preconditions.checkState( diff --git a/parquet-column/src/test/java/org/apache/parquet/schema/TestTypeBuilders.java b/parquet-column/src/test/java/org/apache/parquet/schema/TestTypeBuilders.java index 71886d1208..018ce5b276 100644 --- a/parquet-column/src/test/java/org/apache/parquet/schema/TestTypeBuilders.java +++ b/parquet-column/src/test/java/org/apache/parquet/schema/TestTypeBuilders.java @@ -55,6 +55,7 @@ import java.util.ArrayList; import java.util.List; import java.util.concurrent.Callable; +import org.apache.parquet.column.schema.EdgeInterpolationAlgorithm; import org.apache.parquet.schema.PrimitiveType.PrimitiveTypeName; import org.apache.parquet.schema.Type.Repetition; import org.junit.Assert; @@ -1477,6 +1478,102 @@ public void testDecimalLogicalTypeWithDeprecatedPrecisionMismatch() { .named("aDecimal"); } + @Test + public void testGeometryLogicalType() { + // Test with default CRS + PrimitiveType defaultCrsExpected = + new PrimitiveType(REQUIRED, BINARY, "aGeometry", LogicalTypeAnnotation.geometryType("OGC:CRS84")); + PrimitiveType defaultCrsActual = Types.required(BINARY) + .as(LogicalTypeAnnotation.geometryType("OGC:CRS84")) + .named("aGeometry"); + Assert.assertEquals(defaultCrsExpected, defaultCrsActual); + + // Test with custom CRS + PrimitiveType customCrsExpected = + new PrimitiveType(REQUIRED, BINARY, "aGeometry", LogicalTypeAnnotation.geometryType("EPSG:4326")); + PrimitiveType customCrsActual = Types.required(BINARY) + .as(LogicalTypeAnnotation.geometryType("EPSG:4326")) + .named("aGeometry"); + Assert.assertEquals(customCrsExpected, customCrsActual); + + // Test with optional repetition + PrimitiveType optionalGeometryExpected = + new PrimitiveType(OPTIONAL, BINARY, "aGeometry", LogicalTypeAnnotation.geometryType("OGC:CRS84")); + PrimitiveType optionalGeometryActual = Types.optional(BINARY) + .as(LogicalTypeAnnotation.geometryType("OGC:CRS84")) + .named("aGeometry"); + Assert.assertEquals(optionalGeometryExpected, optionalGeometryActual); + } + + @Test + public void testGeographyLogicalType() { + // Test with default CRS and no edge algorithm + PrimitiveType defaultCrsExpected = new PrimitiveType( + REQUIRED, BINARY, "aGeography", LogicalTypeAnnotation.geographyType("OGC:CRS84", null)); + PrimitiveType defaultCrsActual = Types.required(BINARY) + .as(LogicalTypeAnnotation.geographyType("OGC:CRS84", null)) + .named("aGeography"); + Assert.assertEquals(defaultCrsExpected, defaultCrsActual); + + // Test with custom CRS and no edge algorithm + PrimitiveType customCrsExpected = new PrimitiveType( + REQUIRED, BINARY, "aGeography", LogicalTypeAnnotation.geographyType("EPSG:4326", null)); + PrimitiveType customCrsActual = Types.required(BINARY) + .as(LogicalTypeAnnotation.geographyType("EPSG:4326", null)) + .named("aGeography"); + Assert.assertEquals(customCrsExpected, customCrsActual); + + // Test with custom CRS and edge algorithm + EdgeInterpolationAlgorithm greatCircle = EdgeInterpolationAlgorithm.SPHERICAL; + PrimitiveType customCrsWithEdgeAlgorithmExpected = new PrimitiveType( + REQUIRED, BINARY, "aGeography", LogicalTypeAnnotation.geographyType("EPSG:4326", greatCircle)); + PrimitiveType customCrsWithEdgeAlgorithmActual = Types.required(BINARY) + .as(LogicalTypeAnnotation.geographyType("EPSG:4326", greatCircle)) + .named("aGeography"); + Assert.assertEquals(customCrsWithEdgeAlgorithmExpected, customCrsWithEdgeAlgorithmActual); + + // Test with optional repetition + PrimitiveType optionalGeographyExpected = new PrimitiveType( + OPTIONAL, BINARY, "aGeography", LogicalTypeAnnotation.geographyType("OGC:CRS84", null)); + PrimitiveType optionalGeographyActual = Types.optional(BINARY) + .as(LogicalTypeAnnotation.geographyType("OGC:CRS84", null)) + .named("aGeography"); + Assert.assertEquals(optionalGeographyExpected, optionalGeographyActual); + } + + @Test + public void testGeographyLogicalTypeWithoutEdgeInterpolationAlgorithm() { + // Test with default CRS and no edge algorithm + PrimitiveType defaultCrsExpected = + new PrimitiveType(REQUIRED, BINARY, "aGeography", LogicalTypeAnnotation.geographyType()); + PrimitiveType defaultCrsActual = + Types.required(BINARY).as(LogicalTypeAnnotation.geographyType()).named("aGeography"); + Assert.assertEquals(defaultCrsExpected, defaultCrsActual); + + // Test with custom CRS and no edge algorithm + PrimitiveType customCrsExpected = new PrimitiveType( + REQUIRED, BINARY, "aGeography", LogicalTypeAnnotation.geographyType("EPSG:4326", null)); + PrimitiveType customCrsActual = Types.required(BINARY) + .as(LogicalTypeAnnotation.geographyType("EPSG:4326", null)) + .named("aGeography"); + Assert.assertEquals(customCrsExpected, customCrsActual); + + // Test with custom CRS and edge algorithm + PrimitiveType customCrsWithEdgeAlgorithmExpected = new PrimitiveType( + REQUIRED, BINARY, "aGeography", LogicalTypeAnnotation.geographyType("EPSG:4326", null)); + PrimitiveType customCrsWithEdgeAlgorithmActual = Types.required(BINARY) + .as(LogicalTypeAnnotation.geographyType("EPSG:4326", null)) + .named("aGeography"); + Assert.assertEquals(customCrsWithEdgeAlgorithmExpected, customCrsWithEdgeAlgorithmActual); + + // Test with optional repetition + PrimitiveType optionalGeographyExpected = + new PrimitiveType(OPTIONAL, BINARY, "aGeography", LogicalTypeAnnotation.geographyType()); + PrimitiveType optionalGeographyActual = + Types.optional(BINARY).as(LogicalTypeAnnotation.geographyType()).named("aGeography"); + Assert.assertEquals(optionalGeographyExpected, optionalGeographyActual); + } + /** * A convenience method to avoid a large number of @Test(expected=...) tests * diff --git a/parquet-hadoop/src/main/java/org/apache/parquet/format/converter/ParquetMetadataConverter.java b/parquet-hadoop/src/main/java/org/apache/parquet/format/converter/ParquetMetadataConverter.java index 5759be234f..15fcd14a73 100644 --- a/parquet-hadoop/src/main/java/org/apache/parquet/format/converter/ParquetMetadataConverter.java +++ b/parquet-hadoop/src/main/java/org/apache/parquet/format/converter/ParquetMetadataConverter.java @@ -76,10 +76,13 @@ import org.apache.parquet.format.DataPageHeaderV2; import org.apache.parquet.format.DecimalType; import org.apache.parquet.format.DictionaryPageHeader; +import org.apache.parquet.format.EdgeInterpolationAlgorithm; import org.apache.parquet.format.Encoding; import org.apache.parquet.format.EncryptionWithColumnKey; import org.apache.parquet.format.FieldRepetitionType; import org.apache.parquet.format.FileMetaData; +import org.apache.parquet.format.GeographyType; +import org.apache.parquet.format.GeometryType; import org.apache.parquet.format.IntType; import org.apache.parquet.format.KeyValue; import org.apache.parquet.format.LogicalType; @@ -520,6 +523,27 @@ public Optional visit(LogicalTypeAnnotation.IntervalLogicalTypeAnno public Optional visit(LogicalTypeAnnotation.VariantLogicalTypeAnnotation variantLogicalType) { return of(LogicalTypes.VARIANT(variantLogicalType.getSpecVersion())); } + + @Override + public Optional visit(LogicalTypeAnnotation.GeometryLogicalTypeAnnotation geometryLogicalType) { + GeometryType geometryType = new GeometryType(); + if (geometryLogicalType.getCrs() != null + && !geometryLogicalType.getCrs().isEmpty()) { + geometryType.setCrs(geometryLogicalType.getCrs()); + } + return of(LogicalType.GEOMETRY(geometryType)); + } + + @Override + public Optional visit(LogicalTypeAnnotation.GeographyLogicalTypeAnnotation geographyLogicalType) { + GeographyType geographyType = new GeographyType(); + if (geographyLogicalType.getCrs() != null + && !geographyLogicalType.getCrs().isEmpty()) { + geographyType.setCrs(geographyLogicalType.getCrs()); + } + geographyType.setAlgorithm(fromParquetEdgeInterpolationAlgorithm(geographyLogicalType.getAlgorithm())); + return of(LogicalType.GEOGRAPHY(geographyType)); + } } private void addRowGroup( @@ -1183,6 +1207,13 @@ LogicalTypeAnnotation getLogicalTypeAnnotation(LogicalType type) { return LogicalTypeAnnotation.uuidType(); case FLOAT16: return LogicalTypeAnnotation.float16Type(); + case GEOMETRY: + GeometryType geometry = type.getGEOMETRY(); + return LogicalTypeAnnotation.geometryType(geometry.getCrs()); + case GEOGRAPHY: + GeographyType geography = type.getGEOGRAPHY(); + return LogicalTypeAnnotation.geographyType( + geography.getCrs(), toParquetEdgeInterpolationAlgorithm(geography.getAlgorithm())); case VARIANT: VariantType variant = type.getVARIANT(); return LogicalTypeAnnotation.variantType(variant.getSpecification_version()); @@ -2490,4 +2521,26 @@ public static SizeStatistics toParquetSizeStatistics(org.apache.parquet.column.s } return formatStats; } + + /** Convert Parquet Algorithm enum to Thrift Algorithm enum */ + public static EdgeInterpolationAlgorithm fromParquetEdgeInterpolationAlgorithm( + org.apache.parquet.column.schema.EdgeInterpolationAlgorithm parquetAlgo) { + if (parquetAlgo == null) { + return null; + } + EdgeInterpolationAlgorithm thriftAlgo = EdgeInterpolationAlgorithm.findByValue(parquetAlgo.getValue()); + if (thriftAlgo == null) { + throw new IllegalArgumentException("Unrecognized Parquet EdgeInterpolationAlgorithm: " + parquetAlgo); + } + return thriftAlgo; + } + + /** Convert Thrift Algorithm enum to Parquet Algorithm enum */ + public static org.apache.parquet.column.schema.EdgeInterpolationAlgorithm toParquetEdgeInterpolationAlgorithm( + EdgeInterpolationAlgorithm thriftAlgo) { + if (thriftAlgo == null) { + return null; + } + return org.apache.parquet.column.schema.EdgeInterpolationAlgorithm.findByValue(thriftAlgo.getValue()); + } } diff --git a/parquet-hadoop/src/test/java/org/apache/parquet/format/converter/TestParquetMetadataConverter.java b/parquet-hadoop/src/test/java/org/apache/parquet/format/converter/TestParquetMetadataConverter.java index 322d4c4abc..82c70bed95 100644 --- a/parquet-hadoop/src/test/java/org/apache/parquet/format/converter/TestParquetMetadataConverter.java +++ b/parquet-hadoop/src/test/java/org/apache/parquet/format/converter/TestParquetMetadataConverter.java @@ -80,6 +80,7 @@ import org.apache.parquet.column.Encoding; import org.apache.parquet.column.EncodingStats; import org.apache.parquet.column.ParquetProperties; +import org.apache.parquet.column.schema.EdgeInterpolationAlgorithm; import org.apache.parquet.column.statistics.BinaryStatistics; import org.apache.parquet.column.statistics.BooleanStatistics; import org.apache.parquet.column.statistics.DoubleStatistics; @@ -101,6 +102,8 @@ import org.apache.parquet.format.DecimalType; import org.apache.parquet.format.FieldRepetitionType; import org.apache.parquet.format.FileMetaData; +import org.apache.parquet.format.GeographyType; +import org.apache.parquet.format.GeometryType; import org.apache.parquet.format.LogicalType; import org.apache.parquet.format.MapType; import org.apache.parquet.format.PageHeader; @@ -1661,4 +1664,141 @@ public void testSizeStatisticsConversion() { assertEquals(repLevelHistogram, sizeStatistics.getRepetitionLevelHistogram()); assertEquals(defLevelHistogram, sizeStatistics.getDefinitionLevelHistogram()); } + + @Test + public void testGeometryLogicalType() { + ParquetMetadataConverter parquetMetadataConverter = new ParquetMetadataConverter(); + + // Create schema with geometry type + MessageType schema = Types.buildMessage() + .required(PrimitiveTypeName.BINARY) + .as(LogicalTypeAnnotation.geometryType("EPSG:4326")) + .named("geomField") + .named("Message"); + + // Convert to parquet schema and back + List parquetSchema = parquetMetadataConverter.toParquetSchema(schema); + MessageType actual = parquetMetadataConverter.fromParquetSchema(parquetSchema, null); + + // Verify the logical type is preserved + assertEquals(schema, actual); + + PrimitiveType primitiveType = actual.getType("geomField").asPrimitiveType(); + LogicalTypeAnnotation logicalType = primitiveType.getLogicalTypeAnnotation(); + assertTrue(logicalType instanceof LogicalTypeAnnotation.GeometryLogicalTypeAnnotation); + assertEquals("EPSG:4326", ((LogicalTypeAnnotation.GeometryLogicalTypeAnnotation) logicalType).getCrs()); + } + + @Test + public void testGeographyLogicalType() { + ParquetMetadataConverter parquetMetadataConverter = new ParquetMetadataConverter(); + + // Create schema with geography type + MessageType schema = Types.buildMessage() + .required(PrimitiveTypeName.BINARY) + .as(LogicalTypeAnnotation.geographyType("EPSG:4326", EdgeInterpolationAlgorithm.SPHERICAL)) + .named("geogField") + .named("Message"); + + // Convert to parquet schema and back + List parquetSchema = parquetMetadataConverter.toParquetSchema(schema); + MessageType actual = parquetMetadataConverter.fromParquetSchema(parquetSchema, null); + + // Verify the logical type is preserved + assertEquals(schema, actual); + + PrimitiveType primitiveType = actual.getType("geogField").asPrimitiveType(); + LogicalTypeAnnotation logicalType = primitiveType.getLogicalTypeAnnotation(); + assertTrue(logicalType instanceof LogicalTypeAnnotation.GeographyLogicalTypeAnnotation); + + LogicalTypeAnnotation.GeographyLogicalTypeAnnotation geographyType = + (LogicalTypeAnnotation.GeographyLogicalTypeAnnotation) logicalType; + assertEquals("EPSG:4326", geographyType.getCrs()); + assertEquals(EdgeInterpolationAlgorithm.SPHERICAL, geographyType.getAlgorithm()); + } + + @Test + public void testGeometryLogicalTypeWithMissingCrs() { + // Create a Geometry logical type without specifying CRS + GeometryType geometryType = new GeometryType(); + LogicalType logicalType = new LogicalType(); + logicalType.setGEOMETRY(geometryType); + + // Convert to LogicalTypeAnnotation + ParquetMetadataConverter converter = new ParquetMetadataConverter(); + LogicalTypeAnnotation annotation = converter.getLogicalTypeAnnotation(logicalType); + + // Verify the annotation is created correctly + assertNotNull("Geometry annotation should not be null", annotation); + assertTrue( + "Should be a GeometryLogicalTypeAnnotation", + annotation instanceof LogicalTypeAnnotation.GeometryLogicalTypeAnnotation); + + LogicalTypeAnnotation.GeometryLogicalTypeAnnotation geometryAnnotation = + (LogicalTypeAnnotation.GeometryLogicalTypeAnnotation) annotation; + + // Default behavior should use null or empty CRS + assertNull("CRS should be null or empty when not specified", geometryAnnotation.getCrs()); + } + + @Test + public void testGeographyLogicalTypeWithMissingParameters() { + ParquetMetadataConverter converter = new ParquetMetadataConverter(); + + // Create a Geography logical type without CRS and algorithm + GeographyType geographyType = new GeographyType(); + LogicalType logicalType = new LogicalType(); + logicalType.setGEOGRAPHY(geographyType); + + // Convert to LogicalTypeAnnotation + LogicalTypeAnnotation annotation = converter.getLogicalTypeAnnotation(logicalType); + + // Verify the annotation is created correctly + assertNotNull("Geography annotation should not be null", annotation); + assertTrue( + "Should be a GeographyLogicalTypeAnnotation", + annotation instanceof LogicalTypeAnnotation.GeographyLogicalTypeAnnotation); + + // Check that optional parameters are handled correctly + LogicalTypeAnnotation.GeographyLogicalTypeAnnotation geographyAnnotation = + (LogicalTypeAnnotation.GeographyLogicalTypeAnnotation) annotation; + assertNull("CRS should be null when not specified", geographyAnnotation.getCrs()); + // Most implementations default to LINEAR when algorithm is not specified + assertNull("Algorithm should be null when not specified", geographyAnnotation.getAlgorithm()); + + // Now test the round-trip conversion + LogicalType roundTripType = converter.convertToLogicalType(annotation); + assertEquals("setField should be GEOGRAPHY", LogicalType._Fields.GEOGRAPHY, roundTripType.getSetField()); + assertNull( + "Round trip CRS should still be null", + roundTripType.getGEOGRAPHY().getCrs()); + assertNull( + "Round trip Algorithm should be null", + roundTripType.getGEOGRAPHY().getAlgorithm()); + } + + @Test + public void testGeographyLogicalTypeWithAlgorithmButNoCrs() { + // Create a Geography logical type with algorithm but no CRS + GeographyType geographyType = new GeographyType(); + geographyType.setAlgorithm(org.apache.parquet.format.EdgeInterpolationAlgorithm.SPHERICAL); + LogicalType logicalType = new LogicalType(); + logicalType.setGEOGRAPHY(geographyType); + + // Convert to LogicalTypeAnnotation + ParquetMetadataConverter converter = new ParquetMetadataConverter(); + LogicalTypeAnnotation annotation = converter.getLogicalTypeAnnotation(logicalType); + + // Verify the annotation is created correctly + Assert.assertNotNull("Geography annotation should not be null", annotation); + LogicalTypeAnnotation.GeographyLogicalTypeAnnotation geographyAnnotation = + (LogicalTypeAnnotation.GeographyLogicalTypeAnnotation) annotation; + + // CRS should be null/empty but algorithm should be set + assertNull("CRS should be null or empty", geographyAnnotation.getCrs()); + assertEquals( + "Algorithm should be SPHERICAL", + EdgeInterpolationAlgorithm.SPHERICAL, + geographyAnnotation.getAlgorithm()); + } } diff --git a/pom.xml b/pom.xml index 73360fb72d..f2f3b46bc3 100644 --- a/pom.xml +++ b/pom.xml @@ -104,6 +104,7 @@ 2.0.9 0.27ea0 3.5.0 + 1.20.0 2.3