diff --git a/parquet-column/pom.xml b/parquet-column/pom.xml
index 654fad27cd..01b5b8e8c6 100644
--- a/parquet-column/pom.xml
+++ b/parquet-column/pom.xml
@@ -76,6 +76,12 @@
${slf4j.version}
+
+ org.locationtech.jts
+ jts-core
+ ${jts.version}
+
+
com.carrotsearch
junit-benchmarks
diff --git a/parquet-column/src/main/java/org/apache/parquet/column/schema/EdgeInterpolationAlgorithm.java b/parquet-column/src/main/java/org/apache/parquet/column/schema/EdgeInterpolationAlgorithm.java
new file mode 100644
index 0000000000..5357073a8f
--- /dev/null
+++ b/parquet-column/src/main/java/org/apache/parquet/column/schema/EdgeInterpolationAlgorithm.java
@@ -0,0 +1,64 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.parquet.column.schema;
+
+/**
+ * Edge interpolation algorithm for Geography logical type
+ */
+public enum EdgeInterpolationAlgorithm {
+ SPHERICAL(0),
+ VINCENTY(1),
+ THOMAS(2),
+ ANDOYER(3),
+ KARNEY(4);
+
+ private final int value;
+
+ private EdgeInterpolationAlgorithm(int value) {
+ this.value = value;
+ }
+
+ /**
+ * Get the integer value of this enum value, as defined in the Thrift IDL.
+ */
+ public int getValue() {
+ return value;
+ }
+
+ /**
+ * Find the enum type by its integer value, as defined in the Thrift IDL.
+ * @return null if the value is not found.
+ */
+ public static EdgeInterpolationAlgorithm findByValue(int value) {
+ switch (value) {
+ case 0:
+ return SPHERICAL;
+ case 1:
+ return VINCENTY;
+ case 2:
+ return THOMAS;
+ case 3:
+ return ANDOYER;
+ case 4:
+ return KARNEY;
+ default:
+ throw new IllegalArgumentException("Unrecognized EdgeInterpolationAlgorithm value: " + value);
+ }
+ }
+}
diff --git a/parquet-column/src/main/java/org/apache/parquet/schema/LogicalTypeAnnotation.java b/parquet-column/src/main/java/org/apache/parquet/schema/LogicalTypeAnnotation.java
index 749beaa95e..be98e071f6 100644
--- a/parquet-column/src/main/java/org/apache/parquet/schema/LogicalTypeAnnotation.java
+++ b/parquet-column/src/main/java/org/apache/parquet/schema/LogicalTypeAnnotation.java
@@ -41,8 +41,13 @@
import java.util.Set;
import java.util.function.Supplier;
import org.apache.parquet.Preconditions;
+import org.apache.parquet.column.schema.EdgeInterpolationAlgorithm;
public abstract class LogicalTypeAnnotation {
+
+ public static final String DEFAULT_CRS = "OGC:CRS84";
+ public static final EdgeInterpolationAlgorithm DEFAULT_ALGO = EdgeInterpolationAlgorithm.SPHERICAL;
+
enum LogicalTypeToken {
MAP {
@Override
@@ -155,6 +160,31 @@ protected LogicalTypeAnnotation fromString(List params) {
return float16Type();
}
},
+ GEOMETRY {
+ @Override
+ protected LogicalTypeAnnotation fromString(List params) {
+ if (params.size() > 1) {
+ throw new RuntimeException(
+ "Expecting at most 1 parameter for geometry logical type, got " + params.size());
+ }
+ String crs = params.isEmpty() ? null : params.get(0);
+ return geometryType(crs);
+ }
+ },
+ GEOGRAPHY {
+ @Override
+ protected LogicalTypeAnnotation fromString(List params) {
+ if (params.size() > 2) {
+ throw new RuntimeException(
+ "Expecting at most 2 parameters for geography logical type (crs and edge algorithm), got "
+ + params.size());
+ }
+ String crs = !params.isEmpty() ? params.get(0) : null;
+ EdgeInterpolationAlgorithm algo =
+ params.size() > 1 ? EdgeInterpolationAlgorithm.valueOf(params.get(1)) : null;
+ return geographyType(crs, algo);
+ }
+ },
UNKNOWN {
@Override
protected LogicalTypeAnnotation fromString(List params) {
@@ -334,6 +364,18 @@ public static Float16LogicalTypeAnnotation float16Type() {
return Float16LogicalTypeAnnotation.INSTANCE;
}
+ public static GeometryLogicalTypeAnnotation geometryType(String crs) {
+ return new GeometryLogicalTypeAnnotation(crs);
+ }
+
+ public static GeographyLogicalTypeAnnotation geographyType(String crs, EdgeInterpolationAlgorithm edgeAlgorithm) {
+ return new GeographyLogicalTypeAnnotation(crs, edgeAlgorithm);
+ }
+
+ public static GeographyLogicalTypeAnnotation geographyType() {
+ return new GeographyLogicalTypeAnnotation(null, null);
+ }
+
public static UnknownLogicalTypeAnnotation unknownType() {
return UnknownLogicalTypeAnnotation.INSTANCE;
}
@@ -1183,6 +1225,124 @@ public boolean equals(Object obj) {
}
}
+ public static class GeometryLogicalTypeAnnotation extends LogicalTypeAnnotation {
+ private final String crs;
+
+ private GeometryLogicalTypeAnnotation(String crs) {
+ this.crs = crs;
+ }
+
+ @Override
+ @Deprecated
+ public OriginalType toOriginalType() {
+ return null;
+ }
+
+ @Override
+ public Optional accept(LogicalTypeAnnotationVisitor logicalTypeAnnotationVisitor) {
+ return logicalTypeAnnotationVisitor.visit(this);
+ }
+
+ @Override
+ LogicalTypeToken getType() {
+ return LogicalTypeToken.GEOMETRY;
+ }
+
+ @Override
+ protected String typeParametersAsString() {
+ if (crs == null || crs.isEmpty()) {
+ return "";
+ }
+ return String.format("(%s)", crs);
+ }
+
+ public String getCrs() {
+ return crs;
+ }
+
+ @Override
+ public boolean equals(Object obj) {
+ if (!(obj instanceof GeometryLogicalTypeAnnotation)) {
+ return false;
+ }
+ GeometryLogicalTypeAnnotation other = (GeometryLogicalTypeAnnotation) obj;
+ return Objects.equals(crs, other.crs);
+ }
+
+ @Override
+ public int hashCode() {
+ return Objects.hash(crs);
+ }
+
+ @Override
+ PrimitiveStringifier valueStringifier(PrimitiveType primitiveType) {
+ return PrimitiveStringifier.WKB_STRINGIFIER;
+ }
+ }
+
+ public static class GeographyLogicalTypeAnnotation extends LogicalTypeAnnotation {
+ private final String crs;
+ private final EdgeInterpolationAlgorithm algorithm;
+
+ private GeographyLogicalTypeAnnotation(String crs, EdgeInterpolationAlgorithm algorithm) {
+ this.crs = crs;
+ this.algorithm = algorithm;
+ }
+
+ @Override
+ @Deprecated
+ public OriginalType toOriginalType() {
+ return null;
+ }
+
+ @Override
+ public Optional accept(LogicalTypeAnnotationVisitor logicalTypeAnnotationVisitor) {
+ return logicalTypeAnnotationVisitor.visit(this);
+ }
+
+ @Override
+ LogicalTypeToken getType() {
+ return LogicalTypeToken.GEOGRAPHY;
+ }
+
+ @Override
+ protected String typeParametersAsString() {
+ boolean hasCrs = crs != null && !crs.isEmpty();
+ boolean hasAlgo = algorithm != null;
+ if (!hasCrs && !hasAlgo) {
+ return "";
+ }
+ return String.format("(%s,%s)", hasCrs ? crs : DEFAULT_CRS, hasAlgo ? algorithm : DEFAULT_ALGO);
+ }
+
+ public String getCrs() {
+ return crs;
+ }
+
+ public EdgeInterpolationAlgorithm getAlgorithm() {
+ return algorithm;
+ }
+
+ @Override
+ public boolean equals(Object obj) {
+ if (!(obj instanceof GeographyLogicalTypeAnnotation)) {
+ return false;
+ }
+ GeographyLogicalTypeAnnotation other = (GeographyLogicalTypeAnnotation) obj;
+ return Objects.equals(crs, other.crs) && Objects.equals(algorithm, other.algorithm);
+ }
+
+ @Override
+ public int hashCode() {
+ return Objects.hash(crs, algorithm);
+ }
+
+ @Override
+ PrimitiveStringifier valueStringifier(PrimitiveType primitiveType) {
+ return PrimitiveStringifier.WKB_STRINGIFIER;
+ }
+ }
+
/**
* Implement this interface to visit a logical type annotation in the schema.
* The default implementation for each logical type specific visitor method is empty.
@@ -1259,6 +1419,14 @@ default Optional visit(Float16LogicalTypeAnnotation float16LogicalType) {
return empty();
}
+ default Optional visit(GeometryLogicalTypeAnnotation geometryLogicalType) {
+ return empty();
+ }
+
+ default Optional visit(GeographyLogicalTypeAnnotation geographyLogicalType) {
+ return empty();
+ }
+
default Optional visit(UnknownLogicalTypeAnnotation unknownLogicalTypeAnnotation) {
return empty();
}
diff --git a/parquet-column/src/main/java/org/apache/parquet/schema/PrimitiveStringifier.java b/parquet-column/src/main/java/org/apache/parquet/schema/PrimitiveStringifier.java
index 7aface72a7..3bbcca981b 100644
--- a/parquet-column/src/main/java/org/apache/parquet/schema/PrimitiveStringifier.java
+++ b/parquet-column/src/main/java/org/apache/parquet/schema/PrimitiveStringifier.java
@@ -35,6 +35,9 @@
import java.util.concurrent.TimeUnit;
import javax.naming.OperationNotSupportedException;
import org.apache.parquet.io.api.Binary;
+import org.locationtech.jts.geom.Geometry;
+import org.locationtech.jts.io.ParseException;
+import org.locationtech.jts.io.WKBReader;
/**
* Class that provides string representations for the primitive values. These string values are to be used for
@@ -442,6 +445,21 @@ private void appendHex(byte[] array, int offset, int length, StringBuilder build
}
};
+ static final PrimitiveStringifier WKB_STRINGIFIER = new BinaryStringifierBase("WKB_STRINGIFIER") {
+
+ @Override
+ String stringifyNotNull(Binary value) {
+
+ try {
+ WKBReader reader = new WKBReader();
+ Geometry geometry = reader.read(value.getBytesUnsafe());
+ return geometry.toText();
+ } catch (ParseException e) {
+ return BINARY_INVALID;
+ }
+ }
+ };
+
static final PrimitiveStringifier FLOAT16_STRINGIFIER = new BinaryStringifierBase("FLOAT16_STRINGIFIER") {
@Override
diff --git a/parquet-column/src/main/java/org/apache/parquet/schema/PrimitiveType.java b/parquet-column/src/main/java/org/apache/parquet/schema/PrimitiveType.java
index e74d7cde02..6beff4da93 100644
--- a/parquet-column/src/main/java/org/apache/parquet/schema/PrimitiveType.java
+++ b/parquet-column/src/main/java/org/apache/parquet/schema/PrimitiveType.java
@@ -271,6 +271,18 @@ public Optional visit(
LogicalTypeAnnotation.BsonLogicalTypeAnnotation bsonLogicalType) {
return of(PrimitiveComparator.UNSIGNED_LEXICOGRAPHICAL_BINARY_COMPARATOR);
}
+
+ @Override
+ public Optional visit(
+ LogicalTypeAnnotation.GeometryLogicalTypeAnnotation geometryLogicalType) {
+ return of(PrimitiveComparator.UNSIGNED_LEXICOGRAPHICAL_BINARY_COMPARATOR);
+ }
+
+ @Override
+ public Optional visit(
+ LogicalTypeAnnotation.GeographyLogicalTypeAnnotation geographyLogicalType) {
+ return of(PrimitiveComparator.UNSIGNED_LEXICOGRAPHICAL_BINARY_COMPARATOR);
+ }
})
.orElseThrow(() -> new ShouldNeverHappenException(
"No comparator logic implemented for BINARY logical type: " + logicalType));
diff --git a/parquet-column/src/main/java/org/apache/parquet/schema/Types.java b/parquet-column/src/main/java/org/apache/parquet/schema/Types.java
index 399672022c..fd82d36768 100644
--- a/parquet-column/src/main/java/org/apache/parquet/schema/Types.java
+++ b/parquet-column/src/main/java/org/apache/parquet/schema/Types.java
@@ -577,6 +577,18 @@ public Optional visit(
return checkBinaryPrimitiveType(enumLogicalType);
}
+ @Override
+ public Optional visit(
+ LogicalTypeAnnotation.GeometryLogicalTypeAnnotation geometryLogicalType) {
+ return checkBinaryPrimitiveType(geometryLogicalType);
+ }
+
+ @Override
+ public Optional visit(
+ LogicalTypeAnnotation.GeographyLogicalTypeAnnotation geographyLogicalType) {
+ return checkBinaryPrimitiveType(geographyLogicalType);
+ }
+
private Optional checkFixedPrimitiveType(
int l, LogicalTypeAnnotation logicalTypeAnnotation) {
Preconditions.checkState(
diff --git a/parquet-column/src/test/java/org/apache/parquet/schema/TestTypeBuilders.java b/parquet-column/src/test/java/org/apache/parquet/schema/TestTypeBuilders.java
index 71886d1208..018ce5b276 100644
--- a/parquet-column/src/test/java/org/apache/parquet/schema/TestTypeBuilders.java
+++ b/parquet-column/src/test/java/org/apache/parquet/schema/TestTypeBuilders.java
@@ -55,6 +55,7 @@
import java.util.ArrayList;
import java.util.List;
import java.util.concurrent.Callable;
+import org.apache.parquet.column.schema.EdgeInterpolationAlgorithm;
import org.apache.parquet.schema.PrimitiveType.PrimitiveTypeName;
import org.apache.parquet.schema.Type.Repetition;
import org.junit.Assert;
@@ -1477,6 +1478,102 @@ public void testDecimalLogicalTypeWithDeprecatedPrecisionMismatch() {
.named("aDecimal");
}
+ @Test
+ public void testGeometryLogicalType() {
+ // Test with default CRS
+ PrimitiveType defaultCrsExpected =
+ new PrimitiveType(REQUIRED, BINARY, "aGeometry", LogicalTypeAnnotation.geometryType("OGC:CRS84"));
+ PrimitiveType defaultCrsActual = Types.required(BINARY)
+ .as(LogicalTypeAnnotation.geometryType("OGC:CRS84"))
+ .named("aGeometry");
+ Assert.assertEquals(defaultCrsExpected, defaultCrsActual);
+
+ // Test with custom CRS
+ PrimitiveType customCrsExpected =
+ new PrimitiveType(REQUIRED, BINARY, "aGeometry", LogicalTypeAnnotation.geometryType("EPSG:4326"));
+ PrimitiveType customCrsActual = Types.required(BINARY)
+ .as(LogicalTypeAnnotation.geometryType("EPSG:4326"))
+ .named("aGeometry");
+ Assert.assertEquals(customCrsExpected, customCrsActual);
+
+ // Test with optional repetition
+ PrimitiveType optionalGeometryExpected =
+ new PrimitiveType(OPTIONAL, BINARY, "aGeometry", LogicalTypeAnnotation.geometryType("OGC:CRS84"));
+ PrimitiveType optionalGeometryActual = Types.optional(BINARY)
+ .as(LogicalTypeAnnotation.geometryType("OGC:CRS84"))
+ .named("aGeometry");
+ Assert.assertEquals(optionalGeometryExpected, optionalGeometryActual);
+ }
+
+ @Test
+ public void testGeographyLogicalType() {
+ // Test with default CRS and no edge algorithm
+ PrimitiveType defaultCrsExpected = new PrimitiveType(
+ REQUIRED, BINARY, "aGeography", LogicalTypeAnnotation.geographyType("OGC:CRS84", null));
+ PrimitiveType defaultCrsActual = Types.required(BINARY)
+ .as(LogicalTypeAnnotation.geographyType("OGC:CRS84", null))
+ .named("aGeography");
+ Assert.assertEquals(defaultCrsExpected, defaultCrsActual);
+
+ // Test with custom CRS and no edge algorithm
+ PrimitiveType customCrsExpected = new PrimitiveType(
+ REQUIRED, BINARY, "aGeography", LogicalTypeAnnotation.geographyType("EPSG:4326", null));
+ PrimitiveType customCrsActual = Types.required(BINARY)
+ .as(LogicalTypeAnnotation.geographyType("EPSG:4326", null))
+ .named("aGeography");
+ Assert.assertEquals(customCrsExpected, customCrsActual);
+
+ // Test with custom CRS and edge algorithm
+ EdgeInterpolationAlgorithm greatCircle = EdgeInterpolationAlgorithm.SPHERICAL;
+ PrimitiveType customCrsWithEdgeAlgorithmExpected = new PrimitiveType(
+ REQUIRED, BINARY, "aGeography", LogicalTypeAnnotation.geographyType("EPSG:4326", greatCircle));
+ PrimitiveType customCrsWithEdgeAlgorithmActual = Types.required(BINARY)
+ .as(LogicalTypeAnnotation.geographyType("EPSG:4326", greatCircle))
+ .named("aGeography");
+ Assert.assertEquals(customCrsWithEdgeAlgorithmExpected, customCrsWithEdgeAlgorithmActual);
+
+ // Test with optional repetition
+ PrimitiveType optionalGeographyExpected = new PrimitiveType(
+ OPTIONAL, BINARY, "aGeography", LogicalTypeAnnotation.geographyType("OGC:CRS84", null));
+ PrimitiveType optionalGeographyActual = Types.optional(BINARY)
+ .as(LogicalTypeAnnotation.geographyType("OGC:CRS84", null))
+ .named("aGeography");
+ Assert.assertEquals(optionalGeographyExpected, optionalGeographyActual);
+ }
+
+ @Test
+ public void testGeographyLogicalTypeWithoutEdgeInterpolationAlgorithm() {
+ // Test with default CRS and no edge algorithm
+ PrimitiveType defaultCrsExpected =
+ new PrimitiveType(REQUIRED, BINARY, "aGeography", LogicalTypeAnnotation.geographyType());
+ PrimitiveType defaultCrsActual =
+ Types.required(BINARY).as(LogicalTypeAnnotation.geographyType()).named("aGeography");
+ Assert.assertEquals(defaultCrsExpected, defaultCrsActual);
+
+ // Test with custom CRS and no edge algorithm
+ PrimitiveType customCrsExpected = new PrimitiveType(
+ REQUIRED, BINARY, "aGeography", LogicalTypeAnnotation.geographyType("EPSG:4326", null));
+ PrimitiveType customCrsActual = Types.required(BINARY)
+ .as(LogicalTypeAnnotation.geographyType("EPSG:4326", null))
+ .named("aGeography");
+ Assert.assertEquals(customCrsExpected, customCrsActual);
+
+ // Test with custom CRS and edge algorithm
+ PrimitiveType customCrsWithEdgeAlgorithmExpected = new PrimitiveType(
+ REQUIRED, BINARY, "aGeography", LogicalTypeAnnotation.geographyType("EPSG:4326", null));
+ PrimitiveType customCrsWithEdgeAlgorithmActual = Types.required(BINARY)
+ .as(LogicalTypeAnnotation.geographyType("EPSG:4326", null))
+ .named("aGeography");
+ Assert.assertEquals(customCrsWithEdgeAlgorithmExpected, customCrsWithEdgeAlgorithmActual);
+
+ // Test with optional repetition
+ PrimitiveType optionalGeographyExpected =
+ new PrimitiveType(OPTIONAL, BINARY, "aGeography", LogicalTypeAnnotation.geographyType());
+ PrimitiveType optionalGeographyActual =
+ Types.optional(BINARY).as(LogicalTypeAnnotation.geographyType()).named("aGeography");
+ Assert.assertEquals(optionalGeographyExpected, optionalGeographyActual);
+ }
+
/**
* A convenience method to avoid a large number of @Test(expected=...) tests
*
diff --git a/parquet-hadoop/src/main/java/org/apache/parquet/format/converter/ParquetMetadataConverter.java b/parquet-hadoop/src/main/java/org/apache/parquet/format/converter/ParquetMetadataConverter.java
index 5759be234f..15fcd14a73 100644
--- a/parquet-hadoop/src/main/java/org/apache/parquet/format/converter/ParquetMetadataConverter.java
+++ b/parquet-hadoop/src/main/java/org/apache/parquet/format/converter/ParquetMetadataConverter.java
@@ -76,10 +76,13 @@
import org.apache.parquet.format.DataPageHeaderV2;
import org.apache.parquet.format.DecimalType;
import org.apache.parquet.format.DictionaryPageHeader;
+import org.apache.parquet.format.EdgeInterpolationAlgorithm;
import org.apache.parquet.format.Encoding;
import org.apache.parquet.format.EncryptionWithColumnKey;
import org.apache.parquet.format.FieldRepetitionType;
import org.apache.parquet.format.FileMetaData;
+import org.apache.parquet.format.GeographyType;
+import org.apache.parquet.format.GeometryType;
import org.apache.parquet.format.IntType;
import org.apache.parquet.format.KeyValue;
import org.apache.parquet.format.LogicalType;
@@ -520,6 +523,27 @@ public Optional visit(LogicalTypeAnnotation.IntervalLogicalTypeAnno
public Optional visit(LogicalTypeAnnotation.VariantLogicalTypeAnnotation variantLogicalType) {
return of(LogicalTypes.VARIANT(variantLogicalType.getSpecVersion()));
}
+
+ @Override
+ public Optional visit(LogicalTypeAnnotation.GeometryLogicalTypeAnnotation geometryLogicalType) {
+ GeometryType geometryType = new GeometryType();
+ if (geometryLogicalType.getCrs() != null
+ && !geometryLogicalType.getCrs().isEmpty()) {
+ geometryType.setCrs(geometryLogicalType.getCrs());
+ }
+ return of(LogicalType.GEOMETRY(geometryType));
+ }
+
+ @Override
+ public Optional visit(LogicalTypeAnnotation.GeographyLogicalTypeAnnotation geographyLogicalType) {
+ GeographyType geographyType = new GeographyType();
+ if (geographyLogicalType.getCrs() != null
+ && !geographyLogicalType.getCrs().isEmpty()) {
+ geographyType.setCrs(geographyLogicalType.getCrs());
+ }
+ geographyType.setAlgorithm(fromParquetEdgeInterpolationAlgorithm(geographyLogicalType.getAlgorithm()));
+ return of(LogicalType.GEOGRAPHY(geographyType));
+ }
}
private void addRowGroup(
@@ -1183,6 +1207,13 @@ LogicalTypeAnnotation getLogicalTypeAnnotation(LogicalType type) {
return LogicalTypeAnnotation.uuidType();
case FLOAT16:
return LogicalTypeAnnotation.float16Type();
+ case GEOMETRY:
+ GeometryType geometry = type.getGEOMETRY();
+ return LogicalTypeAnnotation.geometryType(geometry.getCrs());
+ case GEOGRAPHY:
+ GeographyType geography = type.getGEOGRAPHY();
+ return LogicalTypeAnnotation.geographyType(
+ geography.getCrs(), toParquetEdgeInterpolationAlgorithm(geography.getAlgorithm()));
case VARIANT:
VariantType variant = type.getVARIANT();
return LogicalTypeAnnotation.variantType(variant.getSpecification_version());
@@ -2490,4 +2521,26 @@ public static SizeStatistics toParquetSizeStatistics(org.apache.parquet.column.s
}
return formatStats;
}
+
+ /** Convert Parquet Algorithm enum to Thrift Algorithm enum */
+ public static EdgeInterpolationAlgorithm fromParquetEdgeInterpolationAlgorithm(
+ org.apache.parquet.column.schema.EdgeInterpolationAlgorithm parquetAlgo) {
+ if (parquetAlgo == null) {
+ return null;
+ }
+ EdgeInterpolationAlgorithm thriftAlgo = EdgeInterpolationAlgorithm.findByValue(parquetAlgo.getValue());
+ if (thriftAlgo == null) {
+ throw new IllegalArgumentException("Unrecognized Parquet EdgeInterpolationAlgorithm: " + parquetAlgo);
+ }
+ return thriftAlgo;
+ }
+
+ /** Convert Thrift Algorithm enum to Parquet Algorithm enum */
+ public static org.apache.parquet.column.schema.EdgeInterpolationAlgorithm toParquetEdgeInterpolationAlgorithm(
+ EdgeInterpolationAlgorithm thriftAlgo) {
+ if (thriftAlgo == null) {
+ return null;
+ }
+ return org.apache.parquet.column.schema.EdgeInterpolationAlgorithm.findByValue(thriftAlgo.getValue());
+ }
}
diff --git a/parquet-hadoop/src/test/java/org/apache/parquet/format/converter/TestParquetMetadataConverter.java b/parquet-hadoop/src/test/java/org/apache/parquet/format/converter/TestParquetMetadataConverter.java
index 322d4c4abc..82c70bed95 100644
--- a/parquet-hadoop/src/test/java/org/apache/parquet/format/converter/TestParquetMetadataConverter.java
+++ b/parquet-hadoop/src/test/java/org/apache/parquet/format/converter/TestParquetMetadataConverter.java
@@ -80,6 +80,7 @@
import org.apache.parquet.column.Encoding;
import org.apache.parquet.column.EncodingStats;
import org.apache.parquet.column.ParquetProperties;
+import org.apache.parquet.column.schema.EdgeInterpolationAlgorithm;
import org.apache.parquet.column.statistics.BinaryStatistics;
import org.apache.parquet.column.statistics.BooleanStatistics;
import org.apache.parquet.column.statistics.DoubleStatistics;
@@ -101,6 +102,8 @@
import org.apache.parquet.format.DecimalType;
import org.apache.parquet.format.FieldRepetitionType;
import org.apache.parquet.format.FileMetaData;
+import org.apache.parquet.format.GeographyType;
+import org.apache.parquet.format.GeometryType;
import org.apache.parquet.format.LogicalType;
import org.apache.parquet.format.MapType;
import org.apache.parquet.format.PageHeader;
@@ -1661,4 +1664,141 @@ public void testSizeStatisticsConversion() {
assertEquals(repLevelHistogram, sizeStatistics.getRepetitionLevelHistogram());
assertEquals(defLevelHistogram, sizeStatistics.getDefinitionLevelHistogram());
}
+
+ @Test
+ public void testGeometryLogicalType() {
+ ParquetMetadataConverter parquetMetadataConverter = new ParquetMetadataConverter();
+
+ // Create schema with geometry type
+ MessageType schema = Types.buildMessage()
+ .required(PrimitiveTypeName.BINARY)
+ .as(LogicalTypeAnnotation.geometryType("EPSG:4326"))
+ .named("geomField")
+ .named("Message");
+
+ // Convert to parquet schema and back
+ List parquetSchema = parquetMetadataConverter.toParquetSchema(schema);
+ MessageType actual = parquetMetadataConverter.fromParquetSchema(parquetSchema, null);
+
+ // Verify the logical type is preserved
+ assertEquals(schema, actual);
+
+ PrimitiveType primitiveType = actual.getType("geomField").asPrimitiveType();
+ LogicalTypeAnnotation logicalType = primitiveType.getLogicalTypeAnnotation();
+ assertTrue(logicalType instanceof LogicalTypeAnnotation.GeometryLogicalTypeAnnotation);
+ assertEquals("EPSG:4326", ((LogicalTypeAnnotation.GeometryLogicalTypeAnnotation) logicalType).getCrs());
+ }
+
+ @Test
+ public void testGeographyLogicalType() {
+ ParquetMetadataConverter parquetMetadataConverter = new ParquetMetadataConverter();
+
+ // Create schema with geography type
+ MessageType schema = Types.buildMessage()
+ .required(PrimitiveTypeName.BINARY)
+ .as(LogicalTypeAnnotation.geographyType("EPSG:4326", EdgeInterpolationAlgorithm.SPHERICAL))
+ .named("geogField")
+ .named("Message");
+
+ // Convert to parquet schema and back
+ List parquetSchema = parquetMetadataConverter.toParquetSchema(schema);
+ MessageType actual = parquetMetadataConverter.fromParquetSchema(parquetSchema, null);
+
+ // Verify the logical type is preserved
+ assertEquals(schema, actual);
+
+ PrimitiveType primitiveType = actual.getType("geogField").asPrimitiveType();
+ LogicalTypeAnnotation logicalType = primitiveType.getLogicalTypeAnnotation();
+ assertTrue(logicalType instanceof LogicalTypeAnnotation.GeographyLogicalTypeAnnotation);
+
+ LogicalTypeAnnotation.GeographyLogicalTypeAnnotation geographyType =
+ (LogicalTypeAnnotation.GeographyLogicalTypeAnnotation) logicalType;
+ assertEquals("EPSG:4326", geographyType.getCrs());
+ assertEquals(EdgeInterpolationAlgorithm.SPHERICAL, geographyType.getAlgorithm());
+ }
+
+ @Test
+ public void testGeometryLogicalTypeWithMissingCrs() {
+ // Create a Geometry logical type without specifying CRS
+ GeometryType geometryType = new GeometryType();
+ LogicalType logicalType = new LogicalType();
+ logicalType.setGEOMETRY(geometryType);
+
+ // Convert to LogicalTypeAnnotation
+ ParquetMetadataConverter converter = new ParquetMetadataConverter();
+ LogicalTypeAnnotation annotation = converter.getLogicalTypeAnnotation(logicalType);
+
+ // Verify the annotation is created correctly
+ assertNotNull("Geometry annotation should not be null", annotation);
+ assertTrue(
+ "Should be a GeometryLogicalTypeAnnotation",
+ annotation instanceof LogicalTypeAnnotation.GeometryLogicalTypeAnnotation);
+
+ LogicalTypeAnnotation.GeometryLogicalTypeAnnotation geometryAnnotation =
+ (LogicalTypeAnnotation.GeometryLogicalTypeAnnotation) annotation;
+
+ // Default behavior should use null or empty CRS
+ assertNull("CRS should be null or empty when not specified", geometryAnnotation.getCrs());
+ }
+
+ @Test
+ public void testGeographyLogicalTypeWithMissingParameters() {
+ ParquetMetadataConverter converter = new ParquetMetadataConverter();
+
+ // Create a Geography logical type without CRS and algorithm
+ GeographyType geographyType = new GeographyType();
+ LogicalType logicalType = new LogicalType();
+ logicalType.setGEOGRAPHY(geographyType);
+
+ // Convert to LogicalTypeAnnotation
+ LogicalTypeAnnotation annotation = converter.getLogicalTypeAnnotation(logicalType);
+
+ // Verify the annotation is created correctly
+ assertNotNull("Geography annotation should not be null", annotation);
+ assertTrue(
+ "Should be a GeographyLogicalTypeAnnotation",
+ annotation instanceof LogicalTypeAnnotation.GeographyLogicalTypeAnnotation);
+
+ // Check that optional parameters are handled correctly
+ LogicalTypeAnnotation.GeographyLogicalTypeAnnotation geographyAnnotation =
+ (LogicalTypeAnnotation.GeographyLogicalTypeAnnotation) annotation;
+ assertNull("CRS should be null when not specified", geographyAnnotation.getCrs());
+ // Most implementations default to LINEAR when algorithm is not specified
+ assertNull("Algorithm should be null when not specified", geographyAnnotation.getAlgorithm());
+
+ // Now test the round-trip conversion
+ LogicalType roundTripType = converter.convertToLogicalType(annotation);
+ assertEquals("setField should be GEOGRAPHY", LogicalType._Fields.GEOGRAPHY, roundTripType.getSetField());
+ assertNull(
+ "Round trip CRS should still be null",
+ roundTripType.getGEOGRAPHY().getCrs());
+ assertNull(
+ "Round trip Algorithm should be null",
+ roundTripType.getGEOGRAPHY().getAlgorithm());
+ }
+
+ @Test
+ public void testGeographyLogicalTypeWithAlgorithmButNoCrs() {
+ // Create a Geography logical type with algorithm but no CRS
+ GeographyType geographyType = new GeographyType();
+ geographyType.setAlgorithm(org.apache.parquet.format.EdgeInterpolationAlgorithm.SPHERICAL);
+ LogicalType logicalType = new LogicalType();
+ logicalType.setGEOGRAPHY(geographyType);
+
+ // Convert to LogicalTypeAnnotation
+ ParquetMetadataConverter converter = new ParquetMetadataConverter();
+ LogicalTypeAnnotation annotation = converter.getLogicalTypeAnnotation(logicalType);
+
+ // Verify the annotation is created correctly
+ Assert.assertNotNull("Geography annotation should not be null", annotation);
+ LogicalTypeAnnotation.GeographyLogicalTypeAnnotation geographyAnnotation =
+ (LogicalTypeAnnotation.GeographyLogicalTypeAnnotation) annotation;
+
+ // CRS should be null/empty but algorithm should be set
+ assertNull("CRS should be null or empty", geographyAnnotation.getCrs());
+ assertEquals(
+ "Algorithm should be SPHERICAL",
+ EdgeInterpolationAlgorithm.SPHERICAL,
+ geographyAnnotation.getAlgorithm());
+ }
}
diff --git a/pom.xml b/pom.xml
index 73360fb72d..f2f3b46bc3 100644
--- a/pom.xml
+++ b/pom.xml
@@ -104,6 +104,7 @@
2.0.9
0.27ea0
3.5.0
+ 1.20.0
2.3