-
Notifications
You must be signed in to change notification settings - Fork 1.5k
Closed
Description
It throws NullPointerException after upgrade parquet to 1.11.0 when using UserDefinedPredicate.
The UserDefinedPredicate is:
new UserDefinedPredicate[Binary] with Serializable {
private val strToBinary = Binary.fromReusedByteArray(v.getBytes)
private val size = strToBinary.length
override def canDrop(statistics: Statistics[Binary]): Boolean = {
val comparator = PrimitiveComparator.UNSIGNED_LEXICOGRAPHICAL_BINARY_COMPARATOR
val max = statistics.getMax
val min = statistics.getMin
comparator.compare(max.slice(0, math.min(size, max.length)), strToBinary) < 0 ||
comparator.compare(min.slice(0, math.min(size, min.length)), strToBinary) > 0
}
override def inverseCanDrop(statistics: Statistics[Binary]): Boolean = {
val comparator = PrimitiveComparator.UNSIGNED_LEXICOGRAPHICAL_BINARY_COMPARATOR
val max = statistics.getMax
val min = statistics.getMin
comparator.compare(max.slice(0, math.min(size, max.length)), strToBinary) == 0 &&
comparator.compare(min.slice(0, math.min(size, min.length)), strToBinary) == 0
}
override def keep(value: Binary): Boolean = {
UTF8String.fromBytes(value.getBytes).startsWith(
UTF8String.fromBytes(strToBinary.getBytes))
}
} The stack trace is:
java.lang.NullPointerException
at org.apache.spark.sql.execution.datasources.parquet.ParquetFilters$$anon$1.keep(ParquetFilters.scala:573)
at org.apache.spark.sql.execution.datasources.parquet.ParquetFilters$$anon$1.keep(ParquetFilters.scala:552)
at org.apache.parquet.internal.filter2.columnindex.ColumnIndexFilter.visit(ColumnIndexFilter.java:152)
at org.apache.parquet.internal.filter2.columnindex.ColumnIndexFilter.visit(ColumnIndexFilter.java:56)
at org.apache.parquet.filter2.predicate.Operators$UserDefined.accept(Operators.java:377)
at org.apache.parquet.internal.filter2.columnindex.ColumnIndexFilter.visit(ColumnIndexFilter.java:181)
at org.apache.parquet.internal.filter2.columnindex.ColumnIndexFilter.visit(ColumnIndexFilter.java:56)
at org.apache.parquet.filter2.predicate.Operators$And.accept(Operators.java:309)
at org.apache.parquet.internal.filter2.columnindex.ColumnIndexFilter$1.visit(ColumnIndexFilter.java:86)
at org.apache.parquet.internal.filter2.columnindex.ColumnIndexFilter$1.visit(ColumnIndexFilter.java:81)
Reporter: Yuming Wang / @wangyum
Assignee: Gabor Szadovszky / @gszadovszky
Related issues:
- Release parquet-mr 1.11.0 (blocks)
- Insufficient documentation for UserDefinedPredicate.keep(T) (is related to)
PRs and other links:
Note: This issue was originally created as PARQUET-1488. Please see the migration documentation for further details.