Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -189,7 +189,10 @@ public <T extends Comparable<T>> Boolean visit(NotEq<T> notEq) {

try {
Set<T> dictSet = expandDictionary(meta);
if (dictSet != null && dictSet.size() == 1 && dictSet.contains(value)) {
boolean mayContainNull = (meta.getStatistics() == null
|| !meta.getStatistics().isNumNullsSet()
|| meta.getStatistics().getNumNulls() > 0);
if (dictSet != null && dictSet.size() == 1 && dictSet.contains(value) && !mayContainNull) {
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

+1 this is matched with the fix I was thinking at least.

return BLOCK_CANNOT_MATCH;
}
} catch (IOException e) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -88,6 +88,7 @@ public class DictionaryFilterTest {
"message test { "
+ "required binary binary_field; "
+ "required binary single_value_field; "
+ "optional binary optional_single_value_field; "
+ "required fixed_len_byte_array(17) fixed_field (DECIMAL(40,4)); "
+ "required int32 int32_field; "
+ "required int64 int64_field; "
Expand Down Expand Up @@ -165,6 +166,11 @@ private static void writeData(SimpleGroupFactory f, ParquetWriter<Group> writer)
ALPHABET.substring(index, index+1) : UUID.randomUUID().toString())
.append("int96_field", INT96_VALUES[i % INT96_VALUES.length]);

// 10% of the time, leave the field null
if (index % 10 > 0) {
group.append("optional_single_value_field", "sharp");
}

writer.write(group);
}
writer.close();
Expand Down Expand Up @@ -256,7 +262,7 @@ public void testDictionaryEncodedColumns() throws Exception {
@SuppressWarnings("deprecation")
private void testDictionaryEncodedColumnsV1() throws Exception {
Set<String> dictionaryEncodedColumns = new HashSet<String>(Arrays.asList(
"binary_field", "single_value_field", "int32_field", "int64_field",
"binary_field", "single_value_field", "optional_single_value_field", "int32_field", "int64_field",
"double_field", "float_field", "int96_field"));
for (ColumnChunkMetaData column : ccmd) {
String name = column.getPath().toDotString();
Expand All @@ -281,7 +287,7 @@ private void testDictionaryEncodedColumnsV1() throws Exception {

private void testDictionaryEncodedColumnsV2() throws Exception {
Set<String> dictionaryEncodedColumns = new HashSet<String>(Arrays.asList(
"binary_field", "single_value_field", "fixed_field", "int32_field",
"binary_field", "single_value_field", "optional_single_value_field", "fixed_field", "int32_field",
"int64_field", "double_field", "float_field", "int96_field"));
for (ColumnChunkMetaData column : ccmd) {
EncodingStats encStats = column.getEncodingStats();
Expand Down Expand Up @@ -355,6 +361,7 @@ public void testEqInt96() throws Exception {
@Test
public void testNotEqBinary() throws Exception {
BinaryColumn sharp = binaryColumn("single_value_field");
BinaryColumn sharpAndNull = binaryColumn("optional_single_value_field");
BinaryColumn b = binaryColumn("binary_field");

assertTrue("Should drop block with only the excluded value",
Expand All @@ -363,6 +370,12 @@ public void testNotEqBinary() throws Exception {
assertFalse("Should not drop block with any other value",
canDrop(notEq(sharp, Binary.fromString("applause")), ccmd, dictionaries));

assertFalse("Should not drop block with only the excluded value and null",
canDrop(notEq(sharpAndNull, Binary.fromString("sharp")), ccmd, dictionaries));

assertFalse("Should not drop block with any other value",
canDrop(notEq(sharpAndNull, Binary.fromString("applause")), ccmd, dictionaries));

assertFalse("Should not drop block with a known value",
canDrop(notEq(b, Binary.fromString("x")), ccmd, dictionaries));

Expand Down