diff --git a/parquet-hadoop/src/main/java/org/apache/parquet/format/converter/ParquetMetadataConverter.java b/parquet-hadoop/src/main/java/org/apache/parquet/format/converter/ParquetMetadataConverter.java index 58ae5039c4..b9c8996f0f 100644 --- a/parquet-hadoop/src/main/java/org/apache/parquet/format/converter/ParquetMetadataConverter.java +++ b/parquet-hadoop/src/main/java/org/apache/parquet/format/converter/ParquetMetadataConverter.java @@ -1334,12 +1334,13 @@ public void writeDataPageHeader( writePageHeader(newDataPageHeader(uncompressedSize, compressedSize, valueCount, - new org.apache.parquet.column.statistics.BooleanStatistics(), rlEncoding, dlEncoding, valuesEncoding), to); } + // Statistics are no longer saved in page headers + @Deprecated public void writeDataPageHeader( int uncompressedSize, int compressedSize, @@ -1350,7 +1351,7 @@ public void writeDataPageHeader( org.apache.parquet.column.Encoding valuesEncoding, OutputStream to) throws IOException { writePageHeader( - newDataPageHeader(uncompressedSize, compressedSize, valueCount, statistics, + newDataPageHeader(uncompressedSize, compressedSize, valueCount, rlEncoding, dlEncoding, valuesEncoding), to); } @@ -1358,7 +1359,6 @@ public void writeDataPageHeader( private PageHeader newDataPageHeader( int uncompressedSize, int compressedSize, int valueCount, - org.apache.parquet.column.statistics.Statistics statistics, org.apache.parquet.column.Encoding rlEncoding, org.apache.parquet.column.Encoding dlEncoding, org.apache.parquet.column.Encoding valuesEncoding) { @@ -1369,12 +1369,11 @@ private PageHeader newDataPageHeader( getEncoding(valuesEncoding), getEncoding(dlEncoding), getEncoding(rlEncoding))); - if (!statistics.isEmpty()) { - pageHeader.getData_page_header().setStatistics(toParquetStatistics(statistics)); - } return pageHeader; } + // Statistics are no longer saved in page headers + @Deprecated public void writeDataPageV2Header( int uncompressedSize, int compressedSize, int valueCount, int nullCount, int rowCount, @@ -1386,7 +1385,36 @@ public void writeDataPageV2Header( newDataPageV2Header( uncompressedSize, compressedSize, valueCount, nullCount, rowCount, - statistics, + dataEncoding, + rlByteLength, dlByteLength), to); + } + + public void writeDataPageV1Header( + int uncompressedSize, + int compressedSize, + int valueCount, + org.apache.parquet.column.Encoding rlEncoding, + org.apache.parquet.column.Encoding dlEncoding, + org.apache.parquet.column.Encoding valuesEncoding, + OutputStream to) throws IOException { + writePageHeader(newDataPageHeader(uncompressedSize, + compressedSize, + valueCount, + rlEncoding, + dlEncoding, + valuesEncoding), to); + } + + public void writeDataPageV2Header( + int uncompressedSize, int compressedSize, + int valueCount, int nullCount, int rowCount, + org.apache.parquet.column.Encoding dataEncoding, + int rlByteLength, int dlByteLength, + OutputStream to) throws IOException { + writePageHeader( + newDataPageV2Header( + uncompressedSize, compressedSize, + valueCount, nullCount, rowCount, dataEncoding, rlByteLength, dlByteLength), to); } @@ -1394,7 +1422,6 @@ public void writeDataPageV2Header( private PageHeader newDataPageV2Header( int uncompressedSize, int compressedSize, int valueCount, int nullCount, int rowCount, - org.apache.parquet.column.statistics.Statistics statistics, org.apache.parquet.column.Encoding dataEncoding, int rlByteLength, int dlByteLength) { // TODO: pageHeader.crc = ...; @@ -1402,10 +1429,6 @@ private PageHeader newDataPageV2Header( valueCount, nullCount, rowCount, getEncoding(dataEncoding), dlByteLength, rlByteLength); - if (!statistics.isEmpty()) { - dataPageHeaderV2.setStatistics( - toParquetStatistics(statistics)); - } PageHeader pageHeader = new PageHeader(PageType.DATA_PAGE_V2, uncompressedSize, compressedSize); pageHeader.setData_page_header_v2(dataPageHeaderV2); return pageHeader; diff --git a/parquet-hadoop/src/main/java/org/apache/parquet/hadoop/ColumnChunkPageWriteStore.java b/parquet-hadoop/src/main/java/org/apache/parquet/hadoop/ColumnChunkPageWriteStore.java index 85bdbdbd9b..f87630bf24 100644 --- a/parquet-hadoop/src/main/java/org/apache/parquet/hadoop/ColumnChunkPageWriteStore.java +++ b/parquet-hadoop/src/main/java/org/apache/parquet/hadoop/ColumnChunkPageWriteStore.java @@ -119,11 +119,10 @@ public void writePage(BytesInput bytes, + compressedSize); } tempOutputStream.reset(); - parquetMetadataConverter.writeDataPageHeader( + parquetMetadataConverter.writeDataPageV1Header( (int)uncompressedSize, (int)compressedSize, valueCount, - statistics, rlEncoding, dlEncoding, valuesEncoding, @@ -171,7 +170,6 @@ public void writePageV2( parquetMetadataConverter.writeDataPageV2Header( uncompressedSize, compressedSize, valueCount, nullCount, rowCount, - statistics, dataEncoding, rlByteLength, dlByteLength, diff --git a/parquet-hadoop/src/main/java/org/apache/parquet/hadoop/ParquetFileWriter.java b/parquet-hadoop/src/main/java/org/apache/parquet/hadoop/ParquetFileWriter.java index a8cd686022..20efe47573 100644 --- a/parquet-hadoop/src/main/java/org/apache/parquet/hadoop/ParquetFileWriter.java +++ b/parquet-hadoop/src/main/java/org/apache/parquet/hadoop/ParquetFileWriter.java @@ -433,7 +433,7 @@ public void writeDataPage( long beforeHeader = out.getPos(); LOG.debug("{}: write data page: {} values", beforeHeader, valueCount); int compressedPageSize = (int)bytes.size(); - metadataConverter.writeDataPageHeader( + metadataConverter.writeDataPageV1Header( uncompressedPageSize, compressedPageSize, valueCount, rlEncoding, @@ -518,10 +518,9 @@ private void innerWriteDataPage( } LOG.debug("{}: write data page: {} values", beforeHeader, valueCount); int compressedPageSize = (int) bytes.size(); - metadataConverter.writeDataPageHeader( + metadataConverter.writeDataPageV1Header( uncompressedPageSize, compressedPageSize, valueCount, - statistics, rlEncoding, dlEncoding, valuesEncoding, diff --git a/parquet-hadoop/src/test/java/org/apache/parquet/hadoop/TestColumnChunkPageWriteStore.java b/parquet-hadoop/src/test/java/org/apache/parquet/hadoop/TestColumnChunkPageWriteStore.java index 9a27defe15..c353ee3fe7 100644 --- a/parquet-hadoop/src/test/java/org/apache/parquet/hadoop/TestColumnChunkPageWriteStore.java +++ b/parquet-hadoop/src/test/java/org/apache/parquet/hadoop/TestColumnChunkPageWriteStore.java @@ -189,7 +189,6 @@ public void test() throws Exception { assertEquals(r, intValue(page.getRepetitionLevels())); assertEquals(dataEncoding, page.getDataEncoding()); assertEquals(v, intValue(page.getData())); - assertEquals(statistics.toString(), page.getStatistics().toString()); // Checking column/offset indexes for the one page ColumnChunkMetaData column = footer.getBlocks().get(0).getColumns().get(0);