diff --git a/parquet-avro/src/test/java/org/apache/parquet/avro/TestReadWrite.java b/parquet-avro/src/test/java/org/apache/parquet/avro/TestReadWrite.java index 69a73cb062..436893873f 100644 --- a/parquet-avro/src/test/java/org/apache/parquet/avro/TestReadWrite.java +++ b/parquet-avro/src/test/java/org/apache/parquet/avro/TestReadWrite.java @@ -18,7 +18,6 @@ */ package org.apache.parquet.avro; -import com.google.common.base.Charsets; import com.google.common.collect.ImmutableMap; import com.google.common.collect.Lists; import com.google.common.io.Resources; @@ -27,6 +26,7 @@ import java.math.BigDecimal; import java.math.BigInteger; import java.nio.ByteBuffer; +import java.nio.charset.StandardCharsets; import java.util.ArrayList; import java.util.Arrays; import java.util.Collection; @@ -369,7 +369,7 @@ public void testAll() throws Exception { .set("mylong", 2L) .set("myfloat", 3.1f) .set("mydouble", 4.1) - .set("mybytes", ByteBuffer.wrap("hello".getBytes(Charsets.UTF_8))) + .set("mybytes", ByteBuffer.wrap("hello".getBytes(StandardCharsets.UTF_8))) .set("mystring", "hello") .set("mynestedrecord", nestedRecord) .set("myenum", "a") @@ -398,7 +398,7 @@ public void testAll() throws Exception { assertEquals(2L, nextRecord.get("mylong")); assertEquals(3.1f, nextRecord.get("myfloat")); assertEquals(4.1, nextRecord.get("mydouble")); - assertEquals(ByteBuffer.wrap("hello".getBytes(Charsets.UTF_8)), nextRecord.get("mybytes")); + assertEquals(ByteBuffer.wrap("hello".getBytes(StandardCharsets.UTF_8)), nextRecord.get("mybytes")); assertEquals(str("hello"), nextRecord.get("mystring")); assertEquals(expectedEnumSymbol, nextRecord.get("myenum")); assertEquals(nestedRecord, nextRecord.get("mynestedrecord")); @@ -567,7 +567,7 @@ public void write(Map record) { record.put("mylong", 2L); record.put("myfloat", 3.1f); record.put("mydouble", 4.1); - record.put("mybytes", ByteBuffer.wrap("hello".getBytes(Charsets.UTF_8))); + record.put("mybytes", ByteBuffer.wrap("hello".getBytes(StandardCharsets.UTF_8))); record.put("mystring", "hello"); record.put("myenum", "a"); record.put("mynestedint", 1); @@ -615,7 +615,7 @@ public void write(Map record) { assertEquals(2L, nextRecord.get("mylong")); assertEquals(3.1f, nextRecord.get("myfloat")); assertEquals(4.1, nextRecord.get("mydouble")); - assertEquals(ByteBuffer.wrap("hello".getBytes(Charsets.UTF_8)), nextRecord.get("mybytes")); + assertEquals(ByteBuffer.wrap("hello".getBytes(StandardCharsets.UTF_8)), nextRecord.get("mybytes")); assertEquals(str("hello"), nextRecord.get("mystring")); assertEquals(str("a"), nextRecord.get("myenum")); // enum symbols are unknown assertEquals(nestedRecord, nextRecord.get("mynestedrecord")); diff --git a/parquet-avro/src/test/java/org/apache/parquet/avro/TestReadWriteOldListBehavior.java b/parquet-avro/src/test/java/org/apache/parquet/avro/TestReadWriteOldListBehavior.java index af6f938115..bcf553eb73 100644 --- a/parquet-avro/src/test/java/org/apache/parquet/avro/TestReadWriteOldListBehavior.java +++ b/parquet-avro/src/test/java/org/apache/parquet/avro/TestReadWriteOldListBehavior.java @@ -18,12 +18,12 @@ */ package org.apache.parquet.avro; -import com.google.common.base.Charsets; import com.google.common.collect.ImmutableMap; import com.google.common.collect.Lists; import com.google.common.io.Resources; import java.io.File; import java.nio.ByteBuffer; +import java.nio.charset.StandardCharsets; import java.util.ArrayList; import java.util.Arrays; import java.util.Collection; @@ -247,7 +247,7 @@ public void testAll() throws Exception { .set("mylong", 2L) .set("myfloat", 3.1f) .set("mydouble", 4.1) - .set("mybytes", ByteBuffer.wrap("hello".getBytes(Charsets.UTF_8))) + .set("mybytes", ByteBuffer.wrap("hello".getBytes(StandardCharsets.UTF_8))) .set("mystring", "hello") .set("mynestedrecord", nestedRecord) .set("myenum", "a") @@ -276,7 +276,7 @@ public void testAll() throws Exception { assertEquals(2L, nextRecord.get("mylong")); assertEquals(3.1f, nextRecord.get("myfloat")); assertEquals(4.1, nextRecord.get("mydouble")); - assertEquals(ByteBuffer.wrap("hello".getBytes(Charsets.UTF_8)), nextRecord.get("mybytes")); + assertEquals(ByteBuffer.wrap("hello".getBytes(StandardCharsets.UTF_8)), nextRecord.get("mybytes")); assertEquals(str("hello"), nextRecord.get("mystring")); assertEquals(expectedEnumSymbol, nextRecord.get("myenum")); assertEquals(nestedRecord, nextRecord.get("mynestedrecord")); @@ -327,7 +327,7 @@ public void testArrayWithNullValues() throws Exception { .set("mylong", 2L) .set("myfloat", 3.1f) .set("mydouble", 4.1) - .set("mybytes", ByteBuffer.wrap("hello".getBytes(Charsets.UTF_8))) + .set("mybytes", ByteBuffer.wrap("hello".getBytes(StandardCharsets.UTF_8))) .set("mystring", "hello") .set("mynestedrecord", nestedRecord) .set("myenum", "a") @@ -512,7 +512,7 @@ public void write(Map record) { record.put("mylong", 2L); record.put("myfloat", 3.1f); record.put("mydouble", 4.1); - record.put("mybytes", ByteBuffer.wrap("hello".getBytes(Charsets.UTF_8))); + record.put("mybytes", ByteBuffer.wrap("hello".getBytes(StandardCharsets.UTF_8))); record.put("mystring", "hello"); record.put("myenum", "a"); record.put("mynestedint", 1); @@ -573,7 +573,7 @@ public void write(Map record) { assertEquals(2L, nextRecord.get("mylong")); assertEquals(3.1f, nextRecord.get("myfloat")); assertEquals(4.1, nextRecord.get("mydouble")); - assertEquals(ByteBuffer.wrap("hello".getBytes(Charsets.UTF_8)), nextRecord.get("mybytes")); + assertEquals(ByteBuffer.wrap("hello".getBytes(StandardCharsets.UTF_8)), nextRecord.get("mybytes")); assertEquals(str("hello"), nextRecord.get("mystring")); assertEquals(str("a"), nextRecord.get("myenum")); assertEquals(nestedRecord, nextRecord.get("mynestedrecord")); diff --git a/parquet-cli/src/main/java/org/apache/parquet/cli/BaseCommand.java b/parquet-cli/src/main/java/org/apache/parquet/cli/BaseCommand.java index f385fde732..96ca5a5d49 100644 --- a/parquet-cli/src/main/java/org/apache/parquet/cli/BaseCommand.java +++ b/parquet-cli/src/main/java/org/apache/parquet/cli/BaseCommand.java @@ -20,7 +20,6 @@ package org.apache.parquet.cli; import com.beust.jcommander.internal.Lists; -import com.google.common.annotations.VisibleForTesting; import com.google.common.base.Preconditions; import com.google.common.io.CharStreams; import com.google.common.io.Resources; @@ -52,7 +51,7 @@ import java.net.MalformedURLException; import java.net.URI; import java.net.URL; -import java.nio.charset.Charset; +import java.nio.charset.StandardCharsets; import java.security.AccessController; import java.util.Iterator; import java.util.List; @@ -60,9 +59,6 @@ public abstract class BaseCommand implements Command, Configurable { - @VisibleForTesting - static final Charset UTF8 = Charset.forName("utf8"); - private static final String RESOURCE_URI_SCHEME = "resource"; private static final String STDIN_AS_SOURCE = "stdin"; @@ -103,7 +99,7 @@ public void output(String content, Logger console, String filename) } else { FSDataOutputStream outgoing = create(filename); try { - outgoing.write(content.getBytes(UTF8)); + outgoing.write(content.getBytes(StandardCharsets.UTF_8)); } finally { outgoing.close(); } diff --git a/parquet-column/src/main/java/org/apache/parquet/io/api/Binary.java b/parquet-column/src/main/java/org/apache/parquet/io/api/Binary.java index 85c82bddbd..021d171ccb 100644 --- a/parquet-column/src/main/java/org/apache/parquet/io/api/Binary.java +++ b/parquet-column/src/main/java/org/apache/parquet/io/api/Binary.java @@ -23,7 +23,6 @@ import java.io.ObjectStreamException; import java.io.OutputStream; import java.io.Serializable; -import java.io.UnsupportedEncodingException; import java.nio.ByteBuffer; import java.nio.CharBuffer; import java.nio.charset.CharacterCodingException; @@ -31,12 +30,9 @@ import java.nio.charset.StandardCharsets; import java.util.Arrays; -import org.apache.parquet.io.ParquetDecodingException; import org.apache.parquet.io.ParquetEncodingException; import org.apache.parquet.schema.PrimitiveComparator; -import static org.apache.parquet.bytes.BytesUtils.UTF8; - abstract public class Binary implements Comparable, Serializable { protected boolean isBackingBytesReused; @@ -133,11 +129,10 @@ public ByteArraySliceBackedBinary(byte[] value, int offset, int length, boolean @Override public String toStringUsingUTF8() { - return UTF8.decode(ByteBuffer.wrap(value, offset, length)).toString(); - // TODO: figure out why the following line was much slower - // rdb: new String(...) is slower because it instantiates a new Decoder, - // while Charset#decode uses a thread-local decoder cache - // return new String(value, offset, length, BytesUtils.UTF8); + // Charset#decode uses a thread-local decoder cache and is faster than + // new String(...) which instantiates a new Decoder per invocation + return StandardCharsets.UTF_8 + .decode(ByteBuffer.wrap(value, offset, length)).toString(); } @Override @@ -220,11 +215,7 @@ public String toString() { } private static ByteBuffer encodeUTF8(String value) { - try { - return ByteBuffer.wrap(value.getBytes("UTF-8")); - } catch (UnsupportedEncodingException e) { - throw new ParquetEncodingException("UTF-8 not supported.", e); - } + return ByteBuffer.wrap(value.getBytes(StandardCharsets.UTF_8)); } } @@ -284,7 +275,7 @@ public ByteArrayBackedBinary(byte[] value, boolean isBackingBytesReused) { @Override public String toStringUsingUTF8() { - return UTF8.decode(ByteBuffer.wrap(value)).toString(); + return StandardCharsets.UTF_8.decode(ByteBuffer.wrap(value)).toString(); } @Override @@ -393,11 +384,8 @@ public ByteBufferBackedBinary(ByteBuffer value, int offset, int length, boolean public String toStringUsingUTF8() { String ret; if (value.hasArray()) { - try { - ret = new String(value.array(), value.arrayOffset() + offset, length, "UTF-8"); - } catch (UnsupportedEncodingException e) { - throw new ParquetDecodingException("UTF-8 not supported"); - } + ret = new String(value.array(), value.arrayOffset() + offset, length, + StandardCharsets.UTF_8); } else { int limit = value.limit(); value.limit(offset+length); @@ -406,7 +394,7 @@ public String toStringUsingUTF8() { // no corresponding interface to read a subset of a buffer, would have to slice it // which creates another ByteBuffer object or do what is done here to adjust the // limit/offset and set them back after - ret = UTF8.decode(value).toString(); + ret = StandardCharsets.UTF_8.decode(value).toString(); value.limit(limit); value.position(position); } diff --git a/parquet-column/src/test/java/org/apache/parquet/column/values/dictionary/TestDictionary.java b/parquet-column/src/test/java/org/apache/parquet/column/values/dictionary/TestDictionary.java index ba3f9034ad..2783b696d5 100644 --- a/parquet-column/src/test/java/org/apache/parquet/column/values/dictionary/TestDictionary.java +++ b/parquet-column/src/test/java/org/apache/parquet/column/values/dictionary/TestDictionary.java @@ -27,8 +27,8 @@ import static org.apache.parquet.schema.PrimitiveType.PrimitiveTypeName.INT32; import java.io.IOException; -import java.io.UnsupportedEncodingException; import java.nio.ByteBuffer; +import java.nio.charset.StandardCharsets; import org.apache.parquet.bytes.ByteBufferInputStream; import org.junit.Assert; @@ -627,9 +627,8 @@ private void writeRepeated(int COUNT, ValuesWriter cw, String prefix) { } } - private void writeRepeatedWithReuse(int COUNT, ValuesWriter cw, - String prefix) throws UnsupportedEncodingException { - Binary reused = Binary.fromReusedByteArray((prefix + "0").getBytes("UTF-8")); + private void writeRepeatedWithReuse(int COUNT, ValuesWriter cw, String prefix) { + Binary reused = Binary.fromReusedByteArray((prefix + "0").getBytes(StandardCharsets.UTF_8)); for (int i = 0; i < COUNT; i++) { Binary content = Binary.fromString(prefix + i % 10); System.arraycopy(content.getBytesUnsafe(), 0, reused.getBytesUnsafe(), 0, reused.length()); diff --git a/parquet-common/src/main/java/org/apache/parquet/bytes/BytesUtils.java b/parquet-common/src/main/java/org/apache/parquet/bytes/BytesUtils.java index 2657c7e96e..2c8162cd1c 100644 --- a/parquet-common/src/main/java/org/apache/parquet/bytes/BytesUtils.java +++ b/parquet-common/src/main/java/org/apache/parquet/bytes/BytesUtils.java @@ -24,6 +24,7 @@ import java.io.OutputStream; import java.nio.ByteBuffer; import java.nio.charset.Charset; +import java.nio.charset.StandardCharsets; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -34,6 +35,8 @@ public class BytesUtils { private static final Logger LOG = LoggerFactory.getLogger(BytesUtils.class); + /** @deprecated Use {@link StandardCharsets#UTF_8} instead */ + @Deprecated public static final Charset UTF8 = Charset.forName("UTF-8"); /** diff --git a/parquet-hadoop/src/main/java/org/apache/parquet/hadoop/ParquetFileWriter.java b/parquet-hadoop/src/main/java/org/apache/parquet/hadoop/ParquetFileWriter.java index 20efe47573..47197f59ae 100644 --- a/parquet-hadoop/src/main/java/org/apache/parquet/hadoop/ParquetFileWriter.java +++ b/parquet-hadoop/src/main/java/org/apache/parquet/hadoop/ParquetFileWriter.java @@ -24,7 +24,7 @@ import static org.apache.parquet.hadoop.ParquetWriter.MAX_PADDING_SIZE_DEFAULT; import java.io.IOException; -import java.nio.charset.Charset; +import java.nio.charset.StandardCharsets; import java.util.ArrayList; import java.util.Arrays; import java.util.Collections; @@ -101,7 +101,7 @@ public class ParquetFileWriter { public static final String PARQUET_METADATA_FILE = "_metadata"; public static final String MAGIC_STR = "PAR1"; - public static final byte[] MAGIC = MAGIC_STR.getBytes(Charset.forName("ASCII")); + public static final byte[] MAGIC = MAGIC_STR.getBytes(StandardCharsets.US_ASCII); public static final String PARQUET_COMMON_METADATA_FILE = "_common_metadata"; public static final int CURRENT_VERSION = 1;