diff --git a/aliyun/src/test/java/org/apache/iceberg/aliyun/oss/mock/AliyunOSSMockLocalStore.java b/aliyun/src/test/java/org/apache/iceberg/aliyun/oss/mock/AliyunOSSMockLocalStore.java index f7a4b72e4b97..cfeae2696fd3 100644 --- a/aliyun/src/test/java/org/apache/iceberg/aliyun/oss/mock/AliyunOSSMockLocalStore.java +++ b/aliyun/src/test/java/org/apache/iceberg/aliyun/oss/mock/AliyunOSSMockLocalStore.java @@ -40,7 +40,6 @@ import java.util.Locale; import java.util.Map; import java.util.stream.Stream; -import org.apache.directory.api.util.Hex; import org.apache.iceberg.relocated.com.google.common.base.Preconditions; import org.apache.iceberg.relocated.com.google.common.collect.Lists; import org.apache.iceberg.relocated.com.google.common.io.ByteStreams; @@ -92,7 +91,22 @@ static String md5sum(InputStream is) throws IOException { while ((numBytes = is.read(bytes)) != -1) { md.update(bytes, 0, numBytes); } - return new String(Hex.encodeHex(md.digest())).toUpperCase(Locale.ROOT); + return encodeHex(md.digest()); + } + + /** + * Encode byte array to hex string; this avoid having to use an external library, though it is + * less efficient than {@code org.apache.directory.api.util.Hex} + * + * @param bytes bytes to encode + * @return hext encoded string + */ + private static String encodeHex(byte[] bytes) { + StringBuilder sb = new StringBuilder(); + for (byte b : bytes) { + sb.append(String.format(Locale.ROOT, "%02X ", b)); + } + return sb.toString(); } private static void inputStreamToFile(InputStream inputStream, File targetFile) diff --git a/build.gradle b/build.gradle index 7a11943cf8be..ff2a8837658b 100644 --- a/build.gradle +++ b/build.gradle @@ -348,7 +348,7 @@ project(':iceberg-core') { implementation libs.jackson.databind implementation libs.caffeine implementation libs.roaringbitmap - compileOnly(libs.hadoop2.client) { + compileOnly(libs.hadoop3.client) { exclude group: 'org.apache.avro', module: 'avro' exclude group: 'org.slf4j', module: 'slf4j-log4j12' } @@ -373,7 +373,7 @@ project(':iceberg-data') { implementation project(':iceberg-core') compileOnly project(':iceberg-parquet') compileOnly project(':iceberg-orc') - compileOnly(libs.hadoop2.common) { + compileOnly(libs.hadoop3.common) { exclude group: 'commons-beanutils' exclude group: 'org.apache.avro', module: 'avro' exclude group: 'org.slf4j', module: 'slf4j-log4j12' @@ -396,7 +396,7 @@ project(':iceberg-data') { compileOnly libs.avro.avro - testImplementation(libs.hadoop2.client) { + testImplementation(libs.hadoop3.client) { exclude group: 'org.apache.avro', module: 'avro' exclude group: 'org.slf4j', module: 'slf4j-log4j12' } @@ -427,7 +427,7 @@ project(':iceberg-aliyun') { compileOnly libs.jaxb.api compileOnly libs.activation compileOnly libs.jaxb.runtime - compileOnly(libs.hadoop2.common) { + compileOnly(libs.hadoop3.common) { exclude group: 'org.apache.avro', module: 'avro' exclude group: 'org.slf4j', module: 'slf4j-log4j12' exclude group: 'javax.servlet', module: 'servlet-api' @@ -478,7 +478,7 @@ project(':iceberg-aws') { compileOnly("software.amazon.awssdk:dynamodb") compileOnly("software.amazon.awssdk:lakeformation") - compileOnly(libs.hadoop2.common) { + compileOnly(libs.hadoop3.common) { exclude group: 'org.apache.avro', module: 'avro' exclude group: 'org.slf4j', module: 'slf4j-log4j12' exclude group: 'javax.servlet', module: 'servlet-api' @@ -582,7 +582,7 @@ project(':iceberg-delta-lake') { compileOnly "io.delta:delta-standalone_${scalaVersion}:${libs.versions.delta.standalone.get()}" - compileOnly(libs.hadoop2.common) { + compileOnly(libs.hadoop3.common) { exclude group: 'org.apache.avro', module: 'avro' exclude group: 'org.slf4j', module: 'slf4j-log4j12' exclude group: 'javax.servlet', module: 'servlet-api' @@ -594,7 +594,7 @@ project(':iceberg-delta-lake') { if (sparkVersions.contains("3.5")) { integrationImplementation "io.delta:delta-spark_${scalaVersion}:${libs.versions.delta.spark.get()}" integrationImplementation project(path: ":iceberg-spark:iceberg-spark-3.5_${scalaVersion}") - integrationImplementation(libs.hadoop2.minicluster) { + integrationImplementation(libs.hadoop3.minicluster) { exclude group: 'org.apache.avro', module: 'avro' // to make sure netty libs only come from project(':iceberg-arrow') exclude group: 'io.netty', module: 'netty-buffer' @@ -654,7 +654,7 @@ project(':iceberg-gcp') { testImplementation project(path: ':iceberg-api', configuration: 'testArtifacts') - testImplementation(libs.hadoop2.common) { + testImplementation(libs.hadoop3.common) { exclude group: 'org.apache.avro', module: 'avro' exclude group: 'org.slf4j', module: 'slf4j-log4j12' exclude group: 'javax.servlet', module: 'servlet-api' @@ -729,7 +729,7 @@ project(':iceberg-hive-metastore') { exclude group: 'com.zaxxer', module: 'HikariCP' } - compileOnly(libs.hadoop2.client) { + compileOnly(libs.hadoop3.client) { exclude group: 'org.apache.avro', module: 'avro' exclude group: 'org.slf4j', module: 'slf4j-log4j12' } @@ -761,12 +761,12 @@ project(':iceberg-orc') { exclude group: 'org.apache.hive', module: 'hive-storage-api' } - compileOnly(libs.hadoop2.common) { + compileOnly(libs.hadoop3.common) { exclude group: 'commons-beanutils' exclude group: 'org.apache.avro', module: 'avro' exclude group: 'org.slf4j', module: 'slf4j-log4j12' } - compileOnly(libs.hadoop2.client) { + compileOnly(libs.hadoop3.client) { exclude group: 'org.apache.avro', module: 'avro' } @@ -795,7 +795,7 @@ project(':iceberg-parquet') { } compileOnly libs.avro.avro - compileOnly(libs.hadoop2.client) { + compileOnly(libs.hadoop3.client) { exclude group: 'org.apache.avro', module: 'avro' } @@ -839,8 +839,8 @@ project(':iceberg-arrow') { // We import :netty-common through :arrow-memory-netty // so that the same version as used by the :arrow-memory-netty module is picked. testImplementation libs.arrow.memory.netty - testImplementation libs.hadoop2.common - testImplementation libs.hadoop2.mapreduce.client.core + testImplementation libs.hadoop3.common + testImplementation libs.hadoop3.mapreduce.client.core } } @@ -866,12 +866,12 @@ project(':iceberg-pig') { compileOnly(libs.pig) { exclude group: "junit", module: "junit" } - compileOnly(libs.hadoop2.mapreduce.client.core) - compileOnly(libs.hadoop2.client) { + compileOnly(libs.hadoop3.mapreduce.client.core) + compileOnly(libs.hadoop3.client) { exclude group: 'org.apache.avro', module: 'avro' } - testImplementation(libs.hadoop2.minicluster) { + testImplementation(libs.hadoop3.minicluster) { exclude group: 'org.apache.avro', module: 'avro' } } @@ -894,7 +894,7 @@ project(':iceberg-nessie') { implementation libs.jackson.core implementation libs.jackson.databind - compileOnly libs.hadoop2.common + compileOnly libs.hadoop3.common // Only there to prevent "warning: unknown enum constant SchemaType.OBJECT" compile messages compileOnly libs.microprofile.openapi.api diff --git a/flink/v1.18/build.gradle b/flink/v1.18/build.gradle index aac01c9c6931..71fbdfeaed14 100644 --- a/flink/v1.18/build.gradle +++ b/flink/v1.18/build.gradle @@ -42,9 +42,9 @@ project(":iceberg-flink:iceberg-flink-${flinkMajorVersion}") { compileOnly libs.flink118.connector.base compileOnly libs.flink118.connector.files - compileOnly libs.hadoop2.hdfs - compileOnly libs.hadoop2.common - compileOnly(libs.hadoop2.minicluster) { + compileOnly libs.hadoop3.hdfs + compileOnly libs.hadoop3.common + compileOnly(libs.hadoop3.minicluster) { exclude group: 'org.apache.avro', module: 'avro' } @@ -186,9 +186,9 @@ project(":iceberg-flink:iceberg-flink-runtime-${flinkMajorVersion}") { integrationImplementation libs.flink118.table.api.java.bridge integrationImplementation "org.apache.flink:flink-table-planner_${scalaVersion}:${libs.versions.flink118.get()}" - integrationImplementation libs.hadoop2.common - integrationImplementation libs.hadoop2.hdfs - integrationImplementation(libs.hadoop2.minicluster) { + integrationImplementation libs.hadoop3.common + integrationImplementation libs.hadoop3.hdfs + integrationImplementation(libs.hadoop3.minicluster) { exclude group: 'org.apache.avro', module: 'avro' } diff --git a/flink/v1.19/build.gradle b/flink/v1.19/build.gradle index 392a1cb124f0..48c420d11121 100644 --- a/flink/v1.19/build.gradle +++ b/flink/v1.19/build.gradle @@ -42,9 +42,9 @@ project(":iceberg-flink:iceberg-flink-${flinkMajorVersion}") { compileOnly libs.flink119.connector.base compileOnly libs.flink119.connector.files - compileOnly libs.hadoop2.hdfs - compileOnly libs.hadoop2.common - compileOnly(libs.hadoop2.minicluster) { + compileOnly libs.hadoop3.hdfs + compileOnly libs.hadoop3.common + compileOnly(libs.hadoop3.minicluster) { exclude group: 'org.apache.avro', module: 'avro' } @@ -186,9 +186,9 @@ project(":iceberg-flink:iceberg-flink-runtime-${flinkMajorVersion}") { integrationImplementation libs.flink119.table.api.java.bridge integrationImplementation "org.apache.flink:flink-table-planner_${scalaVersion}:${libs.versions.flink119.get()}" - integrationImplementation libs.hadoop2.common - integrationImplementation libs.hadoop2.hdfs - integrationImplementation(libs.hadoop2.minicluster) { + integrationImplementation libs.hadoop3.common + integrationImplementation libs.hadoop3.hdfs + integrationImplementation(libs.hadoop3.minicluster) { exclude group: 'org.apache.avro', module: 'avro' } diff --git a/flink/v1.20/build.gradle b/flink/v1.20/build.gradle index f2e1fb51a1f4..ac00680da738 100644 --- a/flink/v1.20/build.gradle +++ b/flink/v1.20/build.gradle @@ -42,9 +42,9 @@ project(":iceberg-flink:iceberg-flink-${flinkMajorVersion}") { compileOnly libs.flink120.connector.base compileOnly libs.flink120.connector.files - compileOnly libs.hadoop2.hdfs - compileOnly libs.hadoop2.common - compileOnly(libs.hadoop2.minicluster) { + compileOnly libs.hadoop3.hdfs + compileOnly libs.hadoop3.common + compileOnly(libs.hadoop3.minicluster) { exclude group: 'org.apache.avro', module: 'avro' } @@ -186,9 +186,9 @@ project(":iceberg-flink:iceberg-flink-runtime-${flinkMajorVersion}") { integrationImplementation libs.flink120.table.api.java.bridge integrationImplementation "org.apache.flink:flink-table-planner_${scalaVersion}:${libs.versions.flink120.get()}" - integrationImplementation libs.hadoop2.common - integrationImplementation libs.hadoop2.hdfs - integrationImplementation(libs.hadoop2.minicluster) { + integrationImplementation libs.hadoop3.common + integrationImplementation libs.hadoop3.hdfs + integrationImplementation(libs.hadoop3.minicluster) { exclude group: 'org.apache.avro', module: 'avro' } diff --git a/gradle/libs.versions.toml b/gradle/libs.versions.toml index 8ae69c566f2e..eacd458dabb0 100644 --- a/gradle/libs.versions.toml +++ b/gradle/libs.versions.toml @@ -44,7 +44,6 @@ flink119 = { strictly = "1.19.0"} flink120 = { strictly = "1.20.0"} google-libraries-bom = "26.44.0" guava = "33.2.1-jre" -hadoop2 = "2.7.3" hadoop3 = "3.3.6" httpcomponents-httpclient5 = "5.3.1" hive2 = { strictly = "2.3.9"} # see rich version usage explanation above @@ -128,13 +127,11 @@ flink120-streaming-java = { module = "org.apache.flink:flink-streaming-java", ve flink120-table-api-java-bridge = { module = "org.apache.flink:flink-table-api-java-bridge", version.ref = "flink120" } google-libraries-bom = { module = "com.google.cloud:libraries-bom", version.ref = "google-libraries-bom" } guava-guava = { module = "com.google.guava:guava", version.ref = "guava" } -hadoop2-client = { module = "org.apache.hadoop:hadoop-client", version.ref = "hadoop2" } -hadoop2-common = { module = "org.apache.hadoop:hadoop-common", version.ref = "hadoop2" } -hadoop2-hdfs = { module = "org.apache.hadoop:hadoop-hdfs", version.ref = "hadoop2" } -hadoop2-mapreduce-client-core = { module = "org.apache.hadoop:hadoop-mapreduce-client-core", version.ref = "hadoop2" } -hadoop2-minicluster = { module = "org.apache.hadoop:hadoop-minicluster", version.ref = "hadoop2" } hadoop3-client = { module = "org.apache.hadoop:hadoop-client", version.ref = "hadoop3" } hadoop3-common = { module = "org.apache.hadoop:hadoop-common", version.ref = "hadoop3" } +hadoop3-hdfs = { module = "org.apache.hadoop:hadoop-hdfs", version.ref = "hadoop3" } +hadoop3-mapreduce-client-core = { module = "org.apache.hadoop:hadoop-mapreduce-client-core", version.ref = "hadoop3" } +hadoop3-minicluster = { module = "org.apache.hadoop:hadoop-minicluster", version.ref = "hadoop3" } hive2-exec = { module = "org.apache.hive:hive-exec", version.ref = "hive2" } hive2-metastore = { module = "org.apache.hive:hive-metastore", version.ref = "hive2" } hive2-serde = { module = "org.apache.hive:hive-serde", version.ref = "hive2" } diff --git a/mr/build.gradle b/mr/build.gradle index bf8f9ee943f7..0e7c5c359aaf 100644 --- a/mr/build.gradle +++ b/mr/build.gradle @@ -37,7 +37,7 @@ project(':iceberg-mr') { implementation project(':iceberg-orc') implementation project(':iceberg-parquet') - compileOnly(libs.hadoop2.client) { + compileOnly(libs.hadoop3.client) { exclude group: 'org.apache.avro', module: 'avro' } diff --git a/spark/v3.3/build.gradle b/spark/v3.3/build.gradle index c081cffeb644..97dac8ea3f2a 100644 --- a/spark/v3.3/build.gradle +++ b/spark/v3.3/build.gradle @@ -93,7 +93,7 @@ project(":iceberg-spark:iceberg-spark-${sparkMajorVersion}_${scalaVersion}") { exclude group: 'com.google.code.findbugs', module: 'jsr305' } - testImplementation(libs.hadoop2.minicluster) { + testImplementation(libs.hadoop3.minicluster) { exclude group: 'org.apache.avro', module: 'avro' // to make sure netty libs only come from // project(':iceberg-arrow') diff --git a/spark/v3.4/build.gradle b/spark/v3.4/build.gradle index 3b1761d39f63..15d04e0ab8f2 100644 --- a/spark/v3.4/build.gradle +++ b/spark/v3.4/build.gradle @@ -95,7 +95,7 @@ project(":iceberg-spark:iceberg-spark-${sparkMajorVersion}_${scalaVersion}") { implementation libs.caffeine - testImplementation(libs.hadoop2.minicluster) { + testImplementation(libs.hadoop3.minicluster) { exclude group: 'org.apache.avro', module: 'avro' // to make sure netty libs only come from project(':iceberg-arrow') exclude group: 'io.netty', module: 'netty-buffer' diff --git a/spark/v3.5/build.gradle b/spark/v3.5/build.gradle index 2ba5d493c6cd..d370bc541598 100644 --- a/spark/v3.5/build.gradle +++ b/spark/v3.5/build.gradle @@ -95,7 +95,7 @@ project(":iceberg-spark:iceberg-spark-${sparkMajorVersion}_${scalaVersion}") { implementation libs.caffeine - testImplementation(libs.hadoop2.minicluster) { + testImplementation(libs.hadoop3.minicluster) { exclude group: 'org.apache.avro', module: 'avro' // to make sure netty libs only come from project(':iceberg-arrow') exclude group: 'io.netty', module: 'netty-buffer'