Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,6 @@
import java.util.Locale;
import java.util.Map;
import java.util.stream.Stream;
import org.apache.directory.api.util.Hex;
import org.apache.iceberg.relocated.com.google.common.base.Preconditions;
import org.apache.iceberg.relocated.com.google.common.collect.Lists;
import org.apache.iceberg.relocated.com.google.common.io.ByteStreams;
Expand Down Expand Up @@ -92,7 +91,22 @@ static String md5sum(InputStream is) throws IOException {
while ((numBytes = is.read(bytes)) != -1) {
md.update(bytes, 0, numBytes);
}
return new String(Hex.encodeHex(md.digest())).toUpperCase(Locale.ROOT);
return encodeHex(md.digest());
}

/**
* Encode byte array to hex string; this avoid having to use an external library, though it is
* less efficient than {@code org.apache.directory.api.util.Hex}
*
* @param bytes bytes to encode
* @return hext encoded string
*/
private static String encodeHex(byte[] bytes) {
StringBuilder sb = new StringBuilder();
for (byte b : bytes) {
sb.append(String.format(Locale.ROOT, "%02X ", b));
}
return sb.toString();
}

private static void inputStreamToFile(InputStream inputStream, File targetFile)
Expand Down
36 changes: 18 additions & 18 deletions build.gradle
Original file line number Diff line number Diff line change
Expand Up @@ -348,7 +348,7 @@ project(':iceberg-core') {
implementation libs.jackson.databind
implementation libs.caffeine
implementation libs.roaringbitmap
compileOnly(libs.hadoop2.client) {
compileOnly(libs.hadoop3.client) {
exclude group: 'org.apache.avro', module: 'avro'
exclude group: 'org.slf4j', module: 'slf4j-log4j12'
}
Expand All @@ -373,7 +373,7 @@ project(':iceberg-data') {
implementation project(':iceberg-core')
compileOnly project(':iceberg-parquet')
compileOnly project(':iceberg-orc')
compileOnly(libs.hadoop2.common) {
compileOnly(libs.hadoop3.common) {
exclude group: 'commons-beanutils'
exclude group: 'org.apache.avro', module: 'avro'
exclude group: 'org.slf4j', module: 'slf4j-log4j12'
Expand All @@ -396,7 +396,7 @@ project(':iceberg-data') {

compileOnly libs.avro.avro

testImplementation(libs.hadoop2.client) {
testImplementation(libs.hadoop3.client) {
exclude group: 'org.apache.avro', module: 'avro'
exclude group: 'org.slf4j', module: 'slf4j-log4j12'
}
Expand Down Expand Up @@ -427,7 +427,7 @@ project(':iceberg-aliyun') {
compileOnly libs.jaxb.api
compileOnly libs.activation
compileOnly libs.jaxb.runtime
compileOnly(libs.hadoop2.common) {
compileOnly(libs.hadoop3.common) {
exclude group: 'org.apache.avro', module: 'avro'
exclude group: 'org.slf4j', module: 'slf4j-log4j12'
exclude group: 'javax.servlet', module: 'servlet-api'
Expand Down Expand Up @@ -478,7 +478,7 @@ project(':iceberg-aws') {
compileOnly("software.amazon.awssdk:dynamodb")
compileOnly("software.amazon.awssdk:lakeformation")

compileOnly(libs.hadoop2.common) {
compileOnly(libs.hadoop3.common) {
exclude group: 'org.apache.avro', module: 'avro'
exclude group: 'org.slf4j', module: 'slf4j-log4j12'
exclude group: 'javax.servlet', module: 'servlet-api'
Expand Down Expand Up @@ -582,7 +582,7 @@ project(':iceberg-delta-lake') {

compileOnly "io.delta:delta-standalone_${scalaVersion}:${libs.versions.delta.standalone.get()}"

compileOnly(libs.hadoop2.common) {
compileOnly(libs.hadoop3.common) {
exclude group: 'org.apache.avro', module: 'avro'
exclude group: 'org.slf4j', module: 'slf4j-log4j12'
exclude group: 'javax.servlet', module: 'servlet-api'
Expand All @@ -594,7 +594,7 @@ project(':iceberg-delta-lake') {
if (sparkVersions.contains("3.5")) {
integrationImplementation "io.delta:delta-spark_${scalaVersion}:${libs.versions.delta.spark.get()}"
integrationImplementation project(path: ":iceberg-spark:iceberg-spark-3.5_${scalaVersion}")
integrationImplementation(libs.hadoop2.minicluster) {
integrationImplementation(libs.hadoop3.minicluster) {
exclude group: 'org.apache.avro', module: 'avro'
// to make sure netty libs only come from project(':iceberg-arrow')
exclude group: 'io.netty', module: 'netty-buffer'
Expand Down Expand Up @@ -654,7 +654,7 @@ project(':iceberg-gcp') {

testImplementation project(path: ':iceberg-api', configuration: 'testArtifacts')

testImplementation(libs.hadoop2.common) {
testImplementation(libs.hadoop3.common) {
exclude group: 'org.apache.avro', module: 'avro'
exclude group: 'org.slf4j', module: 'slf4j-log4j12'
exclude group: 'javax.servlet', module: 'servlet-api'
Expand Down Expand Up @@ -729,7 +729,7 @@ project(':iceberg-hive-metastore') {
exclude group: 'com.zaxxer', module: 'HikariCP'
}

compileOnly(libs.hadoop2.client) {
compileOnly(libs.hadoop3.client) {
exclude group: 'org.apache.avro', module: 'avro'
exclude group: 'org.slf4j', module: 'slf4j-log4j12'
}
Expand Down Expand Up @@ -761,12 +761,12 @@ project(':iceberg-orc') {
exclude group: 'org.apache.hive', module: 'hive-storage-api'
}

compileOnly(libs.hadoop2.common) {
compileOnly(libs.hadoop3.common) {
exclude group: 'commons-beanutils'
exclude group: 'org.apache.avro', module: 'avro'
exclude group: 'org.slf4j', module: 'slf4j-log4j12'
}
compileOnly(libs.hadoop2.client) {
compileOnly(libs.hadoop3.client) {
exclude group: 'org.apache.avro', module: 'avro'
}

Expand Down Expand Up @@ -795,7 +795,7 @@ project(':iceberg-parquet') {
}

compileOnly libs.avro.avro
compileOnly(libs.hadoop2.client) {
compileOnly(libs.hadoop3.client) {
exclude group: 'org.apache.avro', module: 'avro'
}

Expand Down Expand Up @@ -839,8 +839,8 @@ project(':iceberg-arrow') {
// We import :netty-common through :arrow-memory-netty
// so that the same version as used by the :arrow-memory-netty module is picked.
testImplementation libs.arrow.memory.netty
testImplementation libs.hadoop2.common
testImplementation libs.hadoop2.mapreduce.client.core
testImplementation libs.hadoop3.common
testImplementation libs.hadoop3.mapreduce.client.core
}
}

Expand All @@ -866,12 +866,12 @@ project(':iceberg-pig') {
compileOnly(libs.pig) {
exclude group: "junit", module: "junit"
}
compileOnly(libs.hadoop2.mapreduce.client.core)
compileOnly(libs.hadoop2.client) {
compileOnly(libs.hadoop3.mapreduce.client.core)
compileOnly(libs.hadoop3.client) {
exclude group: 'org.apache.avro', module: 'avro'
}

testImplementation(libs.hadoop2.minicluster) {
testImplementation(libs.hadoop3.minicluster) {
exclude group: 'org.apache.avro', module: 'avro'
}
}
Expand All @@ -894,7 +894,7 @@ project(':iceberg-nessie') {
implementation libs.jackson.core
implementation libs.jackson.databind

compileOnly libs.hadoop2.common
compileOnly libs.hadoop3.common
// Only there to prevent "warning: unknown enum constant SchemaType.OBJECT" compile messages
compileOnly libs.microprofile.openapi.api

Expand Down
12 changes: 6 additions & 6 deletions flink/v1.18/build.gradle
Original file line number Diff line number Diff line change
Expand Up @@ -42,9 +42,9 @@ project(":iceberg-flink:iceberg-flink-${flinkMajorVersion}") {
compileOnly libs.flink118.connector.base
compileOnly libs.flink118.connector.files

compileOnly libs.hadoop2.hdfs
compileOnly libs.hadoop2.common
compileOnly(libs.hadoop2.minicluster) {
compileOnly libs.hadoop3.hdfs
compileOnly libs.hadoop3.common
compileOnly(libs.hadoop3.minicluster) {
exclude group: 'org.apache.avro', module: 'avro'
}

Expand Down Expand Up @@ -186,9 +186,9 @@ project(":iceberg-flink:iceberg-flink-runtime-${flinkMajorVersion}") {
integrationImplementation libs.flink118.table.api.java.bridge
integrationImplementation "org.apache.flink:flink-table-planner_${scalaVersion}:${libs.versions.flink118.get()}"

integrationImplementation libs.hadoop2.common
integrationImplementation libs.hadoop2.hdfs
integrationImplementation(libs.hadoop2.minicluster) {
integrationImplementation libs.hadoop3.common
integrationImplementation libs.hadoop3.hdfs
integrationImplementation(libs.hadoop3.minicluster) {
exclude group: 'org.apache.avro', module: 'avro'
}

Expand Down
12 changes: 6 additions & 6 deletions flink/v1.19/build.gradle
Original file line number Diff line number Diff line change
Expand Up @@ -42,9 +42,9 @@ project(":iceberg-flink:iceberg-flink-${flinkMajorVersion}") {
compileOnly libs.flink119.connector.base
compileOnly libs.flink119.connector.files

compileOnly libs.hadoop2.hdfs
compileOnly libs.hadoop2.common
compileOnly(libs.hadoop2.minicluster) {
compileOnly libs.hadoop3.hdfs
compileOnly libs.hadoop3.common
compileOnly(libs.hadoop3.minicluster) {
exclude group: 'org.apache.avro', module: 'avro'
}

Expand Down Expand Up @@ -186,9 +186,9 @@ project(":iceberg-flink:iceberg-flink-runtime-${flinkMajorVersion}") {
integrationImplementation libs.flink119.table.api.java.bridge
integrationImplementation "org.apache.flink:flink-table-planner_${scalaVersion}:${libs.versions.flink119.get()}"

integrationImplementation libs.hadoop2.common
integrationImplementation libs.hadoop2.hdfs
integrationImplementation(libs.hadoop2.minicluster) {
integrationImplementation libs.hadoop3.common
integrationImplementation libs.hadoop3.hdfs
integrationImplementation(libs.hadoop3.minicluster) {
exclude group: 'org.apache.avro', module: 'avro'
}

Expand Down
12 changes: 6 additions & 6 deletions flink/v1.20/build.gradle
Original file line number Diff line number Diff line change
Expand Up @@ -42,9 +42,9 @@ project(":iceberg-flink:iceberg-flink-${flinkMajorVersion}") {
compileOnly libs.flink120.connector.base
compileOnly libs.flink120.connector.files

compileOnly libs.hadoop2.hdfs
compileOnly libs.hadoop2.common
compileOnly(libs.hadoop2.minicluster) {
compileOnly libs.hadoop3.hdfs
compileOnly libs.hadoop3.common
compileOnly(libs.hadoop3.minicluster) {
exclude group: 'org.apache.avro', module: 'avro'
}

Expand Down Expand Up @@ -186,9 +186,9 @@ project(":iceberg-flink:iceberg-flink-runtime-${flinkMajorVersion}") {
integrationImplementation libs.flink120.table.api.java.bridge
integrationImplementation "org.apache.flink:flink-table-planner_${scalaVersion}:${libs.versions.flink120.get()}"

integrationImplementation libs.hadoop2.common
integrationImplementation libs.hadoop2.hdfs
integrationImplementation(libs.hadoop2.minicluster) {
integrationImplementation libs.hadoop3.common
integrationImplementation libs.hadoop3.hdfs
integrationImplementation(libs.hadoop3.minicluster) {
exclude group: 'org.apache.avro', module: 'avro'
}

Expand Down
9 changes: 3 additions & 6 deletions gradle/libs.versions.toml
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,6 @@ flink119 = { strictly = "1.19.0"}
flink120 = { strictly = "1.20.0"}
google-libraries-bom = "26.44.0"
guava = "33.2.1-jre"
hadoop2 = "2.7.3"
hadoop3 = "3.3.6"
httpcomponents-httpclient5 = "5.3.1"
hive2 = { strictly = "2.3.9"} # see rich version usage explanation above
Expand Down Expand Up @@ -128,13 +127,11 @@ flink120-streaming-java = { module = "org.apache.flink:flink-streaming-java", ve
flink120-table-api-java-bridge = { module = "org.apache.flink:flink-table-api-java-bridge", version.ref = "flink120" }
google-libraries-bom = { module = "com.google.cloud:libraries-bom", version.ref = "google-libraries-bom" }
guava-guava = { module = "com.google.guava:guava", version.ref = "guava" }
hadoop2-client = { module = "org.apache.hadoop:hadoop-client", version.ref = "hadoop2" }
hadoop2-common = { module = "org.apache.hadoop:hadoop-common", version.ref = "hadoop2" }
hadoop2-hdfs = { module = "org.apache.hadoop:hadoop-hdfs", version.ref = "hadoop2" }
hadoop2-mapreduce-client-core = { module = "org.apache.hadoop:hadoop-mapreduce-client-core", version.ref = "hadoop2" }
hadoop2-minicluster = { module = "org.apache.hadoop:hadoop-minicluster", version.ref = "hadoop2" }
hadoop3-client = { module = "org.apache.hadoop:hadoop-client", version.ref = "hadoop3" }
hadoop3-common = { module = "org.apache.hadoop:hadoop-common", version.ref = "hadoop3" }
hadoop3-hdfs = { module = "org.apache.hadoop:hadoop-hdfs", version.ref = "hadoop3" }
hadoop3-mapreduce-client-core = { module = "org.apache.hadoop:hadoop-mapreduce-client-core", version.ref = "hadoop3" }
hadoop3-minicluster = { module = "org.apache.hadoop:hadoop-minicluster", version.ref = "hadoop3" }
hive2-exec = { module = "org.apache.hive:hive-exec", version.ref = "hive2" }
hive2-metastore = { module = "org.apache.hive:hive-metastore", version.ref = "hive2" }
hive2-serde = { module = "org.apache.hive:hive-serde", version.ref = "hive2" }
Expand Down
2 changes: 1 addition & 1 deletion mr/build.gradle
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,7 @@ project(':iceberg-mr') {
implementation project(':iceberg-orc')
implementation project(':iceberg-parquet')

compileOnly(libs.hadoop2.client) {
compileOnly(libs.hadoop3.client) {
exclude group: 'org.apache.avro', module: 'avro'
}

Expand Down
2 changes: 1 addition & 1 deletion spark/v3.3/build.gradle
Original file line number Diff line number Diff line change
Expand Up @@ -93,7 +93,7 @@ project(":iceberg-spark:iceberg-spark-${sparkMajorVersion}_${scalaVersion}") {
exclude group: 'com.google.code.findbugs', module: 'jsr305'
}

testImplementation(libs.hadoop2.minicluster) {
testImplementation(libs.hadoop3.minicluster) {
exclude group: 'org.apache.avro', module: 'avro'
// to make sure netty libs only come from
// project(':iceberg-arrow')
Expand Down
2 changes: 1 addition & 1 deletion spark/v3.4/build.gradle
Original file line number Diff line number Diff line change
Expand Up @@ -95,7 +95,7 @@ project(":iceberg-spark:iceberg-spark-${sparkMajorVersion}_${scalaVersion}") {

implementation libs.caffeine

testImplementation(libs.hadoop2.minicluster) {
testImplementation(libs.hadoop3.minicluster) {
exclude group: 'org.apache.avro', module: 'avro'
// to make sure netty libs only come from project(':iceberg-arrow')
exclude group: 'io.netty', module: 'netty-buffer'
Expand Down
2 changes: 1 addition & 1 deletion spark/v3.5/build.gradle
Original file line number Diff line number Diff line change
Expand Up @@ -95,7 +95,7 @@ project(":iceberg-spark:iceberg-spark-${sparkMajorVersion}_${scalaVersion}") {

implementation libs.caffeine

testImplementation(libs.hadoop2.minicluster) {
testImplementation(libs.hadoop3.minicluster) {
exclude group: 'org.apache.avro', module: 'avro'
// to make sure netty libs only come from project(':iceberg-arrow')
exclude group: 'io.netty', module: 'netty-buffer'
Expand Down