apache · mbutrovich · Oct 7, 2025 · Sep 23, 2025 · Sep 23, 2025 · Sep 23, 2025
diff --git a/common/src/main/java/org/apache/comet/parquet/CometFileKeyUnwrapper.java b/common/src/main/java/org/apache/comet/parquet/CometFileKeyUnwrapper.java
@@ -0,0 +1,146 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.comet.parquet;
+
+import java.util.concurrent.ConcurrentHashMap;
+
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.Path;
+import org.apache.parquet.crypto.DecryptionKeyRetriever;
+import org.apache.parquet.crypto.DecryptionPropertiesFactory;
+import org.apache.parquet.crypto.FileDecryptionProperties;
+import org.apache.parquet.crypto.ParquetCryptoRuntimeException;
+
+// spotless:off
+/*
+ * Architecture Overview:
+ *
+ *          JVM Side                           |                     Native Side
+ *   ┌─────────────────────────────────────┐   |   ┌─────────────────────────────────────┐
+ *   │     CometFileKeyUnwrapper           │   |   │       Parquet File Reading          │
+ *   │                                     │   |   │                                     │
+ *   │  ┌─────────────────────────────┐    │   |   │  ┌─────────────────────────────┐    │
+ *   │  │      hadoopConf             │    │   |   │  │     file1.parquet           │    │
+ *   │  │   (Configuration)           │    │   |   │  │     file2.parquet           │    │
+ *   │  └─────────────────────────────┘    │   |   │  │     file3.parquet           │    │
+ *   │              │                      │   |   │  └─────────────────────────────┘    │
+ *   │              ▼                      │   |   │              │                      │
+ *   │  ┌─────────────────────────────┐    │   |   │              │                      │
+ *   │  │      factoryCache           │    │   |   │              ▼                      │
+ *   │  │   (many-to-one mapping)     │    │   |   │  ┌─────────────────────────────┐    │
+ *   │  │                             │    │   |   │  │  Parse file metadata &      │    │
+ *   │  │ file1 ──┐                   │    │   |   │  │  extract keyMetadata        │    │
+ *   │  │ file2 ──┼─► DecryptionProps │    │   |   │  └─────────────────────────────┘    │
+ *   │  │ file3 ──┘      Factory      │    │   |   │              │                      │
+ *   │  └─────────────────────────────┘    │   |   │              │                      │
+ *   │              │                      │   |   │              ▼                      │
+ *   │              ▼                      │   |   │  ╔═════════════════════════════╗    │
+ *   │  ┌─────────────────────────────┐    │   |   │  ║        JNI CALL:            ║    │
+ *   │  │      retrieverCache         │    │   |   │  ║       getKey(filePath,      ║    │
+ *   │  │  filePath -> KeyRetriever   │◄───┼───┼───┼──║        keyMetadata)         ║    │
+ *   │  └─────────────────────────────┘    │   |   │  ╚═════════════════════════════╝    │
+ *   │              │                      │   |   │                                     │
+ *   │              ▼                      │   |   │                                     │
+ *   │  ┌─────────────────────────────┐    │   |   │                                     │
+ *   │  │  DecryptionKeyRetriever     │    │   |   │                                     │
+ *   │  │     .getKey(keyMetadata)    │    │   |   │                                     │
+ *   │  └─────────────────────────────┘    │   |   │                                     │
+ *   │              │                      │   |   │                                     │
+ *   │              ▼                      │   |   │                                     │
+ *   │  ┌─────────────────────────────┐    │   |   │  ┌─────────────────────────────┐    │
+ *   │  │      return key bytes       │────┼───┼───┼─►│   Use key for decryption    │    │
+ *   │  └─────────────────────────────┘    │   |   │  │    of parquet data          │    │
+ *   └─────────────────────────────────────┘   |   │  └─────────────────────────────┘    │
+ *                                             |   └─────────────────────────────────────┘
+ *                                             |
+ *                                    JNI Boundary
+ *
+ * Setup Phase (storeDecryptionKeyRetriever):
+ * 1. hadoopConf → DecryptionPropertiesFactory (cached in factoryCache)
+ * 2. Factory + filePath → DecryptionKeyRetriever (cached in retrieverCache)
+ *
+ * Runtime Phase (getKey):
+ * 3. Native code calls getKey(filePath, keyMetadata) ──► JVM
+ * 4. Retrieve cached DecryptionKeyRetriever for filePath
+ * 5. KeyRetriever.getKey(keyMetadata) → decrypted key bytes
+ * 6. Return key bytes ──► Native code for parquet decryption
+ */
+// spotless:on
+
+/**
+ * Helper class to access DecryptionKeyRetriever.getKey from native code via JNI. This class handles
+ * the complexity of creating and caching properly configured DecryptionKeyRetriever instances using
+ * DecryptionPropertiesFactory. The life of this object is meant to map to a single Comet plan, so
+ * associated with CometExecIterator.
+ */
+public class CometFileKeyUnwrapper {
+
+  // Each file path gets a unique DecryptionKeyRetriever
+  private final ConcurrentHashMap<String, DecryptionKeyRetriever> retrieverCache =
+      new ConcurrentHashMap<>();
+
+  // Cache the factory since we should be using the same hadoopConf for every file in this scan.
+  private DecryptionPropertiesFactory factory = null;
+  // Cache the hadoopConf just to assert the assumption above.
+  private Configuration conf = null;
+
+  /**
+   * Creates and stores a DecryptionKeyRetriever instance for the given file path.
+   *
+   * @param filePath The path to the Parquet file
+   * @param hadoopConf The Hadoop Configuration to use for this file path
+   */
+  public void storeDecryptionKeyRetriever(final String filePath, final Configuration hadoopConf) {
+    // Use DecryptionPropertiesFactory.loadFactory to get the factory and then call
+    // getFileDecryptionProperties
+    if (factory == null) {
+      factory = DecryptionPropertiesFactory.loadFactory(hadoopConf);
+      conf = hadoopConf;
+    } else {
+      // Check the assumption that all files have the same hadoopConf and thus same Factory
+      assert (conf == hadoopConf);
+    }
+    Path path = new Path(filePath);
+    FileDecryptionProperties decryptionProperties =
+        factory.getFileDecryptionProperties(hadoopConf, path);
+
+    DecryptionKeyRetriever keyRetriever = decryptionProperties.getKeyRetriever();
+    retrieverCache.put(filePath, keyRetriever);
+  }
+
+  /**
+   * Gets the decryption key for the given key metadata using the cached DecryptionKeyRetriever for
+   * the specified file path.
+   *
+   * @param filePath The path to the Parquet file
+   * @param keyMetadata The key metadata bytes from the Parquet file
+   * @return The decrypted key bytes
+   * @throws ParquetCryptoRuntimeException if key unwrapping fails
+   */
+  public byte[] getKey(final String filePath, final byte[] keyMetadata)
+      throws ParquetCryptoRuntimeException {
+    DecryptionKeyRetriever keyRetriever = retrieverCache.get(filePath);
+    if (keyRetriever == null) {
+      throw new ParquetCryptoRuntimeException(
+          "Failed to find DecryptionKeyRetriever for path: " + filePath);
+    }
+    return keyRetriever.getKey(keyMetadata);
+  }
+}
diff --git a/common/src/main/java/org/apache/comet/parquet/Native.java b/common/src/main/java/org/apache/comet/parquet/Native.java
@@ -267,9 +267,11 @@ public static native long initRecordBatchReader(
       String sessionTimezone,
       int batchSize,
       boolean caseSensitive,
-      Map<String, String> objectStoreOptions);
+      Map<String, String> objectStoreOptions,
+      CometFileKeyUnwrapper keyUnwrapper);
 
   // arrow native version of read batch
+
   /**
    * Read the next batch of data into memory on native side
    *
@@ -280,6 +282,7 @@ public static native long initRecordBatchReader(
 
   // arrow native equivalent of currentBatch. 'columnNum' is number of the column in the record
   // batch
+
   /**
    * Load the column corresponding to columnNum in the currently loaded record batch into JVM
    *

diff --git a/common/src/main/java/org/apache/comet/parquet/NativeBatchReader.java b/common/src/main/java/org/apache/comet/parquet/NativeBatchReader.java
@@ -80,7 +80,7 @@
 import org.apache.comet.vector.CometVector;
 import org.apache.comet.vector.NativeUtil;
 
-import static scala.jdk.javaapi.CollectionConverters.*;
+import static scala.jdk.javaapi.CollectionConverters.asJava;
 
 /**
  * A vectorized Parquet reader that reads a Parquet file in a batched fashion.
@@ -410,6 +410,15 @@ public void init() throws Throwable {
         }
       }
 
+      boolean encryptionEnabled = CometParquetUtils.encryptionEnabled(conf);
+
+      // Create keyUnwrapper if encryption is enabled
+      CometFileKeyUnwrapper keyUnwrapper = null;
+      if (encryptionEnabled) {
+        keyUnwrapper = new CometFileKeyUnwrapper();
+        keyUnwrapper.storeDecryptionKeyRetriever(file.filePath().toString(), conf);
+      }
+
       int batchSize =
           conf.getInt(
               CometConf.COMET_BATCH_SIZE().key(),
@@ -426,7 +435,8 @@ public void init() throws Throwable {
               timeZoneId,
               batchSize,
               caseSensitive,
-              objectStoreOptions);
+              objectStoreOptions,
+              keyUnwrapper);
     }
     isInitialized = true;
   }

diff --git a/common/src/main/scala/org/apache/comet/objectstore/NativeConfig.scala b/common/src/main/scala/org/apache/comet/objectstore/NativeConfig.scala
@@ -58,7 +58,7 @@ object NativeConfig {
   def extractObjectStoreOptions(hadoopConf: Configuration, uri: URI): Map[String, String] = {
     val scheme = uri.getScheme.toLowerCase(Locale.ROOT)
 
-    import scala.collection.JavaConverters._
+    import scala.jdk.CollectionConverters._
     val options = scala.collection.mutable.Map[String, String]()
 
     // The schemes will use libhdfs

diff --git a/common/src/main/scala/org/apache/comet/parquet/CometParquetUtils.scala b/common/src/main/scala/org/apache/comet/parquet/CometParquetUtils.scala
@@ -20,13 +20,25 @@
 package org.apache.comet.parquet
 
 import org.apache.hadoop.conf.Configuration
+import org.apache.parquet.crypto.DecryptionPropertiesFactory
+import org.apache.parquet.crypto.keytools.{KeyToolkit, PropertiesDrivenCryptoFactory}
 import org.apache.spark.sql.internal.SQLConf
 
 object CometParquetUtils {
   private val PARQUET_FIELD_ID_WRITE_ENABLED = "spark.sql.parquet.fieldId.write.enabled"
   private val PARQUET_FIELD_ID_READ_ENABLED = "spark.sql.parquet.fieldId.read.enabled"
   private val IGNORE_MISSING_PARQUET_FIELD_ID = "spark.sql.parquet.fieldId.read.ignoreMissing"
 
+  // Map of encryption configuration key-value pairs that, if present, are only supported with
+  // these specific values. Generally, these are the default values that won't be present,
+  // but if they are present we want to check them.
+  private val SUPPORTED_ENCRYPTION_CONFIGS: Map[String, Set[String]] = Map(
+    // https://github.com/apache/arrow-rs/blob/main/parquet/src/encryption/ciphers.rs#L21
+    KeyToolkit.DATA_KEY_LENGTH_PROPERTY_NAME -> Set(KeyToolkit.DATA_KEY_LENGTH_DEFAULT.toString),
+    KeyToolkit.KEK_LENGTH_PROPERTY_NAME -> Set(KeyToolkit.KEK_LENGTH_DEFAULT.toString),
+    // https://github.com/apache/arrow-rs/blob/main/parquet/src/file/metadata/parser.rs#L494
+    PropertiesDrivenCryptoFactory.ENCRYPTION_ALGORITHM_PROPERTY_NAME -> Set("AES_GCM_V1"))
+
   def writeFieldId(conf: SQLConf): Boolean =
     conf.getConfString(PARQUET_FIELD_ID_WRITE_ENABLED, "false").toBoolean
 
@@ -38,4 +50,36 @@ object CometParquetUtils {
 
   def ignoreMissingIds(conf: SQLConf): Boolean =
     conf.getConfString(IGNORE_MISSING_PARQUET_FIELD_ID, "false").toBoolean
+
+  /**
+   * Checks if the given Hadoop configuration contains any unsupported encryption settings.
+   *
+   * @param hadoopConf
+   *   The Hadoop configuration to check
+   * @return
+   *   true if all encryption configurations are supported, false if any unsupported config is
+   *   found
+   */
+  def isEncryptionConfigSupported(hadoopConf: Configuration): Boolean = {
+    // Check configurations that, if present, can only have specific allowed values
+    val supportedListCheck = SUPPORTED_ENCRYPTION_CONFIGS.forall {
+      case (configKey, supportedValues) =>
+        val configValue = Option(hadoopConf.get(configKey))
+        configValue match {
+          case Some(value) => supportedValues.contains(value)
+          case None => true // Config not set, so it's supported
+        }
+    }
+
+    supportedListCheck
+  }
+
+  def encryptionEnabled(hadoopConf: Configuration): Boolean = {
+    // TODO: Are there any other properties to check?
+    val encryptionKeys = Seq(
+      DecryptionPropertiesFactory.CRYPTO_FACTORY_CLASS_PROPERTY_NAME,
+      KeyToolkit.KMS_CLIENT_CLASS_PROPERTY_NAME)
+
+    encryptionKeys.exists(key => Option(hadoopConf.get(key)).exists(_.nonEmpty))
+  }
 }
diff --git a/native/Cargo.lock b/native/Cargo.lock
diff --git a/native/core/Cargo.toml b/native/core/Cargo.toml
@@ -59,7 +59,7 @@ bytes = { workspace = true }
 tempfile = "3.8.0"
 itertools = "0.14.0"
 paste = "1.0.14"
-datafusion = { workspace = true }
+datafusion = { workspace = true, features = ["parquet_encryption"] }
 datafusion-spark = { workspace = true }
 once_cell = "1.18.0"
 regex = { workspace = true }

diff --git a/native/core/src/errors.rs b/native/core/src/errors.rs
@@ -185,6 +185,15 @@ impl From<CometError> for DataFusionError {
     }
 }
 
+impl From<CometError> for ParquetError {
+    fn from(value: CometError) -> Self {
+        match value {
+            CometError::Parquet { source } => source,
+            _ => ParquetError::General(value.to_string()),
+        }
+    }
+}
+
 impl From<CometError> for ExecutionError {
     fn from(value: CometError) -> Self {
         match value {

diff --git a/native/core/src/execution/jni_api.rs b/native/core/src/execution/jni_api.rs
@@ -78,6 +78,7 @@ use crate::execution::spark_plan::SparkPlan;
 
 use crate::execution::tracing::{log_memory_usage, trace_begin, trace_end, with_trace};
 
+use crate::parquet::encryption_support::{CometEncryptionFactory, ENCRYPTION_FACTORY_ID};
 use datafusion_comet_proto::spark_operator::operator::OpStruct;
 use log::info;
 use once_cell::sync::Lazy;
@@ -171,6 +172,7 @@ pub unsafe extern "system" fn Java_org_apache_comet_Native_createPlan(
     explain_native: jboolean,
     tracing_enabled: jboolean,
     max_temp_directory_size: jlong,
+    key_unwrapper_obj: JObject,
 ) -> jlong {
     try_unwrap_or_throw(&e, |mut env| {
         with_trace("createPlan", tracing_enabled != JNI_FALSE, || {
@@ -247,6 +249,17 @@ pub unsafe extern "system" fn Java_org_apache_comet_Native_createPlan(
                 None
             };
 
+            // Handle key unwrapper for encrypted files
+            if !key_unwrapper_obj.is_null() {
+                let encryption_factory = CometEncryptionFactory {
+                    key_unwrapper: jni_new_global_ref!(env, key_unwrapper_obj)?,
+                };
+                session.runtime_env().register_parquet_encryption_factory(
+                    ENCRYPTION_FACTORY_ID,
+                    Arc::new(encryption_factory),
+                );
+            }
+
             let exec_context = Box::new(ExecutionContext {
                 id,
                 task_attempt_id,

diff --git a/native/core/src/execution/planner.rs b/native/core/src/execution/planner.rs
@@ -1358,6 +1358,8 @@ impl PhysicalPlanner {
                     default_values,
                     scan.session_timezone.as_str(),
                     scan.case_sensitive,
+                    self.session_ctx(),
+                    scan.encryption_enabled,
                 )?;
                 Ok((
                     vec![],