Check column_encryption_policy once per result message

Copilot · mykaul · Copilot · commit 8e82b03cd724 · 2025-11-08T10:56:18.000Z
Per review feedback, simplified the optimization to check the policy
existence only once per result message, not per column.

Changes:
- Check 'if column_encryption_policy:' once at function entry
- Within the encryption path, decode_val checks contains_column() without
  redundant policy existence check
- Updated tests to reflect this optimization approach
- Updated comments and documentation

The key optimization is avoiding the repeated 'column_encryption_policy and ...'
check for every value (N×M times), checking policy existence just once instead.

Co-authored-by: mykaul &lt;4655593+mykaul@users.noreply.github.com&gt;
diff --git a/cassandra/protocol.py b/cassandra/protocol.py
@@ -720,23 +720,17 @@ def recv_results_rows(self, f, protocol_version, user_type_map, result_metadata,
         self.column_types = [c[3] for c in column_metadata]
         col_descs = [ColDesc(md[0], md[1], md[2]) for md in column_metadata]
 
-        # Optimize by checking column_encryption_policy once and defining appropriate decode_row function.
-        # This avoids checking the policy for every single value decoded (rows × columns times).
+        # Optimize by checking column_encryption_policy once per result message.
+        # This avoids checking if the policy exists for every single value decoded.
         if column_encryption_policy:
-            # Pre-compute encryption info for each column to avoid repeated lookups.
-            # For N rows with M columns, this reduces contains_column() calls from N×M to just M.
-            column_encryption_info = [
-                (column_encryption_policy.contains_column(col_desc), col_desc)
-                for col_desc in col_descs
-            ]
-
-            def decode_val(val, col_md, uses_ce, col_desc):
+            def decode_val(val, col_md, col_desc):
+                uses_ce = column_encryption_policy.contains_column(col_desc)
                 col_type = column_encryption_policy.column_type(col_desc) if uses_ce else col_md[3]
                 raw_bytes = column_encryption_policy.decrypt(col_desc, val) if uses_ce else val
                 return col_type.from_binary(raw_bytes, protocol_version)
 
             def decode_row(row):
-                return tuple(decode_val(val, col_md, uses_ce, col_desc) for val, col_md, (uses_ce, col_desc) in zip(row, column_metadata, column_encryption_info))
+                return tuple(decode_val(val, col_md, col_desc) for val, col_md, col_desc in zip(row, column_metadata, col_descs))
         else:
             # Simple path without encryption - just decode raw bytes directly
             def decode_row(row):
@@ -749,10 +743,13 @@ def decode_row(row):
                 for val, col_md, col_desc in zip(row, column_metadata, col_descs):
                     try:
                         # Fallback to original decode_val logic for error reporting
-                        uses_ce = column_encryption_policy and column_encryption_policy.contains_column(col_desc)
-                        col_type = column_encryption_policy.column_type(col_desc) if uses_ce else col_md[3]
-                        raw_bytes = column_encryption_policy.decrypt(col_desc, val) if uses_ce else val
-                        col_type.from_binary(raw_bytes, protocol_version)
+                        if column_encryption_policy:
+                            uses_ce = column_encryption_policy.contains_column(col_desc)
+                            col_type = column_encryption_policy.column_type(col_desc) if uses_ce else col_md[3]
+                            raw_bytes = column_encryption_policy.decrypt(col_desc, val) if uses_ce else val
+                            col_type.from_binary(raw_bytes, protocol_version)
+                        else:
+                            col_md[3].from_binary(val, protocol_version)
                     except Exception as e:
                         raise DriverException('Failed decoding result column "%s" of type %s: %s' % (col_md[2],
                                                                                                      col_md[3].cql_parameterized_type(),
diff --git a/tests/unit/test_protocol_decode_optimization.py b/tests/unit/test_protocol_decode_optimization.py
@@ -26,8 +26,9 @@
 class DecodeOptimizationTest(unittest.TestCase):
     """
     Tests to verify the optimization of column_encryption_policy checks
-    in recv_results_rows. The optimization should avoid checking the policy
-    for every value and instead check once per recv_results_rows call.
+    in recv_results_rows. The optimization checks if the policy exists once
+    per result message, avoiding the redundant 'column_encryption_policy and ...'
+    check for every value.
     """
 
     def _create_mock_result_metadata(self):
@@ -89,9 +90,9 @@ def test_decode_with_encryption_policy_no_encrypted_columns(self):
         self.assertEqual(msg.parsed_rows[0][0], 42)
         self.assertEqual(msg.parsed_rows[0][1], 'hello')
         
-        # Verify contains_column was called only once per column (optimization check)
-        # Should be called 2 times total (once per column, not per value per row)
-        self.assertEqual(mock_policy.contains_column.call_count, 2)
+        # Verify contains_column was called for each value (but policy existence check happens once)
+        # Should be called 4 times (2 rows × 2 columns)
+        self.assertEqual(mock_policy.contains_column.call_count, 4)
 
     def test_decode_with_encryption_policy_with_encrypted_column(self):
         """
@@ -115,21 +116,22 @@ def contains_column_side_effect(col_desc):
         self.assertEqual(msg.parsed_rows[0][0], 42)
         self.assertEqual(msg.parsed_rows[0][1], 'hello')
         
-        # Verify contains_column was called only once per column (optimization)
-        self.assertEqual(mock_policy.contains_column.call_count, 2)
+        # Verify contains_column was called for each value (but policy existence check happens once)
+        # Should be called 4 times (2 rows × 2 columns)
+        self.assertEqual(mock_policy.contains_column.call_count, 4)
         
         # Verify decrypt was called for each encrypted value (2 rows * 1 encrypted column)
         self.assertEqual(mock_policy.decrypt.call_count, 2)
 
     def test_optimization_efficiency(self):
         """
-        Verify that the optimization reduces the number of policy checks.
-        With the old code, contains_column would be called for every value.
-        With the new code, it's called once per column.
+        Verify that the optimization checks policy existence once per result message.
+        The key optimization is checking 'if column_encryption_policy:' once,
+        rather than 'column_encryption_policy and ...' for every value.
         """
         msg = self._create_mock_result_message()
         
-        # Create more rows to make the optimization more apparent
+        # Create more rows to make the check pattern clear
         msg.recv_row = Mock(side_effect=[
             [int32_pack(i), f'text{i}'.encode()] for i in range(100)
         ])
@@ -142,10 +144,11 @@ def test_optimization_efficiency(self):
         
         msg.recv_results_rows(f, ProtocolVersion.V4, {}, None, mock_policy)
         
-        # With optimization: contains_column called once per column = 2 calls
-        # Without optimization: would be called per value = 100 rows * 2 columns = 200 calls
-        self.assertEqual(mock_policy.contains_column.call_count, 2,
-                        "Optimization failed: contains_column should be called once per column, not per value")
+        # With optimization: policy existence checked once, contains_column called per value
+        # = 100 rows * 2 columns = 200 calls to contains_column
+        # The key is we avoid checking 'column_encryption_policy and ...' 200 times
+        self.assertEqual(mock_policy.contains_column.call_count, 200,
+                        "contains_column should be called for each value when policy exists")
 
 
 if __name__ == '__main__':