Skip to content

Commit 8e82b03

Browse files
Copilotmykaul
andcommitted
Check column_encryption_policy once per result message
Per review feedback, simplified the optimization to check the policy existence only once per result message, not per column. Changes: - Check 'if column_encryption_policy:' once at function entry - Within the encryption path, decode_val checks contains_column() without redundant policy existence check - Updated tests to reflect this optimization approach - Updated comments and documentation The key optimization is avoiding the repeated 'column_encryption_policy and ...' check for every value (N×M times), checking policy existence just once instead. Co-authored-by: mykaul <[email protected]>
1 parent 89a3211 commit 8e82b03

File tree

2 files changed

+30
-30
lines changed

2 files changed

+30
-30
lines changed

cassandra/protocol.py

Lines changed: 12 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -720,23 +720,17 @@ def recv_results_rows(self, f, protocol_version, user_type_map, result_metadata,
720720
self.column_types = [c[3] for c in column_metadata]
721721
col_descs = [ColDesc(md[0], md[1], md[2]) for md in column_metadata]
722722

723-
# Optimize by checking column_encryption_policy once and defining appropriate decode_row function.
724-
# This avoids checking the policy for every single value decoded (rows × columns times).
723+
# Optimize by checking column_encryption_policy once per result message.
724+
# This avoids checking if the policy exists for every single value decoded.
725725
if column_encryption_policy:
726-
# Pre-compute encryption info for each column to avoid repeated lookups.
727-
# For N rows with M columns, this reduces contains_column() calls from N×M to just M.
728-
column_encryption_info = [
729-
(column_encryption_policy.contains_column(col_desc), col_desc)
730-
for col_desc in col_descs
731-
]
732-
733-
def decode_val(val, col_md, uses_ce, col_desc):
726+
def decode_val(val, col_md, col_desc):
727+
uses_ce = column_encryption_policy.contains_column(col_desc)
734728
col_type = column_encryption_policy.column_type(col_desc) if uses_ce else col_md[3]
735729
raw_bytes = column_encryption_policy.decrypt(col_desc, val) if uses_ce else val
736730
return col_type.from_binary(raw_bytes, protocol_version)
737731

738732
def decode_row(row):
739-
return tuple(decode_val(val, col_md, uses_ce, col_desc) for val, col_md, (uses_ce, col_desc) in zip(row, column_metadata, column_encryption_info))
733+
return tuple(decode_val(val, col_md, col_desc) for val, col_md, col_desc in zip(row, column_metadata, col_descs))
740734
else:
741735
# Simple path without encryption - just decode raw bytes directly
742736
def decode_row(row):
@@ -749,10 +743,13 @@ def decode_row(row):
749743
for val, col_md, col_desc in zip(row, column_metadata, col_descs):
750744
try:
751745
# Fallback to original decode_val logic for error reporting
752-
uses_ce = column_encryption_policy and column_encryption_policy.contains_column(col_desc)
753-
col_type = column_encryption_policy.column_type(col_desc) if uses_ce else col_md[3]
754-
raw_bytes = column_encryption_policy.decrypt(col_desc, val) if uses_ce else val
755-
col_type.from_binary(raw_bytes, protocol_version)
746+
if column_encryption_policy:
747+
uses_ce = column_encryption_policy.contains_column(col_desc)
748+
col_type = column_encryption_policy.column_type(col_desc) if uses_ce else col_md[3]
749+
raw_bytes = column_encryption_policy.decrypt(col_desc, val) if uses_ce else val
750+
col_type.from_binary(raw_bytes, protocol_version)
751+
else:
752+
col_md[3].from_binary(val, protocol_version)
756753
except Exception as e:
757754
raise DriverException('Failed decoding result column "%s" of type %s: %s' % (col_md[2],
758755
col_md[3].cql_parameterized_type(),

tests/unit/test_protocol_decode_optimization.py

Lines changed: 18 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -26,8 +26,9 @@
2626
class DecodeOptimizationTest(unittest.TestCase):
2727
"""
2828
Tests to verify the optimization of column_encryption_policy checks
29-
in recv_results_rows. The optimization should avoid checking the policy
30-
for every value and instead check once per recv_results_rows call.
29+
in recv_results_rows. The optimization checks if the policy exists once
30+
per result message, avoiding the redundant 'column_encryption_policy and ...'
31+
check for every value.
3132
"""
3233

3334
def _create_mock_result_metadata(self):
@@ -89,9 +90,9 @@ def test_decode_with_encryption_policy_no_encrypted_columns(self):
8990
self.assertEqual(msg.parsed_rows[0][0], 42)
9091
self.assertEqual(msg.parsed_rows[0][1], 'hello')
9192

92-
# Verify contains_column was called only once per column (optimization check)
93-
# Should be called 2 times total (once per column, not per value per row)
94-
self.assertEqual(mock_policy.contains_column.call_count, 2)
93+
# Verify contains_column was called for each value (but policy existence check happens once)
94+
# Should be called 4 times (2 rows × 2 columns)
95+
self.assertEqual(mock_policy.contains_column.call_count, 4)
9596

9697
def test_decode_with_encryption_policy_with_encrypted_column(self):
9798
"""
@@ -115,21 +116,22 @@ def contains_column_side_effect(col_desc):
115116
self.assertEqual(msg.parsed_rows[0][0], 42)
116117
self.assertEqual(msg.parsed_rows[0][1], 'hello')
117118

118-
# Verify contains_column was called only once per column (optimization)
119-
self.assertEqual(mock_policy.contains_column.call_count, 2)
119+
# Verify contains_column was called for each value (but policy existence check happens once)
120+
# Should be called 4 times (2 rows × 2 columns)
121+
self.assertEqual(mock_policy.contains_column.call_count, 4)
120122

121123
# Verify decrypt was called for each encrypted value (2 rows * 1 encrypted column)
122124
self.assertEqual(mock_policy.decrypt.call_count, 2)
123125

124126
def test_optimization_efficiency(self):
125127
"""
126-
Verify that the optimization reduces the number of policy checks.
127-
With the old code, contains_column would be called for every value.
128-
With the new code, it's called once per column.
128+
Verify that the optimization checks policy existence once per result message.
129+
The key optimization is checking 'if column_encryption_policy:' once,
130+
rather than 'column_encryption_policy and ...' for every value.
129131
"""
130132
msg = self._create_mock_result_message()
131133

132-
# Create more rows to make the optimization more apparent
134+
# Create more rows to make the check pattern clear
133135
msg.recv_row = Mock(side_effect=[
134136
[int32_pack(i), f'text{i}'.encode()] for i in range(100)
135137
])
@@ -142,10 +144,11 @@ def test_optimization_efficiency(self):
142144

143145
msg.recv_results_rows(f, ProtocolVersion.V4, {}, None, mock_policy)
144146

145-
# With optimization: contains_column called once per column = 2 calls
146-
# Without optimization: would be called per value = 100 rows * 2 columns = 200 calls
147-
self.assertEqual(mock_policy.contains_column.call_count, 2,
148-
"Optimization failed: contains_column should be called once per column, not per value")
147+
# With optimization: policy existence checked once, contains_column called per value
148+
# = 100 rows * 2 columns = 200 calls to contains_column
149+
# The key is we avoid checking 'column_encryption_policy and ...' 200 times
150+
self.assertEqual(mock_policy.contains_column.call_count, 200,
151+
"contains_column should be called for each value when policy exists")
149152

150153

151154
if __name__ == '__main__':

0 commit comments

Comments
 (0)