|
3 | 3 | # |
4 | 4 |
|
5 | 5 | import json |
6 | | -from unittest.mock import Mock, call |
| 6 | +from unittest.mock import MagicMock, Mock, call |
7 | 7 |
|
8 | 8 | import pytest |
9 | 9 | import requests |
@@ -220,3 +220,91 @@ def create_schema(): |
220 | 220 | "field_float": {"type": "number"}, |
221 | 221 | }, |
222 | 222 | } |
| 223 | + |
| 224 | + |
| 225 | +@pytest.mark.parametrize("transform_before_filtering", [True, False]) |
| 226 | +def test_transform_before_filtering(transform_before_filtering): |
| 227 | + """ |
| 228 | + Verify that when transform_before_filtering=True, records are modified before |
| 229 | + filtering. When False, the filter sees the original record data first. |
| 230 | + """ |
| 231 | + |
| 232 | + # 1) Our response body with 'myfield' set differently |
| 233 | + # The first record has myfield=0 (needs transformation to pass) |
| 234 | + # The second record has myfield=999 (already passes the filter) |
| 235 | + body = {"data": [{"id": 1, "myfield": 0}, {"id": 2, "myfield": 999}]} |
| 236 | + |
| 237 | + # 2) A response object |
| 238 | + response = requests.Response() |
| 239 | + response._content = json.dumps(body).encode("utf-8") |
| 240 | + |
| 241 | + # 3) A simple extractor pulling records from 'data' |
| 242 | + extractor = DpathExtractor( |
| 243 | + field_path=["data"], decoder=JsonDecoder(parameters={}), config={}, parameters={} |
| 244 | + ) |
| 245 | + |
| 246 | + # 4) A filter that keeps only records whose 'myfield' == 999 |
| 247 | + # i.e.: "{{ record['myfield'] == 999 }}" |
| 248 | + record_filter = RecordFilter( |
| 249 | + config={}, |
| 250 | + condition="{{ record['myfield'] == 999 }}", |
| 251 | + parameters={}, |
| 252 | + ) |
| 253 | + |
| 254 | + # 5) A transformation that sets 'myfield' to 999 |
| 255 | + # We'll attach it to a mock so we can confirm how many times it was called |
| 256 | + transformation_mock = MagicMock(spec=RecordTransformation) |
| 257 | + |
| 258 | + def transformation_side_effect(record, config, stream_state, stream_slice): |
| 259 | + record["myfield"] = 999 |
| 260 | + |
| 261 | + transformation_mock.transform.side_effect = transformation_side_effect |
| 262 | + |
| 263 | + # 6) Create a RecordSelector with transform_before_filtering set from our param |
| 264 | + record_selector = RecordSelector( |
| 265 | + extractor=extractor, |
| 266 | + config={}, |
| 267 | + name="test_stream", |
| 268 | + record_filter=record_filter, |
| 269 | + transformations=[transformation_mock], |
| 270 | + schema_normalization=TypeTransformer(TransformConfig.NoTransform), |
| 271 | + transform_before_filtering=transform_before_filtering, |
| 272 | + parameters={}, |
| 273 | + ) |
| 274 | + |
| 275 | + # 7) Collect records |
| 276 | + stream_slice = StreamSlice(partition={}, cursor_slice={}) |
| 277 | + actual_records = list( |
| 278 | + record_selector.select_records( |
| 279 | + response=response, |
| 280 | + records_schema={}, # not using schema in this test |
| 281 | + stream_state={}, |
| 282 | + stream_slice=stream_slice, |
| 283 | + next_page_token=None, |
| 284 | + ) |
| 285 | + ) |
| 286 | + |
| 287 | + # 8) Assert how many records survive |
| 288 | + if transform_before_filtering: |
| 289 | + # Both records become myfield=999 BEFORE the filter => both pass |
| 290 | + assert len(actual_records) == 2 |
| 291 | + # The transformation should be called 2x (once per record) |
| 292 | + assert transformation_mock.transform.call_count == 2 |
| 293 | + else: |
| 294 | + # The first record is myfield=0 when the filter sees it => filter excludes it |
| 295 | + # The second record is myfield=999 => filter includes it |
| 296 | + assert len(actual_records) == 1 |
| 297 | + # The transformation occurs only on that single surviving record |
| 298 | + # (the filter is done first, so the first record is already dropped) |
| 299 | + assert transformation_mock.transform.call_count == 1 |
| 300 | + |
| 301 | + # 9) Check final record data |
| 302 | + # If transform_before_filtering=True => we have records [1,2] both with myfield=999 |
| 303 | + # If transform_before_filtering=False => we have record [2] with myfield=999 |
| 304 | + final_record_data = [r.data for r in actual_records] |
| 305 | + if transform_before_filtering: |
| 306 | + assert all(record["myfield"] == 999 for record in final_record_data) |
| 307 | + assert sorted([r["id"] for r in final_record_data]) == [1, 2] |
| 308 | + else: |
| 309 | + assert final_record_data[0]["id"] == 2 |
| 310 | + assert final_record_data[0]["myfield"] == 999 |
0 commit comments