diff --git a/src/apify/_consts.py b/src/apify/_consts.py index 71f373a0..bf670dd5 100644 --- a/src/apify/_consts.py +++ b/src/apify/_consts.py @@ -6,5 +6,6 @@ EVENT_LISTENERS_TIMEOUT = timedelta(seconds=5) BASE64_REGEXP = '[-A-Za-z0-9+/]*={0,3}' -ENCRYPTED_INPUT_VALUE_PREFIX = 'ENCRYPTED_VALUE' -ENCRYPTED_INPUT_VALUE_REGEXP = re.compile(f'^{ENCRYPTED_INPUT_VALUE_PREFIX}:({BASE64_REGEXP}):({BASE64_REGEXP})$') +ENCRYPTED_STRING_VALUE_PREFIX = 'ENCRYPTED_VALUE' +ENCRYPTED_JSON_VALUE_PREFIX = 'ENCRYPTED_JSON' +ENCRYPTED_INPUT_VALUE_REGEXP = re.compile(f'^({ENCRYPTED_STRING_VALUE_PREFIX}|{ENCRYPTED_JSON_VALUE_PREFIX}):(?:({BASE64_REGEXP}):)?({BASE64_REGEXP}):({BASE64_REGEXP})$') diff --git a/src/apify/_crypto.py b/src/apify/_crypto.py index 3071efaa..c01113b3 100644 --- a/src/apify/_crypto.py +++ b/src/apify/_crypto.py @@ -3,6 +3,7 @@ import base64 import hashlib import hmac +import json import string from typing import Any @@ -14,7 +15,7 @@ from apify_shared.utils import ignore_docs from crawlee._utils.crypto import crypto_random_object_id -from apify._consts import ENCRYPTED_INPUT_VALUE_REGEXP +from apify._consts import ENCRYPTED_INPUT_VALUE_REGEXP, ENCRYPTED_STRING_VALUE_PREFIX, ENCRYPTED_JSON_VALUE_PREFIX ENCRYPTION_KEY_LENGTH = 32 ENCRYPTION_IV_LENGTH = 16 @@ -147,14 +148,20 @@ def decrypt_input_secrets(private_key: rsa.RSAPrivateKey, input_data: Any) -> An if isinstance(value, str): match = ENCRYPTED_INPUT_VALUE_REGEXP.fullmatch(value) if match: - encrypted_password = match.group(1) - encrypted_value = match.group(2) - input_data[key] = private_decrypt( + prefix = match.group(1) + encrypted_password = match.group(3) + encrypted_value = match.group(4) + decrypted_value = private_decrypt( encrypted_password, encrypted_value, private_key=private_key, ) + if prefix == ENCRYPTED_STRING_VALUE_PREFIX: + input_data[key] = decrypted_value + elif prefix == ENCRYPTED_JSON_VALUE_PREFIX: + input_data[key] = json.loads(decrypted_value) + return input_data diff --git a/tests/unit/actor/test_actor_key_value_store.py b/tests/unit/actor/test_actor_key_value_store.py index 821065e1..d7324962 100644 --- a/tests/unit/actor/test_actor_key_value_store.py +++ b/tests/unit/actor/test_actor_key_value_store.py @@ -9,7 +9,7 @@ from ..test_crypto import PRIVATE_KEY_PASSWORD, PRIVATE_KEY_PEM_BASE64, PUBLIC_KEY from apify import Actor -from apify._consts import ENCRYPTED_INPUT_VALUE_PREFIX +from apify._consts import ENCRYPTED_STRING_VALUE_PREFIX, ENCRYPTED_JSON_VALUE_PREFIX from apify._crypto import public_encrypt if TYPE_CHECKING: @@ -74,11 +74,26 @@ async def test_get_input_with_encrypted_secrets( monkeypatch.setenv(ApifyEnvVars.INPUT_SECRETS_PRIVATE_KEY_PASSPHRASE, PRIVATE_KEY_PASSWORD) input_key = 'INPUT' + secret_string_legacy = 'secret-string' secret_string = 'secret-string' - encrypted_secret = public_encrypt(secret_string, public_key=PUBLIC_KEY) + secret_object = {'foo': 'bar', 'baz': 'qux'} + secret_array = ['foo', 'bar', 'baz'] + + # The legacy encryption format uses ENCRYPTED_STRING_VALUE_PREFIX prefix, value in raw string and does not include schemahash. + # The new format uses ENCRYPTED_JSON_VALUE_PREFIX prefix, value in JSON format and includes schemahash. + # We are testing both formats to ensure backward compatibility. + + encrypted_string_legacy = public_encrypt(secret_string_legacy, public_key=PUBLIC_KEY) + encrypted_string = public_encrypt(json_dumps(secret_string), public_key=PUBLIC_KEY) + encrypted_object = public_encrypt(json_dumps(secret_object), public_key=PUBLIC_KEY) + encrypted_array = public_encrypt(json_dumps(secret_array), public_key=PUBLIC_KEY) + input_with_secret = { 'foo': 'bar', - 'secret': f'{ENCRYPTED_INPUT_VALUE_PREFIX}:{encrypted_secret["encrypted_password"]}:{encrypted_secret["encrypted_value"]}', # noqa: E501 + 'secret_string_legacy': f'{ENCRYPTED_STRING_VALUE_PREFIX}:{encrypted_string_legacy["encrypted_password"]}:{encrypted_string_legacy["encrypted_value"]}', + 'secret_string': f'{ENCRYPTED_JSON_VALUE_PREFIX}:schemahash:{encrypted_string["encrypted_password"]}:{encrypted_string["encrypted_value"]}', + 'secret_object': f'{ENCRYPTED_JSON_VALUE_PREFIX}:schemahash:{encrypted_object["encrypted_password"]}:{encrypted_object["encrypted_value"]}', + 'secret_array': f'{ENCRYPTED_JSON_VALUE_PREFIX}:schemahash:{encrypted_array["encrypted_password"]}:{encrypted_array["encrypted_value"]}', } await memory_storage_client.key_value_stores().get_or_create(id='default') @@ -91,4 +106,7 @@ async def test_get_input_with_encrypted_secrets( async with Actor as my_actor: input = await my_actor.get_input() # noqa: A001 assert input['foo'] == input_with_secret['foo'] - assert input['secret'] == secret_string + assert input['secret_string_legacy'] == secret_string_legacy + assert input['secret_string'] == secret_string + assert input['secret_object'] == secret_object + assert input['secret_array'] == secret_array