Skip to content

Commit c6305d0

Browse files
authored
[v2] Flexible checksums for S3 high-level commands (#8933)
1 parent a1a13eb commit c6305d0

File tree

8 files changed

+334
-1
lines changed

8 files changed

+334
-1
lines changed
Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
{
2+
"type": "feature",
3+
"category": "s3",
4+
"description": "Adds ``--checksum-mode`` and ``--checksum-algorithm`` parameters to high-level ``s3`` commands."
5+
}

awscli/customizations/s3/subcommands.py

Lines changed: 36 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -481,6 +481,17 @@
481481
)
482482
}
483483

484+
CHECKSUM_MODE = {
485+
'name': 'checksum-mode', 'choices': ['ENABLED'],
486+
'help_text': 'To retrieve the checksum, this mode must be enabled. If the object has a '
487+
'checksum, it will be verified.'
488+
}
489+
490+
CHECKSUM_ALGORITHM = {
491+
'name': 'checksum-algorithm', 'choices': ['CRC32', 'SHA256', 'SHA1', 'CRC32C'],
492+
'help_text': 'Indicates the algorithm used to create the checksum for the object.'
493+
}
494+
484495
TRANSFER_ARGS = [DRYRUN, QUIET, INCLUDE, EXCLUDE, ACL,
485496
FOLLOW_SYMLINKS, NO_FOLLOW_SYMLINKS, NO_GUESS_MIME_TYPE,
486497
SSE, SSE_C, SSE_C_KEY, SSE_KMS_KEY_ID, SSE_C_COPY_SOURCE,
@@ -489,7 +500,7 @@
489500
CONTENT_DISPOSITION, CONTENT_ENCODING, CONTENT_LANGUAGE,
490501
EXPIRES, SOURCE_REGION, ONLY_SHOW_ERRORS, NO_PROGRESS,
491502
PAGE_SIZE, IGNORE_GLACIER_WARNINGS, FORCE_GLACIER_TRANSFER,
492-
REQUEST_PAYER]
503+
REQUEST_PAYER, CHECKSUM_MODE, CHECKSUM_ALGORITHM]
493504

494505

495506
class S3Command(BasicCommand):
@@ -1276,6 +1287,17 @@ def _validate_path_args(self):
12761287
if self._should_emit_validate_s3_paths_warning():
12771288
self._emit_validate_s3_paths_warning()
12781289

1290+
if params.get('checksum_algorithm'):
1291+
self._raise_if_paths_type_incorrect_for_param(
1292+
CHECKSUM_ALGORITHM['name'],
1293+
params['paths_type'],
1294+
['locals3', 's3s3'])
1295+
if params.get('checksum_mode'):
1296+
self._raise_if_paths_type_incorrect_for_param(
1297+
CHECKSUM_MODE['name'],
1298+
params['paths_type'],
1299+
['s3local'])
1300+
12791301
# If the user provided local path does not exist, hard fail because
12801302
# we know that we will not be able to upload the file.
12811303
if 'locals3' == params['paths_type'] and not params['is_stream']:
@@ -1359,6 +1381,19 @@ def _raise_if_mv_same_paths(self, src, dest):
13591381
f"{self.parameters['src']} - {self.parameters['dest']}"
13601382
)
13611383

1384+
def _raise_if_paths_type_incorrect_for_param(self, param, paths_type, allowed_paths):
1385+
if paths_type not in allowed_paths:
1386+
expected_usage_map = {
1387+
'locals3': '<LocalPath> <S3Uri>',
1388+
's3s3': '<S3Uri> <S3Uri>',
1389+
's3local': '<S3Uri> <LocalPath>',
1390+
's3': '<S3Uri>'
1391+
}
1392+
raise ParamValidationError(
1393+
f"Expected {param} parameter to be used with one of following path formats: "
1394+
f"{', '.join([expected_usage_map[path] for path in allowed_paths])}. Instead, received {expected_usage_map[paths_type]}."
1395+
)
1396+
13621397
def _normalize_s3_trailing_slash(self, paths):
13631398
for i, path in enumerate(paths):
13641399
if path.startswith('s3://'):

awscli/customizations/s3/utils.py

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -470,12 +470,14 @@ def map_put_object_params(cls, request_params, cli_params):
470470
cls._set_sse_request_params(request_params, cli_params)
471471
cls._set_sse_c_request_params(request_params, cli_params)
472472
cls._set_request_payer_param(request_params, cli_params)
473+
cls._set_checksum_algorithm_param(request_params, cli_params)
473474

474475
@classmethod
475476
def map_get_object_params(cls, request_params, cli_params):
476477
"""Map CLI params to GetObject request params"""
477478
cls._set_sse_c_request_params(request_params, cli_params)
478479
cls._set_request_payer_param(request_params, cli_params)
480+
cls._set_checksum_mode_param(request_params, cli_params)
479481

480482
@classmethod
481483
def map_get_object_tagging_params(cls, request_params, cli_params):
@@ -498,6 +500,7 @@ def map_copy_object_params(cls, request_params, cli_params):
498500
cls._set_sse_c_and_copy_source_request_params(
499501
request_params, cli_params)
500502
cls._set_request_payer_param(request_params, cli_params)
503+
cls._set_checksum_algorithm_param(request_params, cli_params)
501504

502505
@classmethod
503506
def map_head_object_params(cls, request_params, cli_params):
@@ -540,6 +543,16 @@ def _set_request_payer_param(cls, request_params, cli_params):
540543
if cli_params.get('request_payer'):
541544
request_params['RequestPayer'] = cli_params['request_payer']
542545

546+
@classmethod
547+
def _set_checksum_mode_param(cls, request_params, cli_params):
548+
if cli_params.get('checksum_mode'):
549+
request_params['ChecksumMode'] = cli_params['checksum_mode']
550+
551+
@classmethod
552+
def _set_checksum_algorithm_param(cls, request_params, cli_params):
553+
if cli_params.get('checksum_algorithm'):
554+
request_params['ChecksumAlgorithm'] = cli_params['checksum_algorithm']
555+
543556
@classmethod
544557
def _set_general_object_params(cls, request_params, cli_params):
545558
# Parameters set in this method should be applicable to the following

tests/functional/s3/test_cp_command.py

Lines changed: 80 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -760,6 +760,86 @@ def test_cp_with_error_and_warning_permissions(self):
760760
self.assertIn('upload failed', stderr)
761761
self.assertIn('warning: File has an invalid timestamp.', stderr)
762762

763+
def test_upload_with_checksum_algorithm_crc32(self):
764+
full_path = self.files.create_file('foo.txt', 'contents')
765+
cmdline = f'{self.prefix} {full_path} s3://bucket/key.txt --checksum-algorithm CRC32'
766+
self.run_cmd(cmdline, expected_rc=0)
767+
self.assertEqual(self.operations_called[0][0].name, 'PutObject')
768+
self.assertEqual(self.operations_called[0][1]['ChecksumAlgorithm'], 'CRC32')
769+
770+
def test_upload_with_checksum_algorithm_crc32c(self):
771+
full_path = self.files.create_file('foo.txt', 'contents')
772+
cmdline = f'{self.prefix} {full_path} s3://bucket/key.txt --checksum-algorithm CRC32C'
773+
self.run_cmd(cmdline, expected_rc=0)
774+
self.assertEqual(self.operations_called[0][0].name, 'PutObject')
775+
self.assertEqual(self.operations_called[0][1]['ChecksumAlgorithm'], 'CRC32C')
776+
777+
def test_multipart_upload_with_checksum_algorithm_crc32(self):
778+
full_path = self.files.create_file('foo.txt', 'a' * 10 * (1024 ** 2))
779+
self.parsed_responses = [
780+
{'UploadId': 'foo'},
781+
{'ETag': 'foo-e1', 'ChecksumCRC32': 'foo-1'},
782+
{'ETag': 'foo-e2', 'ChecksumCRC32': 'foo-2'},
783+
{}
784+
]
785+
cmdline = ('%s %s s3://bucket/key2.txt'
786+
' --checksum-algorithm CRC32' % (self.prefix, full_path))
787+
self.run_cmd(cmdline, expected_rc=0)
788+
self.assertEqual(len(self.operations_called), 4, self.operations_called)
789+
self.assertEqual(self.operations_called[0][0].name, 'CreateMultipartUpload')
790+
self.assertEqual(self.operations_called[0][1]['ChecksumAlgorithm'], 'CRC32')
791+
self.assertEqual(self.operations_called[1][0].name, 'UploadPart')
792+
self.assertEqual(self.operations_called[1][1]['ChecksumAlgorithm'], 'CRC32')
793+
self.assertEqual(self.operations_called[3][0].name, 'CompleteMultipartUpload')
794+
self.assertIn({'ETag': 'foo-e1', 'ChecksumCRC32': 'foo-1', 'PartNumber': 1},
795+
self.operations_called[3][1]['MultipartUpload']['Parts'])
796+
self.assertIn({'ETag': 'foo-e2', 'ChecksumCRC32': 'foo-2', 'PartNumber': 2},
797+
self.operations_called[3][1]['MultipartUpload']['Parts'])
798+
799+
def test_copy_with_checksum_algorithm_crc32(self):
800+
self.parsed_responses = [
801+
self.head_object_response(),
802+
# Mocked CopyObject response with a CRC32 checksum specified
803+
{
804+
'ETag': 'foo-1',
805+
'ChecksumCRC32': 'Tq0H4g=='
806+
}
807+
]
808+
cmdline = f'{self.prefix} s3://bucket1/key.txt s3://bucket2/key.txt --checksum-algorithm CRC32'
809+
self.run_cmd(cmdline, expected_rc=0)
810+
self.assertEqual(self.operations_called[1][0].name, 'CopyObject')
811+
self.assertEqual(self.operations_called[1][1]['ChecksumAlgorithm'], 'CRC32')
812+
813+
def test_download_with_checksum_mode_crc32(self):
814+
self.parsed_responses = [
815+
self.head_object_response(),
816+
# Mocked GetObject response with a checksum algorithm specified
817+
{
818+
'ETag': 'foo-1',
819+
'ChecksumCRC32': 'Tq0H4g==',
820+
'Body': BytesIO(b'foo')
821+
}
822+
]
823+
cmdline = f'{self.prefix} s3://bucket/foo {self.files.rootdir} --checksum-mode ENABLED'
824+
self.run_cmd(cmdline, expected_rc=0)
825+
self.assertEqual(self.operations_called[1][0].name, 'GetObject')
826+
self.assertEqual(self.operations_called[1][1]['ChecksumMode'], 'ENABLED')
827+
828+
def test_download_with_checksum_mode_crc32c(self):
829+
self.parsed_responses = [
830+
self.head_object_response(),
831+
# Mocked GetObject response with a checksum algorithm specified
832+
{
833+
'ETag': 'foo-1',
834+
'ChecksumCRC32C': 'checksum',
835+
'Body': BytesIO(b'foo')
836+
}
837+
]
838+
cmdline = f'{self.prefix} s3://bucket/foo {self.files.rootdir} --checksum-mode ENABLED'
839+
self.run_cmd(cmdline, expected_rc=0)
840+
self.assertEqual(self.operations_called[1][0].name, 'GetObject')
841+
self.assertEqual(self.operations_called[1][1]['ChecksumMode'], 'ENABLED')
842+
763843

764844
class TestStreamingCPCommand(BaseAWSCommandParamsTest):
765845
def test_streaming_upload(self):

tests/functional/s3/test_mv_command.py

Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -244,6 +244,29 @@ def test_mv_does_not_delete_source_on_failed_put_tagging(self):
244244
]
245245
)
246246

247+
def test_upload_with_checksum_algorithm_crc32(self):
248+
full_path = self.files.create_file('foo.txt', 'contents')
249+
cmdline = f'{self.prefix} {full_path} s3://bucket/key.txt --checksum-algorithm CRC32'
250+
self.run_cmd(cmdline, expected_rc=0)
251+
self.assertEqual(self.operations_called[0][0].name, 'PutObject')
252+
self.assertEqual(self.operations_called[0][1]['ChecksumAlgorithm'], 'CRC32')
253+
254+
def test_download_with_checksum_mode_crc32(self):
255+
self.parsed_responses = [
256+
self.head_object_response(),
257+
# Mocked GetObject response with a checksum algorithm specified
258+
{
259+
'ETag': 'foo-1',
260+
'ChecksumCRC32': 'checksum',
261+
'Body': BytesIO(b'foo')
262+
},
263+
self.delete_object_response()
264+
]
265+
cmdline = f'{self.prefix} s3://bucket/foo {self.files.rootdir} --checksum-mode ENABLED'
266+
self.run_cmd(cmdline, expected_rc=0)
267+
self.assertEqual(self.operations_called[1][0].name, 'GetObject')
268+
self.assertEqual(self.operations_called[1][1]['ChecksumMode'], 'ENABLED')
269+
247270

248271
class TestMvWithCRTClient(BaseCRTTransferClientTest):
249272
def test_upload_move_using_crt_client(self):

tests/functional/s3/test_sync_command.py

Lines changed: 87 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -353,6 +353,93 @@ def test_with_copy_props(self):
353353
]
354354
)
355355

356+
def test_upload_with_checksum_algorithm_sha1(self):
357+
self.files.create_file('foo.txt', 'contents')
358+
cmdline = f'{self.prefix} {self.files.rootdir} s3://bucket/ --checksum-algorithm SHA1'
359+
self.run_cmd(cmdline, expected_rc=0)
360+
self.assertEqual(self.operations_called[1][0].name, 'PutObject')
361+
self.assertEqual(self.operations_called[1][1]['ChecksumAlgorithm'], 'SHA1')
362+
363+
def test_copy_with_checksum_algorithm_update_sha1(self):
364+
cmdline = f'{self.prefix} s3://src-bucket/ s3://dest-bucket/ --checksum-algorithm SHA1'
365+
self.parsed_responses = [
366+
# Response for ListObjects on source bucket
367+
{
368+
'Contents': [
369+
{
370+
'Key': 'mykey',
371+
'LastModified': '00:00:00Z',
372+
'Size': 100,
373+
'ChecksumAlgorithm': 'SHA1'
374+
}
375+
],
376+
'CommonPrefixes': []
377+
},
378+
# Response for ListObjects on destination bucket
379+
self.list_objects_response([]),
380+
# Response for CopyObject
381+
{
382+
'ChecksumSHA1': 'sha1-checksum'
383+
}
384+
]
385+
self.run_cmd(cmdline, expected_rc=0)
386+
self.assert_operations_called(
387+
[
388+
self.list_objects_request('src-bucket'),
389+
self.list_objects_request('dest-bucket'),
390+
(
391+
'CopyObject', {
392+
'CopySource': {
393+
'Bucket': 'src-bucket',
394+
'Key': 'mykey'
395+
},
396+
'Bucket': 'dest-bucket',
397+
'Key': 'mykey',
398+
'ChecksumAlgorithm': 'SHA1'
399+
}
400+
)
401+
]
402+
)
403+
404+
def test_upload_with_checksum_algorithm_sha256(self):
405+
self.files.create_file('foo.txt', 'contents')
406+
cmdline = f'{self.prefix} {self.files.rootdir} s3://bucket/ --checksum-algorithm SHA256'
407+
self.run_cmd(cmdline, expected_rc=0)
408+
self.assertEqual(self.operations_called[1][0].name, 'PutObject')
409+
self.assertEqual(self.operations_called[1][1]['ChecksumAlgorithm'], 'SHA256')
410+
411+
def test_download_with_checksum_mode_sha1(self):
412+
self.parsed_responses = [
413+
self.list_objects_response(['bucket']),
414+
# Mocked GetObject response with a checksum algorithm specified
415+
{
416+
'ETag': 'foo-1',
417+
'ChecksumSHA1': 'checksum',
418+
'Body': BytesIO(b'foo')
419+
}
420+
]
421+
cmdline = f'{self.prefix} s3://bucket/foo {self.files.rootdir} --checksum-mode ENABLED'
422+
self.run_cmd(cmdline, expected_rc=0)
423+
self.assertEqual(self.operations_called[0][0].name, 'ListObjectsV2')
424+
self.assertEqual(self.operations_called[1][0].name, 'GetObject')
425+
self.assertIn(('ChecksumMode', 'ENABLED'), self.operations_called[1][1].items())
426+
427+
def test_download_with_checksum_mode_sha256(self):
428+
self.parsed_responses = [
429+
self.list_objects_response(['bucket']),
430+
# Mocked GetObject response with a checksum algorithm specified
431+
{
432+
'ETag': 'foo-1',
433+
'ChecksumSHA256': 'checksum',
434+
'Body': BytesIO(b'foo')
435+
}
436+
]
437+
cmdline = f'{self.prefix} s3://bucket/foo {self.files.rootdir} --checksum-mode ENABLED'
438+
self.run_cmd(cmdline, expected_rc=0)
439+
self.assertEqual(self.operations_called[0][0].name, 'ListObjectsV2')
440+
self.assertEqual(self.operations_called[1][0].name, 'GetObject')
441+
self.assertIn(('ChecksumMode', 'ENABLED'), self.operations_called[1][1].items())
442+
356443

357444
class TestSyncSourceRegion(BaseS3CLIRunnerTest):
358445
def test_respects_source_region(self):

tests/unit/customizations/s3/test_subcommands.py

Lines changed: 40 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -400,6 +400,46 @@ def test_validate_no_streaming_paths(self):
400400
cmd_params.add_paths(paths)
401401
self.assertFalse(cmd_params.parameters['is_stream'])
402402

403+
def test_validate_checksum_algorithm_download_error(self):
404+
paths = ['s3://bucket/key', self.file_creator.rootdir]
405+
parameters = {'checksum_algorithm': 'CRC32'}
406+
cmd_params = CommandParameters('cp', parameters, '')
407+
with self.assertRaises(ParamValidationError) as cm:
408+
cmd_params.add_paths(paths)
409+
self.assertIn('Expected checksum-algorithm parameter to be used with one of following path formats', cm.msg)
410+
411+
def test_validate_checksum_algorithm_sync_download_error(self):
412+
paths = ['s3://bucket/key', self.file_creator.rootdir]
413+
parameters = {'checksum_algorithm': 'CRC32C'}
414+
cmd_params = CommandParameters('sync', parameters, '')
415+
with self.assertRaises(ParamValidationError) as cm:
416+
cmd_params.add_paths(paths)
417+
self.assertIn('Expected checksum-algorithm parameter to be used with one of following path formats', cm.msg)
418+
419+
def test_validate_checksum_mode_upload_error(self):
420+
paths = [self.file_creator.rootdir, 's3://bucket/key']
421+
parameters = {'checksum_mode': 'ENABLED'}
422+
cmd_params = CommandParameters('cp', parameters, '')
423+
with self.assertRaises(ParamValidationError) as cm:
424+
cmd_params.add_paths(paths)
425+
self.assertIn('Expected checksum-mode parameter to be used with one of following path formats', cm.msg)
426+
427+
def test_validate_checksum_mode_sync_upload_error(self):
428+
paths = [self.file_creator.rootdir, 's3://bucket/key']
429+
parameters = {'checksum_mode': 'ENABLED'}
430+
cmd_params = CommandParameters('sync', parameters, '')
431+
with self.assertRaises(ParamValidationError) as cm:
432+
cmd_params.add_paths(paths)
433+
self.assertIn('Expected checksum-mode parameter to be used with one of following path formats', cm.msg)
434+
435+
def test_validate_checksum_mode_move_error(self):
436+
paths = ['s3://bucket/key', 's3://bucket2/key']
437+
parameters = {'checksum_mode': 'ENABLED'}
438+
cmd_params = CommandParameters('mv', parameters, '')
439+
with self.assertRaises(ParamValidationError) as cm:
440+
cmd_params.add_paths(paths)
441+
self.assertIn('Expected checksum-mode parameter to be used with one of following path formats', cm.msg)
442+
403443
def test_validate_streaming_paths_error(self):
404444
parameters = {'src': '-', 'dest': 's3://bucket'}
405445
cmd_params = CommandParameters('sync', parameters, '')

0 commit comments

Comments
 (0)