From 154652410f17076f078956f82a544e23bd1c9001 Mon Sep 17 00:00:00 2001 From: Christian <> Date: Thu, 8 Apr 2021 17:03:47 +0200 Subject: [PATCH 1/8] Add S3OptimizedUploadStorage --- README.rst | 19 +++++++++++++++++++ s3file/storages_optimized.py | 36 ++++++++++++++++++++++++++++++++++++ 2 files changed, 55 insertions(+) create mode 100644 s3file/storages_optimized.py diff --git a/README.rst b/README.rst index 8b5830d..f9782df 100644 --- a/README.rst +++ b/README.rst @@ -188,3 +188,22 @@ uploaded to AWS S3 directly and not to your Django application server. :target: https://codecov.io/gh/codingjoe/django-s3file .. |GitHub license| image:: https://img.shields.io/badge/license-MIT-blue.svg :target: https://raw.githubusercontent.com/codingjoe/django-s3file/master/LICENSE + +Using optimized S3Boto3Storage +------------------------------ + +Since ``S3Boto3Storage`` supports storing data from any other fileobj, +it uses a very general ``_save`` function. This leads to the +frontend uploading the file to S3 to then move it byte-by-byte. +For large files this leads to additional loading times for the user. + +That's why S3File provides an optimized version of this method at +``storages_optimized.S3OptimizedUploadStorage``. It uses the more efficient +``copy`` given that we know that we only copy from one S3 location to another. + +.. code:: python + + from s3file.storages_optimized import S3OptimizedUploadStorage + + class MyStorage(S3OptimizedUploadStorage): # Subclass and use like any other storage + default_acl = 'private' diff --git a/s3file/storages_optimized.py b/s3file/storages_optimized.py new file mode 100644 index 0000000..c4556b3 --- /dev/null +++ b/s3file/storages_optimized.py @@ -0,0 +1,36 @@ +from storages.backends.s3boto3 import S3Boto3Storage + + +class S3OptimizedUploadStorage(S3Boto3Storage): + """ + This storage prevents unnecessary operation to copy with the general ``upload_fileobj`` + command when the object already is a S3 object where the faster copy command can be used. + + The assumption is that ``content`` contains a S3 object from which we can copy. + + See also discussion here: https://github.com/codingjoe/django-s3file/discussions/126 + """ + def _save(self, name, content): + # Basically copy the implementation of _save of S3Boto3Storage + # and replace the obj.upload_fileobj with a copy function + cleaned_name = self._clean_name(name) + name = self._normalize_name(cleaned_name) + params = self._get_write_parameters(name, content) + + if (self.gzip and + params['ContentType'] in self.gzip_content_types and + 'ContentEncoding' not in params): + content = self._compress_content(content) + params['ContentEncoding'] = 'gzip' + + obj = self.bucket.Object(name) + #content.seek(0, os.SEEK_SET) # Disable unnecessary seek operation + #obj.upload_fileobj(content, ExtraArgs=params) # Disable upload function + + if not content.obj or not content.obj.key: + raise RuntimeError("The content object must be a S3 object and contain a valid key.") + + # Copy the file instead uf uploading + obj.copy({'Bucket': self.bucket.name, 'Key': content.obj.key}, ExtraArgs=params) + + return cleaned_name From fb2acc68f5d1069c5756cfd297a8684e3317f204 Mon Sep 17 00:00:00 2001 From: Christian Date: Thu, 8 Apr 2021 21:18:07 +0200 Subject: [PATCH 2/8] Run black --- s3file/storages_optimized.py | 29 +++++++++++++++++------------ 1 file changed, 17 insertions(+), 12 deletions(-) diff --git a/s3file/storages_optimized.py b/s3file/storages_optimized.py index c4556b3..ab6084f 100644 --- a/s3file/storages_optimized.py +++ b/s3file/storages_optimized.py @@ -3,13 +3,14 @@ class S3OptimizedUploadStorage(S3Boto3Storage): """ - This storage prevents unnecessary operation to copy with the general ``upload_fileobj`` - command when the object already is a S3 object where the faster copy command can be used. + This storage prevents unnecessary operation to copy with the general ``upload_fileobj`` + command when the object already is a S3 object where the faster copy command can be used. - The assumption is that ``content`` contains a S3 object from which we can copy. + The assumption is that ``content`` contains a S3 object from which we can copy. - See also discussion here: https://github.com/codingjoe/django-s3file/discussions/126 + See also discussion here: https://github.com/codingjoe/django-s3file/discussions/126 """ + def _save(self, name, content): # Basically copy the implementation of _save of S3Boto3Storage # and replace the obj.upload_fileobj with a copy function @@ -17,20 +18,24 @@ def _save(self, name, content): name = self._normalize_name(cleaned_name) params = self._get_write_parameters(name, content) - if (self.gzip and - params['ContentType'] in self.gzip_content_types and - 'ContentEncoding' not in params): + if ( + self.gzip + and params["ContentType"] in self.gzip_content_types + and "ContentEncoding" not in params + ): content = self._compress_content(content) - params['ContentEncoding'] = 'gzip' + params["ContentEncoding"] = "gzip" obj = self.bucket.Object(name) - #content.seek(0, os.SEEK_SET) # Disable unnecessary seek operation - #obj.upload_fileobj(content, ExtraArgs=params) # Disable upload function + # content.seek(0, os.SEEK_SET) # Disable unnecessary seek operation + # obj.upload_fileobj(content, ExtraArgs=params) # Disable upload function if not content.obj or not content.obj.key: - raise RuntimeError("The content object must be a S3 object and contain a valid key.") + raise RuntimeError( + "The content object must be a S3 object and contain a valid key." + ) # Copy the file instead uf uploading - obj.copy({'Bucket': self.bucket.name, 'Key': content.obj.key}, ExtraArgs=params) + obj.copy({"Bucket": self.bucket.name, "Key": content.obj.key}, ExtraArgs=params) return cleaned_name From 907a9faffe2877441a8fbc8ad306cb85d40c87cc Mon Sep 17 00:00:00 2001 From: Christian Date: Thu, 8 Apr 2021 21:28:55 +0200 Subject: [PATCH 3/8] Adhere to pydocstyle --- s3file/storages_optimized.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/s3file/storages_optimized.py b/s3file/storages_optimized.py index ab6084f..4753150 100644 --- a/s3file/storages_optimized.py +++ b/s3file/storages_optimized.py @@ -3,6 +3,8 @@ class S3OptimizedUploadStorage(S3Boto3Storage): """ + Class for an optimized S3 storage. + This storage prevents unnecessary operation to copy with the general ``upload_fileobj`` command when the object already is a S3 object where the faster copy command can be used. From 10a18092b40533479a28acc1e132f6edc4102284 Mon Sep 17 00:00:00 2001 From: Christian Date: Fri, 9 Apr 2021 11:15:52 +0200 Subject: [PATCH 4/8] Fix check for file type --- s3file/storages_optimized.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/s3file/storages_optimized.py b/s3file/storages_optimized.py index 4753150..f402da3 100644 --- a/s3file/storages_optimized.py +++ b/s3file/storages_optimized.py @@ -32,7 +32,7 @@ def _save(self, name, content): # content.seek(0, os.SEEK_SET) # Disable unnecessary seek operation # obj.upload_fileobj(content, ExtraArgs=params) # Disable upload function - if not content.obj or not content.obj.key: + if not hasattr(content, "obj") or not hasattr(content.obj, "key"): raise RuntimeError( "The content object must be a S3 object and contain a valid key." ) From eb6f7f6cfc508ef1210b79e36f92633fe085d12b Mon Sep 17 00:00:00 2001 From: Christian Date: Fri, 9 Apr 2021 11:16:52 +0200 Subject: [PATCH 5/8] Add tests for S3OptimizedUploadStorage --- s3file/storages.py | 20 ++++++++++++++++++++ tests/test_storages.py | 37 +++++++++++++++++++++++++++++++++++++ 2 files changed, 57 insertions(+) create mode 100644 tests/test_storages.py diff --git a/s3file/storages.py b/s3file/storages.py index 899a5d8..2870ffa 100644 --- a/s3file/storages.py +++ b/s3file/storages.py @@ -8,6 +8,8 @@ from django.core.files.storage import FileSystemStorage, default_storage from django.utils._os import safe_join +from s3file.storages_optimized import S3OptimizedUploadStorage + class S3MockStorage(FileSystemStorage): @property @@ -52,6 +54,24 @@ class bucket: name = "test-bucket" +class S3OptimizedMockStorage(S3OptimizedUploadStorage): + created_objects = {} + + class bucket: + name = "test-bucket" + + class Object: + def __init__(self, key): + self.key = key + self.copy_from_bucket = None + self.copy_from_key = None + S3OptimizedMockStorage.created_objects[self.key] = self + + def copy(self, s3_object, ExtraArgs): + self.copy_from_bucket = s3_object["Bucket"] + self.copy_from_key = s3_object["Key"] + + local_dev = isinstance(default_storage, FileSystemStorage) storage = default_storage if not local_dev else S3MockStorage() diff --git a/tests/test_storages.py b/tests/test_storages.py new file mode 100644 index 0000000..14bdd6a --- /dev/null +++ b/tests/test_storages.py @@ -0,0 +1,37 @@ +import pytest + +from django.core.files.base import ContentFile + +from s3file import storages + + +class TestStorages: + url = "/__s3_mock__/" + + def test_post__save_optimized(self): + storage = storages.S3OptimizedMockStorage() + obj = storage.bucket.Object("tmp/s3file/s3_file.txt") + + class Content: + def __init__(self, obj): + self.obj = obj + + content = Content(obj) + key = storage._save("tmp/s3file/s3_file_copied.txt", content) + stored_object = storage.created_objects[ + "custom/location/tmp/s3file/s3_file_copied.txt" + ] + + assert key == "tmp/s3file/s3_file_copied.txt" + assert stored_object.copy_from_bucket == storage.bucket.name + assert stored_object.copy_from_key == "tmp/s3file/s3_file.txt" + + def test_post__save_optimized_fail(self): + storage = storages.S3OptimizedMockStorage() + + with pytest.raises(RuntimeError) as excinfo: + storage._save("tmp/s3file/s3_file_copied.txt", ContentFile(b"s3file")) + + assert "The content object must be a S3 object and contain a valid key." in str( + excinfo.value + ) From 7a14bbc4a3d45018c9f858f11cdbc8005a7bc3b7 Mon Sep 17 00:00:00 2001 From: Christian Date: Fri, 9 Apr 2021 11:22:36 +0200 Subject: [PATCH 6/8] Fix imports --- tests/test_storages.py | 1 - 1 file changed, 1 deletion(-) diff --git a/tests/test_storages.py b/tests/test_storages.py index 14bdd6a..e920ba8 100644 --- a/tests/test_storages.py +++ b/tests/test_storages.py @@ -1,5 +1,4 @@ import pytest - from django.core.files.base import ContentFile from s3file import storages From 39d9bbcc572814fd934f3e5915d62af5115ef301 Mon Sep 17 00:00:00 2001 From: Christian Date: Fri, 9 Apr 2021 11:36:50 +0200 Subject: [PATCH 7/8] Add test for zip --- s3file/storages.py | 3 +++ tests/test_storages.py | 19 +++++++++++++++++++ 2 files changed, 22 insertions(+) diff --git a/s3file/storages.py b/s3file/storages.py index 2870ffa..a5cae14 100644 --- a/s3file/storages.py +++ b/s3file/storages.py @@ -57,6 +57,9 @@ class bucket: class S3OptimizedMockStorage(S3OptimizedUploadStorage): created_objects = {} + def _compress_content(self, content): + return content + class bucket: name = "test-bucket" diff --git a/tests/test_storages.py b/tests/test_storages.py index e920ba8..ff7de8d 100644 --- a/tests/test_storages.py +++ b/tests/test_storages.py @@ -25,6 +25,25 @@ def __init__(self, obj): assert stored_object.copy_from_bucket == storage.bucket.name assert stored_object.copy_from_key == "tmp/s3file/s3_file.txt" + def test_post__save_optimized_gzip(self): + storage = storages.S3OptimizedMockStorage() + obj = storage.bucket.Object("tmp/s3file/s3_file.css") + storage.gzip = True + + class Content: + def __init__(self, obj): + self.obj = obj + + content = Content(obj) + key = storage._save("tmp/s3file/s3_file_copied.css", content) + stored_object = storage.created_objects[ + "custom/location/tmp/s3file/s3_file_copied.css" + ] + + assert key == "tmp/s3file/s3_file_copied.css" + assert stored_object.copy_from_bucket == storage.bucket.name + assert stored_object.copy_from_key == "tmp/s3file/s3_file.css" + def test_post__save_optimized_fail(self): storage = storages.S3OptimizedMockStorage() From efe5a6454289872f5f70f3e50158042b92c2f2cc Mon Sep 17 00:00:00 2001 From: Christian Date: Mon, 6 Sep 2021 16:21:20 +0200 Subject: [PATCH 8/8] Address issues from PR feedback --- README.rst | 9 +++++---- s3file/storages.py | 23 ----------------------- s3file/storages_optimized.py | 2 +- tests/test_storages.py | 31 ++++++++++++++++++++++++++----- 4 files changed, 32 insertions(+), 33 deletions(-) diff --git a/README.rst b/README.rst index 662d8ac..d2d2211 100644 --- a/README.rst +++ b/README.rst @@ -200,13 +200,14 @@ Using optimized S3Boto3Storage ------------------------------ Since ``S3Boto3Storage`` supports storing data from any other fileobj, -it uses a very general ``_save`` function. This leads to the -frontend uploading the file to S3 to then move it byte-by-byte. -For large files this leads to additional loading times for the user. +it uses a generalized ``_save`` function. This leads to the frontend uploading +the file to S3 and then copying it byte-by-byte to perform a move operation just +to rename the uploaded object. For large files this leads to additional loading +times for the user. That's why S3File provides an optimized version of this method at ``storages_optimized.S3OptimizedUploadStorage``. It uses the more efficient -``copy`` given that we know that we only copy from one S3 location to another. +``copy`` method from S3, given that we know that we only copy from one S3 location to another. .. code:: python diff --git a/s3file/storages.py b/s3file/storages.py index a5cae14..899a5d8 100644 --- a/s3file/storages.py +++ b/s3file/storages.py @@ -8,8 +8,6 @@ from django.core.files.storage import FileSystemStorage, default_storage from django.utils._os import safe_join -from s3file.storages_optimized import S3OptimizedUploadStorage - class S3MockStorage(FileSystemStorage): @property @@ -54,27 +52,6 @@ class bucket: name = "test-bucket" -class S3OptimizedMockStorage(S3OptimizedUploadStorage): - created_objects = {} - - def _compress_content(self, content): - return content - - class bucket: - name = "test-bucket" - - class Object: - def __init__(self, key): - self.key = key - self.copy_from_bucket = None - self.copy_from_key = None - S3OptimizedMockStorage.created_objects[self.key] = self - - def copy(self, s3_object, ExtraArgs): - self.copy_from_bucket = s3_object["Bucket"] - self.copy_from_key = s3_object["Key"] - - local_dev = isinstance(default_storage, FileSystemStorage) storage = default_storage if not local_dev else S3MockStorage() diff --git a/s3file/storages_optimized.py b/s3file/storages_optimized.py index f402da3..dbf39d9 100644 --- a/s3file/storages_optimized.py +++ b/s3file/storages_optimized.py @@ -33,7 +33,7 @@ def _save(self, name, content): # obj.upload_fileobj(content, ExtraArgs=params) # Disable upload function if not hasattr(content, "obj") or not hasattr(content.obj, "key"): - raise RuntimeError( + raise TypeError( "The content object must be a S3 object and contain a valid key." ) diff --git a/tests/test_storages.py b/tests/test_storages.py index ff7de8d..8ac501c 100644 --- a/tests/test_storages.py +++ b/tests/test_storages.py @@ -1,14 +1,35 @@ import pytest from django.core.files.base import ContentFile -from s3file import storages +from s3file.storages_optimized import S3OptimizedUploadStorage + + +class S3OptimizedMockStorage(S3OptimizedUploadStorage): + created_objects = {} + + def _compress_content(self, content): + return content + + class bucket: + name = "test-bucket" + + class Object: + def __init__(self, key): + self.key = key + self.copy_from_bucket = None + self.copy_from_key = None + S3OptimizedMockStorage.created_objects[self.key] = self + + def copy(self, s3_object, ExtraArgs): + self.copy_from_bucket = s3_object["Bucket"] + self.copy_from_key = s3_object["Key"] class TestStorages: url = "/__s3_mock__/" def test_post__save_optimized(self): - storage = storages.S3OptimizedMockStorage() + storage = S3OptimizedMockStorage() obj = storage.bucket.Object("tmp/s3file/s3_file.txt") class Content: @@ -26,7 +47,7 @@ def __init__(self, obj): assert stored_object.copy_from_key == "tmp/s3file/s3_file.txt" def test_post__save_optimized_gzip(self): - storage = storages.S3OptimizedMockStorage() + storage = S3OptimizedMockStorage() obj = storage.bucket.Object("tmp/s3file/s3_file.css") storage.gzip = True @@ -45,9 +66,9 @@ def __init__(self, obj): assert stored_object.copy_from_key == "tmp/s3file/s3_file.css" def test_post__save_optimized_fail(self): - storage = storages.S3OptimizedMockStorage() + storage = S3OptimizedMockStorage() - with pytest.raises(RuntimeError) as excinfo: + with pytest.raises(TypeError) as excinfo: storage._save("tmp/s3file/s3_file_copied.txt", ContentFile(b"s3file")) assert "The content object must be a S3 object and contain a valid key." in str(