diff --git a/gcloud/storage/_helpers.py b/gcloud/storage/_helpers.py index b9e902deb526..7970c78c5136 100644 --- a/gcloud/storage/_helpers.py +++ b/gcloud/storage/_helpers.py @@ -17,6 +17,9 @@ These are *not* part of the API. """ +from Crypto.Hash import MD5 +import base64 + class _PropertyMixin(object): """Abstract mixin for cloud storage classes with associated propertties. @@ -187,3 +190,30 @@ def _setter(self, value): self._patch_properties({fieldname: value}) return property(_getter, _setter) + + +def _write_buffer_to_hash(buffer_object, hash_obj, digest_block_size=8192): + """Read blocks from a buffer and update a hash with them. + + :type buffer_object: bytes buffer + :param buffer_object: Buffer containing bytes used to update a hash object. + """ + block = buffer_object.read(digest_block_size) + + while len(block) > 0: + hash_obj.update(block) + # Update the block for the next iteration. + block = buffer_object.read(digest_block_size) + + +def _base64_md5hash(buffer_object): + """Get MD5 hash of bytes (as base64). + + :type buffer_object: bytes buffer + :param buffer_object: Buffer containing bytes used to compute an MD5 + hash (as base64). + """ + hash_obj = MD5.new() + _write_buffer_to_hash(buffer_object, hash_obj) + digest_bytes = hash_obj.digest() + return base64.b64encode(digest_bytes) diff --git a/gcloud/storage/test__helpers.py b/gcloud/storage/test__helpers.py index b550f905034d..d98634ea5c13 100644 --- a/gcloud/storage/test__helpers.py +++ b/gcloud/storage/test__helpers.py @@ -237,6 +237,53 @@ def _patch_properties(self, mapping): self.assertEqual(test._patched, {'solfege': 'Latido'}) +class Test__base64_md5hash(unittest2.TestCase): + + def _callFUT(self, bytes_to_sign): + from gcloud.storage._helpers import _base64_md5hash + return _base64_md5hash(bytes_to_sign) + + def test_it(self): + from io import BytesIO + BYTES_TO_SIGN = b'FOO' + BUFFER = BytesIO() + BUFFER.write(BYTES_TO_SIGN) + BUFFER.seek(0) + + SIGNED_CONTENT = self._callFUT(BUFFER) + self.assertEqual(SIGNED_CONTENT, b'kBiQqOnIz21aGlQrIp/r/w==') + + def test_it_with_stubs(self): + from gcloud._testing import _Monkey + from gcloud.storage import _helpers as MUT + + class _Buffer(object): + + def __init__(self, return_vals): + self.return_vals = return_vals + self._block_sizes = [] + + def read(self, block_size): + self._block_sizes.append(block_size) + return self.return_vals.pop() + + BASE64 = _Base64() + DIGEST_VAL = object() + BYTES_TO_SIGN = b'BYTES_TO_SIGN' + BUFFER = _Buffer([b'', BYTES_TO_SIGN]) + MD5 = _MD5(DIGEST_VAL) + + with _Monkey(MUT, base64=BASE64, MD5=MD5): + SIGNED_CONTENT = self._callFUT(BUFFER) + + self.assertEqual(BUFFER._block_sizes, [8192, 8192]) + self.assertTrue(SIGNED_CONTENT is DIGEST_VAL) + self.assertEqual(BASE64._called_b64encode, [DIGEST_VAL]) + self.assertEqual(MD5._new_called, [None]) + self.assertEqual(MD5.hash_obj.num_digest_calls, 1) + self.assertEqual(MD5.hash_obj._blocks, [BYTES_TO_SIGN]) + + class _Connection(object): def __init__(self, *responses): @@ -247,3 +294,39 @@ def api_request(self, **kw): self._requested.append(kw) response, self._responses = self._responses[0], self._responses[1:] return response + + +class _MD5Hash(object): + + def __init__(self, digest_val): + self.digest_val = digest_val + self.num_digest_calls = 0 + self._blocks = [] + + def update(self, block): + self._blocks.append(block) + + def digest(self): + self.num_digest_calls += 1 + return self.digest_val + + +class _MD5(object): + + def __init__(self, digest_val): + self.hash_obj = _MD5Hash(digest_val) + self._new_called = [] + + def new(self, data=None): + self._new_called.append(data) + return self.hash_obj + + +class _Base64(object): + + def __init__(self): + self._called_b64encode = [] + + def b64encode(self, value): + self._called_b64encode.append(value) + return value diff --git a/regression/storage.py b/regression/storage.py index f7a657452e9c..342f240761cb 100644 --- a/regression/storage.py +++ b/regression/storage.py @@ -12,8 +12,6 @@ # See the License for the specific language governing permissions and # limitations under the License. -from Crypto.Hash import MD5 -import base64 import httplib2 import tempfile import time @@ -21,6 +19,7 @@ from gcloud import exceptions from gcloud import storage +from gcloud.storage._helpers import _base64_md5hash from gcloud.storage import _implicit_environ @@ -96,18 +95,12 @@ class TestStorageFiles(unittest2.TestCase): } } - @staticmethod - def _get_base64_md5hash(filename): - with open(filename, 'rb') as file_obj: - hash = MD5.new(data=file_obj.read()) - digest_bytes = hash.digest() - return base64.b64encode(digest_bytes) - @classmethod def setUpClass(cls): super(TestStorageFiles, cls).setUpClass() for file_data in cls.FILES.values(): - file_data['hash'] = cls._get_base64_md5hash(file_data['path']) + with open(file_data['path'], 'rb') as file_obj: + file_data['hash'] = _base64_md5hash(file_obj) cls.bucket = SHARED_BUCKETS['test_bucket'] def setUp(self):