Skip to content

Commit 4f32699

Browse files
committed
fix trailing checksum
1 parent 01f7ee3 commit 4f32699

File tree

5 files changed

+172
-61
lines changed

5 files changed

+172
-61
lines changed
File renamed without changes.
Lines changed: 81 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,81 @@
1+
/**
2+
* Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
3+
* SPDX-License-Identifier: Apache-2.0.
4+
*/
5+
#pragma once
6+
#include <aws/core/utils/Outcome.h>
7+
#include <aws/core/utils/crypto/Hash.h>
8+
#include <aws/core/utils/HashingUtils.h>
9+
10+
namespace Aws {
11+
namespace Utils {
12+
namespace Stream {
13+
14+
static const size_t AWS_DATA_BUFFER_SIZE = 65536;
15+
16+
template <size_t DataBufferSize = AWS_DATA_BUFFER_SIZE>
17+
class AwsChunkedStream {
18+
public:
19+
AwsChunkedStream(Http::HttpRequest *request, const std::shared_ptr<Aws::IOStream> &stream)
20+
: m_chunkingStream{Aws::MakeShared<StringStream>("AwsChunkedStream")}, m_request(request), m_stream(stream) {}
21+
22+
size_t BufferedRead(char *dst, size_t amountToRead) {
23+
// the chunk has ended and cannot be read from
24+
if (m_chunkEnd) {
25+
return 0;
26+
}
27+
28+
// If we've read all of the underlying stream write the checksum trailing header
29+
// the set that the chunked stream is over.
30+
if (m_stream->eof() && (m_chunkingStream->eof() || m_chunkingStream->peek() == EOF)) {
31+
Aws::StringStream chunkedTrailer;
32+
chunkedTrailer << "0\r\n";
33+
if (m_request->GetRequestHash().second != nullptr) {
34+
chunkedTrailer << "x-amz-checksum-" << m_request->GetRequestHash().first << ":"
35+
<< HashingUtils::Base64Encode(m_request->GetRequestHash().second->GetHash().GetResult()) << "\r\n";
36+
}
37+
chunkedTrailer << "\r\n";
38+
auto trailerSize = chunkedTrailer.str().size();
39+
memcpy(dst, chunkedTrailer.str().c_str(), trailerSize);
40+
m_chunkEnd = true;
41+
return trailerSize;
42+
}
43+
44+
// Try to read in a 64K chunk, if we cant we know the stream is over
45+
size_t bytesRead = 0;
46+
while (m_stream->good() && bytesRead < DataBufferSize) {
47+
m_stream->read(&m_data[bytesRead], DataBufferSize - bytesRead);
48+
bytesRead += m_stream->gcount();
49+
}
50+
51+
// update the trailing checksum to be sent only if we read data and buffered.
52+
if (bytesRead > 0 && m_request->GetRequestHash().second != nullptr) {
53+
m_request->GetRequestHash().second->Update(reinterpret_cast<unsigned char *>(m_data.data()), bytesRead);
54+
}
55+
56+
// Buffer chunked encoding from data if there was data read to the buffer, otherwise leave it alone/
57+
if (bytesRead > 0 && m_chunkingStream != nullptr) {
58+
*m_chunkingStream << Aws::Utils::StringUtils::ToHexString(bytesRead) << "\r\n";
59+
std::copy(m_data.begin(), m_data.begin() + bytesRead, std::ostream_iterator<char>(*m_chunkingStream));
60+
*m_chunkingStream << "\r\n";
61+
auto curr = m_chunkingStream->tellg();
62+
const auto rn = m_chunkingStream->rdbuf();
63+
AWS_UNREFERENCED_PARAM(rn);
64+
m_chunkingStream->seekg(curr);
65+
}
66+
67+
// Read to destination buffer, return how much was read
68+
m_chunkingStream->read(dst, amountToRead);
69+
return m_chunkingStream->gcount();
70+
}
71+
72+
private:
73+
std::array<char, DataBufferSize> m_data;
74+
std::shared_ptr<Aws::IOStream> m_chunkingStream;
75+
bool m_chunkEnd{false};
76+
Http::HttpRequest *m_request;
77+
std::shared_ptr<Aws::IOStream> m_stream;
78+
};
79+
} // namespace Stream
80+
} // namespace Utils
81+
} // namespace Aws

src/aws-cpp-sdk-core/source/http/curl/CurlHttpClient.cpp

Lines changed: 22 additions & 61 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@
1010
#include <aws/core/utils/HashingUtils.h>
1111
#include <aws/core/utils/logging/LogMacros.h>
1212
#include <aws/core/utils/ratelimiter/RateLimiterInterface.h>
13+
#include <aws/core/utils/stream/AwsChunkedStream.h>
1314
#include <aws/core/utils/DateTime.h>
1415
#include <aws/core/utils/crypto/Hash.h>
1516
#include <aws/core/utils/Outcome.h>
@@ -24,6 +25,7 @@ using namespace Aws::Http;
2425
using namespace Aws::Http::Standard;
2526
using namespace Aws::Utils;
2627
using namespace Aws::Utils::Logging;
28+
using namespace Aws::Utils::Stream;
2729
using namespace Aws::Monitoring;
2830

2931
#ifdef USE_AWS_MEMORY_MANAGEMENT
@@ -144,25 +146,27 @@ struct CurlWriteCallbackContext
144146
int64_t m_numBytesResponseReceived;
145147
};
146148

149+
static const char* CURL_HTTP_CLIENT_TAG = "CurlHttpClient";
150+
147151
struct CurlReadCallbackContext
148152
{
149153
CurlReadCallbackContext(const CurlHttpClient* client, CURL* curlHandle, HttpRequest* request, Aws::Utils::RateLimits::RateLimiterInterface* limiter) :
150154
m_client(client),
151155
m_curlHandle(curlHandle),
152156
m_rateLimiter(limiter),
153157
m_request(request),
154-
m_chunkEnd(false)
158+
m_chunkEnd(false),
159+
m_chunkedStream{Aws::MakeShared<AwsChunkedStream<>>(CURL_HTTP_CLIENT_TAG, request, request->GetContentBody())}
155160
{}
156161

157162
const CurlHttpClient* m_client;
158163
CURL* m_curlHandle;
159164
Aws::Utils::RateLimits::RateLimiterInterface* m_rateLimiter;
160165
HttpRequest* m_request;
161166
bool m_chunkEnd;
167+
std::shared_ptr<Stream::AwsChunkedStream<>> m_chunkedStream;
162168
};
163169

164-
static const char* CURL_HTTP_CLIENT_TAG = "CurlHttpClient";
165-
166170
static int64_t GetContentLengthFromHeader(CURL* connectionHandle,
167171
bool& hasContentLength) {
168172
#if LIBCURL_VERSION_NUM >= 0x073700 // 7.55.0
@@ -293,68 +297,25 @@ static size_t ReadBody(char* ptr, size_t size, size_t nmemb, void* userdata, boo
293297
size_t amountToRead = size * nmemb;
294298
bool isAwsChunked = request->HasHeader(Aws::Http::CONTENT_ENCODING_HEADER) &&
295299
request->GetHeaderValue(Aws::Http::CONTENT_ENCODING_HEADER) == Aws::Http::AWS_CHUNKED_VALUE;
296-
// aws-chunk = hex(chunk-size) + CRLF + chunk-data + CRLF
297-
// Needs to reserve bytes of sizeof(hex(chunk-size)) + sizeof(CRLF) + sizeof(CRLF)
298-
if (isAwsChunked)
299-
{
300-
Aws::String amountToReadHexString = Aws::Utils::StringUtils::ToHexString(amountToRead);
301-
amountToRead -= (amountToReadHexString.size() + 4);
302-
}
303300

304301
if (ioStream != nullptr && amountToRead > 0)
305302
{
306303
size_t amountRead = 0;
307-
if (isStreaming)
308-
{
309-
if (!ioStream->eof() && ioStream->peek() != EOF)
310-
{
311-
amountRead = (size_t) ioStream->readsome(ptr, amountToRead);
312-
}
313-
if (amountRead == 0 && !ioStream->eof())
314-
{
315-
return CURL_READFUNC_PAUSE;
316-
}
317-
}
318-
else
319-
{
320-
ioStream->read(ptr, amountToRead);
321-
amountRead = static_cast<size_t>(ioStream->gcount());
322-
}
323-
324-
if (isAwsChunked)
325-
{
326-
if (amountRead > 0)
327-
{
328-
if (request->GetRequestHash().second != nullptr)
329-
{
330-
request->GetRequestHash().second->Update(reinterpret_cast<unsigned char*>(ptr), amountRead);
331-
}
332-
333-
Aws::String hex = Aws::Utils::StringUtils::ToHexString(amountRead);
334-
memmove(ptr + hex.size() + 2, ptr, amountRead);
335-
memmove(ptr + hex.size() + 2 + amountRead, "\r\n", 2);
336-
memmove(ptr, hex.c_str(), hex.size());
337-
memmove(ptr + hex.size(), "\r\n", 2);
338-
amountRead += hex.size() + 4;
339-
}
340-
else if (!context->m_chunkEnd)
341-
{
342-
Aws::StringStream chunkedTrailer;
343-
chunkedTrailer << "0\r\n";
344-
if (request->GetRequestHash().second != nullptr)
345-
{
346-
chunkedTrailer << "x-amz-checksum-"
347-
<< request->GetRequestHash().first
348-
<< ":"
349-
<< HashingUtils::Base64Encode(request->GetRequestHash().second->GetHash().GetResult())
350-
<< "\r\n";
351-
}
352-
chunkedTrailer << "\r\n";
353-
amountRead = chunkedTrailer.str().size();
354-
memcpy(ptr, chunkedTrailer.str().c_str(), amountRead);
355-
context->m_chunkEnd = true;
356-
}
357-
}
304+
if (isStreaming) {
305+
if (!ioStream->eof() && ioStream->peek() != EOF) {
306+
amountRead = (size_t)ioStream->readsome(ptr, amountToRead);
307+
}
308+
if (amountRead == 0 && !ioStream->eof()) {
309+
return CURL_READFUNC_PAUSE;
310+
}
311+
} else if (isAwsChunked) {
312+
AWS_LOGSTREAM_ERROR(CURL_HTTP_CLIENT_TAG, "Called with size: " << amountToRead);
313+
amountRead = context->m_chunkedStream->BufferedRead(ptr, amountToRead);
314+
AWS_LOGSTREAM_ERROR(CURL_HTTP_CLIENT_TAG, "read: " << amountRead);
315+
} else {
316+
ioStream->read(ptr, amountToRead);
317+
amountRead = static_cast<size_t>(ioStream->gcount());
318+
}
358319

359320
auto& sentHandler = request->GetDataSentEventHandler();
360321
if (sentHandler)
Lines changed: 35 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,35 @@
1+
/**
2+
* Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
3+
* SPDX-License-Identifier: Apache-2.0.
4+
*/
5+
#include <aws/core/http/standard/StandardHttpRequest.h>
6+
#include <aws/core/utils/crypto/CRC32.h>
7+
#include <aws/core/utils/stream/AwsChunkedStream.h>
8+
#include <aws/testing/AwsCppSdkGTestSuite.h>
9+
10+
using namespace Aws;
11+
using namespace Aws::Http::Standard;
12+
using namespace Aws::Utils::Stream;
13+
using namespace Aws::Utils::Crypto;
14+
15+
class AwsChunkedStreamTest : public Aws::Testing::AwsCppSdkGTestSuite {};
16+
17+
const char* TEST_LOG_TAG = "AWS_CHUNKED_STREAM_TEST";
18+
19+
TEST_F(AwsChunkedStreamTest, ChunkedStreamShouldWork) {
20+
StandardHttpRequest request{"www.elda.com/will", Http::HttpMethod::HTTP_GET};
21+
auto requestHash = Aws::MakeShared<CRC32>(TEST_LOG_TAG);
22+
request.SetRequestHash("crc32", requestHash);
23+
std::shared_ptr<IOStream> inputStream = Aws::MakeShared<StringStream>(TEST_LOG_TAG, "1234567890123456789012345");
24+
AwsChunkedStream<10> chunkedStream{&request, inputStream};
25+
std::array<char, 100> outputBuffer{};
26+
Aws::StringStream output;
27+
size_t read = 0;
28+
do {
29+
read = chunkedStream.BufferedRead(outputBuffer.data(), 10);
30+
std::copy(outputBuffer.begin(), outputBuffer.begin() + read, std::ostream_iterator<char>(output));
31+
} while (read > 0);
32+
const auto encodedStr = output.str();
33+
auto expectedStreamWithChecksum = "A\r\n1234567890\r\nA\r\n1234567890\r\n5\r\n12345\r\n0\r\nx-amz-checksum-crc32:78DeVw==\r\n\r\n";
34+
EXPECT_EQ(expectedStreamWithChecksum, encodedStr);
35+
}

tests/aws-cpp-sdk-s3-integration-tests/BucketAndObjectOperationTest.cpp

Lines changed: 34 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2518,4 +2518,38 @@ namespace
25182518
}
25192519
}
25202520
}
2521+
2522+
TEST_F(BucketAndObjectOperationTest, PutObjectChecksumWithGuarunteedChunkedObject) {
2523+
struct ChecksumTestCase {
2524+
std::function<PutObjectRequest(PutObjectRequest)> chucksumRequestMutator;
2525+
String body;
2526+
};
2527+
2528+
const String fullBucketName = CalculateBucketName(BASE_CHECKSUMS_BUCKET_NAME.c_str());
2529+
SCOPED_TRACE(Aws::String("FullBucketName ") + fullBucketName);
2530+
CreateBucketRequest createBucketRequest;
2531+
createBucketRequest.SetBucket(fullBucketName);
2532+
createBucketRequest.SetACL(BucketCannedACL::private_);
2533+
CreateBucketOutcome createBucketOutcome = CreateBucket(createBucketRequest);
2534+
AWS_ASSERT_SUCCESS(createBucketOutcome);
2535+
2536+
Vector<ChecksumTestCase> testCases{
2537+
{[](PutObjectRequest request) -> PutObjectRequest { return request.WithChecksumAlgorithm(ChecksumAlgorithm::CRC32); },
2538+
Aws::String(1024 * 1024, 'e')},
2539+
{[](PutObjectRequest request) -> PutObjectRequest { return request.WithChecksumAlgorithm(ChecksumAlgorithm::CRC32C); },
2540+
Aws::String(1024 * 1024, 'l')},
2541+
{[](PutObjectRequest request) -> PutObjectRequest { return request.WithChecksumAlgorithm(ChecksumAlgorithm::SHA1); },
2542+
Aws::String(1024 * 1024, 'd')},
2543+
{[](PutObjectRequest request) -> PutObjectRequest { return request.WithChecksumAlgorithm(ChecksumAlgorithm::SHA256); },
2544+
Aws::String(1024 * 1024, 'a')}};
2545+
2546+
for (const auto& testCase : testCases) {
2547+
auto request = testCase.chucksumRequestMutator(PutObjectRequest().WithBucket(fullBucketName).WithKey("Metaphor"));
2548+
std::shared_ptr<IOStream> body =
2549+
Aws::MakeShared<StringStream>(ALLOCATION_TAG, testCase.body, std::ios_base::in | std::ios_base::binary);
2550+
request.SetBody(body);
2551+
const auto response = Client->PutObject(request);
2552+
EXPECT_TRUE(response.IsSuccess());
2553+
}
2554+
}
25212555
}

0 commit comments

Comments
 (0)