Skip to content

Commit c0f0337

Browse files
committed
fix trailing checksum
1 parent 01f7ee3 commit c0f0337

File tree

6 files changed

+216
-79
lines changed

6 files changed

+216
-79
lines changed
File renamed without changes.

.github/workflows/clang-format.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -56,7 +56,7 @@ jobs:
5656
run: |
5757
clang-format --version
5858
if [ -s diff_output.patch ]; then
59-
python3 clang-format-diff.py -p1 -style=file:.github/workflows/.clang-format < diff_output.patch > formatted_differences.patch 2> error.log || true
59+
python3 clang-format-diff.py -p1 -style=file:.clang-format < diff_output.patch > formatted_differences.patch 2> error.log || true
6060
if [ -s error.log ]; then
6161
echo "Errors from clang-format-diff.py:"
6262
cat error.log
Lines changed: 93 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,93 @@
1+
/**
2+
* Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
3+
* SPDX-License-Identifier: Apache-2.0.
4+
*/
5+
#pragma once
6+
#include <aws/core/utils/HashingUtils.h>
7+
#include <aws/core/utils/Outcome.h>
8+
#include <aws/core/utils/crypto/Hash.h>
9+
10+
namespace Aws {
11+
namespace Utils {
12+
namespace Stream {
13+
14+
static const size_t AWS_DATA_BUFFER_SIZE = 65536;
15+
16+
template <size_t DataBufferSize = AWS_DATA_BUFFER_SIZE>
17+
class AwsChunkedStream {
18+
public:
19+
AwsChunkedStream(Http::HttpRequest *request, const std::shared_ptr<Aws::IOStream> &stream)
20+
: m_chunkingStream{Aws::MakeShared<StringStream>("AwsChunkedStream")}, m_request(request), m_stream(stream) {
21+
assert(m_stream != nullptr);
22+
if (m_stream == nullptr) {
23+
AWS_LOGSTREAM_ERROR("AwsChunkedStream", "stream is null");
24+
}
25+
assert(request != nullptr);
26+
if (request == nullptr) {
27+
AWS_LOGSTREAM_ERROR("AwsChunkedStream", "request is null");
28+
}
29+
}
30+
31+
size_t BufferedRead(char *dst, size_t amountToRead) {
32+
assert(dst != nullptr);
33+
if (dst == nullptr) {
34+
AWS_LOGSTREAM_ERROR("AwsChunkedStream", "dst is null");
35+
}
36+
37+
// the chunk has ended and cannot be read from
38+
if (m_chunkEnd) {
39+
return 0;
40+
}
41+
42+
// If we've read all of the underlying stream write the checksum trailing header
43+
// the set that the chunked stream is over.
44+
if (m_stream->eof() && !m_stream->bad() && (m_chunkingStream->eof() || m_chunkingStream->peek() == EOF)) {
45+
Aws::StringStream chunkedTrailerStream;
46+
chunkedTrailerStream << "0\r\n";
47+
if (m_request->GetRequestHash().second != nullptr) {
48+
chunkedTrailerStream << "x-amz-checksum-" << m_request->GetRequestHash().first << ":"
49+
<< HashingUtils::Base64Encode(m_request->GetRequestHash().second->GetHash().GetResult()) << "\r\n";
50+
}
51+
chunkedTrailerStream << "\r\n";
52+
const auto chunkedTrailer = chunkedTrailerStream.str();
53+
auto trailerSize = chunkedTrailer.size();
54+
assert(amountToRead >= trailerSize);
55+
memcpy(dst, chunkedTrailer.c_str(), trailerSize);
56+
m_chunkEnd = true;
57+
return trailerSize;
58+
}
59+
60+
// Try to read in a 64K chunk, if we cant we know the stream is over
61+
size_t bytesRead = 0;
62+
while (m_stream->good() && bytesRead < DataBufferSize) {
63+
m_stream->read(&m_data[bytesRead], DataBufferSize - bytesRead);
64+
bytesRead += m_stream->gcount();
65+
}
66+
67+
// update the trailing checksum to be sent only if we read data and buffered.
68+
if (bytesRead > 0 && m_request->GetRequestHash().second != nullptr) {
69+
m_request->GetRequestHash().second->Update(reinterpret_cast<unsigned char *>(m_data.GetUnderlyingData()), bytesRead);
70+
}
71+
72+
// Buffer chunked encoding from data if there was data read to the buffer, otherwise leave it alone/
73+
if (bytesRead > 0 && m_chunkingStream != nullptr && !m_chunkingStream->bad()) {
74+
*m_chunkingStream << Aws::Utils::StringUtils::ToHexString(bytesRead) << "\r\n";
75+
std::copy(m_data.GetUnderlyingData(), m_data.GetUnderlyingData() + bytesRead, std::ostream_iterator<char>(*m_chunkingStream));
76+
*m_chunkingStream << "\r\n";
77+
}
78+
79+
// Read to destination buffer, return how much was read
80+
m_chunkingStream->read(dst, amountToRead);
81+
return m_chunkingStream->gcount();
82+
}
83+
84+
private:
85+
Aws::Utils::Array<char> m_data{DataBufferSize};
86+
std::shared_ptr<Aws::IOStream> m_chunkingStream;
87+
bool m_chunkEnd{false};
88+
Http::HttpRequest *m_request{nullptr};
89+
std::shared_ptr<Aws::IOStream> m_stream;
90+
};
91+
} // namespace Stream
92+
} // namespace Utils
93+
} // namespace Aws

src/aws-cpp-sdk-core/source/http/curl/CurlHttpClient.cpp

Lines changed: 47 additions & 78 deletions
Original file line numberDiff line numberDiff line change
@@ -3,27 +3,29 @@
33
* SPDX-License-Identifier: Apache-2.0.
44
*/
55

6-
#include <aws/core/http/curl/CurlHttpClient.h>
76
#include <aws/core/http/HttpRequest.h>
7+
#include <aws/core/http/curl/CurlHttpClient.h>
88
#include <aws/core/http/standard/StandardHttpResponse.h>
9-
#include <aws/core/utils/StringUtils.h>
9+
#include <aws/core/monitoring/HttpClientMetrics.h>
10+
#include <aws/core/utils/DateTime.h>
1011
#include <aws/core/utils/HashingUtils.h>
12+
#include <aws/core/utils/Outcome.h>
13+
#include <aws/core/utils/StringUtils.h>
14+
#include <aws/core/utils/crypto/Hash.h>
1115
#include <aws/core/utils/logging/LogMacros.h>
1216
#include <aws/core/utils/ratelimiter/RateLimiterInterface.h>
13-
#include <aws/core/utils/DateTime.h>
14-
#include <aws/core/utils/crypto/Hash.h>
15-
#include <aws/core/utils/Outcome.h>
16-
#include <aws/core/monitoring/HttpClientMetrics.h>
17-
#include <cassert>
17+
#include <aws/core/utils/stream/AwsChunkedStream.h>
18+
1819
#include <algorithm>
20+
#include <cassert>
1921
#include <thread>
2022

21-
2223
using namespace Aws::Client;
2324
using namespace Aws::Http;
2425
using namespace Aws::Http::Standard;
2526
using namespace Aws::Utils;
2627
using namespace Aws::Utils::Logging;
28+
using namespace Aws::Utils::Stream;
2729
using namespace Aws::Monitoring;
2830

2931
#ifdef USE_AWS_MEMORY_MANAGEMENT
@@ -144,25 +146,28 @@ struct CurlWriteCallbackContext
144146
int64_t m_numBytesResponseReceived;
145147
};
146148

149+
static const char* CURL_HTTP_CLIENT_TAG = "CurlHttpClient";
150+
147151
struct CurlReadCallbackContext
148152
{
149-
CurlReadCallbackContext(const CurlHttpClient* client, CURL* curlHandle, HttpRequest* request, Aws::Utils::RateLimits::RateLimiterInterface* limiter) :
150-
m_client(client),
153+
CurlReadCallbackContext(const CurlHttpClient* client, CURL* curlHandle, HttpRequest* request,
154+
Aws::Utils::RateLimits::RateLimiterInterface* limiter,
155+
std::shared_ptr<AwsChunkedStream<>> chunkedStream = nullptr)
156+
: m_client(client),
151157
m_curlHandle(curlHandle),
152158
m_rateLimiter(limiter),
153159
m_request(request),
154-
m_chunkEnd(false)
155-
{}
156-
157-
const CurlHttpClient* m_client;
158-
CURL* m_curlHandle;
159-
Aws::Utils::RateLimits::RateLimiterInterface* m_rateLimiter;
160-
HttpRequest* m_request;
161-
bool m_chunkEnd;
160+
m_chunkEnd(false),
161+
m_chunkedStream{std::move(chunkedStream)} {}
162+
163+
const CurlHttpClient* m_client;
164+
CURL* m_curlHandle;
165+
Aws::Utils::RateLimits::RateLimiterInterface* m_rateLimiter;
166+
HttpRequest* m_request;
167+
bool m_chunkEnd;
168+
std::shared_ptr<Stream::AwsChunkedStream<>> m_chunkedStream;
162169
};
163170

164-
static const char* CURL_HTTP_CLIENT_TAG = "CurlHttpClient";
165-
166171
static int64_t GetContentLengthFromHeader(CURL* connectionHandle,
167172
bool& hasContentLength) {
168173
#if LIBCURL_VERSION_NUM >= 0x073700 // 7.55.0
@@ -293,67 +298,24 @@ static size_t ReadBody(char* ptr, size_t size, size_t nmemb, void* userdata, boo
293298
size_t amountToRead = size * nmemb;
294299
bool isAwsChunked = request->HasHeader(Aws::Http::CONTENT_ENCODING_HEADER) &&
295300
request->GetHeaderValue(Aws::Http::CONTENT_ENCODING_HEADER) == Aws::Http::AWS_CHUNKED_VALUE;
296-
// aws-chunk = hex(chunk-size) + CRLF + chunk-data + CRLF
297-
// Needs to reserve bytes of sizeof(hex(chunk-size)) + sizeof(CRLF) + sizeof(CRLF)
298-
if (isAwsChunked)
299-
{
300-
Aws::String amountToReadHexString = Aws::Utils::StringUtils::ToHexString(amountToRead);
301-
amountToRead -= (amountToReadHexString.size() + 4);
302-
}
303301

304302
if (ioStream != nullptr && amountToRead > 0)
305303
{
306304
size_t amountRead = 0;
307-
if (isStreaming)
308-
{
309-
if (!ioStream->eof() && ioStream->peek() != EOF)
310-
{
311-
amountRead = (size_t) ioStream->readsome(ptr, amountToRead);
312-
}
313-
if (amountRead == 0 && !ioStream->eof())
314-
{
315-
return CURL_READFUNC_PAUSE;
316-
}
317-
}
318-
else
319-
{
320-
ioStream->read(ptr, amountToRead);
321-
amountRead = static_cast<size_t>(ioStream->gcount());
322-
}
323-
324-
if (isAwsChunked)
325-
{
326-
if (amountRead > 0)
327-
{
328-
if (request->GetRequestHash().second != nullptr)
329-
{
330-
request->GetRequestHash().second->Update(reinterpret_cast<unsigned char*>(ptr), amountRead);
331-
}
332-
333-
Aws::String hex = Aws::Utils::StringUtils::ToHexString(amountRead);
334-
memmove(ptr + hex.size() + 2, ptr, amountRead);
335-
memmove(ptr + hex.size() + 2 + amountRead, "\r\n", 2);
336-
memmove(ptr, hex.c_str(), hex.size());
337-
memmove(ptr + hex.size(), "\r\n", 2);
338-
amountRead += hex.size() + 4;
339-
}
340-
else if (!context->m_chunkEnd)
341-
{
342-
Aws::StringStream chunkedTrailer;
343-
chunkedTrailer << "0\r\n";
344-
if (request->GetRequestHash().second != nullptr)
345-
{
346-
chunkedTrailer << "x-amz-checksum-"
347-
<< request->GetRequestHash().first
348-
<< ":"
349-
<< HashingUtils::Base64Encode(request->GetRequestHash().second->GetHash().GetResult())
350-
<< "\r\n";
351-
}
352-
chunkedTrailer << "\r\n";
353-
amountRead = chunkedTrailer.str().size();
354-
memcpy(ptr, chunkedTrailer.str().c_str(), amountRead);
355-
context->m_chunkEnd = true;
356-
}
305+
if (isStreaming) {
306+
if (!ioStream->eof() && ioStream->peek() != EOF) {
307+
amountRead = (size_t)ioStream->readsome(ptr, amountToRead);
308+
}
309+
if (amountRead == 0 && !ioStream->eof()) {
310+
return CURL_READFUNC_PAUSE;
311+
}
312+
} else if (isAwsChunked && context->m_chunkedStream != nullptr) {
313+
AWS_LOGSTREAM_ERROR(CURL_HTTP_CLIENT_TAG, "Called with size: " << amountToRead);
314+
amountRead = context->m_chunkedStream->BufferedRead(ptr, amountToRead);
315+
AWS_LOGSTREAM_ERROR(CURL_HTTP_CLIENT_TAG, "read: " << amountRead);
316+
} else {
317+
ioStream->read(ptr, amountToRead);
318+
amountRead = static_cast<size_t>(ioStream->gcount());
357319
}
358320

359321
auto& sentHandler = request->GetDataSentEventHandler();
@@ -724,7 +686,14 @@ std::shared_ptr<HttpResponse> CurlHttpClient::MakeRequest(const std::shared_ptr<
724686
}
725687

726688
CurlWriteCallbackContext writeContext(this, request.get(), response.get(), readLimiter);
727-
CurlReadCallbackContext readContext(this, connectionHandle, request.get(), writeLimiter);
689+
690+
const auto readContext = [this, &connectionHandle, &request, &writeLimiter]() -> CurlReadCallbackContext {
691+
if (request->GetContentBody() != nullptr) {
692+
auto chunkedBodyPtr = Aws::MakeShared<AwsChunkedStream<>>(CURL_HTTP_CLIENT_TAG, request.get(), request->GetContentBody());
693+
return {this, connectionHandle, request.get(), writeLimiter, std::move(chunkedBodyPtr)};
694+
}
695+
return {this, connectionHandle, request.get(), writeLimiter};
696+
}();
728697

729698
SetOptCodeForHttpMethod(connectionHandle, request);
730699

Lines changed: 41 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,41 @@
1+
/**
2+
* Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
3+
* SPDX-License-Identifier: Apache-2.0.
4+
*/
5+
#include <aws/core/http/standard/StandardHttpRequest.h>
6+
#include <aws/core/utils/crypto/CRC32.h>
7+
#include <aws/core/utils/stream/AwsChunkedStream.h>
8+
#include <aws/testing/AwsCppSdkGTestSuite.h>
9+
10+
using namespace Aws;
11+
using namespace Aws::Http::Standard;
12+
using namespace Aws::Utils::Stream;
13+
using namespace Aws::Utils::Crypto;
14+
15+
class AwsChunkedStreamTest : public Aws::Testing::AwsCppSdkGTestSuite {};
16+
17+
const char* TEST_LOG_TAG = "AWS_CHUNKED_STREAM_TEST";
18+
19+
TEST_F(AwsChunkedStreamTest, ChunkedStreamShouldWork) {
20+
StandardHttpRequest request{"www.elda.com/will", Http::HttpMethod::HTTP_GET};
21+
auto requestHash = Aws::MakeShared<CRC32>(TEST_LOG_TAG);
22+
request.SetRequestHash("crc32", requestHash);
23+
std::shared_ptr<IOStream> inputStream = Aws::MakeShared<StringStream>(TEST_LOG_TAG, "1234567890123456789012345");
24+
AwsChunkedStream<10> chunkedStream{&request, inputStream};
25+
std::array<char, 100> outputBuffer{};
26+
Aws::StringStream output;
27+
size_t readIterations{4};
28+
size_t bufferOffset{0};
29+
while (readIterations > 0) {
30+
bufferOffset = chunkedStream.BufferedRead(outputBuffer.data(), 10);
31+
std::copy(outputBuffer.begin(), outputBuffer.begin() + bufferOffset, std::ostream_iterator<char>(output));
32+
readIterations--;
33+
}
34+
// Read trailing checksum that is greater than 10 chars
35+
bufferOffset = chunkedStream.BufferedRead(outputBuffer.data(), 40);
36+
EXPECT_EQ(36ul, bufferOffset);
37+
std::copy(outputBuffer.begin(), outputBuffer.begin() + bufferOffset, std::ostream_iterator<char>(output));
38+
const auto encodedStr = output.str();
39+
auto expectedStreamWithChecksum = "A\r\n1234567890\r\nA\r\n1234567890\r\n5\r\n12345\r\n0\r\nx-amz-checksum-crc32:78DeVw==\r\n\r\n";
40+
EXPECT_EQ(expectedStreamWithChecksum, encodedStr);
41+
}

tests/aws-cpp-sdk-s3-integration-tests/BucketAndObjectOperationTest.cpp

Lines changed: 34 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2518,4 +2518,38 @@ namespace
25182518
}
25192519
}
25202520
}
2521+
2522+
TEST_F(BucketAndObjectOperationTest, PutObjectChecksumWithGuarunteedChunkedObject) {
2523+
struct ChecksumTestCase {
2524+
std::function<PutObjectRequest(PutObjectRequest)> chucksumRequestMutator;
2525+
String body;
2526+
};
2527+
2528+
const String fullBucketName = CalculateBucketName(BASE_CHECKSUMS_BUCKET_NAME.c_str());
2529+
SCOPED_TRACE(Aws::String("FullBucketName ") + fullBucketName);
2530+
CreateBucketRequest createBucketRequest;
2531+
createBucketRequest.SetBucket(fullBucketName);
2532+
createBucketRequest.SetACL(BucketCannedACL::private_);
2533+
CreateBucketOutcome createBucketOutcome = CreateBucket(createBucketRequest);
2534+
AWS_ASSERT_SUCCESS(createBucketOutcome);
2535+
2536+
Vector<ChecksumTestCase> testCases{
2537+
{[](PutObjectRequest request) -> PutObjectRequest { return request.WithChecksumAlgorithm(ChecksumAlgorithm::CRC32); },
2538+
Aws::String(1024 * 1024, 'e')},
2539+
{[](PutObjectRequest request) -> PutObjectRequest { return request.WithChecksumAlgorithm(ChecksumAlgorithm::CRC32C); },
2540+
Aws::String(1024 * 1024, 'l')},
2541+
{[](PutObjectRequest request) -> PutObjectRequest { return request.WithChecksumAlgorithm(ChecksumAlgorithm::SHA1); },
2542+
Aws::String(1024 * 1024, 'd')},
2543+
{[](PutObjectRequest request) -> PutObjectRequest { return request.WithChecksumAlgorithm(ChecksumAlgorithm::SHA256); },
2544+
Aws::String(1024 * 1024, 'a')}};
2545+
2546+
for (const auto& testCase : testCases) {
2547+
auto request = testCase.chucksumRequestMutator(PutObjectRequest().WithBucket(fullBucketName).WithKey("Metaphor"));
2548+
std::shared_ptr<IOStream> body =
2549+
Aws::MakeShared<StringStream>(ALLOCATION_TAG, testCase.body, std::ios_base::in | std::ios_base::binary);
2550+
request.SetBody(body);
2551+
const auto response = Client->PutObject(request);
2552+
EXPECT_TRUE(response.IsSuccess());
2553+
}
2554+
}
25212555
}

0 commit comments

Comments
 (0)