@@ -78,7 +78,6 @@ namespace NActors {
7878 Metrics->UpdateIcQueueTimeHistogram (duration.MicroSeconds ());
7979 }
8080 event.Span && event.Span .Event (" FeedBuf:INITIAL" );
81- SendViaRdma.reset ();
8281 if (event.Buffer ) {
8382 State = EState::BODY;
8483 Iter = event.Buffer ->GetBeginIter ();
@@ -106,21 +105,14 @@ namespace NActors {
106105 State = EState::SECTIONS;
107106 SectionIndex = 0 ;
108107
109- size_t totalSize = 0 ;
110- // It is possible to have event without payload. Such events has only one section.
111- // We do not send such events via rdma.
112- bool sendViaRdma = Params.UseRdma && RdmaMemPool && SerializationInfo->Sections .size () > 2 ;
113- // Check each section can be send via rdma
108+ bool sendViaRdma = false ;
109+ // Check if any section can be send via rdma
114110 for (const auto & section : SerializationInfo->Sections ) {
115- sendViaRdma &= section.IsRdmaCapable ;
116- totalSize += section.Size ;
111+ sendViaRdma |= section.IsRdmaCapable ;
117112 }
118- if (sendViaRdma) {
119- Y_ABORT_UNLESS (totalSize, " got empty sz, sections: %d type: %d " , SerializationInfo->Sections .size (), event.Event ->Type ());
113+ if (sendViaRdma && Params.UseRdma && RdmaMemPool) {
120114 NActorsInterconnect::TRdmaCreds rdmaCreds;
121- ui32 checkSum = 0 ;
122- if (SerializeEventRdma (event, rdmaCreds, task.Params .ChecksumRdmaEvent ? &checkSum : nullptr , rdmaDeviceIndex)) {
123- SendViaRdma.emplace (TRdmaSerializationArtifacts{std::move (rdmaCreds), checkSum});
115+ if (SerializeEventRdma (event)) {
124116 Chunker.DiscardEvent ();
125117 }
126118 }
@@ -168,8 +160,7 @@ namespace NActors {
168160 p += NInterconnect::NDetail::SerializeNumber (section.Alignment , p);
169161 if (section.IsInline && Params.UseXdcShuffle ) {
170162 type = static_cast <ui8>(EXdcCommand::DECLARE_SECTION_INLINE);
171- }
172- if (SendViaRdma) {
163+ } else if (section.IsRdmaCapable ) {
173164 type = static_cast <ui8>(EXdcCommand::DECLARE_SECTION_RDMA);
174165 }
175166 Y_ABORT_UNLESS (p <= std::end (sectionInfo));
@@ -268,16 +259,18 @@ namespace NActors {
268259 if (!Params.UseExternalDataChannel || sections.empty ()) {
269260 // all data goes inline
270261 IsPartInline = true ;
262+ IsPartRdma = false ;
271263 PartLenRemain = Max<size_t >();
272- } else if (!Params.UseXdcShuffle || SendViaRdma ) {
264+ } else if (!Params.UseXdcShuffle ) {
273265 // when UseXdcShuffle feature is not supported by the remote side, we transfer whole event over XDC
274- // also when we use RDMA, we transfer whole over RDMA
275266 IsPartInline = false ;
267+ IsPartRdma = false ;
276268 PartLenRemain = Max<size_t >();
277269 } else {
278270 Y_ABORT_UNLESS (SectionIndex < sections.size ());
279271 IsPartInline = sections[SectionIndex].IsInline ;
280- while (SectionIndex < sections.size () && IsPartInline == sections[SectionIndex].IsInline ) {
272+ IsPartRdma = sections[SectionIndex].IsRdmaCapable ;
273+ while (SectionIndex < sections.size () && IsPartInline == sections[SectionIndex].IsInline && IsPartRdma == sections[SectionIndex].IsRdmaCapable ) {
281274 PartLenRemain += sections[SectionIndex].Size ;
282275 ++SectionIndex;
283276 }
@@ -288,8 +281,8 @@ namespace NActors {
288281 std::optional<bool > complete = false ;
289282 if (IsPartInline) {
290283 complete = FeedInlinePayload (task, event);
291- } else if (SendViaRdma ) {
292- complete = FeedRdmaPayload (task, event, rdmaDeviceIndex);
284+ } else if (IsPartRdma ) {
285+ complete = FeedRdmaPayload (task, event, rdmaDeviceIndex, task. Params . ChecksumRdmaEvent );
293286 } else {
294287 complete = FeedExternalPayload (task, event);
295288 }
@@ -325,56 +318,55 @@ namespace NActors {
325318 return complete;
326319 }
327320
328- bool TEventOutputChannel::SerializeEventRdma (TEventHolder& event, NActorsInterconnect::TRdmaCreds& rdmaCreds,
329- ui32* checksum, ssize_t rdmaDeviceIndex)
330- {
321+ bool TEventOutputChannel::SerializeEventRdma (TEventHolder& event) {
331322 if (!event.Buffer && event.Event ) {
332- std::optional<TRope> rope = event.Event ->SerializeToRope (RdmaMemPool.get ());
323+ // std::optional<TRope> rope = event.Event->SerializeToRope(RdmaMemPool.get());
324+ std::optional<TRope> rope = event.Event ->SerializeToRope (GetDefaultRcBufAllocator ());
333325 if (!rope) {
334326 return false ; // serialization failed
335327 }
336328 event.Buffer = MakeIntrusive<TEventSerializedData>(
337329 std::move (*rope), event.Event ->CreateSerializationInfo ()
338330 );
331+ event.Event = nullptr ;
339332 Iter = event.Buffer ->GetBeginIter ();
340333 }
341334
335+ return true ;
336+ }
337+
338+ std::optional<bool > TEventOutputChannel::FeedRdmaPayload (TTcpPacketOutTask& task, TEventHolder& event, ssize_t rdmaDeviceIndex, bool checksumming) {
339+ Y_ABORT_UNLESS (rdmaDeviceIndex >= 0 );
340+
342341 XXH3_state_t state;
343- if (checksum ) {
342+ if (checksumming ) {
344343 XXH3_64bits_reset (&state);
345344 }
346345
347- if (event.Buffer ) {
348- for (; Iter.Valid (); ++Iter) {
346+ Y_ABORT_UNLESS (event.Buffer );
347+ if (RdmaCredsBuffer.CredsSize () == 0 ) {
348+ for (; Iter.Valid () && PartLenRemain; ++Iter) {
349349 TRcBuf buf = Iter.GetChunk ();
350350 auto memReg = NInterconnect::NRdma::TryExtractFromRcBuf (buf);
351351 if (memReg.Empty ()) {
352352 // TODO: may be copy to RDMA buffer ?????
353353 Iter = event.Buffer ->GetBeginIter ();
354354 return false ;
355355 }
356- if (checksum ) {
356+ if (checksumming ) {
357357 XXH3_64bits_update (&state, buf.GetData (), buf.GetSize ());
358358 }
359- auto cred = rdmaCreds .AddCreds ();
359+ auto cred = RdmaCredsBuffer .AddCreds ();
360360 cred->SetAddress (reinterpret_cast <ui64>(memReg.GetAddr ()));
361361 cred->SetSize (memReg.GetSize ());
362362 cred->SetRkey (memReg.GetRKey (rdmaDeviceIndex));
363+
364+ event.EventActuallySerialized += buf.GetSize ();
365+ PartLenRemain -= buf.GetSize ();
363366 }
364367 }
365368
366- if (checksum) {
367- *checksum = XXH3_64bits_digest (&state);
368- }
369- return true ;
370- }
371-
372- std::optional<bool > TEventOutputChannel::FeedRdmaPayload (TTcpPacketOutTask& task, TEventHolder& event, ssize_t rdmaDeviceIndex) {
373- Y_ABORT_UNLESS (rdmaDeviceIndex >= 0 );
374- const NActorsInterconnect::TRdmaCreds& rdmaCreds = SendViaRdma->RdmaCreds ;
375- ui32 checkSum = SendViaRdma->CheckSum ;
376-
377- ui16 credsSerializedSize = rdmaCreds.ByteSizeLong ();
369+ ui16 credsSerializedSize = RdmaCredsBuffer.ByteSizeLong ();
378370 // Part = | TChannelPart | EXdcCommand::RDMA_READ | rdmaCreds.Size | rdmaCreds | checkSum |
379371 size_t partSize = sizeof (TChannelPart) + sizeof (ui8) + sizeof (ui16) + credsSerializedSize + sizeof (ui32);
380372 Y_ABORT_UNLESS (partSize < 4096 );
@@ -396,20 +388,22 @@ namespace NActors {
396388 ptr += sizeof (ui16);
397389
398390 ui32 payloadSz = 0 ;
399- for (const auto & rdmaCred : rdmaCreds .GetCreds ()) {
391+ for (const auto & rdmaCred : RdmaCredsBuffer .GetCreds ()) {
400392 payloadSz += rdmaCred.GetSize ();
401393 }
402394
403- Y_ABORT_UNLESS (rdmaCreds .SerializePartialToArray (ptr, credsSerializedSize));
395+ Y_ABORT_UNLESS (RdmaCredsBuffer .SerializePartialToArray (ptr, credsSerializedSize));
404396 ptr += credsSerializedSize;
405- WriteUnaligned<ui32>(ptr, checkSum );
406- OutputQueueSize -= event. EventSerializedSize ;
397+ WriteUnaligned<ui32>(ptr, checksumming ? XXH3_64bits_digest (&state) : 0 );
398+ OutputQueueSize -= payloadSz ;
407399
408400 task.Write <false >(buffer, partSize);
409401
410402 task.AttachRdmaPayloadSize (payloadSz);
411403
412- return true ;
404+ RdmaCredsBuffer.Clear ();
405+
406+ return !Iter.Valid ();
413407 }
414408
415409 std::optional<bool > TEventOutputChannel::FeedExternalPayload (TTcpPacketOutTask& task, TEventHolder& event) {
0 commit comments