Skip to content

Commit cf25e81

Browse files
authored
Improve iOS/macOS H264 encoder (#70)
* progress * enable low-latency video encoding * fix compile * prioritize speed * fix required os versions * maxQP for screensharing * cbr * Update RTCVideoEncoderH264.mm * Update RTCVideoEncoderH264.mm * format * clean up * resolution alignment * revert resolution alignment
1 parent e67b315 commit cf25e81

File tree

2 files changed

+160
-68
lines changed

2 files changed

+160
-68
lines changed

sdk/objc/components/video_codec/RTCVideoEncoderH264.mm

Lines changed: 159 additions & 67 deletions
Original file line numberDiff line numberDiff line change
@@ -54,14 +54,42 @@ - (void)frameWasEncoded : (OSStatus)status flags : (VTEncodeInfoFlags)infoFlags
5454
// The ratio between kVTCompressionPropertyKey_DataRateLimits and
5555
// kVTCompressionPropertyKey_AverageBitRate. The data rate limit is set higher
5656
// than the average bit rate to avoid undershooting the target.
57-
const float kLimitToAverageBitRateFactor = 1.5f;
57+
const float kLimitToAverageBitRateFactor = 10.0f;
5858
// These thresholds deviate from the default h264 QP thresholds, as they
5959
// have been found to work better on devices that support VideoToolbox
6060
const int kLowH264QpThreshold = 28;
6161
const int kHighH264QpThreshold = 39;
62+
const int kBitsPerByte = 8;
6263

6364
const OSType kNV12PixelFormat = kCVPixelFormatType_420YpCbCr8BiPlanarFullRange;
6465

66+
typedef NS_ENUM(NSInteger, RTCVideoEncodeMode) {
67+
Variable = 0,
68+
Constant = 1,
69+
};
70+
71+
NSArray *CreateRateLimitArray(uint32_t computedBitrateBps, RTCVideoEncodeMode mode) {
72+
switch (mode) {
73+
case Variable: {
74+
// 5 seconds should be an okay interval for VBR to enforce the long-term
75+
// limit.
76+
float avgInterval = 5.0;
77+
uint32_t avgBytesPerSecond = computedBitrateBps / kBitsPerByte * avgInterval;
78+
// And the peak bitrate is measured per-second in a way similar to CBR.
79+
float peakInterval = 1.0;
80+
uint32_t peakBytesPerSecond =
81+
computedBitrateBps * kLimitToAverageBitRateFactor / kBitsPerByte;
82+
return @[ @(peakBytesPerSecond), @(peakInterval), @(avgBytesPerSecond), @(avgInterval) ];
83+
}
84+
case Constant: {
85+
// CBR should be enforces with granularity of a second.
86+
float targetInterval = 1.0;
87+
int32_t targetBitrate = computedBitrateBps / kBitsPerByte;
88+
return @[ @(targetBitrate), @(targetInterval) ];
89+
}
90+
}
91+
}
92+
6593
// Struct that we pass to the encoder per frame to encode. We receive it again
6694
// in the encoder callback.
6795
struct RTCFrameEncodeParams {
@@ -177,9 +205,9 @@ CFStringRef ExtractProfile(const webrtc::H264ProfileLevelId &profile_level_id, b
177205
switch (profile_level_id.profile) {
178206
case webrtc::H264Profile::kProfileConstrainedBaseline:
179207
case webrtc::H264Profile::kProfileBaseline:
180-
if(screenSharing) {
181-
return kVTProfileLevel_H264_Baseline_AutoLevel;
182-
}
208+
if (screenSharing) {
209+
return kVTProfileLevel_H264_Baseline_AutoLevel;
210+
}
183211
switch (profile_level_id.level) {
184212
case webrtc::H264Level::kLevel3:
185213
return kVTProfileLevel_H264_Baseline_3_0;
@@ -315,8 +343,8 @@ NSUInteger GetMaxSampleRate(const webrtc::H264ProfileLevelId &profile_level_id)
315343

316344
@implementation RTC_OBJC_TYPE (RTCVideoEncoderH264) {
317345
RTC_OBJC_TYPE(RTCVideoCodecInfo) * _codecInfo;
318-
std::unique_ptr<webrtc::BitrateAdjuster> _bitrateAdjuster;
319346
uint32_t _targetBitrateBps;
347+
uint32_t _targetFrameRate;
320348
uint32_t _encoderBitrateBps;
321349
uint32_t _encoderFrameRate;
322350
uint32_t _maxAllowedFrameRate;
@@ -327,10 +355,16 @@ @implementation RTC_OBJC_TYPE (RTCVideoEncoderH264) {
327355
int32_t _height;
328356
VTCompressionSessionRef _compressionSession;
329357
CVPixelBufferPoolRef _pixelBufferPool;
330-
RTCVideoCodecMode _mode;
358+
RTCVideoCodecMode _codecMode;
359+
unsigned int _maxQP;
360+
unsigned int _minBitrate;
361+
unsigned int _maxBitrate;
362+
RTCVideoEncodeMode _encodeMode;
331363

332364
webrtc::H264BitstreamParser _h264BitstreamParser;
333365
std::vector<uint8_t> _frameScaleBuffer;
366+
367+
CMTime _previousPresentationTimeStamp;
334368
}
335369

336370
// .5 is set as a mininum to prevent overcompensating for large temporary
@@ -343,12 +377,14 @@ @implementation RTC_OBJC_TYPE (RTCVideoEncoderH264) {
343377
- (instancetype)initWithCodecInfo:(RTC_OBJC_TYPE(RTCVideoCodecInfo) *)codecInfo {
344378
if (self = [super init]) {
345379
_codecInfo = codecInfo;
346-
_bitrateAdjuster.reset(new webrtc::BitrateAdjuster(.5, .95));
347380
_packetizationMode = RTCH264PacketizationModeNonInterleaved;
348381
_profile_level_id =
349382
webrtc::ParseSdpForH264ProfileLevelId([codecInfo nativeSdpVideoFormat].parameters);
383+
_previousPresentationTimeStamp = kCMTimeZero;
350384
RTC_DCHECK(_profile_level_id);
351-
RTC_LOG(LS_INFO) << "Using profile " << CFStringToString(ExtractProfile(*_profile_level_id, _mode == RTCVideoCodecModeScreensharing));
385+
RTC_LOG(LS_INFO) << "Using profile "
386+
<< CFStringToString(ExtractProfile(
387+
*_profile_level_id, _codecMode == RTCVideoCodecModeScreensharing));
352388
RTC_CHECK([codecInfo.name isEqualToString:kRTCVideoCodecH264Name]);
353389
}
354390
return self;
@@ -365,17 +401,28 @@ - (NSInteger)startEncodeWithSettings:(RTC_OBJC_TYPE(RTCVideoEncoderSettings) *)s
365401

366402
_width = settings.width;
367403
_height = settings.height;
368-
_mode = settings.mode;
404+
_codecMode = settings.mode;
405+
_maxQP = settings.qpMax;
406+
407+
_encodeMode = Variable; // Always variable mode for now
408+
_minBitrate = settings.minBitrate * 1000; // minBitrate is in kbps.
409+
_maxBitrate = settings.maxBitrate * 1000; // maxBitrate is in kbps.
369410

370411
uint32_t aligned_width = (((_width + 15) >> 4) << 4);
371412
uint32_t aligned_height = (((_height + 15) >> 4) << 4);
372413
_maxAllowedFrameRate = static_cast<uint32_t>(GetMaxSampleRate(*_profile_level_id) /
373414
(aligned_width * aligned_height));
374415

375416
// We can only set average bitrate on the HW encoder.
376-
_targetBitrateBps = settings.startBitrate * 1000; // startBitrate is in kbps.
377-
_bitrateAdjuster->SetTargetBitrateBps(_targetBitrateBps);
378-
_encoderFrameRate = MIN(settings.maxFramerate, _maxAllowedFrameRate);
417+
if (_encodeMode == Constant) {
418+
_targetBitrateBps = _maxBitrate;
419+
} else {
420+
_targetBitrateBps = settings.startBitrate * 1000; // startBitrate is in kbps.
421+
}
422+
423+
_targetFrameRate = MIN(settings.maxFramerate, _maxAllowedFrameRate);
424+
_encoderBitrateBps = 0;
425+
_encoderFrameRate = 0;
379426
if (settings.maxFramerate > _maxAllowedFrameRate && _maxAllowedFrameRate > 0) {
380427
RTC_LOG(LS_WARNING) << "Initial encoder frame rate setting " << settings.maxFramerate
381428
<< " is larger than the "
@@ -396,8 +443,15 @@ - (NSInteger)encode:(RTC_OBJC_TYPE(RTCVideoFrame) *)frame
396443
if (!_callback || !_compressionSession) {
397444
return WEBRTC_VIDEO_CODEC_UNINITIALIZED;
398445
}
399-
BOOL isKeyframeRequired = NO;
400446

447+
CMTime presentationTimeStamp = CMTimeMake(frame.timeStampNs / rtc::kNumNanosecsPerMillisec, 1000);
448+
if (CMTimeCompare(presentationTimeStamp, _previousPresentationTimeStamp) == 0) {
449+
// Same PTS
450+
return WEBRTC_VIDEO_CODEC_NO_OUTPUT;
451+
}
452+
_previousPresentationTimeStamp = presentationTimeStamp;
453+
454+
BOOL isKeyframeRequired = NO;
401455
// Get a pixel buffer from the pool and copy frame data over.
402456
if ([self resetCompressionSessionIfNeededWithFrame:frame]) {
403457
isKeyframeRequired = YES;
@@ -424,8 +478,8 @@ - (NSInteger)encode:(RTC_OBJC_TYPE(RTCVideoFrame) *)frame
424478
int dstWidth = CVPixelBufferGetWidth(pixelBuffer);
425479
int dstHeight = CVPixelBufferGetHeight(pixelBuffer);
426480
if ([rtcPixelBuffer requiresScalingToWidth:dstWidth height:dstHeight]) {
427-
int size =
428-
[rtcPixelBuffer bufferSizeForCroppingAndScalingToWidth:dstWidth height:dstHeight];
481+
int size = [rtcPixelBuffer bufferSizeForCroppingAndScalingToWidth:dstWidth
482+
height:dstHeight];
429483
_frameScaleBuffer.resize(size);
430484
} else {
431485
_frameScaleBuffer.clear();
@@ -462,7 +516,6 @@ - (NSInteger)encode:(RTC_OBJC_TYPE(RTCVideoFrame) *)frame
462516
}
463517
}
464518

465-
CMTime presentationTimeStamp = CMTimeMake(frame.timeStampNs / rtc::kNumNanosecsPerMillisec, 1000);
466519
CFDictionaryRef frameProperties = nullptr;
467520
if (isKeyframeRequired) {
468521
CFTypeRef keys[] = {kVTEncodeFrameOptionKey_ForceKeyFrame};
@@ -480,8 +533,8 @@ - (NSInteger)encode:(RTC_OBJC_TYPE(RTCVideoFrame) *)frame
480533
frame.rotation));
481534
encodeParams->codecSpecificInfo.packetizationMode = _packetizationMode;
482535

483-
// Update the bitrate if needed.
484-
[self setBitrateBps:_bitrateAdjuster->GetAdjustedBitrateBps() frameRate:_encoderFrameRate];
536+
// Update encoder bitrate or frameRate if needed.
537+
[self updateEncoderBitrateAndFrameRate];
485538

486539
OSStatus status = VTCompressionSessionEncodeFrame(_compressionSession,
487540
pixelBuffer,
@@ -522,14 +575,19 @@ - (void)setCallback:(RTCVideoEncoderCallback)callback {
522575
}
523576

524577
- (int)setBitrate:(uint32_t)bitrateKbit framerate:(uint32_t)framerate {
525-
_targetBitrateBps = 1000 * bitrateKbit;
526-
_bitrateAdjuster->SetTargetBitrateBps(_targetBitrateBps);
578+
// set target bitrate bps
579+
_targetBitrateBps = bitrateKbit * 1000;
580+
581+
RTC_LOG(LS_INFO) << "setBitrateKBit: " << bitrateKbit << " targetBps: " << _targetBitrateBps
582+
<< " frameRate: " << framerate;
583+
527584
if (framerate > _maxAllowedFrameRate && _maxAllowedFrameRate > 0) {
528585
RTC_LOG(LS_WARNING) << "Encoder frame rate setting " << framerate << " is larger than the "
529586
<< "maximal allowed frame rate " << _maxAllowedFrameRate << ".";
530587
}
531-
framerate = MIN(framerate, _maxAllowedFrameRate);
532-
[self setBitrateBps:_bitrateAdjuster->GetAdjustedBitrateBps() frameRate:framerate];
588+
589+
_targetFrameRate = MIN(framerate, _maxAllowedFrameRate);
590+
533591
return WEBRTC_VIDEO_CODEC_OK;
534592
}
535593

@@ -621,14 +679,19 @@ - (int)resetCompressionSessionWithPixelFormat:(OSType)framePixelFormat {
621679
(NSString *)kCVPixelBufferPixelFormatTypeKey : @(framePixelFormat),
622680
};
623681

624-
NSDictionary *encoder_specs;
682+
NSMutableDictionary *encoder_specs;
625683
#if defined(WEBRTC_MAC) && !defined(WEBRTC_IOS)
626684
// Currently hw accl is supported above 360p on mac, below 360p
627685
// the compression session will be created with hw accl disabled.
628-
encoder_specs = @{
686+
encoder_specs = [@{
629687
(NSString *)kVTVideoEncoderSpecification_EnableHardwareAcceleratedVideoEncoder : @(YES),
630-
};
631-
688+
} mutableCopy];
689+
// Enable low-latency video encoding
690+
if (@available(iOS 14.5, macOS 11.3, *)) {
691+
[encoder_specs addEntriesFromDictionary:@{
692+
(NSString *)kVTVideoEncoderSpecification_EnableLowLatencyRateControl : @(YES),
693+
}];
694+
}
632695
#endif
633696
OSStatus status = VTCompressionSessionCreate(
634697
nullptr, // use default allocator
@@ -669,11 +732,30 @@ - (int)resetCompressionSessionWithPixelFormat:(OSType)framePixelFormat {
669732
- (void)configureCompressionSession {
670733
RTC_DCHECK(_compressionSession);
671734
SetVTSessionProperty(_compressionSession, kVTCompressionPropertyKey_RealTime, true);
672-
SetVTSessionProperty(_compressionSession,
673-
kVTCompressionPropertyKey_ProfileLevel,
674-
ExtractProfile(*_profile_level_id, _mode == RTCVideoCodecModeScreensharing));
735+
// Sacrifice encoding speed over quality when necessary
736+
if (@available(iOS 14.0, macOS 11.0, *)) {
737+
SetVTSessionProperty(
738+
_compressionSession, kVTCompressionPropertyKey_PrioritizeEncodingSpeedOverQuality, true);
739+
}
740+
// Set maximum QP for screen sharing mode, range must be within 1 to 51
741+
// https://developer.apple.com/documentation/videotoolbox/kvtcompressionpropertykey_maxallowedframeqp
742+
if (@available(iOS 15.0, macOS 12.0, *)) {
743+
// Only enable for screen sharing and let VideoToolbox do the optimizing as much as possible.
744+
if (_codecMode == RTCVideoCodecModeScreensharing) {
745+
RTC_LOG(LS_INFO) << "Configuring VideoToolbox to use maxQP: " << kHighH264QpThreshold
746+
<< " mode: " << _codecMode;
747+
SetVTSessionProperty(
748+
_compressionSession, kVTCompressionPropertyKey_MaxAllowedFrameQP, kHighH264QpThreshold);
749+
}
750+
}
751+
SetVTSessionProperty(
752+
_compressionSession,
753+
kVTCompressionPropertyKey_ProfileLevel,
754+
ExtractProfile(*_profile_level_id, _codecMode == RTCVideoCodecModeScreensharing));
675755
SetVTSessionProperty(_compressionSession, kVTCompressionPropertyKey_AllowFrameReordering, false);
676-
[self setEncoderBitrateBps:_targetBitrateBps frameRate:_encoderFrameRate];
756+
757+
// [self updateEncoderBitrateAndFrameRate];
758+
677759
// TODO(tkchin): Look at entropy mode and colorspace matrices.
678760
// TODO(tkchin): Investigate to see if there's any way to make this work.
679761
// May need it to interop with Android. Currently this call just fails.
@@ -701,49 +783,59 @@ - (NSString *)implementationName {
701783
return @"VideoToolbox";
702784
}
703785

704-
- (void)setBitrateBps:(uint32_t)bitrateBps frameRate:(uint32_t)frameRate {
705-
if (_encoderBitrateBps != bitrateBps || _encoderFrameRate != frameRate) {
706-
[self setEncoderBitrateBps:bitrateBps frameRate:frameRate];
786+
- (void)updateEncoderBitrateAndFrameRate {
787+
// If no compression session simply return
788+
if (!_compressionSession) {
789+
return;
707790
}
708-
}
791+
// Initial status
792+
OSStatus status = noErr;
709793

710-
- (void)setEncoderBitrateBps:(uint32_t)bitrateBps frameRate:(uint32_t)frameRate {
711-
if (_compressionSession) {
712-
SetVTSessionProperty(_compressionSession, kVTCompressionPropertyKey_AverageBitRate, bitrateBps);
794+
uint32_t computedBitrateBps = _targetBitrateBps;
713795

714-
// With zero `_maxAllowedFrameRate`, we fall back to automatic frame rate detection.
715-
if (_maxAllowedFrameRate > 0) {
716-
SetVTSessionProperty(
717-
_compressionSession, kVTCompressionPropertyKey_ExpectedFrameRate, frameRate);
718-
}
796+
// With zero `_maxAllowedFrameRate`, we fall back to automatic frame rate detection.
797+
uint32_t computedFrameRate = _maxAllowedFrameRate > 0 ? _targetFrameRate : 0;
719798

720-
// TODO(tkchin): Add a helper method to set array value.
721-
int64_t dataLimitBytesPerSecondValue =
722-
static_cast<int64_t>(bitrateBps * kLimitToAverageBitRateFactor / 8);
723-
CFNumberRef bytesPerSecond =
724-
CFNumberCreate(kCFAllocatorDefault, kCFNumberSInt64Type, &dataLimitBytesPerSecondValue);
725-
int64_t oneSecondValue = 1;
726-
CFNumberRef oneSecond =
727-
CFNumberCreate(kCFAllocatorDefault, kCFNumberSInt64Type, &oneSecondValue);
728-
const void *nums[2] = {bytesPerSecond, oneSecond};
729-
CFArrayRef dataRateLimits = CFArrayCreate(nullptr, nums, 2, &kCFTypeArrayCallBacks);
730-
OSStatus status = VTSessionSetProperty(
731-
_compressionSession, kVTCompressionPropertyKey_DataRateLimits, dataRateLimits);
732-
if (bytesPerSecond) {
733-
CFRelease(bytesPerSecond);
734-
}
735-
if (oneSecond) {
736-
CFRelease(oneSecond);
799+
// Set frame rate
800+
if (computedFrameRate != _encoderFrameRate) {
801+
status = VTSessionSetProperty(_compressionSession,
802+
kVTCompressionPropertyKey_ExpectedFrameRate,
803+
(__bridge CFTypeRef) @(computedFrameRate));
804+
// Ensure the bitrate was set successfully
805+
if (status != noErr) {
806+
RTC_LOG(LS_ERROR) << "Failed to set frame rate: " << computedFrameRate
807+
<< " error: " << status;
808+
} else {
809+
RTC_LOG(LS_INFO) << "Did update encoder frame rate: " << computedFrameRate;
737810
}
738-
if (dataRateLimits) {
739-
CFRelease(dataRateLimits);
811+
_encoderFrameRate = computedFrameRate;
812+
}
813+
814+
// Set bitrate
815+
if (computedBitrateBps != _encoderBitrateBps) {
816+
status = VTSessionSetProperty(_compressionSession,
817+
kVTCompressionPropertyKey_AverageBitRate,
818+
(__bridge CFTypeRef) @(computedBitrateBps));
819+
820+
// Ensure the bitrate was set successfully
821+
if (status != noErr) {
822+
RTC_LOG(LS_ERROR) << "Failed to update encoder bitrate: " << computedBitrateBps
823+
<< "error: " << status;
824+
} else {
825+
RTC_LOG(LS_INFO) << "Did update encoder bitrate: " << computedBitrateBps;
740826
}
827+
828+
status = VTSessionSetProperty(
829+
_compressionSession,
830+
kVTCompressionPropertyKey_DataRateLimits,
831+
(__bridge CFArrayRef)CreateRateLimitArray(computedBitrateBps, _encodeMode));
741832
if (status != noErr) {
742-
RTC_LOG(LS_ERROR) << "Failed to set data rate limit with code: " << status;
833+
RTC_LOG(LS_ERROR) << "Failed to update encoder data rate limits";
834+
} else {
835+
RTC_LOG(LS_INFO) << "Did update encoder data rate limits";
743836
}
744837

745-
_encoderBitrateBps = bitrateBps;
746-
_encoderFrameRate = frameRate;
838+
_encoderBitrateBps = computedBitrateBps;
747839
}
748840
}
749841

@@ -799,8 +891,9 @@ - (void)frameWasEncoded:(OSStatus)status
799891
frame.captureTimeMs = renderTimeMs;
800892
frame.timeStamp = timestamp;
801893
frame.rotation = rotation;
802-
frame.contentType = (_mode == RTCVideoCodecModeScreensharing) ? RTCVideoContentTypeScreenshare :
803-
RTCVideoContentTypeUnspecified;
894+
frame.contentType = (_codecMode == RTCVideoCodecModeScreensharing) ?
895+
RTCVideoContentTypeScreenshare :
896+
RTCVideoContentTypeUnspecified;
804897
frame.flags = webrtc::VideoSendTiming::kInvalid;
805898

806899
_h264BitstreamParser.ParseBitstream(*buffer);
@@ -811,7 +904,6 @@ - (void)frameWasEncoded:(OSStatus)status
811904
RTC_LOG(LS_ERROR) << "Encode callback failed";
812905
return;
813906
}
814-
_bitrateAdjuster->Update(frame.buffer.length);
815907
}
816908

817909
- (nullable RTC_OBJC_TYPE(RTCVideoEncoderQpThresholds) *)scalingSettings {

sdk/objc/native/src/objc_video_track_source.mm

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -37,7 +37,7 @@ - (void)capturer:(RTC_OBJC_TYPE(RTCVideoCapturer) *)capturer
3737
ObjCVideoTrackSource::ObjCVideoTrackSource() : ObjCVideoTrackSource(false) {}
3838

3939
ObjCVideoTrackSource::ObjCVideoTrackSource(bool is_screencast)
40-
: AdaptedVideoTrackSource(/* required resolution alignment */ is_screencast? 16 : 2),
40+
: AdaptedVideoTrackSource(/* required resolution alignment */ 2),
4141
is_screencast_(is_screencast) {}
4242

4343
ObjCVideoTrackSource::ObjCVideoTrackSource(RTCObjCVideoSourceAdapter *adapter) : adapter_(adapter) {

0 commit comments

Comments
 (0)