-
Notifications
You must be signed in to change notification settings - Fork 90
Description
I am using your WebRTC library in a VoIP communication app. I need to record the remote audio, and found that you've recently (GetStream/webrtc#42) implemented AudioTrackSink which is great, as it is a way to intercept and record the remote audiotrack.
In my app, I am able to record the local audiotrack without distortions. I'm using JavaAudioDeviceModule.setSamplesReadyCallback() to intercept the local audiotrack. Here's the working code:
public void onWebRtcAudioRecordSamplesReady(JavaAudioDeviceModule.AudioSamples audioSamples) {
if (!isRunning)
return;
audioThreadHandler.post(() -> {
if (audioEncoder == null) try {
audioEncoder = MediaCodec.createEncoderByType("audio/mp4a-latm");
MediaFormat format = new MediaFormat();
format.setString(MediaFormat.KEY_MIME, "audio/mp4a-latm");
format.setInteger(MediaFormat.KEY_CHANNEL_COUNT, audioSamples.getChannelCount());
format.setInteger(MediaFormat.KEY_SAMPLE_RATE, audioSamples.getSampleRate());
format.setInteger(MediaFormat.KEY_BIT_RATE, 64 * 1024);
format.setInteger(MediaFormat.KEY_AAC_PROFILE, MediaCodecInfo.CodecProfileLevel.AACObjectLC);
audioEncoder.configure(format, null, null, MediaCodec.CONFIGURE_FLAG_ENCODE);
audioEncoder.start();
} catch (IOException exception) {
Log.wtf(TAG, exception);
}
int bufferIndex = audioEncoder.dequeueInputBuffer(0);
if (bufferIndex >= 0) {
ByteBuffer buffer = audioEncoder.getInputBuffer(bufferIndex);
buffer.clear();
byte[] data = audioSamples.getData();
buffer.put(data);
audioEncoder.queueInputBuffer(bufferIndex, 0, data.length, presTime, 0);
presTime += 1000000l * data.length / audioSamples.getSampleRate() / 2;
}
drainAudio();
});
}
private void drainAudio() {
if (audioBufferInfo == null)
audioBufferInfo = new MediaCodec.BufferInfo();
while (true) {
int encoderStatus = audioEncoder.dequeueOutputBuffer(audioBufferInfo, 0);
if (encoderStatus == MediaCodec.INFO_TRY_AGAIN_LATER) {
break;
} else if (encoderStatus == MediaCodec.INFO_OUTPUT_BUFFERS_CHANGED) {
// not expected for an encoder
Log.w(TAG, "encoder output buffers changed");
} else if (encoderStatus == MediaCodec.INFO_OUTPUT_FORMAT_CHANGED) {
// not expected for an encoder
MediaFormat newFormat = audioEncoder.getOutputFormat();
Log.w(TAG, "encoder output format changed: " + newFormat);
audioTrackIndex = mediaMuxer.addTrack(newFormat);
if (trackIndex != -1 && !muxerStarted) {
mediaMuxer.start();
muxerStarted = true;
}
if (!muxerStarted)
break;
} else if (encoderStatus < 0) {
Log.e(TAG, "unexpected result fr om encoder.dequeueOutputBuffer: " + encoderStatus);
} else { // encoderStatus >= 0
try {
ByteBuffer encodedData = audioEncoder.getOutputBuffer(encoderStatus);
if (encodedData == null) {
Log.e(TAG, "encoderOutputBuffer " + encoderStatus + " was null");
break;
}
// It's usually necessary to adjust the ByteBuffer values to match BufferInfo.
encodedData.position(audioBufferInfo.offset);
encodedData.limit(audioBufferInfo.offset + audioBufferInfo.size);
if (muxerStarted)
mediaMuxer.writeSampleData(audioTrackIndex, encodedData, audioBufferInfo);
isRunning = isRunning && (audioBufferInfo.flags & MediaCodec.BUFFER_FLAG_END_OF_STREAM) == 0;
audioEncoder.releaseOutputBuffer(encoderStatus, false);
if ((audioBufferInfo.flags & MediaCodec.BUFFER_FLAG_END_OF_STREAM) != 0) {
break;
}
} catch (Exception e) {
Log.wtf(TAG, e);
break;
}
}
}
}
To intercept the remote audiotrack, I'm using the method AudioTrack.addSink(), and I'm using eventually the same handler to process the samples. Here's the code:
@Override
public void onData(ByteBuffer audioData,
int bitsPerSample,
int sampleRate,
int numberOfChannels,
int numberOfFrames,
long absoluteCaptureTimestampMs) {
if (!isRunning)
return;
audioThreadHandler.post(() -> {
if (audioEncoder == null) try {
audioEncoder = MediaCodec.createEncoderByType("audio/mp4a-latm");
MediaFormat format = new MediaFormat();
format.setString(MediaFormat.KEY_MIME, "audio/mp4a-latm");
format.setInteger(MediaFormat.KEY_CHANNEL_COUNT, numberOfChannels);
format.setInteger(MediaFormat.KEY_SAMPLE_RATE, sampleRate);
format.setInteger(MediaFormat.KEY_BIT_RATE, 64 * 1024);
format.setInteger(MediaFormat.KEY_AAC_PROFILE, MediaCodecInfo.CodecProfileLevel.AACObjectLC);
audioEncoder.configure(format, null, null, MediaCodec.CONFIGURE_FLAG_ENCODE);
audioEncoder.start();
} catch (IOException exception) {
Log.wtf(TAG, exception);
}
int bufferIndex = audioEncoder.dequeueInputBuffer(0);
if (bufferIndex >= 0) {
ByteBuffer buffer = audioEncoder.getInputBuffer(bufferIndex);
buffer.clear();
int length = audioData.remaining();
buffer.put(audioData)
audioEncoder.queueInputBuffer(bufferIndex, 0, length, presTime, 0);
presTime += 1000000l * numberOfFrames / sampleRate;
}
drainAudio();
});
}
Unfortunately this code corrupts the audiotrack. I hear the voice, the timing is correct, but it just sounds wrongly. The same remote audiotrack sounds good when played on a device speaker by WebRTC.
What am I doing wrong? Since the local audiotrack is encoded and recorded perfectly by using the same code, and the remote audio interception is a very recent new feature released just a few month ago, I think this could be a bug, so I'm reporting it here.
By the way, the absoluteCaptureTimestampMs in this handler is always 0. This doesn't seem to be critical, but I believe this is a bug as well.