AudioTrackSink corrupts the remote audiotrack

I am using your WebRTC library in a VoIP communication app. I need to record the remote audio, and found that you've recently (https://github.com/GetStream/webrtc/pull/42) implemented AudioTrackSink which is great, as it is a way to intercept and record the remote audiotrack.

In my app, I am able to record the local audiotrack without distortions. I'm using JavaAudioDeviceModule.setSamplesReadyCallback() to intercept the local audiotrack. Here's the working code:

```
    public void onWebRtcAudioRecordSamplesReady(JavaAudioDeviceModule.AudioSamples audioSamples) {
        if (!isRunning)
            return;
        audioThreadHandler.post(() -> {
            if (audioEncoder == null) try {
                audioEncoder = MediaCodec.createEncoderByType("audio/mp4a-latm");
                MediaFormat format = new MediaFormat();
                format.setString(MediaFormat.KEY_MIME, "audio/mp4a-latm");
                format.setInteger(MediaFormat.KEY_CHANNEL_COUNT, audioSamples.getChannelCount());
                format.setInteger(MediaFormat.KEY_SAMPLE_RATE, audioSamples.getSampleRate());
                format.setInteger(MediaFormat.KEY_BIT_RATE, 64 * 1024);
                format.setInteger(MediaFormat.KEY_AAC_PROFILE, MediaCodecInfo.CodecProfileLevel.AACObjectLC);
                audioEncoder.configure(format, null, null, MediaCodec.CONFIGURE_FLAG_ENCODE);
                audioEncoder.start();
            } catch (IOException exception) {
                Log.wtf(TAG, exception);
            }
            int bufferIndex = audioEncoder.dequeueInputBuffer(0);
            if (bufferIndex >= 0) {
                ByteBuffer buffer = audioEncoder.getInputBuffer(bufferIndex);
                buffer.clear();
                byte[] data = audioSamples.getData();
                buffer.put(data);
                audioEncoder.queueInputBuffer(bufferIndex, 0, data.length, presTime, 0);
                presTime += 1000000l * data.length / audioSamples.getSampleRate() / 2;
            }
            drainAudio();
        });

    }
```

```
    private void drainAudio() {
        if (audioBufferInfo == null)
            audioBufferInfo = new MediaCodec.BufferInfo();
        while (true) {
            int encoderStatus = audioEncoder.dequeueOutputBuffer(audioBufferInfo, 0);
            if (encoderStatus == MediaCodec.INFO_TRY_AGAIN_LATER) {
                break;
            } else if (encoderStatus == MediaCodec.INFO_OUTPUT_BUFFERS_CHANGED) {
                // not expected for an encoder
                Log.w(TAG, "encoder output buffers changed");
            } else if (encoderStatus == MediaCodec.INFO_OUTPUT_FORMAT_CHANGED) {
                // not expected for an encoder
                MediaFormat newFormat = audioEncoder.getOutputFormat();

                Log.w(TAG, "encoder output format changed: " + newFormat);
                audioTrackIndex = mediaMuxer.addTrack(newFormat);
                if (trackIndex != -1 && !muxerStarted) {
                    mediaMuxer.start();
                    muxerStarted = true;
                }
                if (!muxerStarted)
                    break;
            } else if (encoderStatus < 0) {
                Log.e(TAG, "unexpected result fr om encoder.dequeueOutputBuffer: " + encoderStatus);
            } else { // encoderStatus >= 0
                try {
                    ByteBuffer encodedData = audioEncoder.getOutputBuffer(encoderStatus);
                    if (encodedData == null) {
                        Log.e(TAG, "encoderOutputBuffer " + encoderStatus + " was null");
                        break;
                    }
                    // It's usually necessary to adjust the ByteBuffer values to match BufferInfo.
                    encodedData.position(audioBufferInfo.offset);
                    encodedData.limit(audioBufferInfo.offset + audioBufferInfo.size);
                    if (muxerStarted)
                        mediaMuxer.writeSampleData(audioTrackIndex, encodedData, audioBufferInfo);
                    isRunning = isRunning && (audioBufferInfo.flags & MediaCodec.BUFFER_FLAG_END_OF_STREAM) == 0;
                    audioEncoder.releaseOutputBuffer(encoderStatus, false);
                    if ((audioBufferInfo.flags & MediaCodec.BUFFER_FLAG_END_OF_STREAM) != 0) {
                        break;
                    }
                } catch (Exception e) {
                    Log.wtf(TAG, e);
                    break;
                }
            }
        }
    }
```

To intercept the remote audiotrack, I'm using the method AudioTrack.addSink(), and I'm using eventually the same handler to process the samples. Here's the code:

```
    @Override
    public void onData(ByteBuffer audioData,
            int bitsPerSample,
            int sampleRate,
            int numberOfChannels,
            int numberOfFrames,
            long absoluteCaptureTimestampMs) {
        if (!isRunning)
            return;
        audioThreadHandler.post(() -> {
            if (audioEncoder == null) try {
                audioEncoder = MediaCodec.createEncoderByType("audio/mp4a-latm");
                MediaFormat format = new MediaFormat();
                format.setString(MediaFormat.KEY_MIME, "audio/mp4a-latm");
                format.setInteger(MediaFormat.KEY_CHANNEL_COUNT, numberOfChannels);
                format.setInteger(MediaFormat.KEY_SAMPLE_RATE, sampleRate);
                format.setInteger(MediaFormat.KEY_BIT_RATE, 64 * 1024);
                format.setInteger(MediaFormat.KEY_AAC_PROFILE, MediaCodecInfo.CodecProfileLevel.AACObjectLC);
                audioEncoder.configure(format, null, null, MediaCodec.CONFIGURE_FLAG_ENCODE);
                audioEncoder.start();
            } catch (IOException exception) {
                Log.wtf(TAG, exception);
            }
            int bufferIndex = audioEncoder.dequeueInputBuffer(0);
            if (bufferIndex >= 0) {
                ByteBuffer buffer = audioEncoder.getInputBuffer(bufferIndex);
                buffer.clear();
                int length = audioData.remaining();
                buffer.put(audioData)
                audioEncoder.queueInputBuffer(bufferIndex, 0, length, presTime, 0);
                presTime += 1000000l * numberOfFrames / sampleRate;
            }
            drainAudio();
        });
    }
```

Unfortunately this code corrupts the audiotrack. I hear the voice, the timing is correct, but it just sounds wrongly. The same remote audiotrack sounds good when played on a device speaker by WebRTC.

What am I doing wrong? Since the local audiotrack is encoded and recorded perfectly by using the same code, and the remote audio interception is a very recent new feature released just a few month ago, I think this could be a bug, so I'm reporting it here.

By the way, the absoluteCaptureTimestampMs in this handler is always 0. This doesn't seem to be critical, but I believe this is a bug as well.


Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Uh oh!

AudioTrackSink corrupts the remote audiotrack #262

Metadata

Assignees

Labels

Type

Projects

Milestone

Relationships

Development

AudioTrackSink corrupts the remote audiotrack #262

Description

Metadata

Metadata

Assignees

Labels

Type

Projects

Milestone

Relationships

Development

Issue actions