Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 0 additions & 2 deletions tools/setup_helpers/extension.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,6 @@ def _get_build(var, default=False):
_BUILD_RIR = _get_build("BUILD_RIR", True)
_BUILD_RNNT = _get_build("BUILD_RNNT", True)
_USE_FFMPEG = _get_build("USE_FFMPEG", False)
_DLOPEN_FFMPEG = _get_build("DLOPEN_FFMPEG", False)
_USE_ROCM = _get_build("USE_ROCM", torch.backends.cuda.is_built() and torch.version.hip is not None)
_USE_CUDA = _get_build("USE_CUDA", torch.backends.cuda.is_built() and torch.version.hip is None)
_BUILD_ALIGN = _get_build("BUILD_ALIGN", True)
Expand Down Expand Up @@ -125,7 +124,6 @@ def build_extension(self, ext):
f"-DUSE_CUDA:BOOL={'ON' if _USE_CUDA else 'OFF'}",
f"-DUSE_OPENMP:BOOL={'ON' if _USE_OPENMP else 'OFF'}",
f"-DUSE_FFMPEG:BOOL={'ON' if _USE_FFMPEG else 'OFF'}",
f"-DDLOPEN_FFMPEG:BOOL={'ON' if _DLOPEN_FFMPEG else 'OFF'}",
]
build_args = ["--target", "install"]
# Pass CUDA architecture to cmake
Expand Down
18 changes: 8 additions & 10 deletions torchaudio/csrc/ffmpeg/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -2,13 +2,11 @@ message(STATUS "FFMPEG_ROOT=$ENV{FFMPEG_ROOT}")
find_package(FFMPEG 4.1 REQUIRED COMPONENTS avdevice avfilter avformat avcodec avutil)
add_library(ffmpeg INTERFACE)
target_include_directories(ffmpeg INTERFACE "${FFMPEG_INCLUDE_DIRS}")
if (NOT DLOPEN_FFMPEG)
target_link_libraries(ffmpeg INTERFACE "${FFMPEG_LIBRARIES}")
endif()


set(
sources
stub.cpp
ffmpeg.cpp
filter_graph.cpp
hw_context.cpp
Expand All @@ -33,24 +31,24 @@ if (USE_CUDA)
cuda_deps)
endif()

if (DLOPEN_FFMPEG)
set(compile_definitions DLOPEN_FFMPEG)
endif()

torchaudio_library(
libtorchaudio_ffmpeg
"${sources}"
""
"torch;ffmpeg;${additional_lib}"
"${compile_definitions}"
""
)

if (BUILD_TORCHAUDIO_PYTHON_EXTENSION)
set(
ext_sources
pybind/pybind.cpp
)
torchaudio_extension(
_torchaudio_ffmpeg
pybind/pybind.cpp
"${ext_sources}"
""
"libtorchaudio_ffmpeg"
"${compile_definitions}"
""
)
endif ()
41 changes: 17 additions & 24 deletions torchaudio/csrc/ffmpeg/ffmpeg.cpp
Original file line number Diff line number Diff line change
@@ -1,27 +1,20 @@
#include <c10/util/Exception.h>
#include <torchaudio/csrc/ffmpeg/ffmpeg.h>
#include <torchaudio/csrc/ffmpeg/stub.h>
#include <sstream>
#include <stdexcept>
#include <string>
#include <vector>

namespace torchaudio::io {

std::string av_err2string(int errnum) {
char str[AV_ERROR_MAX_STRING_SIZE];
FFMPEG av_strerror(errnum, str, AV_ERROR_MAX_STRING_SIZE);
return str;
}

////////////////////////////////////////////////////////////////////////////////
// AVDictionary
////////////////////////////////////////////////////////////////////////////////
AVDictionary* get_option_dict(const c10::optional<OptionDict>& option) {
AVDictionary* opt = nullptr;
if (option) {
for (auto const& [key, value] : option.value()) {
FFMPEG av_dict_set(&opt, key.c_str(), value.c_str(), 0);
av_dict_set(&opt, key.c_str(), value.c_str(), 0);
}
}
return opt;
Expand All @@ -32,10 +25,10 @@ void clean_up_dict(AVDictionary* p) {
std::vector<std::string> unused_keys;
// Check and copy unused keys, clean up the original dictionary
AVDictionaryEntry* t = nullptr;
while ((t = FFMPEG av_dict_get(p, "", t, AV_DICT_IGNORE_SUFFIX))) {
while ((t = av_dict_get(p, "", t, AV_DICT_IGNORE_SUFFIX))) {
unused_keys.emplace_back(t->key);
}
FFMPEG av_dict_free(&p);
av_dict_free(&p);
TORCH_CHECK(
unused_keys.empty(),
"Unexpected options: ",
Expand All @@ -47,14 +40,14 @@ void clean_up_dict(AVDictionary* p) {
// AVFormatContext
////////////////////////////////////////////////////////////////////////////////
void AVFormatInputContextDeleter::operator()(AVFormatContext* p) {
FFMPEG avformat_close_input(&p);
avformat_close_input(&p);
};

AVFormatInputContextPtr::AVFormatInputContextPtr(AVFormatContext* p)
: Wrapper<AVFormatContext, AVFormatInputContextDeleter>(p) {}

void AVFormatOutputContextDeleter::operator()(AVFormatContext* p) {
FFMPEG avformat_free_context(p);
avformat_free_context(p);
};

AVFormatOutputContextPtr::AVFormatOutputContextPtr(AVFormatContext* p)
Expand All @@ -64,9 +57,9 @@ AVFormatOutputContextPtr::AVFormatOutputContextPtr(AVFormatContext* p)
// AVIO
////////////////////////////////////////////////////////////////////////////////
void AVIOContextDeleter::operator()(AVIOContext* p) {
FFMPEG avio_flush(p);
FFMPEG av_freep(&p->buffer);
FFMPEG av_freep(&p);
avio_flush(p);
av_freep(&p->buffer);
av_freep(&p);
};

AVIOContextPtr::AVIOContextPtr(AVIOContext* p)
Expand All @@ -76,13 +69,13 @@ AVIOContextPtr::AVIOContextPtr(AVIOContext* p)
// AVPacket
////////////////////////////////////////////////////////////////////////////////
void AVPacketDeleter::operator()(AVPacket* p) {
FFMPEG av_packet_free(&p);
av_packet_free(&p);
};

AVPacketPtr::AVPacketPtr(AVPacket* p) : Wrapper<AVPacket, AVPacketDeleter>(p) {}

AVPacketPtr alloc_avpacket() {
AVPacket* p = FFMPEG av_packet_alloc();
AVPacket* p = av_packet_alloc();
TORCH_CHECK(p, "Failed to allocate AVPacket object.");
return AVPacketPtr{p};
}
Expand All @@ -92,7 +85,7 @@ AVPacketPtr alloc_avpacket() {
////////////////////////////////////////////////////////////////////////////////
AutoPacketUnref::AutoPacketUnref(AVPacketPtr& p) : p_(p){};
AutoPacketUnref::~AutoPacketUnref() {
FFMPEG av_packet_unref(p_);
av_packet_unref(p_);
}
AutoPacketUnref::operator AVPacket*() const {
return p_;
Expand All @@ -102,13 +95,13 @@ AutoPacketUnref::operator AVPacket*() const {
// AVFrame
////////////////////////////////////////////////////////////////////////////////
void AVFrameDeleter::operator()(AVFrame* p) {
FFMPEG av_frame_free(&p);
av_frame_free(&p);
};

AVFramePtr::AVFramePtr(AVFrame* p) : Wrapper<AVFrame, AVFrameDeleter>(p) {}

AVFramePtr alloc_avframe() {
AVFrame* p = FFMPEG av_frame_alloc();
AVFrame* p = av_frame_alloc();
TORCH_CHECK(p, "Failed to allocate AVFrame object.");
return AVFramePtr{p};
};
Expand All @@ -117,7 +110,7 @@ AVFramePtr alloc_avframe() {
// AVCodecContext
////////////////////////////////////////////////////////////////////////////////
void AVCodecContextDeleter::operator()(AVCodecContext* p) {
FFMPEG avcodec_free_context(&p);
avcodec_free_context(&p);
};

AVCodecContextPtr::AVCodecContextPtr(AVCodecContext* p)
Expand All @@ -127,7 +120,7 @@ AVCodecContextPtr::AVCodecContextPtr(AVCodecContext* p)
// AVBufferRefPtr
////////////////////////////////////////////////////////////////////////////////
void AutoBufferUnref::operator()(AVBufferRef* p) {
FFMPEG av_buffer_unref(&p);
av_buffer_unref(&p);
}

AVBufferRefPtr::AVBufferRefPtr(AVBufferRef* p)
Expand All @@ -137,7 +130,7 @@ AVBufferRefPtr::AVBufferRefPtr(AVBufferRef* p)
// AVFilterGraph
////////////////////////////////////////////////////////////////////////////////
void AVFilterGraphDeleter::operator()(AVFilterGraph* p) {
FFMPEG avfilter_graph_free(&p);
avfilter_graph_free(&p);
};

AVFilterGraphPtr::AVFilterGraphPtr(AVFilterGraph* p)
Expand All @@ -147,7 +140,7 @@ AVFilterGraphPtr::AVFilterGraphPtr(AVFilterGraph* p)
// AVCodecParameters
////////////////////////////////////////////////////////////////////////////////
void AVCodecParametersDeleter::operator()(AVCodecParameters* codecpar) {
FFMPEG avcodec_parameters_free(&codecpar);
avcodec_parameters_free(&codecpar);
}

AVCodecParametersPtr::AVCodecParametersPtr(AVCodecParameters* p)
Expand Down
5 changes: 4 additions & 1 deletion torchaudio/csrc/ffmpeg/ffmpeg.h
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,10 @@ using OptionDict = std::map<std::string, std::string>;
// Replacement of av_err2str, which causes
// `error: taking address of temporary array`
// https://github.com/joncampbell123/composite-video-simulator/issues/5
std::string av_err2string(int errnum);
av_always_inline std::string av_err2string(int errnum) {
char str[AV_ERROR_MAX_STRING_SIZE];
return av_make_error_string(str, AV_ERROR_MAX_STRING_SIZE, errnum);
}

// Base structure that handles memory management.
// Resource is freed by the destructor of unique_ptr,
Expand Down
40 changes: 19 additions & 21 deletions torchaudio/csrc/ffmpeg/filter_graph.cpp
Original file line number Diff line number Diff line change
@@ -1,12 +1,11 @@
#include <torchaudio/csrc/ffmpeg/filter_graph.h>
#include <torchaudio/csrc/ffmpeg/stub.h>
#include <stdexcept>

namespace torchaudio::io {

namespace {
AVFilterGraph* get_filter_graph() {
AVFilterGraph* ptr = FFMPEG avfilter_graph_alloc();
AVFilterGraph* ptr = avfilter_graph_alloc();
TORCH_CHECK(ptr, "Failed to allocate resouce.");
ptr->nb_threads = 1;
return ptr;
Expand All @@ -32,7 +31,7 @@ std::string get_audio_src_args(
time_base.num,
time_base.den,
sample_rate,
FFMPEG av_get_sample_fmt_name(format),
av_get_sample_fmt_name(format),
channel_layout);
return std::string(args);
}
Expand All @@ -51,7 +50,7 @@ std::string get_video_src_args(
"video_size=%dx%d:pix_fmt=%s:time_base=%d/%d:frame_rate=%d/%d:pixel_aspect=%d/%d",
width,
height,
FFMPEG av_get_pix_fmt_name(format),
av_get_pix_fmt_name(format),
time_base.num,
time_base.den,
frame_rate.num,
Expand All @@ -69,7 +68,7 @@ void FilterGraph::add_audio_src(
int sample_rate,
uint64_t channel_layout) {
add_src(
FFMPEG avfilter_get_by_name("abuffer"),
avfilter_get_by_name("abuffer"),
get_audio_src_args(format, time_base, sample_rate, channel_layout));
}

Expand All @@ -81,13 +80,13 @@ void FilterGraph::add_video_src(
int height,
AVRational sample_aspect_ratio) {
add_src(
FFMPEG avfilter_get_by_name("buffer"),
avfilter_get_by_name("buffer"),
get_video_src_args(
format, time_base, frame_rate, width, height, sample_aspect_ratio));
}

void FilterGraph::add_src(const AVFilter* buffersrc, const std::string& args) {
int ret = FFMPEG avfilter_graph_create_filter(
int ret = avfilter_graph_create_filter(
&buffersrc_ctx, buffersrc, "in", args.c_str(), nullptr, graph);
TORCH_CHECK(
ret >= 0,
Expand All @@ -96,11 +95,11 @@ void FilterGraph::add_src(const AVFilter* buffersrc, const std::string& args) {
}

void FilterGraph::add_audio_sink() {
add_sink(FFMPEG avfilter_get_by_name("abuffersink"));
add_sink(avfilter_get_by_name("abuffersink"));
}

void FilterGraph::add_video_sink() {
add_sink(FFMPEG avfilter_get_by_name("buffersink"));
add_sink(avfilter_get_by_name("buffersink"));
}

void FilterGraph::add_sink(const AVFilter* buffersink) {
Expand All @@ -114,7 +113,7 @@ void FilterGraph::add_sink(const AVFilter* buffersink) {
// According to the other example
// https://ffmpeg.org/doxygen/4.1/filter_audio_8c-example.html
// `abuffersink` should not take options, and this resolved issue.
int ret = FFMPEG avfilter_graph_create_filter(
int ret = avfilter_graph_create_filter(
&buffersink_ctx, buffersink, "out", nullptr, nullptr, graph);
TORCH_CHECK(ret >= 0, "Failed to create output filter.");
}
Expand All @@ -131,15 +130,15 @@ class InOuts {

public:
InOuts(const char* name, AVFilterContext* pCtx) {
p = FFMPEG avfilter_inout_alloc();
p = avfilter_inout_alloc();
TORCH_CHECK(p, "Failed to allocate AVFilterInOut.");
p->name = FFMPEG av_strdup(name);
p->name = av_strdup(name);
p->filter_ctx = pCtx;
p->pad_idx = 0;
p->next = nullptr;
}
~InOuts() {
FFMPEG avfilter_inout_free(&p);
avfilter_inout_free(&p);
}
operator AVFilterInOut**() {
return &p;
Expand All @@ -156,7 +155,7 @@ void FilterGraph::add_process(const std::string& filter_description) {
// If you are debugging this part of the code, you might get confused.
InOuts in{"in", buffersrc_ctx}, out{"out", buffersink_ctx};

int ret = FFMPEG avfilter_graph_parse_ptr(
int ret = avfilter_graph_parse_ptr(
graph, filter_description.c_str(), out, in, nullptr);

TORCH_CHECK(
Expand All @@ -167,11 +166,11 @@ void FilterGraph::add_process(const std::string& filter_description) {

void FilterGraph::create_filter(AVBufferRef* hw_frames_ctx) {
buffersrc_ctx->outputs[0]->hw_frames_ctx = hw_frames_ctx;
int ret = FFMPEG avfilter_graph_config(graph, nullptr);
int ret = avfilter_graph_config(graph, nullptr);
TORCH_CHECK(ret >= 0, "Failed to configure the graph: " + av_err2string(ret));
// char* desc = FFMPEG avfilter_graph_dump(graph, NULL);
// char* desc = avfilter_graph_dump(graph, NULL);
// std::cerr << "Filter created:\n" << desc << std::endl;
// FFMPEG av_free(static_cast<void*>(desc));
// av_free(static_cast<void*>(desc));
}

//////////////////////////////////////////////////////////////////////////////
Expand All @@ -191,8 +190,7 @@ FilterGraphOutputInfo FilterGraph::get_output_info() const {
ret.num_channels = l->ch_layout.nb_channels;
#else
// Before FFmpeg 5.1
ret.num_channels =
FFMPEG av_get_channel_layout_nb_channels(l->channel_layout);
ret.num_channels = av_get_channel_layout_nb_channels(l->channel_layout);
#endif
break;
}
Expand All @@ -215,12 +213,12 @@ FilterGraphOutputInfo FilterGraph::get_output_info() const {
// Streaming process
//////////////////////////////////////////////////////////////////////////////
int FilterGraph::add_frame(AVFrame* pInputFrame) {
return FFMPEG av_buffersrc_add_frame_flags(
return av_buffersrc_add_frame_flags(
buffersrc_ctx, pInputFrame, AV_BUFFERSRC_FLAG_KEEP_REF);
}

int FilterGraph::get_frame(AVFrame* pOutputFrame) {
return FFMPEG av_buffersink_get_frame(buffersink_ctx, pOutputFrame);
return av_buffersink_get_frame(buffersink_ctx, pOutputFrame);
}

} // namespace torchaudio::io
3 changes: 1 addition & 2 deletions torchaudio/csrc/ffmpeg/hw_context.cpp
Original file line number Diff line number Diff line change
@@ -1,5 +1,4 @@
#include <torchaudio/csrc/ffmpeg/hw_context.h>
#include <torchaudio/csrc/ffmpeg/stub.h>

namespace torchaudio::io {
namespace {
Expand All @@ -16,7 +15,7 @@ AVBufferRef* get_cuda_context(int index) {
}
if (CUDA_CONTEXT_CACHE.count(index) == 0) {
AVBufferRef* p = nullptr;
int ret = FFMPEG av_hwdevice_ctx_create(
int ret = av_hwdevice_ctx_create(
&p, AV_HWDEVICE_TYPE_CUDA, std::to_string(index).c_str(), nullptr, 0);
TORCH_CHECK(
ret >= 0,
Expand Down
Loading