pytorch · mthrok · Jul 5, 2023
@@ -37,7 +37,6 @@ def _get_build(var, default=False):
 _BUILD_RIR = _get_build("BUILD_RIR", True)
 _BUILD_RNNT = _get_build("BUILD_RNNT", True)
 _USE_FFMPEG = _get_build("USE_FFMPEG", False)
-_DLOPEN_FFMPEG = _get_build("DLOPEN_FFMPEG", False)
 _USE_ROCM = _get_build("USE_ROCM", torch.backends.cuda.is_built() and torch.version.hip is not None)
 _USE_CUDA = _get_build("USE_CUDA", torch.backends.cuda.is_built() and torch.version.hip is None)
 _BUILD_ALIGN = _get_build("BUILD_ALIGN", True)
@@ -125,7 +124,6 @@ def build_extension(self, ext):
             f"-DUSE_CUDA:BOOL={'ON' if _USE_CUDA else 'OFF'}",
             f"-DUSE_OPENMP:BOOL={'ON' if _USE_OPENMP else 'OFF'}",
             f"-DUSE_FFMPEG:BOOL={'ON' if _USE_FFMPEG else 'OFF'}",
-            f"-DDLOPEN_FFMPEG:BOOL={'ON' if _DLOPEN_FFMPEG else 'OFF'}",
         ]
         build_args = ["--target", "install"]
         # Pass CUDA architecture to cmake

diff --git a/torchaudio/csrc/ffmpeg/CMakeLists.txt b/torchaudio/csrc/ffmpeg/CMakeLists.txt
@@ -2,13 +2,11 @@ message(STATUS "FFMPEG_ROOT=$ENV{FFMPEG_ROOT}")
 find_package(FFMPEG 4.1 REQUIRED COMPONENTS avdevice avfilter avformat avcodec avutil)
 add_library(ffmpeg INTERFACE)
 target_include_directories(ffmpeg INTERFACE "${FFMPEG_INCLUDE_DIRS}")
-if (NOT DLOPEN_FFMPEG)
 target_link_libraries(ffmpeg INTERFACE "${FFMPEG_LIBRARIES}")
-endif()
+
 
 set(
   sources
-  stub.cpp
   ffmpeg.cpp
   filter_graph.cpp
   hw_context.cpp
@@ -33,24 +31,24 @@ if (USE_CUDA)
     cuda_deps)
 endif()
 
-if (DLOPEN_FFMPEG)
-  set(compile_definitions DLOPEN_FFMPEG)
-endif()
-
 torchaudio_library(
   libtorchaudio_ffmpeg
   "${sources}"
   ""
   "torch;ffmpeg;${additional_lib}"
-  "${compile_definitions}"
+  ""
   )
 
 if (BUILD_TORCHAUDIO_PYTHON_EXTENSION)
+  set(
+    ext_sources
+    pybind/pybind.cpp
+    )
   torchaudio_extension(
     _torchaudio_ffmpeg
-    pybind/pybind.cpp
+    "${ext_sources}"
     ""
     "libtorchaudio_ffmpeg"
-    "${compile_definitions}"
+    ""
     )
 endif ()
diff --git a/torchaudio/csrc/ffmpeg/ffmpeg.cpp b/torchaudio/csrc/ffmpeg/ffmpeg.cpp
@@ -1,27 +1,20 @@
 #include <c10/util/Exception.h>
 #include <torchaudio/csrc/ffmpeg/ffmpeg.h>
-#include <torchaudio/csrc/ffmpeg/stub.h>
 #include <sstream>
 #include <stdexcept>
 #include <string>
 #include <vector>
 
 namespace torchaudio::io {
 
-std::string av_err2string(int errnum) {
-  char str[AV_ERROR_MAX_STRING_SIZE];
-  FFMPEG av_strerror(errnum, str, AV_ERROR_MAX_STRING_SIZE);
-  return str;
-}
-
 ////////////////////////////////////////////////////////////////////////////////
 // AVDictionary
 ////////////////////////////////////////////////////////////////////////////////
 AVDictionary* get_option_dict(const c10::optional<OptionDict>& option) {
   AVDictionary* opt = nullptr;
   if (option) {
     for (auto const& [key, value] : option.value()) {
-      FFMPEG av_dict_set(&opt, key.c_str(), value.c_str(), 0);
+      av_dict_set(&opt, key.c_str(), value.c_str(), 0);
     }
   }
   return opt;
@@ -32,10 +25,10 @@ void clean_up_dict(AVDictionary* p) {
     std::vector<std::string> unused_keys;
     // Check and copy unused keys, clean up the original dictionary
     AVDictionaryEntry* t = nullptr;
-    while ((t = FFMPEG av_dict_get(p, "", t, AV_DICT_IGNORE_SUFFIX))) {
+    while ((t = av_dict_get(p, "", t, AV_DICT_IGNORE_SUFFIX))) {
       unused_keys.emplace_back(t->key);
     }
-    FFMPEG av_dict_free(&p);
+    av_dict_free(&p);
     TORCH_CHECK(
         unused_keys.empty(),
         "Unexpected options: ",
@@ -47,14 +40,14 @@ void clean_up_dict(AVDictionary* p) {
 // AVFormatContext
 ////////////////////////////////////////////////////////////////////////////////
 void AVFormatInputContextDeleter::operator()(AVFormatContext* p) {
-  FFMPEG avformat_close_input(&p);
+  avformat_close_input(&p);
 };
 
 AVFormatInputContextPtr::AVFormatInputContextPtr(AVFormatContext* p)
     : Wrapper<AVFormatContext, AVFormatInputContextDeleter>(p) {}
 
 void AVFormatOutputContextDeleter::operator()(AVFormatContext* p) {
-  FFMPEG avformat_free_context(p);
+  avformat_free_context(p);
 };
 
 AVFormatOutputContextPtr::AVFormatOutputContextPtr(AVFormatContext* p)
@@ -64,9 +57,9 @@ AVFormatOutputContextPtr::AVFormatOutputContextPtr(AVFormatContext* p)
 // AVIO
 ////////////////////////////////////////////////////////////////////////////////
 void AVIOContextDeleter::operator()(AVIOContext* p) {
-  FFMPEG avio_flush(p);
-  FFMPEG av_freep(&p->buffer);
-  FFMPEG av_freep(&p);
+  avio_flush(p);
+  av_freep(&p->buffer);
+  av_freep(&p);
 };
 
 AVIOContextPtr::AVIOContextPtr(AVIOContext* p)
@@ -76,13 +69,13 @@ AVIOContextPtr::AVIOContextPtr(AVIOContext* p)
 // AVPacket
 ////////////////////////////////////////////////////////////////////////////////
 void AVPacketDeleter::operator()(AVPacket* p) {
-  FFMPEG av_packet_free(&p);
+  av_packet_free(&p);
 };
 
 AVPacketPtr::AVPacketPtr(AVPacket* p) : Wrapper<AVPacket, AVPacketDeleter>(p) {}
 
 AVPacketPtr alloc_avpacket() {
-  AVPacket* p = FFMPEG av_packet_alloc();
+  AVPacket* p = av_packet_alloc();
   TORCH_CHECK(p, "Failed to allocate AVPacket object.");
   return AVPacketPtr{p};
 }
@@ -92,7 +85,7 @@ AVPacketPtr alloc_avpacket() {
 ////////////////////////////////////////////////////////////////////////////////
 AutoPacketUnref::AutoPacketUnref(AVPacketPtr& p) : p_(p){};
 AutoPacketUnref::~AutoPacketUnref() {
-  FFMPEG av_packet_unref(p_);
+  av_packet_unref(p_);
 }
 AutoPacketUnref::operator AVPacket*() const {
   return p_;
@@ -102,13 +95,13 @@ AutoPacketUnref::operator AVPacket*() const {
 // AVFrame
 ////////////////////////////////////////////////////////////////////////////////
 void AVFrameDeleter::operator()(AVFrame* p) {
-  FFMPEG av_frame_free(&p);
+  av_frame_free(&p);
 };
 
 AVFramePtr::AVFramePtr(AVFrame* p) : Wrapper<AVFrame, AVFrameDeleter>(p) {}
 
 AVFramePtr alloc_avframe() {
-  AVFrame* p = FFMPEG av_frame_alloc();
+  AVFrame* p = av_frame_alloc();
   TORCH_CHECK(p, "Failed to allocate AVFrame object.");
   return AVFramePtr{p};
 };
@@ -117,7 +110,7 @@ AVFramePtr alloc_avframe() {
 // AVCodecContext
 ////////////////////////////////////////////////////////////////////////////////
 void AVCodecContextDeleter::operator()(AVCodecContext* p) {
-  FFMPEG avcodec_free_context(&p);
+  avcodec_free_context(&p);
 };
 
 AVCodecContextPtr::AVCodecContextPtr(AVCodecContext* p)
@@ -127,7 +120,7 @@ AVCodecContextPtr::AVCodecContextPtr(AVCodecContext* p)
 // AVBufferRefPtr
 ////////////////////////////////////////////////////////////////////////////////
 void AutoBufferUnref::operator()(AVBufferRef* p) {
-  FFMPEG av_buffer_unref(&p);
+  av_buffer_unref(&p);
 }
 
 AVBufferRefPtr::AVBufferRefPtr(AVBufferRef* p)
@@ -137,7 +130,7 @@ AVBufferRefPtr::AVBufferRefPtr(AVBufferRef* p)
 // AVFilterGraph
 ////////////////////////////////////////////////////////////////////////////////
 void AVFilterGraphDeleter::operator()(AVFilterGraph* p) {
-  FFMPEG avfilter_graph_free(&p);
+  avfilter_graph_free(&p);
 };
 
 AVFilterGraphPtr::AVFilterGraphPtr(AVFilterGraph* p)
@@ -147,7 +140,7 @@ AVFilterGraphPtr::AVFilterGraphPtr(AVFilterGraph* p)
 // AVCodecParameters
 ////////////////////////////////////////////////////////////////////////////////
 void AVCodecParametersDeleter::operator()(AVCodecParameters* codecpar) {
-  FFMPEG avcodec_parameters_free(&codecpar);
+  avcodec_parameters_free(&codecpar);
 }
 
 AVCodecParametersPtr::AVCodecParametersPtr(AVCodecParameters* p)

diff --git a/torchaudio/csrc/ffmpeg/ffmpeg.h b/torchaudio/csrc/ffmpeg/ffmpeg.h
@@ -41,7 +41,10 @@ using OptionDict = std::map<std::string, std::string>;
 // Replacement of av_err2str, which causes
 // `error: taking address of temporary array`
 // https://github.com/joncampbell123/composite-video-simulator/issues/5
-std::string av_err2string(int errnum);
+av_always_inline std::string av_err2string(int errnum) {
+  char str[AV_ERROR_MAX_STRING_SIZE];
+  return av_make_error_string(str, AV_ERROR_MAX_STRING_SIZE, errnum);
+}
 
 // Base structure that handles memory management.
 // Resource is freed by the destructor of unique_ptr,

diff --git a/torchaudio/csrc/ffmpeg/filter_graph.cpp b/torchaudio/csrc/ffmpeg/filter_graph.cpp
@@ -1,12 +1,11 @@
 #include <torchaudio/csrc/ffmpeg/filter_graph.h>
-#include <torchaudio/csrc/ffmpeg/stub.h>
 #include <stdexcept>
 
 namespace torchaudio::io {
 
 namespace {
 AVFilterGraph* get_filter_graph() {
-  AVFilterGraph* ptr = FFMPEG avfilter_graph_alloc();
+  AVFilterGraph* ptr = avfilter_graph_alloc();
   TORCH_CHECK(ptr, "Failed to allocate resouce.");
   ptr->nb_threads = 1;
   return ptr;
@@ -32,7 +31,7 @@ std::string get_audio_src_args(
       time_base.num,
       time_base.den,
       sample_rate,
-      FFMPEG av_get_sample_fmt_name(format),
+      av_get_sample_fmt_name(format),
       channel_layout);
   return std::string(args);
 }
@@ -51,7 +50,7 @@ std::string get_video_src_args(
       "video_size=%dx%d:pix_fmt=%s:time_base=%d/%d:frame_rate=%d/%d:pixel_aspect=%d/%d",
       width,
       height,
-      FFMPEG av_get_pix_fmt_name(format),
+      av_get_pix_fmt_name(format),
       time_base.num,
       time_base.den,
       frame_rate.num,
@@ -69,7 +68,7 @@ void FilterGraph::add_audio_src(
     int sample_rate,
     uint64_t channel_layout) {
   add_src(
-      FFMPEG avfilter_get_by_name("abuffer"),
+      avfilter_get_by_name("abuffer"),
       get_audio_src_args(format, time_base, sample_rate, channel_layout));
 }
 
@@ -81,13 +80,13 @@ void FilterGraph::add_video_src(
     int height,
     AVRational sample_aspect_ratio) {
   add_src(
-      FFMPEG avfilter_get_by_name("buffer"),
+      avfilter_get_by_name("buffer"),
       get_video_src_args(
           format, time_base, frame_rate, width, height, sample_aspect_ratio));
 }
 
 void FilterGraph::add_src(const AVFilter* buffersrc, const std::string& args) {
-  int ret = FFMPEG avfilter_graph_create_filter(
+  int ret = avfilter_graph_create_filter(
       &buffersrc_ctx, buffersrc, "in", args.c_str(), nullptr, graph);
   TORCH_CHECK(
       ret >= 0,
@@ -96,11 +95,11 @@ void FilterGraph::add_src(const AVFilter* buffersrc, const std::string& args) {
 }
 
 void FilterGraph::add_audio_sink() {
-  add_sink(FFMPEG avfilter_get_by_name("abuffersink"));
+  add_sink(avfilter_get_by_name("abuffersink"));
 }
 
 void FilterGraph::add_video_sink() {
-  add_sink(FFMPEG avfilter_get_by_name("buffersink"));
+  add_sink(avfilter_get_by_name("buffersink"));
 }
 
 void FilterGraph::add_sink(const AVFilter* buffersink) {
@@ -114,7 +113,7 @@ void FilterGraph::add_sink(const AVFilter* buffersink) {
   // According to the other example
   // https://ffmpeg.org/doxygen/4.1/filter_audio_8c-example.html
   // `abuffersink` should not take options, and this resolved issue.
-  int ret = FFMPEG avfilter_graph_create_filter(
+  int ret = avfilter_graph_create_filter(
       &buffersink_ctx, buffersink, "out", nullptr, nullptr, graph);
   TORCH_CHECK(ret >= 0, "Failed to create output filter.");
 }
@@ -131,15 +130,15 @@ class InOuts {
 
  public:
   InOuts(const char* name, AVFilterContext* pCtx) {
-    p = FFMPEG avfilter_inout_alloc();
+    p = avfilter_inout_alloc();
     TORCH_CHECK(p, "Failed to allocate AVFilterInOut.");
-    p->name = FFMPEG av_strdup(name);
+    p->name = av_strdup(name);
     p->filter_ctx = pCtx;
     p->pad_idx = 0;
     p->next = nullptr;
   }
   ~InOuts() {
-    FFMPEG avfilter_inout_free(&p);
+    avfilter_inout_free(&p);
   }
   operator AVFilterInOut**() {
     return &p;
@@ -156,7 +155,7 @@ void FilterGraph::add_process(const std::string& filter_description) {
   // If you are debugging this part of the code, you might get confused.
   InOuts in{"in", buffersrc_ctx}, out{"out", buffersink_ctx};
 
-  int ret = FFMPEG avfilter_graph_parse_ptr(
+  int ret = avfilter_graph_parse_ptr(
       graph, filter_description.c_str(), out, in, nullptr);
 
   TORCH_CHECK(
@@ -167,11 +166,11 @@ void FilterGraph::add_process(const std::string& filter_description) {
 
 void FilterGraph::create_filter(AVBufferRef* hw_frames_ctx) {
   buffersrc_ctx->outputs[0]->hw_frames_ctx = hw_frames_ctx;
-  int ret = FFMPEG avfilter_graph_config(graph, nullptr);
+  int ret = avfilter_graph_config(graph, nullptr);
   TORCH_CHECK(ret >= 0, "Failed to configure the graph: " + av_err2string(ret));
-  // char* desc = FFMPEG avfilter_graph_dump(graph, NULL);
+  // char* desc = avfilter_graph_dump(graph, NULL);
   // std::cerr << "Filter created:\n" << desc << std::endl;
-  // FFMPEG av_free(static_cast<void*>(desc));
+  // av_free(static_cast<void*>(desc));
 }
 
 //////////////////////////////////////////////////////////////////////////////
@@ -191,8 +190,7 @@ FilterGraphOutputInfo FilterGraph::get_output_info() const {
       ret.num_channels = l->ch_layout.nb_channels;
 #else
       // Before FFmpeg 5.1
-      ret.num_channels =
-          FFMPEG av_get_channel_layout_nb_channels(l->channel_layout);
+      ret.num_channels = av_get_channel_layout_nb_channels(l->channel_layout);
 #endif
       break;
     }
@@ -215,12 +213,12 @@ FilterGraphOutputInfo FilterGraph::get_output_info() const {
 // Streaming process
 //////////////////////////////////////////////////////////////////////////////
 int FilterGraph::add_frame(AVFrame* pInputFrame) {
-  return FFMPEG av_buffersrc_add_frame_flags(
+  return av_buffersrc_add_frame_flags(
       buffersrc_ctx, pInputFrame, AV_BUFFERSRC_FLAG_KEEP_REF);
 }
 
 int FilterGraph::get_frame(AVFrame* pOutputFrame) {
-  return FFMPEG av_buffersink_get_frame(buffersink_ctx, pOutputFrame);
+  return av_buffersink_get_frame(buffersink_ctx, pOutputFrame);
 }
 
 } // namespace torchaudio::io
diff --git a/torchaudio/csrc/ffmpeg/hw_context.cpp b/torchaudio/csrc/ffmpeg/hw_context.cpp
@@ -1,5 +1,4 @@
 #include <torchaudio/csrc/ffmpeg/hw_context.h>
-#include <torchaudio/csrc/ffmpeg/stub.h>
 
 namespace torchaudio::io {
 namespace {
@@ -16,7 +15,7 @@ AVBufferRef* get_cuda_context(int index) {
   }
   if (CUDA_CONTEXT_CACHE.count(index) == 0) {
     AVBufferRef* p = nullptr;
-    int ret = FFMPEG av_hwdevice_ctx_create(
+    int ret = av_hwdevice_ctx_create(
         &p, AV_HWDEVICE_TYPE_CUDA, std::to_string(index).c_str(), nullptr, 0);
     TORCH_CHECK(
         ret >= 0,