diff --git a/tools/setup_helpers/extension.py b/tools/setup_helpers/extension.py index 95b59c207c..2e5610bc79 100644 --- a/tools/setup_helpers/extension.py +++ b/tools/setup_helpers/extension.py @@ -37,7 +37,6 @@ def _get_build(var, default=False): _BUILD_RIR = _get_build("BUILD_RIR", True) _BUILD_RNNT = _get_build("BUILD_RNNT", True) _USE_FFMPEG = _get_build("USE_FFMPEG", False) -_DLOPEN_FFMPEG = _get_build("DLOPEN_FFMPEG", False) _USE_ROCM = _get_build("USE_ROCM", torch.backends.cuda.is_built() and torch.version.hip is not None) _USE_CUDA = _get_build("USE_CUDA", torch.backends.cuda.is_built() and torch.version.hip is None) _BUILD_ALIGN = _get_build("BUILD_ALIGN", True) @@ -125,7 +124,6 @@ def build_extension(self, ext): f"-DUSE_CUDA:BOOL={'ON' if _USE_CUDA else 'OFF'}", f"-DUSE_OPENMP:BOOL={'ON' if _USE_OPENMP else 'OFF'}", f"-DUSE_FFMPEG:BOOL={'ON' if _USE_FFMPEG else 'OFF'}", - f"-DDLOPEN_FFMPEG:BOOL={'ON' if _DLOPEN_FFMPEG else 'OFF'}", ] build_args = ["--target", "install"] # Pass CUDA architecture to cmake diff --git a/torchaudio/csrc/ffmpeg/CMakeLists.txt b/torchaudio/csrc/ffmpeg/CMakeLists.txt index 849d83d62f..e3445265b5 100644 --- a/torchaudio/csrc/ffmpeg/CMakeLists.txt +++ b/torchaudio/csrc/ffmpeg/CMakeLists.txt @@ -2,13 +2,11 @@ message(STATUS "FFMPEG_ROOT=$ENV{FFMPEG_ROOT}") find_package(FFMPEG 4.1 REQUIRED COMPONENTS avdevice avfilter avformat avcodec avutil) add_library(ffmpeg INTERFACE) target_include_directories(ffmpeg INTERFACE "${FFMPEG_INCLUDE_DIRS}") -if (NOT DLOPEN_FFMPEG) target_link_libraries(ffmpeg INTERFACE "${FFMPEG_LIBRARIES}") -endif() + set( sources - stub.cpp ffmpeg.cpp filter_graph.cpp hw_context.cpp @@ -33,24 +31,24 @@ if (USE_CUDA) cuda_deps) endif() -if (DLOPEN_FFMPEG) - set(compile_definitions DLOPEN_FFMPEG) -endif() - torchaudio_library( libtorchaudio_ffmpeg "${sources}" "" "torch;ffmpeg;${additional_lib}" - "${compile_definitions}" + "" ) if (BUILD_TORCHAUDIO_PYTHON_EXTENSION) + set( + ext_sources + pybind/pybind.cpp + ) torchaudio_extension( _torchaudio_ffmpeg - pybind/pybind.cpp + "${ext_sources}" "" "libtorchaudio_ffmpeg" - "${compile_definitions}" + "" ) endif () diff --git a/torchaudio/csrc/ffmpeg/ffmpeg.cpp b/torchaudio/csrc/ffmpeg/ffmpeg.cpp index 55e6c142b9..7822b30392 100644 --- a/torchaudio/csrc/ffmpeg/ffmpeg.cpp +++ b/torchaudio/csrc/ffmpeg/ffmpeg.cpp @@ -1,6 +1,5 @@ #include #include -#include #include #include #include @@ -8,12 +7,6 @@ namespace torchaudio::io { -std::string av_err2string(int errnum) { - char str[AV_ERROR_MAX_STRING_SIZE]; - FFMPEG av_strerror(errnum, str, AV_ERROR_MAX_STRING_SIZE); - return str; -} - //////////////////////////////////////////////////////////////////////////////// // AVDictionary //////////////////////////////////////////////////////////////////////////////// @@ -21,7 +14,7 @@ AVDictionary* get_option_dict(const c10::optional& option) { AVDictionary* opt = nullptr; if (option) { for (auto const& [key, value] : option.value()) { - FFMPEG av_dict_set(&opt, key.c_str(), value.c_str(), 0); + av_dict_set(&opt, key.c_str(), value.c_str(), 0); } } return opt; @@ -32,10 +25,10 @@ void clean_up_dict(AVDictionary* p) { std::vector unused_keys; // Check and copy unused keys, clean up the original dictionary AVDictionaryEntry* t = nullptr; - while ((t = FFMPEG av_dict_get(p, "", t, AV_DICT_IGNORE_SUFFIX))) { + while ((t = av_dict_get(p, "", t, AV_DICT_IGNORE_SUFFIX))) { unused_keys.emplace_back(t->key); } - FFMPEG av_dict_free(&p); + av_dict_free(&p); TORCH_CHECK( unused_keys.empty(), "Unexpected options: ", @@ -47,14 +40,14 @@ void clean_up_dict(AVDictionary* p) { // AVFormatContext //////////////////////////////////////////////////////////////////////////////// void AVFormatInputContextDeleter::operator()(AVFormatContext* p) { - FFMPEG avformat_close_input(&p); + avformat_close_input(&p); }; AVFormatInputContextPtr::AVFormatInputContextPtr(AVFormatContext* p) : Wrapper(p) {} void AVFormatOutputContextDeleter::operator()(AVFormatContext* p) { - FFMPEG avformat_free_context(p); + avformat_free_context(p); }; AVFormatOutputContextPtr::AVFormatOutputContextPtr(AVFormatContext* p) @@ -64,9 +57,9 @@ AVFormatOutputContextPtr::AVFormatOutputContextPtr(AVFormatContext* p) // AVIO //////////////////////////////////////////////////////////////////////////////// void AVIOContextDeleter::operator()(AVIOContext* p) { - FFMPEG avio_flush(p); - FFMPEG av_freep(&p->buffer); - FFMPEG av_freep(&p); + avio_flush(p); + av_freep(&p->buffer); + av_freep(&p); }; AVIOContextPtr::AVIOContextPtr(AVIOContext* p) @@ -76,13 +69,13 @@ AVIOContextPtr::AVIOContextPtr(AVIOContext* p) // AVPacket //////////////////////////////////////////////////////////////////////////////// void AVPacketDeleter::operator()(AVPacket* p) { - FFMPEG av_packet_free(&p); + av_packet_free(&p); }; AVPacketPtr::AVPacketPtr(AVPacket* p) : Wrapper(p) {} AVPacketPtr alloc_avpacket() { - AVPacket* p = FFMPEG av_packet_alloc(); + AVPacket* p = av_packet_alloc(); TORCH_CHECK(p, "Failed to allocate AVPacket object."); return AVPacketPtr{p}; } @@ -92,7 +85,7 @@ AVPacketPtr alloc_avpacket() { //////////////////////////////////////////////////////////////////////////////// AutoPacketUnref::AutoPacketUnref(AVPacketPtr& p) : p_(p){}; AutoPacketUnref::~AutoPacketUnref() { - FFMPEG av_packet_unref(p_); + av_packet_unref(p_); } AutoPacketUnref::operator AVPacket*() const { return p_; @@ -102,13 +95,13 @@ AutoPacketUnref::operator AVPacket*() const { // AVFrame //////////////////////////////////////////////////////////////////////////////// void AVFrameDeleter::operator()(AVFrame* p) { - FFMPEG av_frame_free(&p); + av_frame_free(&p); }; AVFramePtr::AVFramePtr(AVFrame* p) : Wrapper(p) {} AVFramePtr alloc_avframe() { - AVFrame* p = FFMPEG av_frame_alloc(); + AVFrame* p = av_frame_alloc(); TORCH_CHECK(p, "Failed to allocate AVFrame object."); return AVFramePtr{p}; }; @@ -117,7 +110,7 @@ AVFramePtr alloc_avframe() { // AVCodecContext //////////////////////////////////////////////////////////////////////////////// void AVCodecContextDeleter::operator()(AVCodecContext* p) { - FFMPEG avcodec_free_context(&p); + avcodec_free_context(&p); }; AVCodecContextPtr::AVCodecContextPtr(AVCodecContext* p) @@ -127,7 +120,7 @@ AVCodecContextPtr::AVCodecContextPtr(AVCodecContext* p) // AVBufferRefPtr //////////////////////////////////////////////////////////////////////////////// void AutoBufferUnref::operator()(AVBufferRef* p) { - FFMPEG av_buffer_unref(&p); + av_buffer_unref(&p); } AVBufferRefPtr::AVBufferRefPtr(AVBufferRef* p) @@ -137,7 +130,7 @@ AVBufferRefPtr::AVBufferRefPtr(AVBufferRef* p) // AVFilterGraph //////////////////////////////////////////////////////////////////////////////// void AVFilterGraphDeleter::operator()(AVFilterGraph* p) { - FFMPEG avfilter_graph_free(&p); + avfilter_graph_free(&p); }; AVFilterGraphPtr::AVFilterGraphPtr(AVFilterGraph* p) @@ -147,7 +140,7 @@ AVFilterGraphPtr::AVFilterGraphPtr(AVFilterGraph* p) // AVCodecParameters //////////////////////////////////////////////////////////////////////////////// void AVCodecParametersDeleter::operator()(AVCodecParameters* codecpar) { - FFMPEG avcodec_parameters_free(&codecpar); + avcodec_parameters_free(&codecpar); } AVCodecParametersPtr::AVCodecParametersPtr(AVCodecParameters* p) diff --git a/torchaudio/csrc/ffmpeg/ffmpeg.h b/torchaudio/csrc/ffmpeg/ffmpeg.h index 83d18464fa..0bae00c12d 100644 --- a/torchaudio/csrc/ffmpeg/ffmpeg.h +++ b/torchaudio/csrc/ffmpeg/ffmpeg.h @@ -41,7 +41,10 @@ using OptionDict = std::map; // Replacement of av_err2str, which causes // `error: taking address of temporary array` // https://github.com/joncampbell123/composite-video-simulator/issues/5 -std::string av_err2string(int errnum); +av_always_inline std::string av_err2string(int errnum) { + char str[AV_ERROR_MAX_STRING_SIZE]; + return av_make_error_string(str, AV_ERROR_MAX_STRING_SIZE, errnum); +} // Base structure that handles memory management. // Resource is freed by the destructor of unique_ptr, diff --git a/torchaudio/csrc/ffmpeg/filter_graph.cpp b/torchaudio/csrc/ffmpeg/filter_graph.cpp index faa3606e08..1a1e40b011 100644 --- a/torchaudio/csrc/ffmpeg/filter_graph.cpp +++ b/torchaudio/csrc/ffmpeg/filter_graph.cpp @@ -1,12 +1,11 @@ #include -#include #include namespace torchaudio::io { namespace { AVFilterGraph* get_filter_graph() { - AVFilterGraph* ptr = FFMPEG avfilter_graph_alloc(); + AVFilterGraph* ptr = avfilter_graph_alloc(); TORCH_CHECK(ptr, "Failed to allocate resouce."); ptr->nb_threads = 1; return ptr; @@ -32,7 +31,7 @@ std::string get_audio_src_args( time_base.num, time_base.den, sample_rate, - FFMPEG av_get_sample_fmt_name(format), + av_get_sample_fmt_name(format), channel_layout); return std::string(args); } @@ -51,7 +50,7 @@ std::string get_video_src_args( "video_size=%dx%d:pix_fmt=%s:time_base=%d/%d:frame_rate=%d/%d:pixel_aspect=%d/%d", width, height, - FFMPEG av_get_pix_fmt_name(format), + av_get_pix_fmt_name(format), time_base.num, time_base.den, frame_rate.num, @@ -69,7 +68,7 @@ void FilterGraph::add_audio_src( int sample_rate, uint64_t channel_layout) { add_src( - FFMPEG avfilter_get_by_name("abuffer"), + avfilter_get_by_name("abuffer"), get_audio_src_args(format, time_base, sample_rate, channel_layout)); } @@ -81,13 +80,13 @@ void FilterGraph::add_video_src( int height, AVRational sample_aspect_ratio) { add_src( - FFMPEG avfilter_get_by_name("buffer"), + avfilter_get_by_name("buffer"), get_video_src_args( format, time_base, frame_rate, width, height, sample_aspect_ratio)); } void FilterGraph::add_src(const AVFilter* buffersrc, const std::string& args) { - int ret = FFMPEG avfilter_graph_create_filter( + int ret = avfilter_graph_create_filter( &buffersrc_ctx, buffersrc, "in", args.c_str(), nullptr, graph); TORCH_CHECK( ret >= 0, @@ -96,11 +95,11 @@ void FilterGraph::add_src(const AVFilter* buffersrc, const std::string& args) { } void FilterGraph::add_audio_sink() { - add_sink(FFMPEG avfilter_get_by_name("abuffersink")); + add_sink(avfilter_get_by_name("abuffersink")); } void FilterGraph::add_video_sink() { - add_sink(FFMPEG avfilter_get_by_name("buffersink")); + add_sink(avfilter_get_by_name("buffersink")); } void FilterGraph::add_sink(const AVFilter* buffersink) { @@ -114,7 +113,7 @@ void FilterGraph::add_sink(const AVFilter* buffersink) { // According to the other example // https://ffmpeg.org/doxygen/4.1/filter_audio_8c-example.html // `abuffersink` should not take options, and this resolved issue. - int ret = FFMPEG avfilter_graph_create_filter( + int ret = avfilter_graph_create_filter( &buffersink_ctx, buffersink, "out", nullptr, nullptr, graph); TORCH_CHECK(ret >= 0, "Failed to create output filter."); } @@ -131,15 +130,15 @@ class InOuts { public: InOuts(const char* name, AVFilterContext* pCtx) { - p = FFMPEG avfilter_inout_alloc(); + p = avfilter_inout_alloc(); TORCH_CHECK(p, "Failed to allocate AVFilterInOut."); - p->name = FFMPEG av_strdup(name); + p->name = av_strdup(name); p->filter_ctx = pCtx; p->pad_idx = 0; p->next = nullptr; } ~InOuts() { - FFMPEG avfilter_inout_free(&p); + avfilter_inout_free(&p); } operator AVFilterInOut**() { return &p; @@ -156,7 +155,7 @@ void FilterGraph::add_process(const std::string& filter_description) { // If you are debugging this part of the code, you might get confused. InOuts in{"in", buffersrc_ctx}, out{"out", buffersink_ctx}; - int ret = FFMPEG avfilter_graph_parse_ptr( + int ret = avfilter_graph_parse_ptr( graph, filter_description.c_str(), out, in, nullptr); TORCH_CHECK( @@ -167,11 +166,11 @@ void FilterGraph::add_process(const std::string& filter_description) { void FilterGraph::create_filter(AVBufferRef* hw_frames_ctx) { buffersrc_ctx->outputs[0]->hw_frames_ctx = hw_frames_ctx; - int ret = FFMPEG avfilter_graph_config(graph, nullptr); + int ret = avfilter_graph_config(graph, nullptr); TORCH_CHECK(ret >= 0, "Failed to configure the graph: " + av_err2string(ret)); - // char* desc = FFMPEG avfilter_graph_dump(graph, NULL); + // char* desc = avfilter_graph_dump(graph, NULL); // std::cerr << "Filter created:\n" << desc << std::endl; - // FFMPEG av_free(static_cast(desc)); + // av_free(static_cast(desc)); } ////////////////////////////////////////////////////////////////////////////// @@ -191,8 +190,7 @@ FilterGraphOutputInfo FilterGraph::get_output_info() const { ret.num_channels = l->ch_layout.nb_channels; #else // Before FFmpeg 5.1 - ret.num_channels = - FFMPEG av_get_channel_layout_nb_channels(l->channel_layout); + ret.num_channels = av_get_channel_layout_nb_channels(l->channel_layout); #endif break; } @@ -215,12 +213,12 @@ FilterGraphOutputInfo FilterGraph::get_output_info() const { // Streaming process ////////////////////////////////////////////////////////////////////////////// int FilterGraph::add_frame(AVFrame* pInputFrame) { - return FFMPEG av_buffersrc_add_frame_flags( + return av_buffersrc_add_frame_flags( buffersrc_ctx, pInputFrame, AV_BUFFERSRC_FLAG_KEEP_REF); } int FilterGraph::get_frame(AVFrame* pOutputFrame) { - return FFMPEG av_buffersink_get_frame(buffersink_ctx, pOutputFrame); + return av_buffersink_get_frame(buffersink_ctx, pOutputFrame); } } // namespace torchaudio::io diff --git a/torchaudio/csrc/ffmpeg/hw_context.cpp b/torchaudio/csrc/ffmpeg/hw_context.cpp index 5c84f3dd09..a1d7f3c7a0 100644 --- a/torchaudio/csrc/ffmpeg/hw_context.cpp +++ b/torchaudio/csrc/ffmpeg/hw_context.cpp @@ -1,5 +1,4 @@ #include -#include namespace torchaudio::io { namespace { @@ -16,7 +15,7 @@ AVBufferRef* get_cuda_context(int index) { } if (CUDA_CONTEXT_CACHE.count(index) == 0) { AVBufferRef* p = nullptr; - int ret = FFMPEG av_hwdevice_ctx_create( + int ret = av_hwdevice_ctx_create( &p, AV_HWDEVICE_TYPE_CUDA, std::to_string(index).c_str(), nullptr, 0); TORCH_CHECK( ret >= 0, diff --git a/torchaudio/csrc/ffmpeg/pybind/pybind.cpp b/torchaudio/csrc/ffmpeg/pybind/pybind.cpp index 5fcb9f6df8..95db01fcec 100644 --- a/torchaudio/csrc/ffmpeg/pybind/pybind.cpp +++ b/torchaudio/csrc/ffmpeg/pybind/pybind.cpp @@ -2,7 +2,6 @@ #include #include #include -#include namespace torchaudio::io { namespace { @@ -10,15 +9,15 @@ namespace { std::map> get_versions() { std::map> ret; -#define add_version(NAME) \ - { \ - int ver = FFMPEG NAME##_version(); \ - ret.emplace( \ - "lib" #NAME, \ - std::make_tuple<>( \ - AV_VERSION_MAJOR(ver), \ - AV_VERSION_MINOR(ver), \ - AV_VERSION_MICRO(ver))); \ +#define add_version(NAME) \ + { \ + int ver = NAME##_version(); \ + ret.emplace( \ + "lib" #NAME, \ + std::make_tuple<>( \ + AV_VERSION_MAJOR(ver), \ + AV_VERSION_MINOR(ver), \ + AV_VERSION_MICRO(ver))); \ } add_version(avutil); @@ -35,7 +34,7 @@ std::map get_demuxers(bool req_device) { std::map ret; const AVInputFormat* fmt = nullptr; void* i = nullptr; - while ((fmt = FFMPEG av_demuxer_iterate(&i))) { + while ((fmt = av_demuxer_iterate(&i))) { assert(fmt); bool is_device = [&]() { const AVClass* avclass = fmt->priv_class; @@ -52,7 +51,7 @@ std::map get_muxers(bool req_device) { std::map ret; const AVOutputFormat* fmt = nullptr; void* i = nullptr; - while ((fmt = FFMPEG av_muxer_iterate(&i))) { + while ((fmt = av_muxer_iterate(&i))) { assert(fmt); bool is_device = [&]() { const AVClass* avclass = fmt->priv_class; @@ -71,10 +70,10 @@ std::map get_codecs( const AVCodec* c = nullptr; void* i = nullptr; std::map ret; - while ((c = FFMPEG av_codec_iterate(&i))) { + while ((c = av_codec_iterate(&i))) { assert(c); - if ((req_encoder && FFMPEG av_codec_is_encoder(c)) || - (!req_encoder && FFMPEG av_codec_is_decoder(c))) { + if ((req_encoder && av_codec_is_encoder(c)) || + (!req_encoder && av_codec_is_decoder(c))) { if (c->type == type && c->name) { ret.emplace(c->name, c->long_name ? c->long_name : ""); } @@ -87,7 +86,7 @@ std::vector get_protocols(bool output) { void* opaque = nullptr; const char* name = nullptr; std::vector ret; - while ((name = FFMPEG avio_enum_protocols(&opaque, output))) { + while ((name = avio_enum_protocols(&opaque, output))) { assert(name); ret.emplace_back(name); } @@ -95,7 +94,7 @@ std::vector get_protocols(bool output) { } std::string get_build_config() { - return FFMPEG avcodec_configuration(); + return avcodec_configuration(); } ////////////////////////////////////////////////////////////////////////////// @@ -188,9 +187,9 @@ struct StreamWriterFileObj : private FileObj, public StreamWriterCustomIO { }; PYBIND11_MODULE(_torchaudio_ffmpeg, m) { - m.def("init", []() { FFMPEG avdevice_register_all(); }); - m.def("get_log_level", []() { return FFMPEG av_log_get_level(); }); - m.def("set_log_level", [](int level) { FFMPEG av_log_set_level(level); }); + m.def("init", []() { avdevice_register_all(); }); + m.def("get_log_level", []() { return av_log_get_level(); }); + m.def("set_log_level", [](int level) { av_log_set_level(level); }); m.def("get_versions", &get_versions); m.def("get_muxers", []() { return get_muxers(false); }); m.def("get_demuxers", []() { return get_demuxers(false); }); @@ -246,22 +245,21 @@ PYBIND11_MODULE(_torchaudio_ffmpeg, m) { .def_property_readonly( "media_type", [](const OutputStreamInfo& o) -> std::string { - return FFMPEG av_get_media_type_string(o.media_type); + return av_get_media_type_string(o.media_type); }) .def_property_readonly( "format", [](const OutputStreamInfo& o) -> std::string { switch (o.media_type) { case AVMEDIA_TYPE_AUDIO: - return FFMPEG av_get_sample_fmt_name( - (AVSampleFormat)(o.format)); + return av_get_sample_fmt_name((AVSampleFormat)(o.format)); case AVMEDIA_TYPE_VIDEO: - return FFMPEG av_get_pix_fmt_name((AVPixelFormat)(o.format)); + return av_get_pix_fmt_name((AVPixelFormat)(o.format)); default: TORCH_INTERNAL_ASSERT( false, "FilterGraph is returning unexpected media type: ", - FFMPEG av_get_media_type_string(o.media_type)); + av_get_media_type_string(o.media_type)); } }) .def_readonly("sample_rate", &OutputStreamInfo::sample_rate) @@ -285,7 +283,7 @@ PYBIND11_MODULE(_torchaudio_ffmpeg, m) { .def_property_readonly( "media_type", [](const SrcStreamInfo& s) { - return FFMPEG av_get_media_type_string(s.media_type); + return av_get_media_type_string(s.media_type); }) .def_readonly("codec_name", &SrcStreamInfo::codec_name) .def_readonly("codec_long_name", &SrcStreamInfo::codec_long_name) diff --git a/torchaudio/csrc/ffmpeg/stream_reader/conversion.cpp b/torchaudio/csrc/ffmpeg/stream_reader/conversion.cpp index cf126d16a2..406f4e91bf 100644 --- a/torchaudio/csrc/ffmpeg/stream_reader/conversion.cpp +++ b/torchaudio/csrc/ffmpeg/stream_reader/conversion.cpp @@ -1,6 +1,5 @@ #include #include -#include #ifdef USE_CUDA #include @@ -429,11 +428,11 @@ void NV12CudaConverter::convert(const AVFrame* src, torch::Tensor& dst) { TORCH_INTERNAL_ASSERT( AV_PIX_FMT_CUDA == fmt, "Expected CUDA frame. Found: ", - FFMPEG av_get_pix_fmt_name(fmt)); + av_get_pix_fmt_name(fmt)); TORCH_INTERNAL_ASSERT( AV_PIX_FMT_NV12 == sw_fmt, "Expected NV12 format. Found: ", - FFMPEG av_get_pix_fmt_name(sw_fmt)); + av_get_pix_fmt_name(sw_fmt)); // Write Y plane directly auto status = cudaMemcpy2D( @@ -510,11 +509,11 @@ void P010CudaConverter::convert(const AVFrame* src, torch::Tensor& dst) { TORCH_INTERNAL_ASSERT( AV_PIX_FMT_CUDA == fmt, "Expected CUDA frame. Found: ", - FFMPEG av_get_pix_fmt_name(fmt)); + av_get_pix_fmt_name(fmt)); TORCH_INTERNAL_ASSERT( AV_PIX_FMT_P010 == sw_fmt, "Expected P010 format. Found: ", - FFMPEG av_get_pix_fmt_name(sw_fmt)); + av_get_pix_fmt_name(sw_fmt)); // Write Y plane directly auto status = cudaMemcpy2D( @@ -591,11 +590,11 @@ void YUV444PCudaConverter::convert(const AVFrame* src, torch::Tensor& dst) { TORCH_INTERNAL_ASSERT( AV_PIX_FMT_CUDA == fmt, "Expected CUDA frame. Found: ", - FFMPEG av_get_pix_fmt_name(fmt)); + av_get_pix_fmt_name(fmt)); TORCH_INTERNAL_ASSERT( AV_PIX_FMT_YUV444P == sw_fmt, "Expected YUV444P format. Found: ", - FFMPEG av_get_pix_fmt_name(sw_fmt)); + av_get_pix_fmt_name(sw_fmt)); // Write Y plane directly for (int i = 0; i < 3; ++i) { diff --git a/torchaudio/csrc/ffmpeg/stream_reader/packet_buffer.cpp b/torchaudio/csrc/ffmpeg/stream_reader/packet_buffer.cpp index 883999fa41..bcff81dc3b 100644 --- a/torchaudio/csrc/ffmpeg/stream_reader/packet_buffer.cpp +++ b/torchaudio/csrc/ffmpeg/stream_reader/packet_buffer.cpp @@ -1,11 +1,9 @@ #include -#include namespace torchaudio::io { - void PacketBuffer::push_packet(AVPacket* packet) { TORCH_INTERNAL_ASSERT_DEBUG_ONLY(packet, "Packet is null."); - AVPacket* p = FFMPEG av_packet_clone(packet); + AVPacket* p = av_packet_clone(packet); TORCH_INTERNAL_ASSERT(p, "Failed to clone packet."); packets.emplace_back(p); } diff --git a/torchaudio/csrc/ffmpeg/stream_reader/post_process.cpp b/torchaudio/csrc/ffmpeg/stream_reader/post_process.cpp index 4f397d8b49..38440e3e33 100644 --- a/torchaudio/csrc/ffmpeg/stream_reader/post_process.cpp +++ b/torchaudio/csrc/ffmpeg/stream_reader/post_process.cpp @@ -2,7 +2,6 @@ #include #include #include -#include namespace torchaudio::io { namespace detail { @@ -49,7 +48,7 @@ FilterGraphFactory get_video_factory( f.add_video_sink(); f.add_process(filter_desc); if (hw_frames_ctx) { - f.create_filter(FFMPEG av_buffer_ref(hw_frames_ctx)); + f.create_filter(av_buffer_ref(hw_frames_ctx)); } else { f.create_filter(); } @@ -140,7 +139,7 @@ struct ProcessImpl : public IPostDecodeProcess { if (ret >= 0) { buffer.push_frame(converter.convert(frame), frame->pts); } - FFMPEG av_frame_unref(frame); + av_frame_unref(frame); } return ret; } @@ -160,7 +159,7 @@ std::unique_ptr get_unchunked_audio_process( TORCH_INTERNAL_ASSERT( i.type == AVMEDIA_TYPE_AUDIO, "Unsupported media type found: ", - FFMPEG av_get_media_type_string(i.type)); + av_get_media_type_string(i.type)); using B = UnchunkedBuffer; @@ -227,7 +226,7 @@ std::unique_ptr get_unchunked_audio_process( } default: TORCH_INTERNAL_ASSERT( - false, "Unexpected audio type:", FFMPEG av_get_sample_fmt_name(fmt)); + false, "Unexpected audio type:", av_get_sample_fmt_name(fmt)); } } @@ -240,7 +239,7 @@ std::unique_ptr get_chunked_audio_process( TORCH_INTERNAL_ASSERT_DEBUG_ONLY( i.type == AVMEDIA_TYPE_AUDIO, "Unsupported media type found: ", - FFMPEG av_get_media_type_string(i.type)); + av_get_media_type_string(i.type)); using B = ChunkedBuffer; B buffer{i.time_base, frames_per_chunk, num_chunks}; @@ -308,7 +307,7 @@ std::unique_ptr get_chunked_audio_process( } default: TORCH_INTERNAL_ASSERT( - false, "Unexpected audio type:", FFMPEG av_get_sample_fmt_name(fmt)); + false, "Unexpected audio type:", av_get_sample_fmt_name(fmt)); } } @@ -322,7 +321,7 @@ std::unique_ptr get_unchunked_video_process( TORCH_INTERNAL_ASSERT_DEBUG_ONLY( i.type == AVMEDIA_TYPE_VIDEO, "Unsupported media type found: ", - FFMPEG av_get_media_type_string(i.type)); + av_get_media_type_string(i.type)); auto h = i.height; auto w = i.width; @@ -376,9 +375,7 @@ std::unique_ptr get_unchunked_video_process( } default: { TORCH_INTERNAL_ASSERT( - false, - "Unexpected video format found: ", - FFMPEG av_get_pix_fmt_name(fmt)); + false, "Unexpected video format found: ", av_get_pix_fmt_name(fmt)); } } } @@ -396,7 +393,7 @@ std::unique_ptr get_unchunked_cuda_video_process( TORCH_INTERNAL_ASSERT_DEBUG_ONLY( i.type == AVMEDIA_TYPE_VIDEO, "Unsupported media type found: ", - FFMPEG av_get_media_type_string(i.type)); + av_get_media_type_string(i.type)); using B = UnchunkedBuffer; switch (auto fmt = (AVPixelFormat)i.format; fmt) { @@ -419,13 +416,13 @@ std::unique_ptr get_unchunked_cuda_video_process( TORCH_CHECK( false, "Unsupported video format found in CUDA HW: ", - FFMPEG av_get_pix_fmt_name(fmt)); + av_get_pix_fmt_name(fmt)); } default: { TORCH_CHECK( false, "Unexpected video format found in CUDA HW: ", - FFMPEG av_get_pix_fmt_name(fmt)); + av_get_pix_fmt_name(fmt)); } } #endif @@ -440,7 +437,7 @@ std::unique_ptr get_chunked_video_process( TORCH_INTERNAL_ASSERT_DEBUG_ONLY( i.type == AVMEDIA_TYPE_VIDEO, "Unsupported media type found: ", - FFMPEG av_get_media_type_string(i.type)); + av_get_media_type_string(i.type)); auto h = i.height; auto w = i.width; @@ -494,9 +491,7 @@ std::unique_ptr get_chunked_video_process( } default: { TORCH_INTERNAL_ASSERT( - false, - "Unexpected video format found: ", - FFMPEG av_get_pix_fmt_name(fmt)); + false, "Unexpected video format found: ", av_get_pix_fmt_name(fmt)); } } } @@ -516,7 +511,7 @@ std::unique_ptr get_chunked_cuda_video_process( TORCH_INTERNAL_ASSERT_DEBUG_ONLY( i.type == AVMEDIA_TYPE_VIDEO, "Unsupported media type found: ", - FFMPEG av_get_media_type_string(i.type)); + av_get_media_type_string(i.type)); using B = ChunkedBuffer; switch (auto fmt = (AVPixelFormat)i.format; fmt) { @@ -545,13 +540,13 @@ std::unique_ptr get_chunked_cuda_video_process( TORCH_CHECK( false, "Unsupported video format found in CUDA HW: ", - FFMPEG av_get_pix_fmt_name(fmt)); + av_get_pix_fmt_name(fmt)); } default: { TORCH_CHECK( false, "Unexpected video format found in CUDA HW: ", - FFMPEG av_get_pix_fmt_name(fmt)); + av_get_pix_fmt_name(fmt)); } } #endif diff --git a/torchaudio/csrc/ffmpeg/stream_reader/stream_processor.cpp b/torchaudio/csrc/ffmpeg/stream_reader/stream_processor.cpp index ffd1ddea38..2213a4018a 100644 --- a/torchaudio/csrc/ffmpeg/stream_reader/stream_processor.cpp +++ b/torchaudio/csrc/ffmpeg/stream_reader/stream_processor.cpp @@ -1,10 +1,10 @@ #include #include -#include #include #include namespace torchaudio::io { + namespace { AVCodecContextPtr alloc_codec_context( enum AVCodecID codec_id, @@ -12,24 +12,24 @@ AVCodecContextPtr alloc_codec_context( const AVCodec* codec = [&]() { if (decoder_name) { const AVCodec* c = - FFMPEG avcodec_find_decoder_by_name(decoder_name.value().c_str()); + avcodec_find_decoder_by_name(decoder_name.value().c_str()); TORCH_CHECK(c, "Unsupported codec: ", decoder_name.value()); return c; } else { - const AVCodec* c = FFMPEG avcodec_find_decoder(codec_id); - TORCH_CHECK(c, "Unsupported codec: ", FFMPEG avcodec_get_name(codec_id)); + const AVCodec* c = avcodec_find_decoder(codec_id); + TORCH_CHECK(c, "Unsupported codec: ", avcodec_get_name(codec_id)); return c; } }(); - AVCodecContext* codec_ctx = FFMPEG avcodec_alloc_context3(codec); + AVCodecContext* codec_ctx = avcodec_alloc_context3(codec); TORCH_CHECK(codec_ctx, "Failed to allocate CodecContext."); return AVCodecContextPtr(codec_ctx); } const AVCodecHWConfig* get_cuda_config(const AVCodec* codec) { for (int i = 0;; ++i) { - const AVCodecHWConfig* config = FFMPEG avcodec_get_hw_config(codec, i); + const AVCodecHWConfig* config = avcodec_get_hw_config(codec, i); if (!config) { break; } @@ -82,7 +82,7 @@ enum AVPixelFormat get_hw_format( } AVBufferRef* get_hw_frames_ctx(AVCodecContext* codec_ctx) { - AVBufferRef* p = FFMPEG av_hwframe_ctx_alloc(codec_ctx->hw_device_ctx); + AVBufferRef* p = av_hwframe_ctx_alloc(codec_ctx->hw_device_ctx); TORCH_CHECK( p, "Failed to allocate CUDA frame context from device context at ", @@ -93,11 +93,11 @@ AVBufferRef* get_hw_frames_ctx(AVCodecContext* codec_ctx) { frames_ctx->width = codec_ctx->width; frames_ctx->height = codec_ctx->height; frames_ctx->initial_pool_size = 5; - int ret = FFMPEG av_hwframe_ctx_init(p); + int ret = av_hwframe_ctx_init(p); if (ret >= 0) { return p; } - FFMPEG av_buffer_unref(&p); + av_buffer_unref(&p); TORCH_CHECK( false, "Failed to initialize CUDA frame context: ", av_err2string(ret)); } @@ -106,7 +106,7 @@ void configure_codec_context( AVCodecContext* codec_ctx, const AVCodecParameters* params, const torch::Device& device) { - int ret = FFMPEG avcodec_parameters_to_context(codec_ctx, params); + int ret = avcodec_parameters_to_context(codec_ctx, params); TORCH_CHECK( ret >= 0, "Failed to set CodecContext parameter: ", av_err2string(ret)); @@ -121,8 +121,7 @@ void configure_codec_context( // 2. Set pCodecContext->get_format call back function which // will retrieve the HW pixel format from opaque pointer. codec_ctx->get_format = get_hw_format; - codec_ctx->hw_device_ctx = - FFMPEG av_buffer_ref(get_cuda_context(device.index())); + codec_ctx->hw_device_ctx = av_buffer_ref(get_cuda_context(device.index())); TORCH_INTERNAL_ASSERT( codec_ctx->hw_device_ctx, "Failed to reference HW device context."); #endif @@ -135,16 +134,16 @@ void open_codec( AVDictionary* opts = get_option_dict(decoder_option); // Default to single thread execution. - if (!FFMPEG av_dict_get(opts, "threads", nullptr, 0)) { - FFMPEG av_dict_set(&opts, "threads", "1", 0); + if (!av_dict_get(opts, "threads", nullptr, 0)) { + av_dict_set(&opts, "threads", "1", 0); } if (!codec_ctx->channel_layout) { codec_ctx->channel_layout = - FFMPEG av_get_default_channel_layout(codec_ctx->channels); + av_get_default_channel_layout(codec_ctx->channels); } - int ret = FFMPEG avcodec_open2(codec_ctx, codec_ctx->codec, &opts); + int ret = avcodec_open2(codec_ctx, codec_ctx->codec, &opts); clean_up_dict(opts); TORCH_CHECK( ret >= 0, "Failed to initialize CodecContext: ", av_err2string(ret)); @@ -259,8 +258,8 @@ void StreamProcessor::remove_stream(KeyType key) { void StreamProcessor::set_discard_timestamp(int64_t timestamp) { TORCH_CHECK(timestamp >= 0, "timestamp must be non-negative."); - discard_before_pts = FFMPEG av_rescale_q( - timestamp, FFMPEG av_get_time_base_q(), stream_time_base); + discard_before_pts = + av_rescale_q(timestamp, av_get_time_base_q(), stream_time_base); } void StreamProcessor::set_decoder( @@ -306,9 +305,9 @@ int StreamProcessor::process_packet(AVPacket* packet) { TORCH_INTERNAL_ASSERT_DEBUG_ONLY( is_decoder_set(), "Decoder must have been set prior to calling this function."); - int ret = FFMPEG avcodec_send_packet(codec_ctx, packet); + int ret = avcodec_send_packet(codec_ctx, packet); while (ret >= 0) { - ret = FFMPEG avcodec_receive_frame(codec_ctx, frame); + ret = avcodec_receive_frame(codec_ctx, frame); // AVERROR(EAGAIN) means that new input data is required to return new // output. if (ret == AVERROR(EAGAIN)) @@ -355,7 +354,7 @@ int StreamProcessor::process_packet(AVPacket* packet) { } // else we can just unref the frame and continue - FFMPEG av_frame_unref(frame); + av_frame_unref(frame); } return ret; } @@ -364,7 +363,7 @@ void StreamProcessor::flush() { TORCH_INTERNAL_ASSERT_DEBUG_ONLY( is_decoder_set(), "Decoder must have been set prior to calling this function."); - FFMPEG avcodec_flush_buffers(codec_ctx); + avcodec_flush_buffers(codec_ctx); for (auto& ite : post_processes) { ite.second->flush(); } diff --git a/torchaudio/csrc/ffmpeg/stream_reader/stream_reader.cpp b/torchaudio/csrc/ffmpeg/stream_reader/stream_reader.cpp index 518bc02131..b8e9d7a9bf 100644 --- a/torchaudio/csrc/ffmpeg/stream_reader/stream_reader.cpp +++ b/torchaudio/csrc/ffmpeg/stream_reader/stream_reader.cpp @@ -1,15 +1,10 @@ #include #include -#include #include #include #include #include -extern "C" { -#include -} - namespace torchaudio::io { using KeyType = StreamProcessor::KeyType; @@ -23,7 +18,7 @@ AVFormatContext* get_input_format_context( const c10::optional& format, const c10::optional& option, AVIOContext* io_ctx) { - AVFormatContext* p = FFMPEG avformat_alloc_context(); + AVFormatContext* p = avformat_alloc_context(); TORCH_CHECK(p, "Failed to allocate AVFormatContext."); if (io_ctx) { p->pb = io_ctx; @@ -33,7 +28,7 @@ AVFormatContext* get_input_format_context( if (format.has_value()) { std::string format_str = format.value(); AVFORMAT_CONST AVInputFormat* pInput = - FFMPEG av_find_input_format(format_str.c_str()); + av_find_input_format(format_str.c_str()); TORCH_CHECK(pInput, "Unsupported device/format: \"", format_str, "\""); return pInput; } @@ -41,7 +36,7 @@ AVFormatContext* get_input_format_context( }(); AVDictionary* opt = get_option_dict(option); - int ret = FFMPEG avformat_open_input(&p, src.c_str(), pInputFormat, &opt); + int ret = avformat_open_input(&p, src.c_str(), pInputFormat, &opt); clean_up_dict(opt); TORCH_CHECK( @@ -57,7 +52,7 @@ AVFormatContext* get_input_format_context( StreamReader::StreamReader(AVFormatContext* p) : format_ctx(p) { C10_LOG_API_USAGE_ONCE("torchaudio.io.StreamReader"); - int ret = FFMPEG avformat_find_stream_info(format_ctx, nullptr); + int ret = avformat_find_stream_info(format_ctx, nullptr); TORCH_CHECK( ret >= 0, "Failed to find stream information: ", av_err2string(ret)); @@ -114,7 +109,7 @@ void validate_src_stream_type( "Stream ", i, " is not ", - FFMPEG av_get_media_type_string(type), + av_get_media_type_string(type), " stream."); } @@ -129,7 +124,7 @@ namespace { OptionDict parse_metadata(const AVDictionary* metadata) { AVDictionaryEntry* tag = nullptr; OptionDict ret; - while ((tag = FFMPEG av_dict_get(metadata, "", tag, AV_DICT_IGNORE_SUFFIX))) { + while ((tag = av_dict_get(metadata, "", tag, AV_DICT_IGNORE_SUFFIX))) { ret.emplace(std::string(tag->key), std::string(tag->value)); } return ret; @@ -152,8 +147,7 @@ SrcStreamInfo StreamReader::get_src_stream_info(int i) const { ret.num_frames = stream->nb_frames; ret.bits_per_sample = codecpar->bits_per_raw_sample; ret.metadata = parse_metadata(stream->metadata); - const AVCodecDescriptor* desc = - FFMPEG avcodec_descriptor_get(codecpar->codec_id); + const AVCodecDescriptor* desc = avcodec_descriptor_get(codecpar->codec_id); if (desc) { ret.codec_name = desc->name; ret.codec_long_name = desc->long_name; @@ -163,7 +157,7 @@ SrcStreamInfo StreamReader::get_src_stream_info(int i) const { case AVMEDIA_TYPE_AUDIO: { AVSampleFormat smp_fmt = static_cast(codecpar->format); if (smp_fmt != AV_SAMPLE_FMT_NONE) { - ret.fmt_name = FFMPEG av_get_sample_fmt_name(smp_fmt); + ret.fmt_name = av_get_sample_fmt_name(smp_fmt); } ret.sample_rate = static_cast(codecpar->sample_rate); ret.num_channels = codecpar->channels; @@ -172,7 +166,7 @@ SrcStreamInfo StreamReader::get_src_stream_info(int i) const { case AVMEDIA_TYPE_VIDEO: { AVPixelFormat pix_fmt = static_cast(codecpar->format); if (pix_fmt != AV_PIX_FMT_NONE) { - ret.fmt_name = FFMPEG av_get_pix_fmt_name(pix_fmt); + ret.fmt_name = av_get_pix_fmt_name(pix_fmt); } ret.width = codecpar->width; ret.height = codecpar->height; @@ -186,7 +180,7 @@ SrcStreamInfo StreamReader::get_src_stream_info(int i) const { namespace { AVCodecParameters* get_codecpar() { - AVCodecParameters* ptr = FFMPEG avcodec_parameters_alloc(); + AVCodecParameters* ptr = avcodec_parameters_alloc(); TORCH_CHECK(ptr, "Failed to allocate resource."); return ptr; } @@ -197,7 +191,7 @@ StreamParams StreamReader::get_src_stream_params(int i) { AVStream* stream = format_ctx->streams[i]; AVCodecParametersPtr codec_params(get_codecpar()); - int ret = FFMPEG avcodec_parameters_copy(codec_params, stream->codecpar); + int ret = avcodec_parameters_copy(codec_params, stream->codecpar); TORCH_CHECK( ret >= 0, "Failed to copy the stream's codec parameters. (", @@ -239,12 +233,12 @@ OutputStreamInfo StreamReader::get_out_stream_info(int i) const { } int64_t StreamReader::find_best_audio_stream() const { - return FFMPEG av_find_best_stream( + return av_find_best_stream( format_ctx, AVMEDIA_TYPE_AUDIO, -1, -1, nullptr, 0); } int64_t StreamReader::find_best_video_stream() const { - return FFMPEG av_find_best_stream( + return av_find_best_stream( format_ctx, AVMEDIA_TYPE_VIDEO, -1, -1, nullptr, 0); } @@ -294,7 +288,7 @@ void StreamReader::seek(double timestamp_s, int64_t mode) { TORCH_CHECK(false, "Invalid mode value: ", mode); } - int ret = FFMPEG av_seek_frame(format_ctx, -1, timestamp_av_tb, flag); + int ret = av_seek_frame(format_ctx, -1, timestamp_av_tb, flag); if (ret < 0) { seek_timestamp = 0; @@ -407,12 +401,12 @@ void StreamReader::add_stream( case AVMEDIA_TYPE_AUDIO: return AVRational{0, 1}; case AVMEDIA_TYPE_VIDEO: - return FFMPEG av_guess_frame_rate(format_ctx, stream, nullptr); + return av_guess_frame_rate(format_ctx, stream, nullptr); default: TORCH_INTERNAL_ASSERT( false, "Unexpected media type is given: ", - FFMPEG av_get_media_type_string(media_type)); + av_get_media_type_string(media_type)); } }(); int key = processors[i]->add_stream( @@ -451,7 +445,7 @@ void StreamReader::remove_stream(int64_t i) { // 1: It's done, caller should stop calling // <0: Some error happened int StreamReader::process_packet() { - int ret = FFMPEG av_read_frame(format_ctx, packet); + int ret = av_read_frame(format_ctx, packet); if (ret == AVERROR_EOF) { ret = drain(); return (ret < 0) ? ret : 1; @@ -582,13 +576,12 @@ AVIOContext* get_io_context( int buffer_size, int (*read_packet)(void* opaque, uint8_t* buf, int buf_size), int64_t (*seek)(void* opaque, int64_t offset, int whence)) { - unsigned char* buffer = - static_cast(FFMPEG av_malloc(buffer_size)); + unsigned char* buffer = static_cast(av_malloc(buffer_size)); TORCH_CHECK(buffer, "Failed to allocate buffer."); - AVIOContext* io_ctx = FFMPEG avio_alloc_context( + AVIOContext* io_ctx = avio_alloc_context( buffer, buffer_size, 0, opaque, read_packet, nullptr, seek); if (!io_ctx) { - FFMPEG av_freep(&buffer); + av_freep(&buffer); TORCH_CHECK(false, "Failed to allocate AVIOContext."); } return io_ctx; diff --git a/torchaudio/csrc/ffmpeg/stream_writer/encode_process.cpp b/torchaudio/csrc/ffmpeg/stream_writer/encode_process.cpp index 3f9a153004..c13c3cfcb9 100644 --- a/torchaudio/csrc/ffmpeg/stream_writer/encode_process.cpp +++ b/torchaudio/csrc/ffmpeg/stream_writer/encode_process.cpp @@ -1,12 +1,7 @@ #include #include -#include #include -extern "C" { -#include -} - namespace torchaudio::io { //////////////////////////////////////////////////////////////////////////////// @@ -61,7 +56,7 @@ void EncodeProcess::process_frame(AVFrame* src) { if (ret >= 0) { encoder.encode(dst_frame); } - FFMPEG av_frame_unref(dst_frame); + av_frame_unref(dst_frame); } } @@ -76,8 +71,8 @@ void EncodeProcess::flush() { namespace { enum AVSampleFormat get_src_sample_fmt(const std::string& src) { - auto fmt = FFMPEG av_get_sample_fmt(src.c_str()); - if (fmt != AV_SAMPLE_FMT_NONE && !FFMPEG av_sample_fmt_is_planar(fmt)) { + auto fmt = av_get_sample_fmt(src.c_str()); + if (fmt != AV_SAMPLE_FMT_NONE && !av_sample_fmt_is_planar(fmt)) { return fmt; } TORCH_CHECK( @@ -94,7 +89,7 @@ enum AVSampleFormat get_src_sample_fmt(const std::string& src) { AV_SAMPLE_FMT_S64, AV_SAMPLE_FMT_FLT, AV_SAMPLE_FMT_DBL}) { - ret.emplace_back(FFMPEG av_get_sample_fmt_name(fmt)); + ret.emplace_back(av_get_sample_fmt_name(fmt)); } return c10::Join(", ", ret); }(), @@ -102,7 +97,7 @@ enum AVSampleFormat get_src_sample_fmt(const std::string& src) { } enum AVPixelFormat get_src_pix_fmt(const std::string& src) { - AVPixelFormat fmt = FFMPEG av_get_pix_fmt(src.c_str()); + AVPixelFormat fmt = av_get_pix_fmt(src.c_str()); switch (fmt) { case AV_PIX_FMT_GRAY8: case AV_PIX_FMT_RGB24: @@ -123,7 +118,7 @@ enum AVPixelFormat get_src_pix_fmt(const std::string& src) { AV_PIX_FMT_RGB24, AV_PIX_FMT_BGR24, AV_PIX_FMT_YUV444P}) { - ret.emplace_back(FFMPEG av_get_pix_fmt_name(fmt)); + ret.emplace_back(av_get_pix_fmt_name(fmt)); } return c10::Join(", ", ret); }(), @@ -137,21 +132,18 @@ const AVCodec* get_codec( AVCodecID default_codec, const c10::optional& encoder) { if (encoder) { - const AVCodec* c = - FFMPEG avcodec_find_encoder_by_name(encoder.value().c_str()); + const AVCodec* c = avcodec_find_encoder_by_name(encoder.value().c_str()); TORCH_CHECK(c, "Unexpected codec: ", encoder.value()); return c; } - const AVCodec* c = FFMPEG avcodec_find_encoder(default_codec); + const AVCodec* c = avcodec_find_encoder(default_codec); TORCH_CHECK( - c, - "Encoder not found for codec: ", - FFMPEG avcodec_get_name(default_codec)); + c, "Encoder not found for codec: ", avcodec_get_name(default_codec)); return c; } AVCodecContextPtr get_codec_ctx(const AVCodec* codec, int flags) { - AVCodecContext* ctx = FFMPEG avcodec_alloc_context3(codec); + AVCodecContext* ctx = avcodec_alloc_context3(codec); TORCH_CHECK(ctx, "Failed to allocate CodecContext."); if (flags & AVFMT_GLOBALHEADER) { @@ -177,25 +169,25 @@ void open_codec( // while "libopus" refers to the one depends on libopusenc // https://ffmpeg.org/doxygen/4.1/libopusenc_8c.html#aa1d649e48cd2ec00cfe181cf9d0f3251 if (std::strcmp(codec_ctx->codec->name, "vorbis") == 0) { - if (!FFMPEG av_dict_get(opt, "strict", nullptr, 0)) { + if (!av_dict_get(opt, "strict", nullptr, 0)) { TORCH_WARN_ONCE( "\"vorbis\" encoder is selected. Enabling '-strict experimental'. ", "If this is not desired, please provide \"strict\" encoder option ", "with desired value."); - FFMPEG av_dict_set(&opt, "strict", "experimental", 0); + av_dict_set(&opt, "strict", "experimental", 0); } } if (std::strcmp(codec_ctx->codec->name, "opus") == 0) { - if (!FFMPEG av_dict_get(opt, "strict", nullptr, 0)) { + if (!av_dict_get(opt, "strict", nullptr, 0)) { TORCH_WARN_ONCE( "\"opus\" encoder is selected. Enabling '-strict experimental'. ", "If this is not desired, please provide \"strict\" encoder option ", "with desired value."); - FFMPEG av_dict_set(&opt, "strict", "experimental", 0); + av_dict_set(&opt, "strict", "experimental", 0); } } - int ret = FFMPEG avcodec_open2(codec_ctx, codec_ctx->codec, &opt); + int ret = avcodec_open2(codec_ctx, codec_ctx->codec, &opt); clean_up_dict(opt); TORCH_CHECK(ret >= 0, "Failed to open codec: (", av_err2string(ret), ")"); } @@ -222,7 +214,7 @@ bool supported_sample_fmt( std::string get_supported_formats(const AVSampleFormat* sample_fmts) { std::vector ret; while (*sample_fmts != AV_SAMPLE_FMT_NONE) { - ret.emplace_back(FFMPEG av_get_sample_fmt_name(*sample_fmts)); + ret.emplace_back(av_get_sample_fmt_name(*sample_fmts)); ++sample_fmts; } return c10::Join(", ", ret); @@ -234,7 +226,7 @@ AVSampleFormat get_enc_fmt( const AVCodec* codec) { if (encoder_format) { auto& enc_fmt_val = encoder_format.value(); - auto fmt = FFMPEG av_get_sample_fmt(enc_fmt_val.c_str()); + auto fmt = av_get_sample_fmt(enc_fmt_val.c_str()); TORCH_CHECK( fmt != AV_SAMPLE_FMT_NONE, "Unknown sample format: ", enc_fmt_val); TORCH_CHECK( @@ -321,8 +313,8 @@ std::string get_supported_channels(const uint64_t* channel_layouts) { std::vector names; while (*channel_layouts) { std::stringstream ss; - ss << FFMPEG av_get_channel_layout_nb_channels(*channel_layouts); - ss << " (" << FFMPEG av_get_channel_name(*channel_layouts) << ")"; + ss << av_get_channel_layout_nb_channels(*channel_layouts); + ss << " (" << av_get_channel_name(*channel_layouts) << ")"; names.emplace_back(ss.str()); ++channel_layouts; } @@ -339,10 +331,10 @@ uint64_t get_channel_layout( TORCH_CHECK( val > 0, "The number of channels must be greater than 0. Found: ", val); if (!codec->channel_layouts) { - return static_cast(FFMPEG av_get_default_channel_layout(val)); + return static_cast(av_get_default_channel_layout(val)); } for (const uint64_t* it = codec->channel_layouts; *it; ++it) { - if (FFMPEG av_get_channel_layout_nb_channels(*it) == val) { + if (av_get_channel_layout_nb_channels(*it) == val) { return *it; } } @@ -379,9 +371,8 @@ void configure_audio_codec_ctx( const c10::optional& codec_config) { codec_ctx->sample_fmt = format; codec_ctx->sample_rate = sample_rate; - codec_ctx->time_base = av_inv_q(FFMPEG av_d2q(sample_rate, 1 << 24)); - codec_ctx->channels = - FFMPEG av_get_channel_layout_nb_channels(channel_layout); + codec_ctx->time_base = av_inv_q(av_d2q(sample_rate, 1 << 24)); + codec_ctx->channels = av_get_channel_layout_nb_channels(channel_layout); codec_ctx->channel_layout = channel_layout; // Set optional stuff @@ -420,7 +411,7 @@ bool supported_pix_fmt(const AVPixelFormat fmt, const AVPixelFormat* pix_fmts) { std::string get_supported_formats(const AVPixelFormat* pix_fmts) { std::vector ret; while (*pix_fmts != AV_PIX_FMT_NONE) { - ret.emplace_back(FFMPEG av_get_pix_fmt_name(*pix_fmts)); + ret.emplace_back(av_get_pix_fmt_name(*pix_fmts)); ++pix_fmts; } return c10::Join(", ", ret); @@ -432,7 +423,7 @@ AVPixelFormat get_enc_fmt( const AVCodec* codec) { if (encoder_format) { const auto& val = encoder_format.value(); - auto fmt = FFMPEG av_get_pix_fmt(val.c_str()); + auto fmt = av_get_pix_fmt(val.c_str()); TORCH_CHECK( supported_pix_fmt(fmt, codec->pix_fmts), codec->name, @@ -470,7 +461,7 @@ AVRational get_enc_rate( std::isfinite(enc_rate) && enc_rate > 0, "Encoder sample rate must be positive and fininte. Found: ", enc_rate); - AVRational rate = FFMPEG av_d2q(enc_rate, 1 << 24); + AVRational rate = av_d2q(enc_rate, 1 << 24); TORCH_CHECK( supported_frame_rate(rate, codec->supported_framerates), codec->name, @@ -554,14 +545,14 @@ void configure_hw_accel(AVCodecContext* ctx, const std::string& hw_accel) { // context to AVCodecContext. But this way, it will be deallocated // automatically at the time AVCodecContext is freed, so we do that. - ctx->hw_device_ctx = FFMPEG av_buffer_ref(get_cuda_context(device.index())); + ctx->hw_device_ctx = av_buffer_ref(get_cuda_context(device.index())); TORCH_INTERNAL_ASSERT( ctx->hw_device_ctx, "Failed to reference HW device context."); ctx->sw_pix_fmt = ctx->pix_fmt; ctx->pix_fmt = AV_PIX_FMT_CUDA; - ctx->hw_frames_ctx = FFMPEG av_hwframe_ctx_alloc(ctx->hw_device_ctx); + ctx->hw_frames_ctx = av_hwframe_ctx_alloc(ctx->hw_device_ctx); TORCH_CHECK(ctx->hw_frames_ctx, "Failed to create CUDA frame context."); auto frames_ctx = (AVHWFramesContext*)(ctx->hw_frames_ctx->data); @@ -571,7 +562,7 @@ void configure_hw_accel(AVCodecContext* ctx, const std::string& hw_accel) { frames_ctx->height = ctx->height; frames_ctx->initial_pool_size = 5; - int ret = FFMPEG av_hwframe_ctx_init(ctx->hw_frames_ctx); + int ret = av_hwframe_ctx_init(ctx->hw_frames_ctx); TORCH_CHECK( ret >= 0, "Failed to initialize CUDA frame context: ", @@ -583,11 +574,11 @@ void configure_hw_accel(AVCodecContext* ctx, const std::string& hw_accel) { //////////////////////////////////////////////////////////////////////////////// AVStream* get_stream(AVFormatContext* format_ctx, AVCodecContext* codec_ctx) { - AVStream* stream = FFMPEG avformat_new_stream(format_ctx, nullptr); + AVStream* stream = avformat_new_stream(format_ctx, nullptr); TORCH_CHECK(stream, "Failed to allocate stream."); stream->time_base = codec_ctx->time_base; - int ret = FFMPEG avcodec_parameters_from_context(stream->codecpar, codec_ctx); + int ret = avcodec_parameters_from_context(stream->codecpar, codec_ctx); TORCH_CHECK( ret >= 0, "Failed to copy the stream parameter: ", av_err2string(ret)); return stream; @@ -614,7 +605,7 @@ FilterGraph get_audio_filter_graph( if (filter_desc || src_fmt != enc_fmt || src_sample_rate != enc_sample_rate || src_ch_layout != enc_ch_layout) { std::stringstream ss; - ss << "aformat=sample_fmts=" << FFMPEG av_get_sample_fmt_name(enc_fmt) + ss << "aformat=sample_fmts=" << av_get_sample_fmt_name(enc_fmt) << ":sample_rates=" << enc_sample_rate << ":channel_layouts=0x" << std::hex << enc_ch_layout; parts.push_back(ss.str()); @@ -665,7 +656,7 @@ FilterGraph get_video_filter_graph( } if (filter_desc || src_fmt != enc_fmt) { std::stringstream ss; - ss << "format=" << FFMPEG av_get_pix_fmt_name(enc_fmt); + ss << "format=" << av_get_pix_fmt_name(enc_fmt); parts.emplace_back(ss.str()); } if (filter_desc || @@ -709,7 +700,7 @@ AVFramePtr get_audio_frame( frame->channel_layout = channel_layout; frame->sample_rate = sample_rate; frame->nb_samples = nb_samples; - int ret = FFMPEG av_frame_get_buffer(frame, 0); + int ret = av_frame_get_buffer(frame, 0); TORCH_CHECK( ret >= 0, "Error allocating the source audio frame:", av_err2string(ret)); @@ -725,7 +716,7 @@ AVFramePtr get_video_frame(AVPixelFormat src_fmt, int width, int height) { frame->format = src_fmt; frame->width = width; frame->height = height; - int ret = FFMPEG av_frame_get_buffer(frame, 0); + int ret = av_frame_get_buffer(frame, 0); TORCH_CHECK( ret >= 0, "Error allocating a video buffer :", av_err2string(ret)); @@ -770,10 +761,10 @@ EncodeProcess get_audio_encode_process( // case, restrictions on the format to support tensor inputs do not apply, and // so we directly get the format via FFmpeg. const AVSampleFormat src_fmt = (disable_converter) - ? FFMPEG av_get_sample_fmt(format.c_str()) + ? av_get_sample_fmt(format.c_str()) : get_src_sample_fmt(format); - const auto src_ch_layout = static_cast( - FFMPEG av_get_default_channel_layout(src_num_channels)); + const auto src_ch_layout = + static_cast(av_get_default_channel_layout(src_num_channels)); // 2. Fetch codec from default or override TORCH_CHECK( @@ -793,7 +784,7 @@ EncodeProcess get_audio_encode_process( // https://github.com/FFmpeg/FFmpeg/blob/0684e58886881a998f1a7b510d73600ff1df2b90/libavcodec/vorbisenc.c#L1277 // This is the case for at least until FFmpeg 6.0, so it will be // like this for a while. - return static_cast(FFMPEG av_get_default_channel_layout(2)); + return static_cast(av_get_default_channel_layout(2)); } return get_channel_layout(src_ch_layout, encoder_num_channels, codec); }(); @@ -881,9 +872,9 @@ EncodeProcess get_video_encode_process( // case, restrictions on the format to support tensor inputs do not apply, and // so we directly get the format via FFmpeg. const AVPixelFormat src_fmt = (disable_converter) - ? FFMPEG av_get_pix_fmt(format.c_str()) + ? av_get_pix_fmt(format.c_str()) : get_src_pix_fmt(format); - const AVRational src_rate = FFMPEG av_d2q(frame_rate, 1 << 24); + const AVRational src_rate = av_d2q(frame_rate, 1 << 24); // 2. Fetch codec from default or override TORCH_CHECK( @@ -950,8 +941,7 @@ EncodeProcess get_video_encode_process( AVFramePtr src_frame = [&]() { if (codec_ctx->hw_frames_ctx) { AVFramePtr frame{alloc_avframe()}; - int ret = - FFMPEG av_hwframe_get_buffer(codec_ctx->hw_frames_ctx, frame, 0); + int ret = av_hwframe_get_buffer(codec_ctx->hw_frames_ctx, frame, 0); TORCH_CHECK(ret >= 0, "Failed to fetch CUDA frame: ", av_err2string(ret)); frame->nb_samples = 1; frame->pts = 0; diff --git a/torchaudio/csrc/ffmpeg/stream_writer/encoder.cpp b/torchaudio/csrc/ffmpeg/stream_writer/encoder.cpp index 7552484f2a..3d2e501535 100644 --- a/torchaudio/csrc/ffmpeg/stream_writer/encoder.cpp +++ b/torchaudio/csrc/ffmpeg/stream_writer/encoder.cpp @@ -1,5 +1,4 @@ #include -#include namespace torchaudio::io { @@ -14,10 +13,10 @@ Encoder::Encoder( /// /// @param frame Frame data to encode void Encoder::encode(AVFrame* frame) { - int ret = FFMPEG avcodec_send_frame(codec_ctx, frame); + int ret = avcodec_send_frame(codec_ctx, frame); TORCH_CHECK(ret >= 0, "Failed to encode frame (", av_err2string(ret), ")."); while (ret >= 0) { - ret = FFMPEG avcodec_receive_packet(codec_ctx, packet); + ret = avcodec_receive_packet(codec_ctx, packet); if (ret == AVERROR(EAGAIN) || ret == AVERROR_EOF) { if (ret == AVERROR_EOF) { // Note: @@ -32,7 +31,7 @@ void Encoder::encode(AVFrame* frame) { // An alternative is to use `av_write_frame` functoin, but in that case // client code is responsible for ordering packets, which makes it // complicated to use StreamWriter - ret = FFMPEG av_interleaved_write_frame(format_ctx, nullptr); + ret = av_interleaved_write_frame(format_ctx, nullptr); TORCH_CHECK( ret >= 0, "Failed to flush packet (", av_err2string(ret), ")."); } @@ -52,11 +51,10 @@ void Encoder::encode(AVFrame* frame) { // This has to be set before av_packet_rescale_ts bellow. packet->duration = 1; } - FFMPEG av_packet_rescale_ts( - packet, codec_ctx->time_base, stream->time_base); + av_packet_rescale_ts(packet, codec_ctx->time_base, stream->time_base); packet->stream_index = stream->index; - ret = FFMPEG av_interleaved_write_frame(format_ctx, packet); + ret = av_interleaved_write_frame(format_ctx, packet); TORCH_CHECK(ret >= 0, "Failed to write packet (", av_err2string(ret), ")."); } } diff --git a/torchaudio/csrc/ffmpeg/stream_writer/packet_writer.cpp b/torchaudio/csrc/ffmpeg/stream_writer/packet_writer.cpp index 45872a6af5..0701c5a596 100644 --- a/torchaudio/csrc/ffmpeg/stream_writer/packet_writer.cpp +++ b/torchaudio/csrc/ffmpeg/stream_writer/packet_writer.cpp @@ -1,14 +1,13 @@ #include -#include namespace torchaudio::io { namespace { AVStream* add_stream( AVFormatContext* format_ctx, const StreamParams& stream_params) { - AVStream* stream = FFMPEG avformat_new_stream(format_ctx, nullptr); - int ret = FFMPEG avcodec_parameters_copy( - stream->codecpar, stream_params.codec_params); + AVStream* stream = avformat_new_stream(format_ctx, nullptr); + int ret = + avcodec_parameters_copy(stream->codecpar, stream_params.codec_params); TORCH_CHECK( ret >= 0, "Failed to copy the stream's codec parameters. (", @@ -27,12 +26,11 @@ PacketWriter::PacketWriter( void PacketWriter::write_packet(const AVPacketPtr& packet) { AVPacket dst_packet; - int ret = FFMPEG av_packet_ref(&dst_packet, packet); + int ret = av_packet_ref(&dst_packet, packet); TORCH_CHECK(ret >= 0, "Failed to copy packet."); - FFMPEG av_packet_rescale_ts( - &dst_packet, original_time_base, stream->time_base); + av_packet_rescale_ts(&dst_packet, original_time_base, stream->time_base); dst_packet.stream_index = stream->index; - ret = FFMPEG av_interleaved_write_frame(format_ctx, &dst_packet); + ret = av_interleaved_write_frame(format_ctx, &dst_packet); TORCH_CHECK(ret >= 0, "Failed to write packet to destination."); } } // namespace torchaudio::io diff --git a/torchaudio/csrc/ffmpeg/stream_writer/stream_writer.cpp b/torchaudio/csrc/ffmpeg/stream_writer/stream_writer.cpp index 4252cd7072..df51d92355 100644 --- a/torchaudio/csrc/ffmpeg/stream_writer/stream_writer.cpp +++ b/torchaudio/csrc/ffmpeg/stream_writer/stream_writer.cpp @@ -1,11 +1,11 @@ #include -#include #ifdef USE_CUDA #include #endif -namespace torchaudio::io { +namespace torchaudio { +namespace io { namespace { AVFormatContext* get_output_format_context( @@ -19,7 +19,7 @@ AVFormatContext* get_output_format_context( } AVFormatContext* p = nullptr; - int ret = FFMPEG avformat_alloc_output_context2( + int ret = avformat_alloc_output_context2( &p, nullptr, format ? format.value().c_str() : nullptr, dst.c_str()); TORCH_CHECK( ret >= 0, @@ -208,14 +208,14 @@ void StreamWriter::add_video_frame_stream( } void StreamWriter::set_metadata(const OptionDict& metadata) { - FFMPEG av_dict_free(&format_ctx->metadata); + av_dict_free(&format_ctx->metadata); for (auto const& [key, value] : metadata) { - FFMPEG av_dict_set(&format_ctx->metadata, key.c_str(), value.c_str(), 0); + av_dict_set(&format_ctx->metadata, key.c_str(), value.c_str(), 0); } } void StreamWriter::dump_format(int64_t i) { - FFMPEG av_dump_format(format_ctx, (int)i, format_ctx->url, 1); + av_dump_format(format_ctx, (int)i, format_ctx->url, 1); } void StreamWriter::open(const c10::optional& option) { @@ -231,10 +231,10 @@ void StreamWriter::open(const c10::optional& option) { AVDictionary* opt = get_option_dict(option); if (!(fmt->flags & AVFMT_NOFILE) && !(format_ctx->flags & AVFMT_FLAG_CUSTOM_IO)) { - ret = FFMPEG avio_open2( + ret = avio_open2( &format_ctx->pb, format_ctx->url, AVIO_FLAG_WRITE, nullptr, &opt); if (ret < 0) { - FFMPEG av_dict_free(&opt); + av_dict_free(&opt); TORCH_CHECK( false, "Failed to open dst: ", @@ -245,7 +245,7 @@ void StreamWriter::open(const c10::optional& option) { } } - ret = FFMPEG avformat_write_header(format_ctx, &opt); + ret = avformat_write_header(format_ctx, &opt); clean_up_dict(opt); TORCH_CHECK( ret >= 0, @@ -258,7 +258,7 @@ void StreamWriter::open(const c10::optional& option) { } void StreamWriter::close() { - int ret = FFMPEG av_write_trailer(format_ctx); + int ret = av_write_trailer(format_ctx); if (ret < 0) { LOG(WARNING) << "Failed to write trailer. (" << av_err2string(ret) << ")."; } @@ -269,7 +269,7 @@ void StreamWriter::close() { if (!(fmt->flags & AVFMT_NOFILE) && !(format_ctx->flags & AVFMT_FLAG_CUSTOM_IO)) { // avio_closep can be only applied to AVIOContext opened by avio_open - FFMPEG avio_closep(&(format_ctx->pb)); + avio_closep(&(format_ctx->pb)); } is_open = false; } @@ -355,13 +355,12 @@ AVIOContext* get_io_context( int buffer_size, int (*write_packet)(void* opaque, uint8_t* buf, int buf_size), int64_t (*seek)(void* opaque, int64_t offset, int whence)) { - unsigned char* buffer = - static_cast(FFMPEG av_malloc(buffer_size)); + unsigned char* buffer = static_cast(av_malloc(buffer_size)); TORCH_CHECK(buffer, "Failed to allocate buffer."); - AVIOContext* io_ctx = FFMPEG avio_alloc_context( + AVIOContext* io_ctx = avio_alloc_context( buffer, buffer_size, 1, opaque, nullptr, write_packet, seek); if (!io_ctx) { - FFMPEG av_freep(&buffer); + av_freep(&buffer); TORCH_CHECK(false, "Failed to allocate AVIOContext."); } return io_ctx; @@ -385,4 +384,5 @@ StreamWriterCustomIO::StreamWriterCustomIO( : CustomOutput(opaque, buffer_size, write_packet, seek), StreamWriter(io_ctx, format) {} -} // namespace torchaudio::io +} // namespace io +} // namespace torchaudio diff --git a/torchaudio/csrc/ffmpeg/stream_writer/tensor_converter.cpp b/torchaudio/csrc/ffmpeg/stream_writer/tensor_converter.cpp index 1478d38d5a..e9350f0479 100644 --- a/torchaudio/csrc/ffmpeg/stream_writer/tensor_converter.cpp +++ b/torchaudio/csrc/ffmpeg/stream_writer/tensor_converter.cpp @@ -1,11 +1,11 @@ #include -#include #ifdef USE_CUDA #include #endif namespace torchaudio::io { + namespace { using InitFunc = TensorConverter::InitFunc; @@ -41,8 +41,8 @@ void convert_func_(const torch::Tensor& chunk, AVFrame* buffer) { TORCH_INTERNAL_ASSERT_DEBUG_ONLY(chunk.size(1) == buffer->channels); // https://ffmpeg.org/doxygen/4.1/muxing_8c_source.html#l00334 - if (!FFMPEG av_frame_is_writable(buffer)) { - int ret = FFMPEG av_frame_make_writable(buffer); + if (!av_frame_is_writable(buffer)) { + int ret = av_frame_make_writable(buffer); TORCH_INTERNAL_ASSERT( ret >= 0, "Failed to make frame writable: ", av_err2string(ret)); } @@ -145,8 +145,8 @@ void write_interlaced_video( TORCH_INTERNAL_ASSERT_DEBUG_ONLY(frame.size(3) == num_channels); // https://ffmpeg.org/doxygen/4.1/muxing_8c_source.html#l00472 - if (!FFMPEG av_frame_is_writable(buffer)) { - int ret = FFMPEG av_frame_make_writable(buffer); + if (!av_frame_is_writable(buffer)) { + int ret = av_frame_make_writable(buffer); TORCH_INTERNAL_ASSERT( ret >= 0, "Failed to make frame writable: ", av_err2string(ret)); } @@ -187,7 +187,7 @@ void write_planar_video( AVFrame* buffer, int num_planes) { const auto num_colors = - FFMPEG av_pix_fmt_desc_get((AVPixelFormat)buffer->format)->nb_components; + av_pix_fmt_desc_get((AVPixelFormat)buffer->format)->nb_components; TORCH_INTERNAL_ASSERT_DEBUG_ONLY(frame.dim() == 4); TORCH_INTERNAL_ASSERT_DEBUG_ONLY(frame.size(0) == 1); TORCH_INTERNAL_ASSERT_DEBUG_ONLY(frame.size(1) == num_colors); @@ -195,8 +195,8 @@ void write_planar_video( TORCH_INTERNAL_ASSERT_DEBUG_ONLY(frame.size(3), buffer->width); // https://ffmpeg.org/doxygen/4.1/muxing_8c_source.html#l00472 - if (!FFMPEG av_frame_is_writable(buffer)) { - int ret = FFMPEG av_frame_make_writable(buffer); + if (!av_frame_is_writable(buffer)) { + int ret = av_frame_make_writable(buffer); TORCH_INTERNAL_ASSERT( ret >= 0, "Failed to make frame writable: ", av_err2string(ret)); } @@ -308,7 +308,7 @@ std::pair get_video_func(AVFrame* buffer) { TORCH_CHECK( false, "Unexpected pixel format for CUDA: ", - FFMPEG av_get_pix_fmt_name(sw_pix_fmt)); + av_get_pix_fmt_name(sw_pix_fmt)); } } @@ -317,7 +317,7 @@ std::pair get_video_func(AVFrame* buffer) { case AV_PIX_FMT_GRAY8: case AV_PIX_FMT_RGB24: case AV_PIX_FMT_BGR24: { - int channels = FFMPEG av_pix_fmt_desc_get(pix_fmt)->nb_components; + int channels = av_pix_fmt_desc_get(pix_fmt)->nb_components; InitFunc init_func = [=](const torch::Tensor& t, AVFrame* f) { validate_video_input(t, f, channels); return init_interlaced(t); @@ -339,9 +339,7 @@ std::pair get_video_func(AVFrame* buffer) { } default: TORCH_CHECK( - false, - "Unexpected pixel format: ", - FFMPEG av_get_pix_fmt_name(pix_fmt)); + false, "Unexpected pixel format: ", av_get_pix_fmt_name(pix_fmt)); } } @@ -385,9 +383,7 @@ TensorConverter::TensorConverter(AVMediaType type, AVFrame* buf, int buf_size) break; default: TORCH_INTERNAL_ASSERT( - false, - "Unsupported media type: ", - FFMPEG av_get_media_type_string(type)); + false, "Unsupported media type: ", av_get_media_type_string(type)); } } diff --git a/torchaudio/csrc/ffmpeg/stub.cpp b/torchaudio/csrc/ffmpeg/stub.cpp deleted file mode 100644 index 4960b0050e..0000000000 --- a/torchaudio/csrc/ffmpeg/stub.cpp +++ /dev/null @@ -1,196 +0,0 @@ -#ifdef DLOPEN_FFMPEG - -#include -#include -#include - -extern "C" { -#include -#include -#include -#include -#include -} - -namespace torchaudio::io::detail { -namespace { -class StubImpl { - at::DynamicLibrary libavutil; - at::DynamicLibrary libavcodec; - at::DynamicLibrary libavformat; - at::DynamicLibrary libavdevice; - at::DynamicLibrary libavfilter; - - public: - // The struct that holds all the function pointers to be used. - FFmpegStub stub{}; - - StubImpl( - const char* util, - const char* codec, - const char* format, - const char* device, - const char* filter) - : libavutil(util), - libavcodec(codec), - libavformat(format), - libavdevice(device), - libavfilter(filter) { -#define set(X) stub.X = (decltype(FFmpegStub::X))libavutil.sym(#X) - set(av_buffer_ref); - set(av_buffer_unref); - set(av_d2q); - set(av_dict_free); - set(av_dict_get); - set(av_dict_set); - set(av_frame_alloc); - set(av_frame_free); - set(av_frame_get_buffer); - set(av_frame_is_writable); - set(av_frame_make_writable); - set(av_frame_unref); - set(av_freep); - set(av_get_channel_layout_nb_channels); - set(av_get_channel_name); - set(av_get_default_channel_layout); - set(av_get_media_type_string); - set(av_get_pix_fmt); - set(av_get_pix_fmt_name); - set(av_get_sample_fmt); - set(av_get_sample_fmt_name); - set(av_get_time_base_q); - set(av_hwdevice_ctx_create); - set(av_hwframe_ctx_alloc); - set(av_hwframe_ctx_init); - set(av_hwframe_get_buffer); - set(av_log_get_level); - set(av_log_set_level); - set(av_malloc); - set(av_pix_fmt_desc_get); - set(av_rescale_q); - set(av_sample_fmt_is_planar); - set(av_strdup); - set(av_strerror); - set(avutil_version); -#undef set - -#define set(X) stub.X = (decltype(FFmpegStub::X))libavcodec.sym(#X) - set(av_codec_is_decoder); - set(av_codec_is_encoder); - set(av_codec_iterate); - set(av_packet_alloc); - set(av_packet_clone); - set(av_packet_free); - set(av_packet_ref); - set(av_packet_rescale_ts); - set(av_packet_unref); - set(avcodec_alloc_context3); - set(avcodec_configuration); - set(avcodec_descriptor_get); - set(avcodec_find_decoder); - set(avcodec_find_decoder_by_name); - set(avcodec_find_encoder); - set(avcodec_find_encoder_by_name); - set(avcodec_flush_buffers); - set(avcodec_free_context); - set(avcodec_get_hw_config); - set(avcodec_get_name); - set(avcodec_open2); - set(avcodec_parameters_alloc); - set(avcodec_parameters_copy); - set(avcodec_parameters_free); - set(avcodec_parameters_from_context); - set(avcodec_parameters_to_context); - set(avcodec_receive_frame); - set(avcodec_receive_packet); - set(avcodec_send_frame); - set(avcodec_send_packet); - set(avcodec_version); -#undef set - -#define set(X) stub.X = (decltype(FFmpegStub::X))libavformat.sym(#X) - set(av_demuxer_iterate); - set(av_dump_format); - set(av_find_best_stream); - set(av_find_input_format); - set(av_guess_frame_rate); - set(av_interleaved_write_frame); - set(av_muxer_iterate); - set(av_read_frame); - set(av_seek_frame); - set(av_write_trailer); - set(avio_alloc_context); - set(avio_enum_protocols); - set(avio_closep); - set(avio_flush); - set(avio_open2); - set(avformat_alloc_context); - set(avformat_alloc_output_context2); - set(avformat_close_input); - set(avformat_find_stream_info); - set(avformat_free_context); - set(avformat_new_stream); - set(avformat_open_input); - set(avformat_version); - set(avformat_write_header); -#undef set - -#define set(X) stub.X = (decltype(FFmpegStub::X))libavdevice.sym(#X) - set(avdevice_register_all); - set(avdevice_version); -#undef set - -#define set(X) stub.X = (decltype(FFmpegStub::X))libavfilter.sym(#X) - set(av_buffersink_get_frame); - set(av_buffersrc_add_frame_flags); - set(avfilter_get_by_name); - set(avfilter_graph_alloc); - set(avfilter_graph_config); - set(avfilter_graph_create_filter); - set(avfilter_graph_free); - set(avfilter_graph_parse_ptr); - set(avfilter_inout_alloc); - set(avfilter_inout_free); - set(avfilter_version); -#undef set - } -}; - -static std::unique_ptr _stub; - -void _init_stub() { -#if defined(_WIN32) - _stub = std::make_unique( - "avutil-" AV_STRINGIFY(LIBAVUTIL_VERSION_MAJOR) ".dll", - "avcodec-" AV_STRINGIFY(LIBAVCODEC_VERSION_MAJOR) ".dll", - "avformat-" AV_STRINGIFY(LIBAVFORMAT_VERSION_MAJOR) ".dll", - "avdevice-" AV_STRINGIFY(LIBAVDEVICE_VERSION_MAJOR) ".dll", - "avfilter-" AV_STRINGIFY(LIBAVFILTER_VERSION_MAJOR) ".dll"); -#elif defined(__APPLE__) - _stub = std::make_unique( - "libavutil." AV_STRINGIFY(LIBAVUTIL_VERSION_MAJOR) ".dylib", - "libavcodec." AV_STRINGIFY(LIBAVCODEC_VERSION_MAJOR) ".dylib", - "libavformat." AV_STRINGIFY(LIBAVFORMAT_VERSION_MAJOR) ".dylib", - "libavdevice." AV_STRINGIFY(LIBAVDEVICE_VERSION_MAJOR) ".dylib", - "libavfilter." AV_STRINGIFY(LIBAVFILTER_VERSION_MAJOR) ".dylib"); -#else - _stub = std::make_unique( - "libavutil.so." AV_STRINGIFY(LIBAVUTIL_VERSION_MAJOR), - "libavcodec.so." AV_STRINGIFY(LIBAVCODEC_VERSION_MAJOR), - "libavformat.so." AV_STRINGIFY(LIBAVFORMAT_VERSION_MAJOR), - "libavdevice.so." AV_STRINGIFY(LIBAVDEVICE_VERSION_MAJOR), - "libavfilter.so." AV_STRINGIFY(LIBAVFILTER_VERSION_MAJOR)); -#endif -} - -} // namespace - -FFmpegStub& ffmpeg_stub() { - static c10::once_flag init_flag; - c10::call_once(init_flag, _init_stub); - return _stub->stub; -} - -} // namespace torchaudio::io::detail - -#endif diff --git a/torchaudio/csrc/ffmpeg/stub.h b/torchaudio/csrc/ffmpeg/stub.h deleted file mode 100644 index ae6e0a3d1c..0000000000 --- a/torchaudio/csrc/ffmpeg/stub.h +++ /dev/null @@ -1,313 +0,0 @@ -#pragma once - -// Abstraction of the access to FFmpeg libraries. -// -// Do not include this in header files. -// Include this header in implementation files and prepend -// all the calls to libav functions with FFMPEG macro. -// -// If DLOPEN_FFMPEG is not defined, FFMPEG macro is empty. -// In this case, FFmpeg libraries are linked at the time torchaudio is built. -// -// If DLOPEN_FFMPEG is defined, FFMPEG macro becomes a function call to -// fetch a stub instance of FFmpeg libraries. -// This function also initializes the function pointers by automatically -// dlopens all the required libraries. -// - -#ifndef DLOPEN_FFMPEG -#define FFMPEG -#else -#define FFMPEG detail::ffmpeg_stub(). - -#include - -namespace torchaudio::io::detail { - -struct FFmpegStub; - -// dlopen FFmpeg libraries and populate the methods of stub instance, -// then return the reference to the stub instance -FFmpegStub& ffmpeg_stub(); - -struct FFmpegStub { - ///////////////////////////////////////////////////////////////////////////// - // libavutil - ///////////////////////////////////////////////////////////////////////////// - - AVBufferRef* (*av_buffer_ref)(const AVBufferRef*); - - void (*av_buffer_unref)(AVBufferRef**); - - AVRational (*av_d2q)(double, int) av_const; - - void (*av_dict_free)(AVDictionary**); - - AVDictionaryEntry* (*av_dict_get)( - const AVDictionary*, - const char*, - const AVDictionaryEntry*, - int); - - int (*av_dict_set)(AVDictionary**, const char*, const char*, int); - - AVFrame* (*av_frame_alloc)(); - - void (*av_frame_free)(AVFrame**); - - int (*av_frame_get_buffer)(AVFrame*, int); - - int (*av_frame_is_writable)(AVFrame*); - - int (*av_frame_make_writable)(AVFrame*); - - void (*av_frame_unref)(AVFrame*); - - void (*av_freep)(void*); - - int (*av_get_channel_layout_nb_channels)(uint64_t); - - const char* (*av_get_channel_name)(uint64_t); - - int64_t (*av_get_default_channel_layout)(int); - - const char* (*av_get_media_type_string)(enum AVMediaType); - - enum AVPixelFormat (*av_get_pix_fmt)(const char*); - - const char* (*av_get_pix_fmt_name)(enum AVPixelFormat); - - enum AVSampleFormat (*av_get_sample_fmt)(const char*); - - const char* (*av_get_sample_fmt_name)(enum AVSampleFormat); - - AVRational (*av_get_time_base_q)(); - - int (*av_hwdevice_ctx_create)( - AVBufferRef**, - enum AVHWDeviceType, - const char*, - AVDictionary*, - int); - - AVBufferRef* (*av_hwframe_ctx_alloc)(AVBufferRef*); - - int (*av_hwframe_ctx_init)(AVBufferRef*); - - int (*av_hwframe_get_buffer)(AVBufferRef*, AVFrame*, int); - - int (*av_log_get_level)(); - - void (*av_log_set_level)(int); - - void* (*av_malloc)(size_t); - - const AVPixFmtDescriptor* (*av_pix_fmt_desc_get)(enum AVPixelFormat); - - int64_t (*av_rescale_q)(int64_t, AVRational, AVRational) av_const; - - int (*av_sample_fmt_is_planar)(enum AVSampleFormat); - - char* (*av_strdup)(const char*); - - int (*av_strerror)(int, char*, size_t); - - unsigned (*avutil_version)(); - - ///////////////////////////////////////////////////////////////////////////// - // libavcodec - ///////////////////////////////////////////////////////////////////////////// - - int (*av_codec_is_decoder)(const AVCodec*); - - int (*av_codec_is_encoder)(const AVCodec*); - - const AVCodec* (*av_codec_iterate)(void**); - - AVPacket* (*av_packet_alloc)(); - - AVPacket* (*av_packet_clone)(const AVPacket*); - - void (*av_packet_free)(AVPacket**); - - int (*av_packet_ref)(AVPacket*, const AVPacket*); - - void (*av_packet_rescale_ts)(AVPacket*, AVRational, AVRational); - - void (*av_packet_unref)(AVPacket*); - - AVCodecContext* (*avcodec_alloc_context3)(const AVCodec*); - - const char* (*avcodec_configuration)(); - - const AVCodecDescriptor* (*avcodec_descriptor_get)(enum AVCodecID); - - AVCodec* (*avcodec_find_decoder)(enum AVCodecID); - - AVCodec* (*avcodec_find_decoder_by_name)(const char*); - - AVCodec* (*avcodec_find_encoder)(enum AVCodecID); - - AVCodec* (*avcodec_find_encoder_by_name)(const char*); - - void (*avcodec_flush_buffers)(AVCodecContext*); - - void (*avcodec_free_context)(AVCodecContext**); - - const AVCodecHWConfig* (*avcodec_get_hw_config)(const AVCodec*, int); - - const char* (*avcodec_get_name)(enum AVCodecID); - - int (*avcodec_open2)(AVCodecContext*, const AVCodec*, AVDictionary**); - - AVCodecParameters* (*avcodec_parameters_alloc)(); - - int (*avcodec_parameters_copy)(AVCodecParameters*, const AVCodecParameters*); - - void (*avcodec_parameters_free)(AVCodecParameters**); - - int (*avcodec_parameters_from_context)( - AVCodecParameters*, - const AVCodecContext*); - - int (*avcodec_parameters_to_context)( - AVCodecContext*, - const AVCodecParameters*); - - int (*avcodec_receive_frame)(AVCodecContext*, AVFrame*); - - int (*avcodec_receive_packet)(AVCodecContext*, AVPacket*); - - int (*avcodec_send_frame)(AVCodecContext*, const AVFrame*); - - int (*avcodec_send_packet)(AVCodecContext*, const AVPacket*); - - unsigned (*avcodec_version)(); - - ///////////////////////////////////////////////////////////////////////////// - // libavformat - ///////////////////////////////////////////////////////////////////////////// - - const AVInputFormat* (*av_demuxer_iterate)(void**); - - void (*av_dump_format)(AVFormatContext*, int, const char*, int); - - int (*av_find_best_stream)( - AVFormatContext*, - enum AVMediaType, - int, - int, - AVCodec**, - int); - - AVInputFormat* (*av_find_input_format)(const char*); - - AVRational (*av_guess_frame_rate)(AVFormatContext*, AVStream*, AVFrame*); - - int (*av_interleaved_write_frame)(AVFormatContext*, AVPacket*); - - const AVOutputFormat* (*av_muxer_iterate)(void**); - - int (*av_read_frame)(AVFormatContext*, AVPacket*); - - int (*av_seek_frame)(AVFormatContext*, int, int64_t, int); - - int (*av_write_trailer)(AVFormatContext* s); - - AVIOContext* (*avio_alloc_context)( - unsigned char*, - int, - int, - void*, - int (*)(void*, uint8_t*, int), - int (*)(void*, uint8_t*, int), - int64_t (*)(void*, int64_t, int)); - - const char* (*avio_enum_protocols)(void**, int); - - int (*avio_closep)(AVIOContext**); - - void (*avio_flush)(AVIOContext*); - - int (*avio_open2)( - AVIOContext**, - const char*, - int, - const AVIOInterruptCB*, - AVDictionary**); - - AVFormatContext* (*avformat_alloc_context)(); - - int (*avformat_alloc_output_context2)( - AVFormatContext**, - AVOutputFormat*, - const char*, - const char*); - - void (*avformat_close_input)(AVFormatContext**); - - int (*avformat_find_stream_info)(AVFormatContext*, AVDictionary**); - - void (*avformat_free_context)(AVFormatContext*); - - AVStream* (*avformat_new_stream)(AVFormatContext*, const AVCodec*); - - int (*avformat_open_input)( - AVFormatContext**, - const char*, - AVFORMAT_CONST AVInputFormat*, - AVDictionary**); - - unsigned (*avformat_version)(); - - int (*avformat_write_header)(AVFormatContext*, AVDictionary**); - - ///////////////////////////////////////////////////////////////////////////// - // libavdevice - ///////////////////////////////////////////////////////////////////////////// - - void (*avdevice_register_all)(); - - unsigned (*avdevice_version)(); - - ///////////////////////////////////////////////////////////////////////////// - // libavfilter - ///////////////////////////////////////////////////////////////////////////// - - int (*av_buffersink_get_frame)(AVFilterContext*, AVFrame*); - - int (*av_buffersrc_add_frame_flags)(AVFilterContext*, AVFrame*, int); - - const AVFilter* (*avfilter_get_by_name)(const char*); - - AVFilterGraph* (*avfilter_graph_alloc)(); - - int (*avfilter_graph_config)(AVFilterGraph*, void*); - - int (*avfilter_graph_create_filter)( - AVFilterContext**, - const AVFilter*, - const char*, - const char*, - void*, - AVFilterGraph*); - - void (*avfilter_graph_free)(AVFilterGraph**); - - int (*avfilter_graph_parse_ptr)( - AVFilterGraph*, - const char*, - AVFilterInOut**, - AVFilterInOut**, - void*); - - AVFilterInOut* (*avfilter_inout_alloc)(); - - void (*avfilter_inout_free)(AVFilterInOut**); - - unsigned (*avfilter_version)(); -}; - -} // namespace torchaudio::io::detail - -#endif