Rename default interface into FFmpeg (meta-pytorch#964)

NicolasHug · NicolasHug · commit ece7f9334327 · 2025-10-27T17:29:56.000Z
diff --git a/src/torchcodec/_core/BetaCudaDeviceInterface.cpp b/src/torchcodec/_core/BetaCudaDeviceInterface.cpp
@@ -129,7 +129,7 @@ static UniqueCUvideodecoder createDecoder(CUVIDEOFORMAT* videoFormat) {
   // automatically converted to 8bits by NVDEC itself. That is, the raw frames
   // we get back from cuvidMapVideoFrame will already be in 8bit format.  We
   // won't need to do the conversion ourselves, so that's a lot easier.
-  // In the default interface, we have to do the 10 -> 8bits conversion
+  // In the ffmpeg CUDA interface, we have to do the 10 -> 8bits conversion
   // ourselves later in convertAVFrameToFrameOutput(), because FFmpeg explicitly
   // requests 10 or 16bits output formats for >8-bit videos!
   // https://github.com/FFmpeg/FFmpeg/blob/e05f8acabff468c1382277c1f31fa8e9d90c3202/libavcodec/nvdec.c#L376-L403
@@ -480,8 +480,7 @@ int BetaCudaDeviceInterface::receiveFrame(UniqueAVFrame& avFrame) {
   procParams.top_field_first = dispInfo.top_field_first;
   procParams.unpaired_field = dispInfo.repeat_first_field < 0;
   // We set the NVDEC stream to the current stream. It will be waited upon by
-  // the NPP stream before any color conversion. Currently, that syncing logic
-  // is in the default interface.
+  // the NPP stream before any color conversion.
   // Re types: we get a cudaStream_t from PyTorch but it's interchangeable with
   // CUstream
   procParams.output_stream = reinterpret_cast<CUstream>(
@@ -618,8 +617,8 @@ void BetaCudaDeviceInterface::convertAVFrameToFrameOutput(
     UniqueAVFrame& avFrame,
     FrameOutput& frameOutput,
     std::optional<torch::Tensor> preAllocatedOutputTensor) {
-  // TODONVDEC P2: we may need to handle 10bit videos the same way the default
-  // interface does it with maybeConvertAVFrameToNV12OrRGB24().
+  // TODONVDEC P2: we may need to handle 10bit videos the same way the CUDA
+  // ffmpeg interface does it with maybeConvertAVFrameToNV12OrRGB24().
   TORCH_CHECK(
       avFrame->format == AV_PIX_FMT_CUDA,
       "Expected CUDA format frame from BETA CUDA interface");
diff --git a/src/torchcodec/_core/DeviceInterface.h b/src/torchcodec/_core/DeviceInterface.h
@@ -21,7 +21,7 @@ namespace facebook::torchcodec {
 // Key for device interface registration with device type + variant support
 struct DeviceInterfaceKey {
   torch::DeviceType deviceType;
-  std::string_view variant = "default"; // e.g., "default", "beta", etc.
+  std::string_view variant = "ffmpeg"; // e.g., "ffmpeg", "beta", etc.
 
   bool operator<(const DeviceInterfaceKey& other) const {
     if (deviceType != other.deviceType) {
@@ -141,7 +141,7 @@ void validateDeviceInterface(
 
 std::unique_ptr<DeviceInterface> createDeviceInterface(
     const torch::Device& device,
-    const std::string_view variant = "default");
+    const std::string_view variant = "ffmpeg");
 
 torch::Tensor rgbAVFrameToTensor(const UniqueAVFrame& avFrame);
 
diff --git a/src/torchcodec/_core/SingleStreamDecoder.h b/src/torchcodec/_core/SingleStreamDecoder.h
@@ -311,7 +311,7 @@ class SingleStreamDecoder {
       int streamIndex,
       AVMediaType mediaType,
       const torch::Device& device = torch::kCPU,
-      const std::string_view deviceVariant = "default",
+      const std::string_view deviceVariant = "ffmpeg",
       std::optional<int> ffmpegThreadCount = std::nullopt);
 
   // Returns the "best" stream index for a given media type. The "best" is
diff --git a/src/torchcodec/_core/StreamOptions.h b/src/torchcodec/_core/StreamOptions.h
@@ -41,8 +41,8 @@ struct VideoStreamOptions {
 
   // By default we use CPU for decoding for both C++ and python users.
   torch::Device device = torch::kCPU;
-  // Device variant (e.g., "default", "beta", etc.)
-  std::string_view deviceVariant = "default";
+  // Device variant (e.g., "ffmpeg", "beta", etc.)
+  std::string_view deviceVariant = "ffmpeg";
 
   // Encoding options
   // TODO-VideoEncoder: Consider adding other optional fields here
diff --git a/src/torchcodec/_core/custom_ops.cpp b/src/torchcodec/_core/custom_ops.cpp
@@ -43,9 +43,9 @@ TORCH_LIBRARY(torchcodec_ns, m) {
   m.def(
       "_create_from_file_like(int file_like_context, str? seek_mode=None) -> Tensor");
   m.def(
-      "_add_video_stream(Tensor(a!) decoder, *, int? num_threads=None, str? dimension_order=None, int? stream_index=None, str device=\"cpu\", str device_variant=\"default\", str transform_specs=\"\", (Tensor, Tensor, Tensor)? custom_frame_mappings=None, str? color_conversion_library=None) -> ()");
+      "_add_video_stream(Tensor(a!) decoder, *, int? num_threads=None, str? dimension_order=None, int? stream_index=None, str device=\"cpu\", str device_variant=\"ffmpeg\", str transform_specs=\"\", (Tensor, Tensor, Tensor)? custom_frame_mappings=None, str? color_conversion_library=None) -> ()");
   m.def(
-      "add_video_stream(Tensor(a!) decoder, *, int? num_threads=None, str? dimension_order=None, int? stream_index=None, str device=\"cpu\", str device_variant=\"default\", str transform_specs=\"\", (Tensor, Tensor, Tensor)? custom_frame_mappings=None) -> ()");
+      "add_video_stream(Tensor(a!) decoder, *, int? num_threads=None, str? dimension_order=None, int? stream_index=None, str device=\"cpu\", str device_variant=\"ffmpeg\", str transform_specs=\"\", (Tensor, Tensor, Tensor)? custom_frame_mappings=None) -> ()");
   m.def(
       "add_audio_stream(Tensor(a!) decoder, *, int? stream_index=None, int? sample_rate=None, int? num_channels=None) -> ()");
   m.def("seek_to_pts(Tensor(a!) decoder, float seconds) -> ()");
@@ -319,7 +319,7 @@ void _add_video_stream(
     std::optional<std::string_view> dimension_order = std::nullopt,
     std::optional<int64_t> stream_index = std::nullopt,
     std::string_view device = "cpu",
-    std::string_view device_variant = "default",
+    std::string_view device_variant = "ffmpeg",
     std::string_view transform_specs = "",
     std::optional<std::tuple<at::Tensor, at::Tensor, at::Tensor>>
         custom_frame_mappings = std::nullopt,
@@ -376,7 +376,7 @@ void add_video_stream(
     std::optional<std::string_view> dimension_order = std::nullopt,
     std::optional<int64_t> stream_index = std::nullopt,
     std::string_view device = "cpu",
-    std::string_view device_variant = "default",
+    std::string_view device_variant = "ffmpeg",
     std::string_view transform_specs = "",
     const std::optional<std::tuple<at::Tensor, at::Tensor, at::Tensor>>&
         custom_frame_mappings = std::nullopt) {
diff --git a/src/torchcodec/_core/ops.py b/src/torchcodec/_core/ops.py
@@ -304,7 +304,7 @@ def _add_video_stream_abstract(
     dimension_order: Optional[str] = None,
     stream_index: Optional[int] = None,
     device: str = "cpu",
-    device_variant: str = "default",
+    device_variant: str = "ffmpeg",
     transform_specs: str = "",
     custom_frame_mappings: Optional[
         tuple[torch.Tensor, torch.Tensor, torch.Tensor]
@@ -322,7 +322,7 @@ def add_video_stream_abstract(
     dimension_order: Optional[str] = None,
     stream_index: Optional[int] = None,
     device: str = "cpu",
-    device_variant: str = "default",
+    device_variant: str = "ffmpeg",
     transform_specs: str = "",
     custom_frame_mappings: Optional[
         tuple[torch.Tensor, torch.Tensor, torch.Tensor]
diff --git a/src/torchcodec/decoders/_video_decoder.py b/src/torchcodec/decoders/_video_decoder.py
@@ -147,9 +147,6 @@ def __init__(
             device = str(device)
 
         device_variant = _get_cuda_backend()
-        if device_variant == "ffmpeg":
-            # TODONVDEC P2 rename 'default' into 'ffmpeg' everywhere.
-            device_variant = "default"
 
         # Legacy support for device="cuda:0:beta" syntax
         # TODONVDEC P2: remove support for this everywhere. This will require
diff --git a/test/test_decoders.py b/test/test_decoders.py
@@ -1303,7 +1303,7 @@ def test_10bit_videos(self, device, asset):
             # RuntimeError: Codec configuration not supported on this GPU.
             # Codec: 4, chroma format: 1, bit depth: 10
             #
-            # It works on the default interface because FFmpeg fallsback to the
+            # It works on the ffmpeg interface because FFmpeg fallsback to the
             # CPU, while the BETA interface doesn't.
             pytest.skip("Asset not supported by NVDEC")
 
@@ -1692,8 +1692,8 @@ def test_beta_cuda_interface_backwards(self, asset, seek_mode):
     @needs_cuda
     def test_beta_cuda_interface_small_h265(self):
         # Test to illustrate current difference in behavior between the BETA and
-        # the default interface: this video isn't supported by NVDEC, but in the
-        # default interface, FFMPEG fallsback to the CPU while we don't.
+        # the ffmpeg interface: this video isn't supported by NVDEC, but in the
+        # ffmpeg interface, FFMPEG fallsback to the CPU while we don't.
 
         VideoDecoder(H265_VIDEO.path, device="cuda").get_frame_at(0)
         with pytest.raises(
diff --git a/test/utils.py b/test/utils.py
@@ -44,7 +44,7 @@ def unsplit_device_str(device_str: str) -> str:
     if device_str == "cuda:0:beta":
         return "cuda", "beta"
     else:
-        return device_str, "default"
+        return device_str, "ffmpeg"
 
 
 def get_ffmpeg_major_version():