Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 4 additions & 5 deletions src/torchcodec/_core/BetaCudaDeviceInterface.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -129,7 +129,7 @@ static UniqueCUvideodecoder createDecoder(CUVIDEOFORMAT* videoFormat) {
// automatically converted to 8bits by NVDEC itself. That is, the raw frames
// we get back from cuvidMapVideoFrame will already be in 8bit format. We
// won't need to do the conversion ourselves, so that's a lot easier.
// In the default interface, we have to do the 10 -> 8bits conversion
// In the ffmpeg CUDA interface, we have to do the 10 -> 8bits conversion
// ourselves later in convertAVFrameToFrameOutput(), because FFmpeg explicitly
// requests 10 or 16bits output formats for >8-bit videos!
// https://github.com/FFmpeg/FFmpeg/blob/e05f8acabff468c1382277c1f31fa8e9d90c3202/libavcodec/nvdec.c#L376-L403
Expand Down Expand Up @@ -480,8 +480,7 @@ int BetaCudaDeviceInterface::receiveFrame(UniqueAVFrame& avFrame) {
procParams.top_field_first = dispInfo.top_field_first;
procParams.unpaired_field = dispInfo.repeat_first_field < 0;
// We set the NVDEC stream to the current stream. It will be waited upon by
// the NPP stream before any color conversion. Currently, that syncing logic
// is in the default interface.
// the NPP stream before any color conversion.
// Re types: we get a cudaStream_t from PyTorch but it's interchangeable with
// CUstream
procParams.output_stream = reinterpret_cast<CUstream>(
Expand Down Expand Up @@ -618,8 +617,8 @@ void BetaCudaDeviceInterface::convertAVFrameToFrameOutput(
UniqueAVFrame& avFrame,
FrameOutput& frameOutput,
std::optional<torch::Tensor> preAllocatedOutputTensor) {
// TODONVDEC P2: we may need to handle 10bit videos the same way the default
// interface does it with maybeConvertAVFrameToNV12OrRGB24().
// TODONVDEC P2: we may need to handle 10bit videos the same way the CUDA
// ffmpeg interface does it with maybeConvertAVFrameToNV12OrRGB24().
TORCH_CHECK(
avFrame->format == AV_PIX_FMT_CUDA,
"Expected CUDA format frame from BETA CUDA interface");
Expand Down
4 changes: 2 additions & 2 deletions src/torchcodec/_core/DeviceInterface.h
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ namespace facebook::torchcodec {
// Key for device interface registration with device type + variant support
struct DeviceInterfaceKey {
torch::DeviceType deviceType;
std::string_view variant = "default"; // e.g., "default", "beta", etc.
std::string_view variant = "ffmpeg"; // e.g., "ffmpeg", "beta", etc.

bool operator<(const DeviceInterfaceKey& other) const {
if (deviceType != other.deviceType) {
Expand Down Expand Up @@ -141,7 +141,7 @@ void validateDeviceInterface(

std::unique_ptr<DeviceInterface> createDeviceInterface(
const torch::Device& device,
const std::string_view variant = "default");
const std::string_view variant = "ffmpeg");

torch::Tensor rgbAVFrameToTensor(const UniqueAVFrame& avFrame);

Expand Down
2 changes: 1 addition & 1 deletion src/torchcodec/_core/SingleStreamDecoder.h
Original file line number Diff line number Diff line change
Expand Up @@ -311,7 +311,7 @@ class SingleStreamDecoder {
int streamIndex,
AVMediaType mediaType,
const torch::Device& device = torch::kCPU,
const std::string_view deviceVariant = "default",
const std::string_view deviceVariant = "ffmpeg",
std::optional<int> ffmpegThreadCount = std::nullopt);

// Returns the "best" stream index for a given media type. The "best" is
Expand Down
4 changes: 2 additions & 2 deletions src/torchcodec/_core/StreamOptions.h
Original file line number Diff line number Diff line change
Expand Up @@ -41,8 +41,8 @@ struct VideoStreamOptions {

// By default we use CPU for decoding for both C++ and python users.
torch::Device device = torch::kCPU;
// Device variant (e.g., "default", "beta", etc.)
std::string_view deviceVariant = "default";
// Device variant (e.g., "ffmpeg", "beta", etc.)
std::string_view deviceVariant = "ffmpeg";

// Encoding options
// TODO-VideoEncoder: Consider adding other optional fields here
Expand Down
8 changes: 4 additions & 4 deletions src/torchcodec/_core/custom_ops.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -43,9 +43,9 @@ TORCH_LIBRARY(torchcodec_ns, m) {
m.def(
"_create_from_file_like(int file_like_context, str? seek_mode=None) -> Tensor");
m.def(
"_add_video_stream(Tensor(a!) decoder, *, int? num_threads=None, str? dimension_order=None, int? stream_index=None, str device=\"cpu\", str device_variant=\"default\", str transform_specs=\"\", (Tensor, Tensor, Tensor)? custom_frame_mappings=None, str? color_conversion_library=None) -> ()");
"_add_video_stream(Tensor(a!) decoder, *, int? num_threads=None, str? dimension_order=None, int? stream_index=None, str device=\"cpu\", str device_variant=\"ffmpeg\", str transform_specs=\"\", (Tensor, Tensor, Tensor)? custom_frame_mappings=None, str? color_conversion_library=None) -> ()");
m.def(
"add_video_stream(Tensor(a!) decoder, *, int? num_threads=None, str? dimension_order=None, int? stream_index=None, str device=\"cpu\", str device_variant=\"default\", str transform_specs=\"\", (Tensor, Tensor, Tensor)? custom_frame_mappings=None) -> ()");
"add_video_stream(Tensor(a!) decoder, *, int? num_threads=None, str? dimension_order=None, int? stream_index=None, str device=\"cpu\", str device_variant=\"ffmpeg\", str transform_specs=\"\", (Tensor, Tensor, Tensor)? custom_frame_mappings=None) -> ()");
m.def(
"add_audio_stream(Tensor(a!) decoder, *, int? stream_index=None, int? sample_rate=None, int? num_channels=None) -> ()");
m.def("seek_to_pts(Tensor(a!) decoder, float seconds) -> ()");
Expand Down Expand Up @@ -319,7 +319,7 @@ void _add_video_stream(
std::optional<std::string_view> dimension_order = std::nullopt,
std::optional<int64_t> stream_index = std::nullopt,
std::string_view device = "cpu",
std::string_view device_variant = "default",
std::string_view device_variant = "ffmpeg",
std::string_view transform_specs = "",
std::optional<std::tuple<at::Tensor, at::Tensor, at::Tensor>>
custom_frame_mappings = std::nullopt,
Expand Down Expand Up @@ -376,7 +376,7 @@ void add_video_stream(
std::optional<std::string_view> dimension_order = std::nullopt,
std::optional<int64_t> stream_index = std::nullopt,
std::string_view device = "cpu",
std::string_view device_variant = "default",
std::string_view device_variant = "ffmpeg",
std::string_view transform_specs = "",
const std::optional<std::tuple<at::Tensor, at::Tensor, at::Tensor>>&
custom_frame_mappings = std::nullopt) {
Expand Down
4 changes: 2 additions & 2 deletions src/torchcodec/_core/ops.py
Original file line number Diff line number Diff line change
Expand Up @@ -304,7 +304,7 @@ def _add_video_stream_abstract(
dimension_order: Optional[str] = None,
stream_index: Optional[int] = None,
device: str = "cpu",
device_variant: str = "default",
device_variant: str = "ffmpeg",
transform_specs: str = "",
custom_frame_mappings: Optional[
tuple[torch.Tensor, torch.Tensor, torch.Tensor]
Expand All @@ -322,7 +322,7 @@ def add_video_stream_abstract(
dimension_order: Optional[str] = None,
stream_index: Optional[int] = None,
device: str = "cpu",
device_variant: str = "default",
device_variant: str = "ffmpeg",
transform_specs: str = "",
custom_frame_mappings: Optional[
tuple[torch.Tensor, torch.Tensor, torch.Tensor]
Expand Down
3 changes: 0 additions & 3 deletions src/torchcodec/decoders/_video_decoder.py
Original file line number Diff line number Diff line change
Expand Up @@ -147,9 +147,6 @@ def __init__(
device = str(device)

device_variant = _get_cuda_backend()
if device_variant == "ffmpeg":
# TODONVDEC P2 rename 'default' into 'ffmpeg' everywhere.
device_variant = "default"

# Legacy support for device="cuda:0:beta" syntax
# TODONVDEC P2: remove support for this everywhere. This will require
Expand Down
6 changes: 3 additions & 3 deletions test/test_decoders.py
Original file line number Diff line number Diff line change
Expand Up @@ -1303,7 +1303,7 @@ def test_10bit_videos(self, device, asset):
# RuntimeError: Codec configuration not supported on this GPU.
# Codec: 4, chroma format: 1, bit depth: 10
#
# It works on the default interface because FFmpeg fallsback to the
# It works on the ffmpeg interface because FFmpeg fallsback to the
# CPU, while the BETA interface doesn't.
pytest.skip("Asset not supported by NVDEC")

Expand Down Expand Up @@ -1692,8 +1692,8 @@ def test_beta_cuda_interface_backwards(self, asset, seek_mode):
@needs_cuda
def test_beta_cuda_interface_small_h265(self):
# Test to illustrate current difference in behavior between the BETA and
# the default interface: this video isn't supported by NVDEC, but in the
# default interface, FFMPEG fallsback to the CPU while we don't.
# the ffmpeg interface: this video isn't supported by NVDEC, but in the
# ffmpeg interface, FFMPEG fallsback to the CPU while we don't.

VideoDecoder(H265_VIDEO.path, device="cuda").get_frame_at(0)
with pytest.raises(
Expand Down
2 changes: 1 addition & 1 deletion test/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,7 @@ def unsplit_device_str(device_str: str) -> str:
if device_str == "cuda:0:beta":
return "cuda", "beta"
else:
return device_str, "default"
return device_str, "ffmpeg"


def get_ffmpeg_major_version():
Expand Down
Loading