Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 0 additions & 5 deletions .github/scripts/setup-env.sh
Original file line number Diff line number Diff line change
Expand Up @@ -34,11 +34,6 @@ conda activate ci
conda install --quiet --yes libjpeg-turbo -c pytorch
pip install --progress-bar=off --upgrade setuptools==72.1.0

# See https://github.com/pytorch/vision/issues/6790
if [[ "${PYTHON_VERSION}" != "3.11" ]]; then
pip install --progress-bar=off av!=10.0.0
fi

echo '::endgroup::'

if [[ "${OS_TYPE}" == windows && "${GPU_ARCH_TYPE}" == cuda ]]; then
Expand Down
10 changes: 5 additions & 5 deletions .github/workflows/docs.yml
Original file line number Diff line number Diff line change
Expand Up @@ -34,12 +34,12 @@ jobs:
CONDA_PATH=$(which conda)
eval "$(${CONDA_PATH} shell.bash hook)"
conda activate ci
# FIXME: not sure why we need this. `ldd torchvision/video_reader.so` shows that it
# already links against the one pulled from conda. However, at runtime it pulls from
# /lib64
# Should we maybe always do this in `./.github/scripts/setup-env.sh` so that we don't
# have to pay attention in all other workflows?

echo '::group::Install TorchCodec and ffmpeg'
conda install --quiet --yes ffmpeg
pip install --progress-bar=off --pre torchcodec --index-url="https://download.pytorch.org/whl/nightly/cpu"
export LD_LIBRARY_PATH="${CONDA_PREFIX}/lib:${LD_LIBRARY_PATH}"
echo '::endgroup::'

cd docs

Expand Down
19 changes: 9 additions & 10 deletions gallery/others/plot_optical_flow.py
Original file line number Diff line number Diff line change
Expand Up @@ -47,11 +47,10 @@ def plot(imgs, **imshow_kwargs):
plt.tight_layout()

# %%
# Reading Videos Using Torchvision
# Reading Videos Using TorchCodec
# --------------------------------
# We will first read a video using :func:`~torchvision.io.read_video`.
# Alternatively one can use the new :class:`~torchvision.io.VideoReader` API (if
# torchvision is built from source).
# We will first read a video using
# `TorchCodec <https://github.com/pytorch/torchcodec>`_.
# The video we will use here is free of use from `pexels.com
# <https://www.pexels.com/video/a-man-playing-a-game-of-basketball-5192157/>`_,
# credits go to `Pavel Danilyuk <https://www.pexels.com/@pavel-danilyuk>`_.
Expand All @@ -67,16 +66,16 @@ def plot(imgs, **imshow_kwargs):
_ = urlretrieve(video_url, video_path)

# %%
# :func:`~torchvision.io.read_video` returns the video frames, audio frames and
# the metadata associated with the video. In our case, we only need the video
# frames.
# We use :class:`~torchcodec.decoders.VideoDecoder` to decode the video frames.
# TorchCodec returns frames in NCHW format by default.
#
# Here we will just make 2 predictions between 2 pre-selected pairs of frames,
# namely frames (100, 101) and (150, 151). Each of these pairs corresponds to a
# single model input.

from torchvision.io import read_video
frames, _, _ = read_video(str(video_path), output_format="TCHW")
from torchcodec.decoders import VideoDecoder
decoder = VideoDecoder(str(video_path))
frames = decoder[:]

img1_batch = torch.stack([frames[100], frames[150]])
img2_batch = torch.stack([frames[101], frames[151]])
Expand All @@ -85,7 +84,7 @@ def plot(imgs, **imshow_kwargs):

# %%
# The RAFT model accepts RGB images. We first get the frames from
# :func:`~torchvision.io.read_video` and resize them to ensure their dimensions
# the decoder and resize them to ensure their dimensions
# are divisible by 8. Note that we explicitly use ``antialias=False``, because
# this is how those models were trained. Then we use the transforms bundled into
# the weights in order to preprocess the input and rescale its values to the
Expand Down
11 changes: 6 additions & 5 deletions test/common_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@
import torch.testing

from torch.testing._comparison import BooleanPair, NonePair, not_close_error_metas, NumberPair, TensorLikePair
from torchvision import io, tv_tensors
from torchvision import tv_tensors
from torchvision.transforms._functional_tensor import _max_value as get_max_value
from torchvision.transforms.v2.functional import cvcuda_to_tensor, to_cvcuda_tensor, to_image, to_pil_image
from torchvision.transforms.v2.functional._utils import _is_cvcuda_available, _is_cvcuda_tensor
Expand Down Expand Up @@ -166,6 +166,8 @@ def _create_data_batch(height=3, width=3, channels=3, num_samples=4, device="cpu


def get_list_of_videos(tmpdir, num_videos=5, sizes=None, fps=None):
from datasets_utils import create_video_file

names = []
for i in range(num_videos):
if sizes is None:
Expand All @@ -176,10 +178,9 @@ def get_list_of_videos(tmpdir, num_videos=5, sizes=None, fps=None):
f = 5
else:
f = fps[i]
data = torch.randint(0, 256, (size, 300, 400, 3), dtype=torch.uint8)
name = os.path.join(tmpdir, f"{i}.mp4")
names.append(name)
io.write_video(name, data, fps=f)
name = f"{i}.mp4"
create_video_file(tmpdir, name, size=(size, 3, 300, 400), fps=f)
names.append(os.path.join(tmpdir, name))

return names

Expand Down
34 changes: 22 additions & 12 deletions test/datasets_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -66,7 +66,7 @@ class LazyImporter:
"""

MODULES = (
"av",
"torchcodec",
"lmdb",
"pycocotools",
"requests",
Expand Down Expand Up @@ -669,17 +669,24 @@ class VideoDatasetTestCase(DatasetTestCase):

- Overwrites the 'FEATURE_TYPES' class attribute to expect two :class:`torch.Tensor` s for the video and audio as
well as an integer label.
- Overwrites the 'REQUIRED_PACKAGES' class attribute to require PyAV (``av``).
- Overwrites the 'REQUIRED_PACKAGES' class attribute to require TorchCodec (``torchcodec``).
- Skips on non-Linux platforms and CUDA-only environments.
- Adds the 'DEFAULT_FRAMES_PER_CLIP' class attribute. If no 'frames_per_clip' is provided by 'inject_fake_data()'
and it is the last parameter without a default value in the dataset constructor, the value of the
'DEFAULT_FRAMES_PER_CLIP' class attribute is appended to the output.
"""

FEATURE_TYPES = (torch.Tensor, torch.Tensor, int)
REQUIRED_PACKAGES = ("av",)
REQUIRED_PACKAGES = ("torchcodec",)

FRAMES_PER_CLIP = 1

@classmethod
def setUpClass(cls):
if platform.system() != "Linux":
raise unittest.SkipTest("Video dataset tests are only supported on Linux.")
super().setUpClass()

def __init__(self, *args, **kwargs):
super().__init__(*args, **kwargs)
self.dataset_args = self._set_default_frames_per_clip(self.dataset_args)
Expand Down Expand Up @@ -864,13 +871,12 @@ def shape_test_for_stereo(
assert dw == mw


@requires_lazy_imports("av")
@requires_lazy_imports("torchcodec")
def create_video_file(
root: Union[pathlib.Path, str],
name: Union[pathlib.Path, str],
size: Union[Sequence[int], int] = (1, 3, 10, 10),
fps: float = 25,
**kwargs: Any,
) -> pathlib.Path:
"""Create a video file from random data.

Expand All @@ -881,14 +887,15 @@ def create_video_file(
``(num_frames, num_channels, height, width)``. If scalar, the value is used for the height and width.
If not provided, ``num_frames=1`` and ``num_channels=3`` are assumed.
fps (float): Frame rate in frames per second.
kwargs (Any): Additional parameters passed to :func:`torchvision.io.write_video`.

Returns:
pathlib.Path: Path to the created image file.
pathlib.Path: Path to the created video file.

Raises:
UsageError: If PyAV is not available.
UsageError: If TorchCodec is not available.
"""
from torchcodec.encoders import VideoEncoder

if isinstance(size, int):
size = (size, size)
if len(size) == 2:
Expand All @@ -902,11 +909,14 @@ def create_video_file(

video = create_image_or_video_tensor(size)
file = pathlib.Path(root) / name
torchvision.io.write_video(str(file), video.permute(0, 2, 3, 1), fps, **kwargs)

encoder = VideoEncoder(video, frame_rate=fps)
encoder.to_file(str(file))

return file


@requires_lazy_imports("av")
@requires_lazy_imports("torchcodec")
def create_video_folder(
root: Union[str, pathlib.Path],
name: Union[str, pathlib.Path],
Expand All @@ -933,7 +943,7 @@ def create_video_folder(
List[pathlib.Path]: Paths to all created video files.

Raises:
UsageError: If PyAV is not available.
UsageError: If TorchCodec is not available.

.. seealso::

Expand All @@ -944,7 +954,7 @@ def create_video_folder(
def size(idx):
num_frames = 1
num_channels = 3
# The 'libx264' video codec, which is the default of torchvision.io.write_video, requires the height and
# The 'libx264' video codec requires the height and
# width of the video to be divisible by 2.
height, width = (torch.randint(2, 6, size=(2,), dtype=torch.int) * 2).tolist()
return (num_frames, num_channels, height, width)
Expand Down
15 changes: 13 additions & 2 deletions test/test_datasets_samplers.py
Original file line number Diff line number Diff line change
@@ -1,12 +1,23 @@
import sys

import pytest
import torch
from common_utils import assert_equal, get_list_of_videos
from torchvision import io
from torchvision.datasets.samplers import DistributedSampler, RandomClipSampler, UniformClipSampler
from torchvision.datasets.video_utils import VideoClips

try:
import torchcodec # noqa: F401

_torchcodec_available = True
except ImportError:
_torchcodec_available = False


@pytest.mark.skipif(not io.video._av_available(), reason="this test requires av")
@pytest.mark.skipif(
not (_torchcodec_available and sys.platform == "linux"),
reason="this test requires torchcodec (linux only)",
)
class TestDatasetsSamplers:
def test_random_clip_sampler(self, tmpdir):
video_list = get_list_of_videos(tmpdir, num_videos=3, sizes=[25, 25, 25])
Expand Down
19 changes: 16 additions & 3 deletions test/test_datasets_video_utils.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,22 @@
import sys

import pytest
import torch
from common_utils import assert_equal, get_list_of_videos
from torchvision import io
from torchvision.datasets.video_utils import unfold, VideoClips

try:
import torchcodec # noqa: F401

_torchcodec_available = True
except ImportError:
_torchcodec_available = False

_requires_torchcodec = pytest.mark.skipif(
not (_torchcodec_available and sys.platform == "linux"),
reason="this test requires torchcodec (linux only)",
)


class TestVideo:
def test_unfold(self):
Expand Down Expand Up @@ -31,7 +44,7 @@ def test_unfold(self):
)
assert_equal(r, expected)

@pytest.mark.skipif(not io.video._av_available(), reason="this test requires av")
@_requires_torchcodec
def test_video_clips(self, tmpdir):
video_list = get_list_of_videos(tmpdir, num_videos=3)
video_clips = VideoClips(video_list, 5, 5, num_workers=2)
Expand All @@ -55,7 +68,7 @@ def test_video_clips(self, tmpdir):
assert video_idx == v_idx
assert clip_idx == c_idx

@pytest.mark.skipif(not io.video._av_available(), reason="this test requires av")
@_requires_torchcodec
def test_video_clips_custom_fps(self, tmpdir):
video_list = get_list_of_videos(tmpdir, num_videos=3, sizes=[12, 12, 12], fps=[3, 4, 6])
num_frames = 4
Expand Down
Loading
Loading