pytorch · NicolasHug · Mar 11, 2026
diff --git a/.github/scripts/setup-env.sh b/.github/scripts/setup-env.sh
@@ -34,11 +34,6 @@ conda activate ci
 conda install --quiet --yes libjpeg-turbo -c pytorch
 pip install --progress-bar=off --upgrade setuptools==72.1.0
 
-# See https://github.com/pytorch/vision/issues/6790
-if [[ "${PYTHON_VERSION}" != "3.11" ]]; then
-  pip install --progress-bar=off av!=10.0.0
-fi
-
 echo '::endgroup::'
 
 if [[ "${OS_TYPE}" == windows && "${GPU_ARCH_TYPE}" == cuda ]]; then

diff --git a/.github/workflows/docs.yml b/.github/workflows/docs.yml
@@ -34,12 +34,12 @@ jobs:
         CONDA_PATH=$(which conda)
         eval "$(${CONDA_PATH} shell.bash hook)"
         conda activate ci
-        # FIXME: not sure why we need this. `ldd torchvision/video_reader.so` shows that it
-        #  already links against the one pulled from conda. However, at runtime it pulls from
-        #  /lib64
-        # Should we maybe always do this in `./.github/scripts/setup-env.sh` so that we don't
-        # have to pay attention in all other workflows?
+
+        echo '::group::Install TorchCodec and ffmpeg'
+        conda install --quiet --yes ffmpeg
+        pip install --progress-bar=off --pre torchcodec --index-url="https://download.pytorch.org/whl/nightly/cpu"
         export LD_LIBRARY_PATH="${CONDA_PREFIX}/lib:${LD_LIBRARY_PATH}"
+        echo '::endgroup::'
 
         cd docs
 

diff --git a/gallery/others/plot_optical_flow.py b/gallery/others/plot_optical_flow.py
@@ -47,11 +47,10 @@ def plot(imgs, **imshow_kwargs):
     plt.tight_layout()
 
 # %%
-# Reading Videos Using Torchvision
+# Reading Videos Using TorchCodec
 # --------------------------------
-# We will first read a video using :func:`~torchvision.io.read_video`.
-# Alternatively one can use the new :class:`~torchvision.io.VideoReader` API (if
-# torchvision is built from source).
+# We will first read a video using
+# `TorchCodec <https://github.com/pytorch/torchcodec>`_.
 # The video we will use here is free of use from `pexels.com
 # <https://www.pexels.com/video/a-man-playing-a-game-of-basketball-5192157/>`_,
 # credits go to `Pavel Danilyuk <https://www.pexels.com/@pavel-danilyuk>`_.
@@ -67,16 +66,16 @@ def plot(imgs, **imshow_kwargs):
 _ = urlretrieve(video_url, video_path)
 
 # %%
-# :func:`~torchvision.io.read_video` returns the video frames, audio frames and
-# the metadata associated with the video. In our case, we only need the video
-# frames.
+# We use :class:`~torchcodec.decoders.VideoDecoder` to decode the video frames.
+# TorchCodec returns frames in NCHW format by default.
 #
 # Here we will just make 2 predictions between 2 pre-selected pairs of frames,
 # namely frames (100, 101) and (150, 151). Each of these pairs corresponds to a
 # single model input.
 
-from torchvision.io import read_video
-frames, _, _ = read_video(str(video_path), output_format="TCHW")
+from torchcodec.decoders import VideoDecoder
+decoder = VideoDecoder(str(video_path))
+frames = decoder[:]
 
 img1_batch = torch.stack([frames[100], frames[150]])
 img2_batch = torch.stack([frames[101], frames[151]])
@@ -85,7 +84,7 @@ def plot(imgs, **imshow_kwargs):
 
 # %%
 # The RAFT model accepts RGB images. We first get the frames from
-# :func:`~torchvision.io.read_video` and resize them to ensure their dimensions
+# the decoder and resize them to ensure their dimensions
 # are divisible by 8. Note that we explicitly use ``antialias=False``, because
 # this is how those models were trained. Then we use the transforms bundled into
 # the weights in order to preprocess the input and rescale its values to the

diff --git a/test/common_utils.py b/test/common_utils.py
@@ -18,7 +18,7 @@
 import torch.testing
 
 from torch.testing._comparison import BooleanPair, NonePair, not_close_error_metas, NumberPair, TensorLikePair
-from torchvision import io, tv_tensors
+from torchvision import tv_tensors
 from torchvision.transforms._functional_tensor import _max_value as get_max_value
 from torchvision.transforms.v2.functional import cvcuda_to_tensor, to_cvcuda_tensor, to_image, to_pil_image
 from torchvision.transforms.v2.functional._utils import _is_cvcuda_available, _is_cvcuda_tensor
@@ -166,6 +166,8 @@ def _create_data_batch(height=3, width=3, channels=3, num_samples=4, device="cpu
 
 
 def get_list_of_videos(tmpdir, num_videos=5, sizes=None, fps=None):
+    from datasets_utils import create_video_file
+
     names = []
     for i in range(num_videos):
         if sizes is None:
@@ -176,10 +178,9 @@ def get_list_of_videos(tmpdir, num_videos=5, sizes=None, fps=None):
             f = 5
         else:
             f = fps[i]
-        data = torch.randint(0, 256, (size, 300, 400, 3), dtype=torch.uint8)
-        name = os.path.join(tmpdir, f"{i}.mp4")
-        names.append(name)
-        io.write_video(name, data, fps=f)
+        name = f"{i}.mp4"
+        create_video_file(tmpdir, name, size=(size, 3, 300, 400), fps=f)
+        names.append(os.path.join(tmpdir, name))
 
     return names
 

diff --git a/test/datasets_utils.py b/test/datasets_utils.py
@@ -66,7 +66,7 @@ class LazyImporter:
     """
 
     MODULES = (
-        "av",
+        "torchcodec",
         "lmdb",
         "pycocotools",
         "requests",
@@ -669,17 +669,24 @@ class VideoDatasetTestCase(DatasetTestCase):
 
     - Overwrites the 'FEATURE_TYPES' class attribute to expect two :class:`torch.Tensor` s for the video and audio as
       well as an integer label.
-    - Overwrites the 'REQUIRED_PACKAGES' class attribute to require PyAV (``av``).
+    - Overwrites the 'REQUIRED_PACKAGES' class attribute to require TorchCodec (``torchcodec``).
+    - Skips on non-Linux platforms and CUDA-only environments.
     - Adds the 'DEFAULT_FRAMES_PER_CLIP' class attribute. If no 'frames_per_clip' is provided by 'inject_fake_data()'
         and it is the last parameter without a default value in the dataset constructor, the value of the
         'DEFAULT_FRAMES_PER_CLIP' class attribute is appended to the output.
     """
 
     FEATURE_TYPES = (torch.Tensor, torch.Tensor, int)
-    REQUIRED_PACKAGES = ("av",)
+    REQUIRED_PACKAGES = ("torchcodec",)
 
     FRAMES_PER_CLIP = 1
 
+    @classmethod
+    def setUpClass(cls):
+        if platform.system() != "Linux":
+            raise unittest.SkipTest("Video dataset tests are only supported on Linux.")
+        super().setUpClass()
+
     def __init__(self, *args, **kwargs):
         super().__init__(*args, **kwargs)
         self.dataset_args = self._set_default_frames_per_clip(self.dataset_args)
@@ -864,13 +871,12 @@ def shape_test_for_stereo(
         assert dw == mw
 
 
-@requires_lazy_imports("av")
+@requires_lazy_imports("torchcodec")
 def create_video_file(
     root: Union[pathlib.Path, str],
     name: Union[pathlib.Path, str],
     size: Union[Sequence[int], int] = (1, 3, 10, 10),
     fps: float = 25,
-    **kwargs: Any,
 ) -> pathlib.Path:
     """Create a video file from random data.
 
@@ -881,14 +887,15 @@ def create_video_file(
             ``(num_frames, num_channels, height, width)``. If scalar, the value is used for the height and width.
             If not provided, ``num_frames=1`` and ``num_channels=3`` are assumed.
         fps (float): Frame rate in frames per second.
-        kwargs (Any): Additional parameters passed to :func:`torchvision.io.write_video`.
 
     Returns:
-        pathlib.Path: Path to the created image file.
+        pathlib.Path: Path to the created video file.
 
     Raises:
-        UsageError: If PyAV is not available.
+        UsageError: If TorchCodec is not available.
     """
+    from torchcodec.encoders import VideoEncoder
+
     if isinstance(size, int):
         size = (size, size)
     if len(size) == 2:
@@ -902,11 +909,14 @@ def create_video_file(
 
     video = create_image_or_video_tensor(size)
     file = pathlib.Path(root) / name
-    torchvision.io.write_video(str(file), video.permute(0, 2, 3, 1), fps, **kwargs)
+
+    encoder = VideoEncoder(video, frame_rate=fps)
+    encoder.to_file(str(file))
+
     return file
 
 
-@requires_lazy_imports("av")
+@requires_lazy_imports("torchcodec")
 def create_video_folder(
     root: Union[str, pathlib.Path],
     name: Union[str, pathlib.Path],
@@ -933,7 +943,7 @@ def create_video_folder(
         List[pathlib.Path]: Paths to all created video files.
 
     Raises:
-        UsageError: If PyAV is not available.
+        UsageError: If TorchCodec is not available.
 
     .. seealso::
 
@@ -944,7 +954,7 @@ def create_video_folder(
         def size(idx):
             num_frames = 1
             num_channels = 3
-            # The 'libx264' video codec, which is the default of torchvision.io.write_video, requires the height and
+            # The 'libx264' video codec requires the height and
             # width of the video to be divisible by 2.
             height, width = (torch.randint(2, 6, size=(2,), dtype=torch.int) * 2).tolist()
             return (num_frames, num_channels, height, width)

diff --git a/test/test_datasets_samplers.py b/test/test_datasets_samplers.py
@@ -1,12 +1,23 @@
+import sys
+
 import pytest
 import torch
 from common_utils import assert_equal, get_list_of_videos
-from torchvision import io
 from torchvision.datasets.samplers import DistributedSampler, RandomClipSampler, UniformClipSampler
 from torchvision.datasets.video_utils import VideoClips
 
+try:
+    import torchcodec  # noqa: F401
+
+    _torchcodec_available = True
+except ImportError:
+    _torchcodec_available = False
+
 
-@pytest.mark.skipif(not io.video._av_available(), reason="this test requires av")
+@pytest.mark.skipif(
+    not (_torchcodec_available and sys.platform == "linux"),
+    reason="this test requires torchcodec (linux only)",
+)
 class TestDatasetsSamplers:
     def test_random_clip_sampler(self, tmpdir):
         video_list = get_list_of_videos(tmpdir, num_videos=3, sizes=[25, 25, 25])

diff --git a/test/test_datasets_video_utils.py b/test/test_datasets_video_utils.py
@@ -1,9 +1,22 @@
+import sys
+
 import pytest
 import torch
 from common_utils import assert_equal, get_list_of_videos
-from torchvision import io
 from torchvision.datasets.video_utils import unfold, VideoClips
 
+try:
+    import torchcodec  # noqa: F401
+
+    _torchcodec_available = True
+except ImportError:
+    _torchcodec_available = False
+
+_requires_torchcodec = pytest.mark.skipif(
+    not (_torchcodec_available and sys.platform == "linux"),
+    reason="this test requires torchcodec (linux only)",
+)
+
 
 class TestVideo:
     def test_unfold(self):
@@ -31,7 +44,7 @@ def test_unfold(self):
         )
         assert_equal(r, expected)
 
-    @pytest.mark.skipif(not io.video._av_available(), reason="this test requires av")
+    @_requires_torchcodec
     def test_video_clips(self, tmpdir):
         video_list = get_list_of_videos(tmpdir, num_videos=3)
         video_clips = VideoClips(video_list, 5, 5, num_workers=2)
@@ -55,7 +68,7 @@ def test_video_clips(self, tmpdir):
             assert video_idx == v_idx
             assert clip_idx == c_idx
 
-    @pytest.mark.skipif(not io.video._av_available(), reason="this test requires av")
+    @_requires_torchcodec
     def test_video_clips_custom_fps(self, tmpdir):
         video_list = get_list_of_videos(tmpdir, num_videos=3, sizes=[12, 12, 12], fps=[3, 4, 6])
         num_frames = 4