huggingface · lhoestq · Mar 23, 2026 · Mar 23, 2026 · Mar 23, 2026
diff --git a/docs/source/video_load.mdx b/docs/source/video_load.mdx
@@ -3,12 +3,12 @@
 > [!WARNING]
 > Video support is experimental and is subject to change.
 
-Video datasets have [`Video`] type columns, which contain `torchvision` objects.
+Video datasets have [`Video`] type columns, which contain `torchcodec` objects.
 
 > [!TIP]
-> To work with video datasets, you need to have the `torchvision` and `av` packages installed. Check out the [installation](https://github.com/pytorch/vision#installation) guide to learn how to install them.
+> To work with video datasets, you need to have the `torchcodec` and `ffmpeg` packages installed. Check out the [installation](https://github.com/meta-pytorch/torchcodec#installing-torchcodec) guide to learn how to install them.
 
-When you load a video dataset and call the video column, the videos are decoded as `torchvision` Videos:
+When you load a video dataset and call the video column, the videos are decoded as `torchcodec` Videos:
 
 ```py
 >>> from datasets import load_dataset, Video
@@ -193,7 +193,7 @@ For more details on working with Lance datasets, see the [Lance documentation](h
 
 ## Video decoding
 
-By default, videos are decoded sequentially as torchvision `VideoReaders` when you iterate on a dataset.
+By default, videos are decoded sequentially as torchcodec `VideoDecoders` when you iterate on a dataset.
 It sequentially decodes the metadata of the videos, and doesn't read the video frames until you access them.
 
 However it is possible to speed up the dataset significantly using multithreaded decoding:
@@ -209,7 +209,7 @@ However it is possible to speed up the dataset significantly using multithreaded
 You can enable multithreading using `num_threads`. This is especially useful to speed up remote data streaming.
 However it can be slower than `num_threads=0` for local data on fast disks.
 
-If you are not interested in the videos decoded as torchvision `VideoReaders` and would like to access the path/bytes instead, you can disable decoding:
+If you are not interested in the videos decoded as torchcodec `VideoDecoders` and would like to access the path/bytes instead, you can disable decoding:
 
 ```python
 >>> dataset = dataset.decode(False)

diff --git a/src/datasets/formatting/jax_formatter.py b/src/datasets/formatting/jax_formatter.py
@@ -107,10 +107,13 @@ def _tensorize(self, value):
             if isinstance(value, PIL.Image.Image):
                 value = np.asarray(value)
         if config.TORCHVISION_AVAILABLE and "torchvision" in sys.modules:
-            from torchvision.io import VideoReader
+            try:
+                from torchvision.io import VideoReader
 
-            if isinstance(value, VideoReader):
-                return value  # TODO(QL): set output to jax arrays ?
+                if isinstance(value, VideoReader):
+                    return value  # TODO(QL): set output to jax arrays ?
+            except ImportError:
+                pass
         if config.TORCHCODEC_AVAILABLE and "torchcodec" in sys.modules:
             from torchcodec.decoders import AudioDecoder, VideoDecoder
 

diff --git a/src/datasets/formatting/np_formatter.py b/src/datasets/formatting/np_formatter.py
@@ -64,10 +64,13 @@ def _tensorize(self, value):
             if isinstance(value, PIL.Image.Image):
                 return np.asarray(value, **self.np_array_kwargs)
         if config.TORCHVISION_AVAILABLE and "torchvision" in sys.modules:
-            from torchvision.io import VideoReader
+            try:
+                from torchvision.io import VideoReader
 
-            if isinstance(value, VideoReader):
-                return value  # TODO(QL): set output to np arrays ?
+                if isinstance(value, VideoReader):
+                    return value  # TODO(QL): set output to np arrays ?
+            except ImportError:
+                pass
         if config.TORCHCODEC_AVAILABLE and "torchcodec" in sys.modules:
             from torchcodec.decoders import AudioDecoder, VideoDecoder
 

diff --git a/src/datasets/formatting/tf_formatter.py b/src/datasets/formatting/tf_formatter.py
@@ -71,10 +71,13 @@ def _tensorize(self, value):
             if isinstance(value, PIL.Image.Image):
                 value = np.asarray(value)
         if config.TORCHVISION_AVAILABLE and "torchvision" in sys.modules:
-            from torchvision.io import VideoReader
+            try:
+                from torchvision.io import VideoReader
 
-            if isinstance(value, VideoReader):
-                return value  # TODO(QL): set output to tf tensors ?
+                if isinstance(value, VideoReader):
+                    return value  # TODO(QL): set output to tf tensors ?
+            except ImportError:
+                pass
         if config.TORCHCODEC_AVAILABLE and "torchcodec" in sys.modules:
             from torchcodec.decoders import AudioDecoder, VideoDecoder
 

diff --git a/src/datasets/formatting/torch_formatter.py b/src/datasets/formatting/torch_formatter.py
@@ -77,10 +77,13 @@ def _tensorize(self, value):
 
                 value = value.transpose((2, 0, 1))
         if config.TORCHVISION_AVAILABLE and "torchvision" in sys.modules:
-            from torchvision.io import VideoReader
+            try:
+                from torchvision.io import VideoReader
 
-            if isinstance(value, VideoReader):
-                return value  # TODO(QL): set output to torch tensors ?
+                if isinstance(value, VideoReader):
+                    return value  # TODO(QL): set output to torch tensors ?
+            except ImportError:
+                pass
         if config.TORCHCODEC_AVAILABLE and "torchcodec" in sys.modules:
             from torchcodec.decoders import AudioDecoder, VideoDecoder
 

diff --git a/src/datasets/iterable_dataset.py b/src/datasets/iterable_dataset.py
@@ -4090,7 +4090,7 @@ def decode(self, enable: bool = True, num_threads: int = 0) -> "IterableDataset"
 
         * audio -> dict of "array" and "sampling_rate" and "path"
         * image -> PIL.Image
-        * video -> torchvision.io.VideoReader
+        * video -> torchcodec.decoders.VideoDecoder
 
         You can enable multithreading using `num_threads`. This is especially useful to speed up remote
         data streaming. However it can be slower than `num_threads=0` for local data on fast disks.