Conversation
| { | ||
| "run": { | ||
| "name": "Uninstall torch", | ||
| "command": "uv pip uninstall torch torchvision torchaudio torchcodec", | ||
| } | ||
| }, | ||
| { | ||
| "run": { | ||
| "name": "Install torch 2.11", | ||
| "command": "uv pip install --no-cache-dir 'torch<=2.11.0' 'torchaudio' 'torchvision' 'torchcodec<=0.11.0' --index-url https://download.pytorch.org/whl/cpu", | ||
| } | ||
| }, |
There was a problem hiding this comment.
please ignore this part, I will revert before merge. It's just a quick way to check
|
[For maintainers] Suggested jobs to run (before merge) run-slow: llava_onevision |
| # Force video decoding to use torchvision backend to avoid torchcodec dependency during tests | ||
| video_processing_utils.is_torchcodec_available = lambda: False # type: ignore | ||
| video_utils.is_torchcodec_available = lambda: False # type: ignore |
There was a problem hiding this comment.
This causes issues like below.
I have no context of why using this. But we are moving away from torchvision anyway as @zucchini-nlp told me, and we also have
if not is_torchcodec_available():
warnings.warn(
"`torchcodec` is not installed and cannot be used to decode the video by default. "
"Falling back to `torchvision`. Note that `torchvision` decoding is deprecated and will be removed in future versions. "
)
backend = "torchvision"Anyway, the changes works.
error log
a = (<tests.models.llava_onevision.test_processing_llava_onevision.LlavaOnevisionProcessorTest testMethod=test_apply_chat_template_video_1>,)
kw = {}
@wraps(func)
def standalone_func(*a, **kw):
> return func(*(a + p.args), **p.kwargs, **kw)
/usr/local/lib/python3.10/site-packages/parameterized/parameterized.py:620:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
tests/test_processing_common.py:1679: in test_apply_chat_template_video
self._test_apply_chat_template(
tests/test_processing_common.py:1616: in _test_apply_chat_template
out_dict = processor.apply_chat_template(
/usr/local/lib/python3.10/site-packages/transformers/processing_utils.py:1898: in apply_chat_template
out = self(
/usr/local/lib/python3.10/site-packages/transformers/models/llava_onevision/processing_llava_onevision.py:139: in __call__
video_inputs = self.video_processor(videos, **output_kwargs["videos_kwargs"])
/usr/local/lib/python3.10/site-packages/transformers/video_processing_utils.py:178: in __call__
return self.preprocess(videos, **kwargs)
/usr/local/lib/python3.10/site-packages/transformers/video_processing_utils.py:354: in preprocess
videos, video_metadata = self._decode_and_sample_videos(
/usr/local/lib/python3.10/site-packages/transformers/video_processing_utils.py:294: in _decode_and_sample_videos
videos, video_metadata = self.fetch_videos(videos, sample_indices_fn=sample_indices_fn)
/usr/local/lib/python3.10/site-packages/transformers/video_processing_utils.py:836: in fetch_videos
return list(zip(*[self.fetch_videos(x, sample_indices_fn=sample_indices_fn) for x in video_url_or_urls]))
/usr/local/lib/python3.10/site-packages/transformers/video_processing_utils.py:836: in <listcomp>
return list(zip(*[self.fetch_videos(x, sample_indices_fn=sample_indices_fn) for x in video_url_or_urls]))
/usr/local/lib/python3.10/site-packages/transformers/video_processing_utils.py:838: in fetch_videos
return load_video(video_url_or_urls, backend=backend, sample_indices_fn=sample_indices_fn)
/usr/local/lib/python3.10/site-packages/transformers/video_utils.py:726: in load_video
video, metadata = video_decoder(file_obj, sample_indices_fn, **kwargs)
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
video_path = '/root/project/sample_demo_1.mp4'
sample_indices_fn = functools.partial(<bound method BaseVideoProcessor.sample_frames of LlavaOnevisionVideoProcessor {
"do_convert_rgb": true,
"do_normalize": true,
"do_rescale": true,
"do_resize": true,
"do_sample_frames": false,
"image_mean": [
0.48145466,
0.4578275,
0.40821073
],
"image_std": [
0.26862954,
0.26130258,
0.27577711
],
"resample": 3,
"rescale_factor": 0.00392156862745098,
"return_metadata": false,
"size": {
"height": 384,
"width": 384
},
"video_processor_type": "LlavaOnevisionVideoProcessor"
}
>, return_tensors='pt', num_frames=2, do_convert_rgb=True, do_resize=True, size=SizeDict(height=384, width=384, longest_edge=None, shortest_edge=None, max_height=None, max_width=None), default_to_square=False, resample=3, do_rescale=True, rescale_factor=0.00392156862745098, do_normalize=True, image_mean=(0.48145466, 0.4578275, 0.40821073), image_std=(0.26862954, 0.26130258, 0.27577711), do_center_crop=None, do_pad=None, crop_size=None, data_format=None, fps=None, return_metadata=False)
kwargs = {}
def read_video_torchvision(
video_path: Union["URL", "Path"],
sample_indices_fn: Callable,
**kwargs,
):
"""
Decode the video with torchvision decoder.
Args:
video_path (`str`):
Path to the video file.
sample_indices_fn (`Callable`, *optional*):
A callable function that will return indices at which the video should be sampled. If the video has to be loaded using
by a different sampling technique than provided by `num_frames` or `fps` arguments, one should provide their own `sample_indices_fn`.
If not provided, simple uniform sampling with fps is performed.
Example:
def sample_indices_fn(metadata, **kwargs):
return np.linspace(0, metadata.total_num_frames - 1, num_frames, dtype=int)
Returns:
tuple[`torch.Tensor`, `VideoMetadata`]: A tuple containing:
- Torch tensor of frames in RGB (shape: [num_frames, height, width, 3]).
- `VideoMetadata` object.
"""
warnings.warn(
"Using `torchvision` for video decoding is deprecated and will be removed in future versions. "
"Please use `torchcodec` instead."
)
> video, _, info = torchvision_io.read_video(
video_path,
start_pts=0.0,
end_pts=None,
pts_unit="sec",
output_format="TCHW",
)
E AttributeError: module 'torchvision.io' has no attribute 'read_video'. Did you mean: 'read_file'?
/usr/local/lib/python3.10/site-packages/transformers/video_utils.py:538: AttributeErrorThere was a problem hiding this comment.
lol, I don't know when that was added, and indeed it shouldn't be there in the first place
There was a problem hiding this comment.
no worry, it was not you :-)
| else "echo Skipping" | ||
| }, | ||
| { | ||
| "run": { |
There was a problem hiding this comment.
Maybe you remove that step and simply add --upgrade on the next one. It should work
| { | ||
| "run": { | ||
| "name": "Uninstall torch", | ||
| "command": "uv pip uninstall torch torchvision torchaudio torchcodec", | ||
| } | ||
| }, | ||
| { | ||
| "run": { | ||
| "name": "Install torch 2.11", | ||
| "command": "uv pip install --no-cache-dir 'torch<=2.11.0' 'torchaudio' 'torchvision' 'torchcodec<=0.11.0' --index-url https://download.pytorch.org/whl/cpu", | ||
| } | ||
| }, |
|
The docs for this PR live here. All of your documentation changes will be reflected on that endpoint. The docs are available until 30 days after the last update. |
What does this PR do?
A bit late, but let's switch, because torch 2.12 is already in RC 1