Merge branch 'main' into fy/autocast-xpu

NicolasHug · web-flow · commit 9ef9377a293c · 2024-08-06T13:23:30.000+01:00
diff --git a/packaging/torchvision/meta.yaml b/packaging/torchvision/meta.yaml
@@ -11,13 +11,13 @@ requirements:
     - {{ compiler('c') }} # [win]
     - libpng
     - libjpeg-turbo
-    - ffmpeg >=4.2  # [linux]
+    - ffmpeg >=4.2.2, <5.0.0  # [linux]
 
   host:
     - python
     - setuptools
     - pytorch-mutex 1.0 {{ build_variant }}  # [not osx ]
-    {{ environ.get('CONDA_PYTORCH_BUILD_CONSTRAINT') }}
+    {{ environ.get('CONDA_PYTORCH_BUILD_CONSTRAINT', 'pytorch') }}
     {{ environ.get('CONDA_CUDATOOLKIT_CONSTRAINT', '') }}
 
   run:
@@ -26,11 +26,11 @@ requirements:
     - numpy >=1.23.5 # [py >= 311]
     - requests
     - libpng
-    - ffmpeg >=4.2  # [linux]
+    - ffmpeg >=4.2.2, <5.0.0  # [linux]
     - libjpeg-turbo
     - pillow >=5.3.0, !=8.3.*
     - pytorch-mutex 1.0 {{ build_variant }}  # [not osx ]
-    {{ environ.get('CONDA_PYTORCH_CONSTRAINT') }}
+    {{ environ.get('CONDA_PYTORCH_CONSTRAINT', 'pytorch') }}
     {{ environ.get('CONDA_CUDATOOLKIT_CONSTRAINT', '') }}
 
   {% if build_variant == 'cpu' %}
diff --git a/test/test_io.py b/test/test_io.py
@@ -6,7 +6,7 @@
 import pytest
 import torch
 import torchvision.io as io
-from common_utils import assert_equal
+from common_utils import assert_equal, cpu_and_cuda
 from torchvision import get_video_backend
 
 
@@ -255,22 +255,19 @@ def test_read_video_partially_corrupted_file(self):
                 assert_equal(video, data)
 
     @pytest.mark.skipif(sys.platform == "win32", reason="temporarily disabled on Windows")
-    @pytest.mark.parametrize("device", ["cpu", "cuda"])
+    @pytest.mark.parametrize("device", cpu_and_cuda())
     def test_write_video_with_audio(self, device, tmpdir):
         f_name = os.path.join(VIDEO_DIR, "R6llTwEh07w.mp4")
         video_tensor, audio_tensor, info = io.read_video(f_name, pts_unit="sec")
 
-        video_tensor = video_tensor.to(device)
-        audio_tensor = audio_tensor.to(device)
-
         out_f_name = os.path.join(tmpdir, "testing.mp4")
         io.video.write_video(
             out_f_name,
-            video_tensor,
+            video_tensor.to(device),
             round(info["video_fps"]),
             video_codec="libx264rgb",
             options={"crf": "0"},
-            audio_array=audio_tensor,
+            audio_array=audio_tensor.to(device),
             audio_fps=info["audio_fps"],
             audio_codec="aac",
         )
diff --git a/torchvision/datasets/kinetics.py b/torchvision/datasets/kinetics.py
@@ -16,7 +16,7 @@
 from .vision import VisionDataset
 
 
-def _dl_wrap(tarpath: str, videopath: str, line: str) -> None:
+def _dl_wrap(tarpath: Union[str, Path], videopath: Union[str, Path], line: str) -> None:
     download_and_extract_archive(line, tarpath, videopath)
 
 
diff --git a/torchvision/datasets/utils.py b/torchvision/datasets/utils.py
@@ -26,7 +26,7 @@
 
 def _urlretrieve(url: str, filename: Union[str, pathlib.Path], chunk_size: int = 1024 * 32) -> None:
     with urllib.request.urlopen(urllib.request.Request(url, headers={"User-Agent": USER_AGENT})) as response:
-        with open(filename, "wb") as fh, tqdm(total=response.length) as pbar:
+        with open(filename, "wb") as fh, tqdm(total=response.length, unit="B", unit_scale=True) as pbar:
             while chunk := response.read(chunk_size):
                 fh.write(chunk)
                 pbar.update(len(chunk))
diff --git a/torchvision/io/video.py b/torchvision/io/video.py
@@ -115,7 +115,7 @@ def write_video(
             audio_sample_fmt = container.streams.audio[0].format.name
 
             format_dtype = np.dtype(audio_format_dtypes[audio_sample_fmt])
-            audio_array = torch.as_tensor(audio_array).numpy().astype(format_dtype)
+            audio_array = torch.as_tensor(audio_array).numpy(force=True).astype(format_dtype)
 
             frame = av.AudioFrame.from_ndarray(audio_array, format=audio_sample_fmt, layout=audio_layout)
 
diff --git a/torchvision/transforms/_presets.py b/torchvision/transforms/_presets.py
@@ -2,7 +2,7 @@
 This file is part of the private API. Please do not use directly these classes as they will be modified on
 future versions without warning. The classes should be accessed only via the transforms argument of Weights.
 """
-from typing import Optional, Tuple
+from typing import Optional, Tuple, Union
 
 import torch
 from torch import nn, Tensor
@@ -87,7 +87,7 @@ def __init__(
         self,
         *,
         crop_size: Tuple[int, int],
-        resize_size: Tuple[int, int],
+        resize_size: Union[Tuple[int], Tuple[int, int]],
         mean: Tuple[float, ...] = (0.43216, 0.394666, 0.37645),
         std: Tuple[float, ...] = (0.22803, 0.22145, 0.216989),
         interpolation: InterpolationMode = InterpolationMode.BILINEAR,