Skip to content

Commit

Permalink
refactor video data loaders, fix some bugs (#22)
Browse files Browse the repository at this point in the history
* ignore mp4 avi zip files

* update dependency versions

* ignore onnx files

* increase package version

* fix a typo

* refacator dataset loading

* update code snippets in readme

* reformat with isort

* update workflows

* ignore export and examples folders

* clean code
  • Loading branch information
fcakyon authored Nov 27, 2022
1 parent 84e8e7d commit 8b85f91
Show file tree
Hide file tree
Showing 13 changed files with 405 additions and 128 deletions.
3 changes: 3 additions & 0 deletions .github/workflows/ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -64,6 +64,9 @@ jobs:
if: matrix.operating-system == 'macos-latest'
run: pip install torch==${{ matrix.torch-version }}

- name: Install Pytorchvideo from main branch
run: pip install git+https://github.com/facebookresearch/pytorchvideo.git

- name: Lint with flake8, black and isort
run: |
pip install .[dev]
Expand Down
3 changes: 3 additions & 0 deletions .github/workflows/package_testing.yml
Original file line number Diff line number Diff line change
Expand Up @@ -63,6 +63,9 @@ jobs:
if: matrix.operating-system == 'macos-latest'
run: pip install torch==${{ matrix.torch-version }}

- name: Install Pytorchvideo from main branch
run: pip install git+https://github.com/facebookresearch/pytorchvideo.git

- name: Install latest video-transformers package
run: >
pip install --upgrade --force-reinstall video-transformers[test]
Expand Down
8 changes: 7 additions & 1 deletion .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -131,4 +131,10 @@ dmypy.json
# extra
.vscode
.neptune
runs/
runs/
*.mp4
*.avi
*.zip
*.onnx
exports/
examples/
32 changes: 20 additions & 12 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,12 @@ and supports:
conda install pytorch=1.11.0 torchvision=0.12.0 cudatoolkit=11.3 -c pytorch
```

- Install pytorchvideo from main branch:

```bash
pip install git+https://github.com/facebookresearch/pytorchvideo.git
```

- Install `video-transformers`:

```bash
Expand Down Expand Up @@ -87,6 +93,7 @@ from video_transformers.data import VideoDataModule
from video_transformers.heads import LinearHead
from video_transformers.necks import TransformerNeck
from video_transformers.trainer import trainer_factory
from video_transformers.utils.file import download_ucf6

backbone = TimeDistributed(TransformersBackbone("microsoft/cvt-13", num_unfrozen_stages=0))
neck = TransformerNeck(
Expand All @@ -96,28 +103,28 @@ neck = TransformerNeck(
transformer_enc_num_layers=2,
dropout_p=0.1,
)
optimizer = AdamW(model.parameters(), lr=1e-4)

download_ucf6("./")
datamodule = VideoDataModule(
train_root=".../ucf6/train",
val_root=".../ucf6/val",
clip_duration=2,
train_dataset_multiplier=1,
train_root="ucf6/train",
val_root="ucf6/val",
batch_size=4,
num_workers=4,
num_timesteps=8,
preprocess_input_size=224,
preprocess_clip_duration=1,
preprocess_means=backbone.mean,
preprocess_stds=backbone.std,
preprocess_min_short_side_scale=256,
preprocess_max_short_side_scale=320,
preprocess_min_short_side=256,
preprocess_max_short_side=320,
preprocess_horizontal_flip_p=0.5,
)

head = LinearHead(hidden_size=neck.num_features, num_classes=datamodule.num_classes)
model = VideoModel(backbone, head, neck)

optimizer = AdamW(model.parameters(), lr=1e-4)

Trainer = trainer_factory("single_label_classification")
trainer = Trainer(
datamodule,
Expand All @@ -139,23 +146,24 @@ from video_transformers.data import VideoDataModule
from video_transformers.heads import LinearHead
from video_transformers.necks import GRUNeck
from video_transformers.trainer import trainer_factory
from video_transformers.utils.file import download_ucf6

backbone = TimeDistributed(TimmBackbone("mobilevitv2_100", num_unfrozen_stages=0))
neck = GRUNeck(num_features=backbone.num_features, hidden_size=128, num_layers=2, return_last=True)

download_ucf6("./")
datamodule = VideoDataModule(
train_root=".../ucf6/train",
val_root=".../ucf6/val",
train_dataset_multiplier=1,
train_root="ucf6/train",
val_root="ucf6/val",
batch_size=4,
num_workers=4,
num_timesteps=8,
preprocess_input_size=224,
preprocess_clip_duration=1,
preprocess_means=backbone.mean,
preprocess_stds=backbone.std,
preprocess_min_short_side_scale=256,
preprocess_max_short_side_scale=320,
preprocess_min_short_side=256,
preprocess_max_short_side=320,
preprocess_horizontal_flip_p=0.5,
)

Expand Down
11 changes: 5 additions & 6 deletions requirements.txt
Original file line number Diff line number Diff line change
@@ -1,13 +1,12 @@
accelerate>=0.12.0
evaluate>=0.2.2
transformers>=4.23.1
timm>=0.6.7
accelerate>=0.14.0,<0.15.0
evaluate>=0.3.0,<0.4.0
transformers>=4.24.0,<4.25.0
timm>=0.6.12,<0.7.0
click==8.0.4
pytorchvideo
balanced-loss
scikit-learn
tensorboard
opencv-python
gradio>=3.1.6
huggingface-hub>=0.10.1
huggingface-hub>=0.11.0,<0.12.0
importlib-metadata>=1.1.0,<4.3;python_version<'3.8'
2 changes: 1 addition & 1 deletion video_transformers/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,4 +3,4 @@
from video_transformers.auto.neck import AutoNeck
from video_transformers.modeling import TimeDistributed, VideoModel

__version__ = "0.0.6"
__version__ = "0.0.7"
53 changes: 20 additions & 33 deletions video_transformers/data.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@
from torch.utils.data import DataLoader
from torchvision.transforms import CenterCrop, Compose, Lambda, RandomCrop, RandomHorizontalFlip

from video_transformers.pytorchvideo_wrapper.data.labeled_video_dataset import labeled_video_dataset
from video_transformers.pytorchvideo_wrapper.data.labeled_video_paths import LabeledVideoDataset, LabeledVideoPaths
from video_transformers.utils.extra import class_to_config

Expand Down Expand Up @@ -53,8 +54,8 @@ def __init__(
input_size: model input isze
means: mean of the video clip
stds: standard deviation of the video clip
min_short_side_scale: minimum short side of the video clip
max_short_side_scale: maximum short side of the video clip
min_short_side: minimum short side of the video clip
max_short_side: maximum short side of the video clip
horizontal_flip_p: probability of horizontal flip
clip_duration: duration of each video clip
Expand All @@ -77,10 +78,13 @@ def __init__(
self.clip_duration = clip_duration

# Transforms applied to train dataset.
def normalize_func(x):
return x / 255.0

self.train_video_transform = Compose(
[
UniformTemporalSubsample(self.num_timesteps),
Lambda(lambda x: x / 255.0),
Lambda(normalize_func),
Normalize(self.means, self.stds),
RandomShortSideScale(
min_size=self.min_short_side,
Expand All @@ -97,7 +101,7 @@ def __init__(
self.val_video_transform = Compose(
[
UniformTemporalSubsample(self.num_timesteps),
Lambda(lambda x: x / 255.0),
Lambda(normalize_func),
Normalize(self.means, self.stds),
ShortSideScale(self.min_short_side),
CenterCrop(self.input_size),
Expand All @@ -112,7 +116,6 @@ def __init__(
train_root: str,
val_root: str,
test_root: str = None,
train_dataset_multiplier: int = 1,
batch_size: int = 4,
num_workers: int = 4,
num_timesteps: int = 8,
Expand Down Expand Up @@ -158,8 +161,6 @@ def __init__(
Path to kinetics formatted train folder.
clip_duration: float
Duration of sampled clip for each video.
train_dataset_multiplier: int
Multipler for number of of random training data samples.
batch_size: int
Batch size for training and validation.
num_workers: int
Expand Down Expand Up @@ -196,7 +197,6 @@ def __init__(
self.train_root = train_root
self.val_root = val_root
self.test_root = test_root if test_root is not None else val_root
self.train_dataset_multiplier = train_dataset_multiplier
self.labels = None

self.train_dataloader = self._get_train_dataloader()
Expand All @@ -212,18 +212,13 @@ def config(self) -> Dict:
return class_to_config(self, ignored_attrs=("config", "train_root", "val_root", "test_root"))

def _get_train_dataloader(self):
labeled_video_paths = LabeledVideoPaths.from_path(self.train_root)
labeled_video_paths.path_prefix = ""
video_sampler = torch.utils.data.RandomSampler
clip_sampler = pytorchvideo.data.make_clip_sampler("random", self.preprocessor_config["clip_duration"])
dataset = LabeledVideoDataset(
labeled_video_paths,
clip_sampler,
video_sampler,
self.preprocessor.train_transform,
dataset = labeled_video_dataset(
data_path=self.train_root,
clip_sampler=clip_sampler,
transform=self.preprocessor.train_transform,
decode_audio=False,
decoder="pyav",
dataset_multiplier=self.train_dataset_multiplier,
)
self.labels = dataset.labels
return DataLoader(
Expand All @@ -234,18 +229,14 @@ def _get_train_dataloader(self):
)

def _get_val_dataloader(self):
labeled_video_paths = LabeledVideoPaths.from_path(self.val_root)
labeled_video_paths.path_prefix = ""
video_sampler = torch.utils.data.SequentialSampler
clip_sampler = pytorchvideo.data.clip_sampling.UniformClipSamplerTruncateFromStart(
clip_duration=self.preprocessor_config["clip_duration"],
truncation_duration=self.preprocessor_config["clip_duration"],
)
dataset = LabeledVideoDataset(
labeled_video_paths,
clip_sampler,
video_sampler,
self.preprocessor.val_transform,
dataset = labeled_video_dataset(
data_path=self.val_root,
clip_sampler=clip_sampler,
transform=self.preprocessor.val_transform,
decode_audio=False,
decoder="pyav",
)
Expand All @@ -257,18 +248,14 @@ def _get_val_dataloader(self):
)

def _get_test_dataloader(self):
labeled_video_paths = LabeledVideoPaths.from_path(self.test_root)
labeled_video_paths.path_prefix = ""
video_sampler = torch.utils.data.SequentialSampler
clip_sampler = pytorchvideo.data.clip_sampling.UniformClipSamplerTruncateFromStart(
clip_duration=self.preprocessor_config["clip_duration"],
truncation_duration=self.preprocessor_config["clip_duration"],
)
dataset = LabeledVideoDataset(
labeled_video_paths,
clip_sampler,
video_sampler,
self.preprocessor.val_transform,
dataset = labeled_video_dataset(
data_path=self.test_root,
clip_sampler=clip_sampler,
transform=self.preprocessor.val_transform,
decode_audio=False,
decoder="pyav",
)
Expand Down
Empty file.
Empty file.
Loading

0 comments on commit 8b85f91

Please sign in to comment.