diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml new file mode 100644 index 0000000..e101d52 --- /dev/null +++ b/.github/workflows/ci.yml @@ -0,0 +1,87 @@ +name: Continious Integration + +on: + push: + branches: [main] + pull_request: + branches: [main] + +jobs: + build: + runs-on: ubuntu-latest + strategy: + matrix: + operating-system: [ubuntu-latest, windows-latest, macos-latest] + python-version: [3.7, 3.8, 3.9] + torch-version: [1.10.2, 1.11.0, 1.12.0] + fail-fast: false + + steps: + - name: Checkout + uses: actions/checkout@v2 + + - name: Set up Python + uses: actions/setup-python@v2 + with: + python-version: ${{ matrix.python-version }} + + - name: Restore Ubuntu cache + uses: actions/cache@v1 + if: matrix.operating-system == 'ubuntu-latest' + with: + path: ~/.cache/pip + key: ${{ matrix.os }}-${{ matrix.python-version }}-${{ hashFiles('**/setup.py')}} + restore-keys: ${{ matrix.os }}-${{ matrix.python-version }}- + + - name: Restore MacOS cache + uses: actions/cache@v1 + if: matrix.operating-system == 'macos-latest' + with: + path: ~/Library/Caches/pip + key: ${{ matrix.os }}-${{ matrix.python-version }}-${{ hashFiles('**/setup.py')}} + restore-keys: ${{ matrix.os }}-${{ matrix.python-version }}- + + - name: Restore Windows cache + uses: actions/cache@v1 + if: matrix.operating-system == 'windows-latest' + with: + path: ~\AppData\Local\pip\Cache + key: ${{ matrix.os }}-${{ matrix.python-version }}-${{ hashFiles('**/setup.py')}} + restore-keys: ${{ matrix.os }}-${{ matrix.python-version }}- + + - name: Update pip + run: python -m pip install --upgrade pip + + - name: Lint with flake8, black and isort + run: | + pip install -e .[dev] + # stop the build if there are Python syntax errors or undefined names + flake8 . --count --select=E9,F63,F7,F82 --show-source --statistics + black . --check --config pyproject.toml + isort -c . + # exit-zero treats all errors as warnings. Allowed max line length is 120. + flake8 . --count --exit-zero --max-complexity=10 --max-line-length=120 --statistics + + - name: Install PyTorch on Linux and Windows + if: > + matrix.operating-system == 'ubuntu-latest' || + matrix.operating-system == 'windows-latest' + run: > + pip install torch==${{ matrix.torch-version }}+cpu + -f https://download.pytorch.org/whl/torch_stable.html + + - name: Install PyTorch on MacOS + if: matrix.operating-system == 'macos-latest' + run: pip install torch==${{ matrix.torch-version }} + + - name: Install video-transformers package from local setup.py + run: > + pip install -e . + + - name: Install test dependencies + run: > + pip install -e .[test] + + - name: Unittest video-transformers + run: | + python -m unittest diff --git a/.github/workflows/package_testing.yml b/.github/workflows/package_testing.yml new file mode 100644 index 0000000..83ef596 --- /dev/null +++ b/.github/workflows/package_testing.yml @@ -0,0 +1,73 @@ +name: Package Testing + +on: + schedule: + - cron: '0 0 * * *' # Runs at 00:00 UTC every day + +jobs: + build: + runs-on: ubuntu-latest + + strategy: + matrix: + operating-system: [ubuntu-latest, windows-latest, macos-latest] + python-version: [3.7, 3.8, 3.9] + torch-version: [1.10.2, 1.11.0, 1.12.0] + fail-fast: false + + steps: + - name: Checkout + uses: actions/checkout@v2 + + - name: Set up Python + uses: actions/setup-python@v2 + with: + python-version: ${{ matrix.python-version }} + + - name: Restore Ubuntu cache + uses: actions/cache@v1 + if: matrix.operating-system == 'ubuntu-latest' + with: + path: ~/.cache/pip + key: ${{ matrix.os }}-${{ matrix.python-version }}-${{ hashFiles('**/setup.py')}} + restore-keys: ${{ matrix.os }}-${{ matrix.python-version }}- + + - name: Restore MacOS cache + uses: actions/cache@v1 + if: matrix.operating-system == 'macos-latest' + with: + path: ~/Library/Caches/pip + key: ${{ matrix.os }}-${{ matrix.python-version }}-${{ hashFiles('**/setup.py')}} + restore-keys: ${{ matrix.os }}-${{ matrix.python-version }}- + + - name: Restore Windows cache + uses: actions/cache@v1 + if: matrix.operating-system == 'windows-latest' + with: + path: ~\AppData\Local\pip\Cache + key: ${{ matrix.os }}-${{ matrix.python-version }}-${{ hashFiles('**/setup.py')}} + restore-keys: ${{ matrix.os }}-${{ matrix.python-version }}- + + - name: Update pip + run: python -m pip install --upgrade pip + + - name: Install PyTorch on Linux and Windows + if: > + matrix.operating-system == 'ubuntu-latest' || + matrix.operating-system == 'windows-latest' + run: > + pip install torch==${{ matrix.torch-version }}+cpu + -f https://download.pytorch.org/whl/torch_stable.html + + - name: Install PyTorch on MacOS + if: matrix.operating-system == 'macos-latest' + run: pip install torch==${{ matrix.torch-version }} + + - name: Install latest video-transformers package + run: > + pip install --upgrade --force-reinstall video-transformers[test] + + - name: Unittest video-transformers + run: | + python -m unittest + diff --git a/.github/workflows/publish_pypi.yml b/.github/workflows/publish_pypi.yml new file mode 100644 index 0000000..f7e7ea2 --- /dev/null +++ b/.github/workflows/publish_pypi.yml @@ -0,0 +1,27 @@ +name: Publish Python Package + +on: + release: + types: [published, edited] + +jobs: + publish: + runs-on: ubuntu-latest + + steps: + - uses: actions/checkout@v2 + - name: Set up Python + uses: actions/setup-python@v2 + with: + python-version: '3.x' + - name: Install dependencies + run: | + python -m pip install --upgrade pip + pip install setuptools wheel twine + - name: Build and publish + env: + TWINE_USERNAME: ${{ secrets.PYPI_USERNAME }} + TWINE_PASSWORD: ${{ secrets.PYPI_PASSWORD }} + run: | + python setup.py sdist bdist_wheel + twine upload --verbose --skip-existing dist/* diff --git a/.gitignore b/.gitignore index b6e4761..09924eb 100644 --- a/.gitignore +++ b/.gitignore @@ -127,3 +127,8 @@ dmypy.json # Pyre type checker .pyre/ + +# extra +.vscode +.neptune +runs/ \ No newline at end of file diff --git a/MANIFEST.in b/MANIFEST.in new file mode 100644 index 0000000..540b720 --- /dev/null +++ b/MANIFEST.in @@ -0,0 +1 @@ +include requirements.txt \ No newline at end of file diff --git a/README.md b/README.md index 5a46f5b..c0734a6 100644 --- a/README.md +++ b/README.md @@ -1,2 +1,231 @@ -# video-transformers -Easiest way of fine-tuning HuggingFace video classification models +

+ +

+ +

+ Easiest way of fine-tuning HuggingFace video classification models. +

+ +## πŸš€ Features + +`video-transformers` uses: + +- πŸ€— [accelerate](https://github.com/huggingface/accelerate) for distributed training, + +- πŸ€— [evaluate](https://github.com/huggingface/evaluate) for evaluation, + +- [pytorchvideo](https://github.com/facebookresearch/pytorchvideo) for dataloading + +and supports: + +- creating and fine-tunining video models using [transformers](https://github.com/huggingface/transformers) and [timm](https://github.com/rwightman/pytorch-image-models) vision models + +- experiment tracking with [layer](https://layer.ai/), [neptune](https://neptune.ai/), [tensorboard](https://www.tensorflow.org/tensorboard) and other trackers + +- exporting fine-tuned models in [ONNX](https://onnx.ai/) format + +- pushing fine-tuned models into [HuggingFace Hub](https://huggingface.co/models?pipeline_tag=image-classification&sort=downloads) + +- loading pretrained models from [HuggingFace Hub](https://huggingface.co/models?pipeline_tag=image-classification&sort=downloads) + +## βŒ› Incomming Features + +- Automated [Gradio app](https://gradio.app/), and [space](https://huggingface.co/spaces) creation + +- [Layer Hub](https://layer.ai/community) support + +## 🏁 Installation + +- Install `Pytorch`: + +```bash +conda install pytorch=1.11.0 torchvision=0.12.0 cudatoolkit=11.3 -c pytorch +``` + +- Install `video-transformers`: + +```bash +pip install video-transformers +``` + +## πŸ”₯ Usage + +- Prepare video classification dataset in such folder structure (.avi and .mp4 extensions are supported): + +```bash +train_root + label_1 + video_1 + video_2 + ... + label_2 + video_1 + video_2 + ... + ... +val_root + label_1 + video_1 + video_2 + ... + label_2 + video_1 + video_2 + ... + ... +``` + +- Fine-tune CVT (from HuggingFace) + Transformer based video classifier: + +```python +from video_transformers import TimeDistributed, VideoClassificationModel +from video_transformers.backbones.transformers import TransformersBackbone +from video_transformers.data import VideoDataModule +from video_transformers.heads import LinearHead +from video_transformers.necks import TransformerNeck +from video_transformers.trainer import trainer_factory + +backbone = TimeDistributed(TransformersBackbone("microsoft/cvt-13", num_unfrozen_stages=0)) +neck = TransformerNeck( + num_features=backbone.num_features, + num_timesteps=8, + transformer_enc_num_heads=4, + transformer_enc_num_layers=2, + dropout_p=0.1, +) + +datamodule = VideoDataModule( + train_root=".../ucf6/train", + val_root=".../ucf6/val", + clip_duration=2, + train_dataset_multiplier=1, + batch_size=4, + num_workers=4, + video_timesteps=8, + video_crop_size=224, + video_means=backbone.mean, + video_stds=backbone.std, + video_min_short_side_scale=256, + video_max_short_side_scale=320, + video_horizontal_flip_p=0.5, +) + +head = LinearHead(hidden_size=neck.num_features, num_classes=datamodule.num_classes) +model = VideoClassificationModel(backbone, head, neck) + +Trainer = trainer_factory("single_label_classification") +trainer = Trainer( + datamodule, + model, +) + +trainer.fit() + +``` + +- Fine-tune MobileViT (from Timm) + GRU based video classifier: + +```python +from video_transformers import TimeDistributed, VideoClassificationModel +from video_transformers.backbones.timm import TimmBackbone +from video_transformers.data import VideoDataModule +from video_transformers.heads import LinearHead +from video_transformers.necks import GRUNeck +from video_transformers.trainer import trainer_factory + +backbone = TimeDistributed(TimmBackbone("mobilevitv2_100", num_unfrozen_stages=0)) +neck = GRUNeck(num_features=backbone.num_features, hidden_size=128, num_layers=2, return_last=True) + +datamodule = VideoDataModule( + train_root=".../ucf6/train", + val_root=".../ucf6/val", + clip_duration=2, + train_dataset_multiplier=1, + batch_size=4, + num_workers=4, + video_timesteps=8, + video_crop_size=224, + video_means=backbone.mean, + video_stds=backbone.std, + video_min_short_side_scale=256, + video_max_short_side_scale=320, + video_horizontal_flip_p=0.5, +) + +head = LinearHead(hidden_size=neck.num_features, num_classes=datamodule.num_classes) +model = VideoClassificationModel(backbone, head, neck) + +Trainer = trainer_factory("single_label_classification") +trainer = Trainer( + datamodule, + model, +) + +trainer.fit() + +``` + +## πŸ€— Full HuggingFace Integration + +- Push your fine-tuned model to the hub: + +```python +from video_transformers import VideoClassificationModel + +model = VideoClassificationModel.from_pretrained("runs/exp/checkpoint") + +model.push_to_hub('model_name') +``` + +- Load any pretrained video-transformer model from the hub: + +```python +from video_transformers import VideoClassificationModel + +model = VideoClassificationModel.from_pretrained("runs/exp/checkpoint") + +model.from_pretrained('account_name/model_name') +``` + +- (Incoming feature) automatically Gradio app Huggingface Space: + +```python +from video_transformers import VideoClassificationModel + +model = VideoClassificationModel.from_pretrained("runs/exp/checkpoint") +model.push_to_space('account_name/app_name') +``` + +## πŸ“ˆ Multiple tracker support + +- Tensorboard tracker is enabled by default. + +- To add Neptune/Layer ... tracking: + +```python +from video_transformers.tracking import NeptuneTracker +from accelerate.tracking import WandBTracker + +trackers = [ + NeptuneTracker(EXPERIMENT_NAME, api_token=NEPTUNE_API_TOKEN, project=NEPTUNE_PROJECT), + WandBTracker(project_name=WANDB_PROJECT) +] + +trainer = Trainer( + datamodule, + model, + trackers=trackers +) + +``` + +## πŸ•ΈοΈ ONNX support + +- Convert your trained models into ONNX format for deployment: + +```python +from video_transformers import VideoClassificationModel + +model = VideoClassificationModel.from_pretrained("runs/exp/checkpoint") +model.to_onnx(quantize=False, opset_version=12, export_dir="runs/exports/", export_filename="model.onnx") +``` diff --git a/pyproject.toml b/pyproject.toml new file mode 100644 index 0000000..3aa87ab --- /dev/null +++ b/pyproject.toml @@ -0,0 +1,11 @@ +[tool.black] +line-length = 120 +exclude = ''' +( + /( + | .git + | venv + | .venv + )/ +) +''' diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 0000000..2840810 --- /dev/null +++ b/requirements.txt @@ -0,0 +1,12 @@ +accelerate>=0.12.0 +evaluate>=0.2.2 +transformers>=4.21.1 +timm>=0.6.7 +click==8.0.4 +pytorchvideo +torch +torchvision +balanced-loss +scikit-learn +tensorboard +opencv-python \ No newline at end of file diff --git a/setup.cfg b/setup.cfg new file mode 100644 index 0000000..941249b --- /dev/null +++ b/setup.cfg @@ -0,0 +1,9 @@ +[flake8] +max-line-length = 120 +select = E9,F63,F7,F82 +per-file-ignores = __init__.py: F401 +max-complexity = 10 + +[isort] +line_length=120 +profile=black \ No newline at end of file diff --git a/setup.py b/setup.py new file mode 100644 index 0000000..78a52d0 --- /dev/null +++ b/setup.py @@ -0,0 +1,63 @@ +import io +import os +import re + +import setuptools + + +def get_long_description(): + base_dir = os.path.abspath(os.path.dirname(__file__)) + with io.open(os.path.join(base_dir, "README.md"), encoding="utf-8") as f: + return f.read() + + +def get_requirements(): + with open("requirements.txt") as f: + return f.read().splitlines() + + +def get_version(): + current_dir = os.path.abspath(os.path.dirname(__file__)) + version_file = os.path.join(current_dir, "video_transformers", "__init__.py") + with io.open(version_file, encoding="utf-8") as f: + return re.search(r'^__version__ = [\'"]([^\'"]*)[\'"]', f.read(), re.M).group(1) + + +_DEV_REQUIREMENTS = ["black==21.7b0", "flake8==3.9.2", "isort==5.9.2"] + +_TEST_REQUIREMENTS = ["onnx", "onnxruntime"] + +extras = {"test": [_DEV_REQUIREMENTS + _TEST_REQUIREMENTS], "dev": _DEV_REQUIREMENTS} + + +setuptools.setup( + name="video-transformers", + version=get_version(), + author="fcakyon", + license="MIT", + description="Easiest way of fine-tuning HuggingFace video classification models.", + long_description=get_long_description(), + long_description_content_type="text/markdown", + url="https://github.com/fcakyon/video-transformers", + packages=setuptools.find_packages(exclude=["examples", "tests"]), + python_requires=">=3.7", + install_requires=get_requirements(), + extras_require=extras, + include_package_data=True, + classifiers=[ + "Development Status :: 5 - Production/Stable", + "Operating System :: OS Independent", + "Intended Audience :: Developers", + "Intended Audience :: Science/Research", + "Programming Language :: Python :: 3", + "Programming Language :: Python :: 3.7", + "Programming Language :: Python :: 3.8", + "Programming Language :: Python :: 3.9", + "Topic :: Software Development :: Libraries", + "Topic :: Software Development :: Libraries :: Python Modules", + "Topic :: Education", + "Topic :: Scientific/Engineering", + "Topic :: Scientific/Engineering :: Artificial Intelligence", + ], + keywords="machine-learning, deep-learning, ml, pytorch, vision, loss, video-classification, transformers, accelerate, evaluate, huggingface", +)