-
-
Notifications
You must be signed in to change notification settings - Fork 13
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
Showing
34 changed files
with
2,817 additions
and
0 deletions.
There are no files selected for viewing
Empty file.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,43 @@ | ||
import unittest | ||
|
||
|
||
class TestAutoBackbone(unittest.TestCase): | ||
def test_transformers_backbone(self): | ||
import torch | ||
|
||
from video_transformers import AutoBackbone | ||
|
||
config = { | ||
"framework": {"name": "timm"}, | ||
"type": "2d_backbone", | ||
"model_name": "mobilevitv2_100", | ||
"num_timesteps": 8, | ||
} | ||
batch_size = 2 | ||
|
||
backbone = AutoBackbone.from_config(config) | ||
input = torch.randn(batch_size, 3, config["num_timesteps"], 224, 224) | ||
output = backbone(input) | ||
self.assertEqual(output.shape, (batch_size, config["num_timesteps"], backbone.num_features)) | ||
|
||
def test_timm_backbone(self): | ||
import torch | ||
|
||
from video_transformers import AutoBackbone | ||
|
||
config = { | ||
"framework": {"name": "transformers"}, | ||
"type": "2d_backbone", | ||
"model_name": "microsoft/cvt-13", | ||
"num_timesteps": 8, | ||
} | ||
batch_size = 2 | ||
|
||
backbone = AutoBackbone.from_config(config) | ||
input = torch.randn(batch_size, 3, config["num_timesteps"], 224, 224) | ||
output = backbone(input) | ||
self.assertEqual(output.shape, (batch_size, config["num_timesteps"], backbone.num_features)) | ||
|
||
|
||
if __name__ == "__main__": | ||
unittest.main() |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,25 @@ | ||
import unittest | ||
|
||
|
||
class TestAutoHead(unittest.TestCase): | ||
def test_liear_head(self): | ||
import torch | ||
|
||
from video_transformers import AutoHead | ||
|
||
config = { | ||
"name": "LinearHead", | ||
"hidden_size": 256, | ||
"num_classes": 10, | ||
"dropout_p": 0.1, | ||
} | ||
batch_size = 2 | ||
|
||
head = AutoHead.from_config(config) | ||
input = torch.randn(batch_size, config["hidden_size"]) | ||
output = head(input) | ||
self.assertEqual(output.shape, (batch_size, config["num_classes"])) | ||
|
||
|
||
if __name__ == "__main__": | ||
unittest.main() |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,69 @@ | ||
import unittest | ||
|
||
|
||
class TestAutoNeck(unittest.TestCase): | ||
def test_transformers_neck(self): | ||
import torch | ||
|
||
from video_transformers import AutoNeck | ||
|
||
config = { | ||
"name": "TransformerNeck", | ||
"num_features": 256, | ||
"num_timesteps": 8, | ||
"transformer_enc_num_heads": 4, | ||
"transformer_enc_num_layers": 2, | ||
"transformer_enc_act": "gelu", | ||
"dropout_p": 0.1, | ||
"return_mean": True, | ||
} | ||
batch_size = 2 | ||
|
||
neck = AutoNeck.from_config(config) | ||
input = torch.randn(batch_size, config["num_timesteps"], config["num_features"]) | ||
output = neck(input) | ||
self.assertEqual(output.shape, (batch_size, neck.num_features)) | ||
|
||
def test_lstm_neck(self): | ||
import torch | ||
|
||
from video_transformers import AutoNeck | ||
|
||
config = { | ||
"name": "LSTMNeck", | ||
"num_features": 256, | ||
"num_timesteps": 8, | ||
"hidden_size": 128, | ||
"num_layers": 2, | ||
"return_last": True, | ||
} | ||
batch_size = 2 | ||
|
||
neck = AutoNeck.from_config(config) | ||
input = torch.randn(batch_size, config["num_timesteps"], config["num_features"]) | ||
output = neck(input) | ||
self.assertEqual(output.shape, (batch_size, config["hidden_size"])) | ||
|
||
def test_gru_neck(self): | ||
import torch | ||
|
||
from video_transformers import AutoNeck | ||
|
||
config = { | ||
"name": "GRUNeck", | ||
"num_features": 256, | ||
"num_timesteps": 8, | ||
"hidden_size": 128, | ||
"num_layers": 2, | ||
"return_last": True, | ||
} | ||
batch_size = 2 | ||
|
||
neck = AutoNeck.from_config(config) | ||
input = torch.randn(batch_size, config["num_timesteps"], config["num_features"]) | ||
output = neck(input) | ||
self.assertEqual(output.shape, (batch_size, config["hidden_size"])) | ||
|
||
|
||
if __name__ == "__main__": | ||
unittest.main() |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,43 @@ | ||
import unittest | ||
|
||
|
||
class TestBackbone(unittest.TestCase): | ||
def test_transformers_backbone(self): | ||
import torch | ||
|
||
from video_transformers.backbones.transformers import TransformersBackbone | ||
|
||
config = {"model_name": "microsoft/cvt-13"} | ||
batch_size = 2 | ||
|
||
backbone = TransformersBackbone(model_name=config["model_name"], num_unfrozen_stages=0) | ||
self.assertEqual(backbone.num_trainable_params, 0) | ||
|
||
backbone = TransformersBackbone(model_name=config["model_name"], num_unfrozen_stages=-1) | ||
self.assertNotEqual(backbone.num_trainable_params, 0) | ||
|
||
input = torch.randn(batch_size, 3, 224, 224) | ||
output = backbone(input) | ||
self.assertEqual(output.shape, (batch_size, backbone.num_features)) | ||
|
||
def test_timm_backbone(self): | ||
import torch | ||
|
||
from video_transformers.backbones.timm import TimmBackbone | ||
|
||
config = {"model_name": "mobilevitv2_100"} | ||
batch_size = 2 | ||
|
||
backbone = TimmBackbone(model_name=config["model_name"], num_unfrozen_stages=0) | ||
self.assertEqual(backbone.num_trainable_params, 0) | ||
|
||
backbone = TimmBackbone(model_name=config["model_name"], num_unfrozen_stages=-1) | ||
self.assertNotEqual(backbone.num_trainable_params, 0) | ||
|
||
input = torch.randn(batch_size, 3, 224, 224) | ||
output = backbone(input) | ||
self.assertEqual(output.shape, (batch_size, backbone.num_features)) | ||
|
||
|
||
if __name__ == "__main__": | ||
unittest.main() |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,83 @@ | ||
import unittest | ||
|
||
|
||
class TestOnnx(unittest.TestCase): | ||
def test_onnx_export(self): | ||
from video_transformers import VideoClassificationModel | ||
|
||
config = { | ||
"backbone": { | ||
"name": "TransformersBackbone", | ||
"framework": {"name": "transformers", "version": "4.21.1"}, | ||
"mean": [0.485, 0.456, 0.406], | ||
"model_name": "microsoft/cvt-13", | ||
"num_features": 384, | ||
"num_total_params": 19611712, | ||
"num_trainable_params": 18536448, | ||
"std": [0.229, 0.224, 0.225], | ||
"type": "2d_backbone", | ||
}, | ||
"head": {"name": "LinearHead", "dropout_p": 0.0, "hidden_size": 384, "num_classes": 6}, | ||
"neck": { | ||
"name": "TransformerNeck", | ||
"dropout_p": 0.1, | ||
"num_features": 384, | ||
"num_timesteps": 8, | ||
"transformer_enc_act": "gelu", | ||
"transformer_enc_num_heads": 4, | ||
"transformer_enc_num_layers": 2, | ||
"return_mean": True, | ||
}, | ||
"preprocess_means": [0.485, 0.456, 0.406], | ||
"preprocess_stds": [0.229, 0.224, 0.225], | ||
"preprocess_min_short_side_scale": 256, | ||
"preprocess_input_size": 224, | ||
"num_timesteps": 8, | ||
"labels": ["BodyWeightSquats", "JumpRope", "Lunges", "PullUps", "PushUps", "WallPushups"], | ||
} | ||
|
||
model = VideoClassificationModel.from_config(config) | ||
|
||
model.to_onnx() | ||
|
||
def test_quantized_onnx_export(self): | ||
from video_transformers import VideoClassificationModel | ||
|
||
config = { | ||
"backbone": { | ||
"name": "TransformersBackbone", | ||
"framework": {"name": "transformers", "version": "4.21.1"}, | ||
"mean": [0.485, 0.456, 0.406], | ||
"model_name": "microsoft/cvt-13", | ||
"num_features": 384, | ||
"num_total_params": 19611712, | ||
"num_trainable_params": 18536448, | ||
"std": [0.229, 0.224, 0.225], | ||
"type": "2d_backbone", | ||
}, | ||
"head": {"name": "LinearHead", "dropout_p": 0.0, "hidden_size": 384, "num_classes": 6}, | ||
"neck": { | ||
"name": "TransformerNeck", | ||
"dropout_p": 0.1, | ||
"num_features": 384, | ||
"num_timesteps": 8, | ||
"transformer_enc_act": "gelu", | ||
"transformer_enc_num_heads": 4, | ||
"transformer_enc_num_layers": 2, | ||
"return_mean": True, | ||
}, | ||
"preprocess_means": [0.485, 0.456, 0.406], | ||
"preprocess_stds": [0.229, 0.224, 0.225], | ||
"preprocess_min_short_side_scale": 256, | ||
"preprocess_input_size": 224, | ||
"num_timesteps": 8, | ||
"labels": ["BodyWeightSquats", "JumpRope", "Lunges", "PullUps", "PushUps", "WallPushups"], | ||
} | ||
|
||
model = VideoClassificationModel.from_config(config) | ||
|
||
model.to_onnx(quantize=True) | ||
|
||
|
||
if __name__ == "__main__": | ||
unittest.main() |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,50 @@ | ||
import unittest | ||
|
||
|
||
class TestVideoClassificationModel(unittest.TestCase): | ||
def test_transformers_backbone(self): | ||
import torch | ||
|
||
from video_transformers import VideoClassificationModel | ||
|
||
config = { | ||
"backbone": { | ||
"name": "TransformersBackbone", | ||
"framework": {"name": "transformers", "version": "4.21.1"}, | ||
"mean": [0.485, 0.456, 0.406], | ||
"model_name": "microsoft/cvt-13", | ||
"num_features": 384, | ||
"num_total_params": 19611712, | ||
"num_trainable_params": 18536448, | ||
"std": [0.229, 0.224, 0.225], | ||
"type": "2d_backbone", | ||
}, | ||
"head": {"name": "LinearHead", "dropout_p": 0.0, "hidden_size": 384, "num_classes": 6}, | ||
"neck": { | ||
"name": "TransformerNeck", | ||
"dropout_p": 0.1, | ||
"num_features": 384, | ||
"num_timesteps": 8, | ||
"transformer_enc_act": "gelu", | ||
"transformer_enc_num_heads": 4, | ||
"transformer_enc_num_layers": 2, | ||
"return_mean": True, | ||
}, | ||
"preprocess_means": [0.485, 0.456, 0.406], | ||
"preprocess_tds": [0.229, 0.224, 0.225], | ||
"preprocess_min_short_side_scale": 256, | ||
"preprocess_input_size": 224, | ||
"num_timesteps": 8, | ||
"labels": ["BodyWeightSquats", "JumpRope", "Lunges", "PullUps", "PushUps", "WallPushups"], | ||
} | ||
batch_size = 2 | ||
|
||
model = VideoClassificationModel.from_config(config) | ||
|
||
input = torch.randn(batch_size, 3, config["num_timesteps"], 224, 224) | ||
output = model(input) | ||
self.assertEqual(output.shape, (batch_size, model.head.num_classes)) | ||
|
||
|
||
if __name__ == "__main__": | ||
unittest.main() |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,6 @@ | ||
from video_transformers.auto.backbone import AutoBackbone | ||
from video_transformers.auto.head import AutoHead | ||
from video_transformers.auto.neck import AutoNeck | ||
from video_transformers.modules import TimeDistributed, VideoClassificationModel | ||
|
||
__version__ = "0.0.2" |
Empty file.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,33 @@ | ||
from typing import Dict, Union | ||
|
||
from video_transformers.backbones.base import Backbone | ||
from video_transformers.modules import TimeDistributed | ||
|
||
|
||
class AutoBackbone: | ||
""" | ||
AutoBackbone is a class that automatically instantiates a video model backbone from a config. | ||
""" | ||
|
||
@classmethod | ||
def from_config(cls, config: Dict) -> Union[Backbone, TimeDistributed]: | ||
backbone_framework = config.get("framework") | ||
backbone_type = config.get("type") | ||
backbone_model_name = config.get("model_name") | ||
|
||
if backbone_framework["name"] == "transformers": | ||
from video_transformers.backbones.transformers import TransformersBackbone | ||
|
||
backbone = TransformersBackbone(model_name=backbone_model_name) | ||
elif backbone_framework["name"] == "timm": | ||
from video_transformers.backbones.timm import TimmBackbone | ||
|
||
backbone = TimmBackbone(model_name=backbone_model_name) | ||
else: | ||
raise ValueError(f"Unknown framework {backbone_framework}") | ||
|
||
if backbone_type == "2d_backbone": | ||
from video_transformers.modules import TimeDistributed | ||
|
||
backbone = TimeDistributed(backbone) | ||
return backbone |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,20 @@ | ||
from typing import Dict | ||
|
||
|
||
class AutoHead: | ||
""" | ||
AutoHead is a class that automatically instantiates a video model head from a config. | ||
""" | ||
|
||
@classmethod | ||
def from_config(cls, config: Dict): | ||
head_class_name = config.get("name") | ||
if head_class_name == "LinearHead": | ||
from video_transformers.heads import LinearHead | ||
|
||
hidden_size = config.get("hidden_size") | ||
num_classes = config.get("num_classes") | ||
dropout_p = config.get("dropout_p") | ||
return LinearHead(hidden_size, num_classes, dropout_p) | ||
else: | ||
raise ValueError(f"Unsupported head class name: {head_class_name}") |
Oops, something went wrong.