diff --git a/fiftyone/utils/clip/zoo.py b/fiftyone/utils/clip/zoo.py index 4ffcb52c30..b6f1dc1d41 100644 --- a/fiftyone/utils/clip/zoo.py +++ b/fiftyone/utils/clip/zoo.py @@ -187,7 +187,7 @@ def _predict_all(self, imgs): frame_size = (width, height) if self._using_gpu: - imgs = imgs.cuda() + imgs = imgs.to(self.device) text_features = self._get_text_features() image_features = self._model.encode_image(imgs) diff --git a/fiftyone/utils/open_clip.py b/fiftyone/utils/open_clip.py index a0c74089c0..d51c350144 100644 --- a/fiftyone/utils/open_clip.py +++ b/fiftyone/utils/open_clip.py @@ -106,7 +106,7 @@ def _get_text_features(self): # Tokenize text text = self._tokenizer(prompts) if self._using_gpu: - text = text.cuda() + text = text.to(self.device) self._text_features = self._model.encode_text(text) return self._text_features @@ -118,7 +118,7 @@ def _embed_prompts(self, prompts): # Tokenize text text = self._tokenizer(formatted_prompts) if self._using_gpu: - text = text.cuda() + text = text.to(self.device) return self._model.encode_text(text) def _get_class_logits(self, text_features, image_features): @@ -143,9 +143,11 @@ def _predict_all(self, imgs): frame_size = (width, height) if self._using_gpu: - imgs = imgs.cuda() + imgs = imgs.to(self.device) - with torch.no_grad(), torch.amp.autocast("cuda"): + with torch.no_grad(), torch.amp.autocast( + device_type=self.device.type if self._using_gpu else "cpu" + ): image_features = self._model.encode_image(imgs) text_features = self._get_text_features() diff --git a/fiftyone/utils/super_gradients.py b/fiftyone/utils/super_gradients.py index ee81f137e6..74c395108d 100644 --- a/fiftyone/utils/super_gradients.py +++ b/fiftyone/utils/super_gradients.py @@ -96,7 +96,7 @@ def _load_model(self, config): ) if self._using_gpu: - model = model.cuda() + model = model.to(self.device) return model diff --git a/fiftyone/utils/transformers.py b/fiftyone/utils/transformers.py index 0e23fee2da..8a8a04c438 100644 --- a/fiftyone/utils/transformers.py +++ b/fiftyone/utils/transformers.py @@ -323,6 +323,9 @@ class FiftyOneTransformerConfig(Config, HasZooModel): def __init__(self, d): self.model = self.parse_raw(d, "model", default=None) self.name_or_path = self.parse_string(d, "name_or_path", default=None) + self.device = self.parse_string( + d, "device", default="cuda" if torch.cuda.is_available() else "cpu" + ) if etau.is_str(self.model): self.name_or_path = self.model self.model = None @@ -451,7 +454,8 @@ class FiftyOneTransformer(TransformerEmbeddingsMixin, Model): def __init__(self, config): self.config = config self.model = self._load_model(config) - self.device = "cuda" if torch.cuda.is_available() else "cpu" + self.device = torch.device(self.config.device) + self.model.to(self.device) self.image_processor = self._load_image_processor() @property @@ -496,7 +500,8 @@ def __init__(self, config): self.config = config self.classes = config.classes self.model = self._load_model(config) - self.device = "cuda" if torch.cuda.is_available() else "cpu" + self.device = torch.device(self.config.device) + self.model.to(self.device) self.processor = self._load_processor() self._text_prompts = None @@ -581,7 +586,7 @@ def _load_model(self, config): if config.model is not None: return config.model - device = "cuda" if torch.cuda.is_available() else "cpu" + device = torch.device(config.device) model = transformers.AutoModel.from_pretrained(config.name_or_path).to( device ) @@ -641,7 +646,7 @@ def _predict_from_retrieval(self, arg): with torch.no_grad(): for text_prompt in text_prompts: inputs = self.processor(arg, text_prompt, return_tensors="pt") - outputs = self.model(**inputs.to(self.device)) + outputs = self.model(**(inputs.to(self.device))) logits.append(outputs.logits[0, :].item()) logits = np.array(logits) @@ -693,14 +698,14 @@ class FiftyOneTransformerForImageClassification(FiftyOneTransformer): def _load_model(self, config): if config.model is not None: return config.model - device = "cuda" if torch.cuda.is_available() else "cpu" + device = torch.device(config.device) return transformers.AutoModelForImageClassification.from_pretrained( config.name_or_path ).to(device) def _predict(self, inputs): with torch.no_grad(): - results = self.model(**inputs.to(self.device)) + results = self.model(**(inputs.to(self.device))) return to_classification(results, self.model.config.id2label) def predict(self, arg): @@ -748,7 +753,8 @@ def __init__(self, config): self.classes = config.classes self.processor = self._load_processor(config) self.model = self._load_model(config) - self.device = "cuda" if torch.cuda.is_available() else "cpu" + self.device = torch.device(self.config.device) + self.model.to(self.device) self._text_prompts = None def _load_processor(self, config): @@ -757,9 +763,7 @@ def _load_processor(self, config): if config.model is not None: name_or_path = config.model.name_or_path - return transformers.AutoProcessor.from_pretrained(name_or_path).to( - self.device - ) + return transformers.AutoProcessor.from_pretrained(name_or_path) def _load_model(self, config): name_or_path = config.name_or_path @@ -770,7 +774,9 @@ def _load_model(self, config): if config.model is not None: return config.model else: - return _get_detector_from_processor(self.processor, name_or_path) + return _get_detector_from_processor( + self.processor, name_or_path + ).to(config.device) def _process_inputs(self, args): text_prompts = self._get_text_prompts() @@ -781,7 +787,7 @@ def _process_inputs(self, args): def _predict(self, inputs, target_sizes): with torch.no_grad(): - outputs = self.model(**inputs.to(self.device)) + outputs = self.model(**(inputs.to(self.device))) results = self.processor.image_processor.post_process_object_detection( outputs, target_sizes=target_sizes @@ -821,10 +827,9 @@ class FiftyOneTransformerForObjectDetection(FiftyOneTransformer): def _load_model(self, config): if config.model is not None: return config.model - device = "cuda" if torch.cuda.is_available() else "cpu" return transformers.AutoModelForObjectDetection.from_pretrained( config.name_or_path - ).to(device) + ).to(config.device) def _predict(self, inputs, target_sizes): with torch.no_grad(): @@ -875,11 +880,10 @@ def _load_model(self, config): if config.model is not None: model = config.model else: - device = "cuda" if torch.cuda.is_available() else "cpu" model = ( transformers.AutoModelForSemanticSegmentation.from_pretrained( config.name_or_path - ).to(device) + ).to(config.device) ) self.mask_targets = model.config.id2label @@ -929,10 +933,9 @@ class FiftyOneTransformerForDepthEstimation(FiftyOneTransformer): def _load_model(self, config): if config.model is not None: return config.model - device = "cuda" if torch.cuda.is_available() else "cpu" return transformers.AutoModelForDepthEstimation.from_pretrained( config.name_or_path - ).to(device) + ).to(config.device) def _predict(self, inputs, target_sizes): with torch.no_grad(): @@ -1084,5 +1087,4 @@ def _get_detector_from_processor(processor, model_name_or_path): __import__(module_name, fromlist=[detector_class_name]), detector_class_name, ) - device = "cuda" if torch.cuda.is_available() else "cpu" - return detector_class.from_pretrained(model_name_or_path).to(device) + return detector_class.from_pretrained(model_name_or_path) diff --git a/fiftyone/utils/ultralytics.py b/fiftyone/utils/ultralytics.py index d6a9729468..c33e5bb7bf 100644 --- a/fiftyone/utils/ultralytics.py +++ b/fiftyone/utils/ultralytics.py @@ -20,6 +20,7 @@ import fiftyone.zoo.models as fozm ultralytics = fou.lazy_import("ultralytics") +torch = fou.lazy_import("torch") def convert_ultralytics_model(model): @@ -378,6 +379,9 @@ def __init__(self, d): self.model_name = self.parse_raw(d, "model_name", default=None) self.model_path = self.parse_raw(d, "model_path", default=None) self.classes = self.parse_array(d, "classes", default=None) + self.device = self.parse_string( + d, "device", default="cuda" if torch.cuda.is_available() else "cpu" + ) class FiftyOneYOLOModel(Model): @@ -390,6 +394,8 @@ class FiftyOneYOLOModel(Model): def __init__(self, config): self.config = config self.model = self._load_model(config) + self.device = torch.device(config.device) + self.model.to(self.device) def _load_model(self, config): if config.model is not None: diff --git a/fiftyone/zoo/models/manifest-torch.json b/fiftyone/zoo/models/manifest-torch.json index 3e3453b1a9..4392b83185 100644 --- a/fiftyone/zoo/models/manifest-torch.json +++ b/fiftyone/zoo/models/manifest-torch.json @@ -2968,7 +2968,8 @@ "entrypoint_args": { "repo_or_dir": "ultralytics/yolov5", "model": "yolov5n", - "pretrained": true + "pretrained": true, + "device": "cpu" }, "output_processor_cls": "fiftyone.utils.ultralytics.UltralyticsOutputProcessor", "raw_inputs": true @@ -2998,7 +2999,8 @@ "entrypoint_args": { "repo_or_dir": "ultralytics/yolov5", "model": "yolov5s", - "pretrained": true + "pretrained": true, + "device": "cpu" }, "output_processor_cls": "fiftyone.utils.ultralytics.UltralyticsOutputProcessor", "raw_inputs": true @@ -3028,7 +3030,8 @@ "entrypoint_args": { "repo_or_dir": "ultralytics/yolov5", "model": "yolov5m", - "pretrained": true + "pretrained": true, + "device": "cpu" }, "output_processor_cls": "fiftyone.utils.ultralytics.UltralyticsOutputProcessor", "raw_inputs": true @@ -3058,7 +3061,8 @@ "entrypoint_args": { "repo_or_dir": "ultralytics/yolov5", "model": "yolov5l", - "pretrained": true + "pretrained": true, + "device": "cpu" }, "output_processor_cls": "fiftyone.utils.ultralytics.UltralyticsOutputProcessor", "raw_inputs": true @@ -4555,7 +4559,8 @@ "entrypoint_args": { "repo_or_dir": "ultralytics/yolov5", "model": "yolov5x", - "pretrained": true + "pretrained": true, + "device": "cpu" }, "output_processor_cls": "fiftyone.utils.ultralytics.UltralyticsOutputProcessor", "raw_inputs": true