|
| 1 | +# Copyright (c) Meta Platforms, Inc. and affiliates. |
| 2 | +# All rights reserved. |
| 3 | +# |
| 4 | +# This source code is licensed under the BSD-style license found in the |
| 5 | +# LICENSE file in the root directory of this source tree. |
| 6 | + |
| 7 | +import torch |
| 8 | + |
| 9 | +from examples.models.model_base import EagerModelBase |
| 10 | +from llava.eval.run_llava import load_images, process_images |
| 11 | +from llava.mm_utils import get_model_name_from_path |
| 12 | + |
| 13 | +from llava.model.builder import load_pretrained_model |
| 14 | +from torch import nn |
| 15 | + |
| 16 | + |
| 17 | +class EncoderModel(nn.Module): |
| 18 | + def __init__(self, llava_model): |
| 19 | + super().__init__() |
| 20 | + self.model_ = llava_model |
| 21 | + |
| 22 | + def forward(self, images_tensor): |
| 23 | + features = self.model_.get_model().get_vision_tower()(images_tensor) |
| 24 | + features = self.model_.get_model().mm_projector(features) |
| 25 | + return features |
| 26 | + |
| 27 | + |
| 28 | +class LlavaModel(EagerModelBase): |
| 29 | + def __init__(self): |
| 30 | + model_path = "liuhaotian/llava-v1.5-7b" |
| 31 | + tokenizer, self.model_, self.image_processor_, context_len = ( |
| 32 | + load_pretrained_model( |
| 33 | + model_path=model_path, |
| 34 | + model_base=None, |
| 35 | + model_name=get_model_name_from_path(model_path), |
| 36 | + ) |
| 37 | + ) |
| 38 | + self.device = "cpu" |
| 39 | + self.model_.to(self.device) |
| 40 | + self.dtype = torch.float32 |
| 41 | + |
| 42 | + def get_eager_model(self): |
| 43 | + model = EncoderModel(self.model_) |
| 44 | + return model |
| 45 | + |
| 46 | + def get_example_inputs(self): |
| 47 | + image_file = "https://llava-vl.github.io/static/images/view.jpg" |
| 48 | + images = load_images([image_file]) |
| 49 | + images_tensor = process_images( |
| 50 | + images, self.image_processor_, self.model_.config |
| 51 | + ).to(self.model_.device, dtype=torch.float32) |
| 52 | + return (images_tensor,) |
0 commit comments