Skip to content

Ovis2.5-2B Inference speed #98

@AlexisMDP

Description

@AlexisMDP

My goal is to get an image description as quickly as possible. How can I speed up the inference? Did I miss anything or add extra parameters?
I'm getting an inference time of 4.5 seconds per frame, which is way too long.
I'm using RunPod with an RTX 4090 (24GB).

"""
Minimal example for Ovis2.5-2B - Model loading and inference
"""

import torch
from transformers import AutoModelForCausalLM
from PIL import Image

# ===== MODEL LOADING =====
def load_model():
    model = AutoModelForCausalLM.from_pretrained(
        "AIDC-AI/Ovis2.5-2B",
        torch_dtype=torch.bfloat16,
        trust_remote_code=True
    ).cuda()
    model.eval()
    return model

# ===== INFERENCE =====
def run_inference(model, image_path, prompt="Describe this image"):
    # Load image
    image = Image.open(image_path).convert("RGB")
    
    # Prepare messages according to Ovis format
    messages = [{
        "role": "user",
        "content": [
            {"type": "image", "image": image},
            {"type": "text", "text": prompt},
        ],
    }]
    
    # Preprocessing
    input_ids, pixel_values, grid_thws = model.preprocess_inputs(
        messages=messages,
        add_generation_prompt=True,
        max_pixels=896*896,
        enable_thinking=False
    )
    
    # Move to GPU
    input_ids = input_ids.cuda()
    pixel_values = pixel_values.cuda() if pixel_values is not None else None
    grid_thws = grid_thws.cuda() if grid_thws is not None else None
    
    # Generation
    with torch.no_grad():
        outputs = model.generate(
            inputs=input_ids,
            pixel_values=pixel_values,
            grid_thws=grid_thws,
            max_new_tokens=256,
            enable_thinking=False
        )
    
    # Decode response
    response = model.text_tokenizer.decode(outputs[0], skip_special_tokens=True)
    return response

# ===== USAGE =====
if __name__ == "__main__":
    # Load model
    model = load_model()
    
    # Run inference
    result = run_inference(model, "your_image.jpg", "Describe this image")
    print(result)

Metadata

Metadata

Assignees

No one assigned

    Labels

    No labels
    No labels

    Type

    No type

    Projects

    No projects

    Milestone

    No milestone

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions