Skip to content

Commit efb5295

Browse files
committed
Release llava-wilder
1 parent 050b2c3 commit efb5295

File tree

4 files changed

+6
-84
lines changed

4 files changed

+6
-84
lines changed

Diff for: lmms_eval/tasks/llava_wilder/llava_wilder_full.yaml

-14
This file was deleted.

Diff for: lmms_eval/tasks/llava_wilder/llava_wilder_medium.yaml

-14
This file was deleted.

Diff for: lmms_eval/tasks/llava_wilder/llava_wilder_small.yaml

+2-3
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,8 @@
1-
dataset_path: lmms-lab/llava-wilder
2-
dataset_name: Small
1+
dataset_path: lmms-lab/llava-bench-wilder
32
dataset_kwargs:
43
token: True
54
task: "llava_wilder_small"
6-
test_split: train
5+
test_split: small
76
model_specific_prompt_kwargs:
87
default:
98
pre_prompt: ""

Diff for: lmms_eval/tasks/llava_wilder/utils.py

+4-53
Original file line numberDiff line numberDiff line change
@@ -13,17 +13,6 @@
1313
# Set up a logger
1414
eval_logger = logging.getLogger("lmms-eval")
1515

16-
# Create a static variable to track if the message has been logged
17-
if not hasattr(eval_logger, "dashcope_warning_logged"):
18-
eval_logger.dashcope_warning_logged = False
19-
20-
try:
21-
import dashscope
22-
except ImportError:
23-
if not eval_logger.dashcope_warning_logged:
24-
eval_logger.debug("Dashcope not found, make sure you install dashscope to use qwen vl")
25-
eval_logger.dashcope_warning_logged = True
26-
2716
NUM_SECONDS_TO_SLEEP = 5
2817
dir_path = os.path.dirname(os.path.realpath(__file__))
2918

@@ -58,14 +47,6 @@
5847
"Content-Type": "application/json",
5948
}
6049

61-
elif API_TYPE == "qwen_vl":
62-
API_URL = os.getenv("QWEN_ENDPOINT", "https://dashscope.aliyuncs.com/api/v1/services/aigc/multimodal-generation/generation")
63-
API_KEY = os.getenv("DASHSCOPE_API_KEY", "YOUR_API_KEY")
64-
headers = {
65-
"Authorization": f"Bearer {API_KEY}",
66-
"Content-Type": "application/json",
67-
}
68-
6950

7051
def get_chat_response(base64_image, prompt, max_retries=5, wait_time=10):
7152
headers = {
@@ -114,29 +95,6 @@ def image_to_base64(pil_image):
11495
return base64.b64encode(buffered.getvalue()).decode("utf-8")
11596

11697

117-
def qwen_multimodal_conversation_call(text_content, image_content, retries=5):
118-
"""Simple single round multimodal conversation call."""
119-
messages = [{"role": "user", "content": [{"image": image_content}, {"text": text_content}]}]
120-
for attempt in range(retries):
121-
try:
122-
response_data = dashscope.MultiModalConversation.call(model=GPT_EVAL_MODEL_NAME, messages=messages)
123-
# The response status_code is HTTPStatus.OK indicate success,
124-
# otherwise indicate request is failed, you can get error code
125-
# and message from code and message.
126-
content = response_data["output"]["choices"][0]["message"]["content"][0]["text"].strip()
127-
if content != "":
128-
return content, GPT_EVAL_MODEL_NAME
129-
break # If successful, break out of the loop
130-
except Exception as e:
131-
eval_logger.info(f"Attempt {attempt + 1} failed with error: {e}")
132-
if attempt < retries: # If we have retries left, sleep and then continue to next attempt
133-
time.sleep(NUM_SECONDS_TO_SLEEP)
134-
else: # If this was the last attempt, log and return empty
135-
eval_logger.error(f"All {retries} attempts failed. Last error message: {e}")
136-
return "", ""
137-
return "", ""
138-
139-
14098
def parse_score(review):
14199
try:
142100
score_pair = review.split("\n")[0]
@@ -162,20 +120,13 @@ def llava_process_results(doc, result):
162120
"""
163121
try:
164122
question = doc.get("question", "")
165-
ans1 = doc.get("gpt4v_answer", "")
123+
ans1 = doc.get("answer", "")
166124
ans2 = result[0] if result else ""
167125
content = f"[Question]\n{question}\n\n" + f"[Assistant 1]\n{ans1}\n\n[End of Assistant 1]\n\n" + f"[Assistant 2]\n{ans2}\n\n[End of Assistant 2]\n\n" f"[System]\n{judge_rules}\n\n"
168126
visuals = llava_doc_to_visual(doc)
169-
if API_TYPE == "qwen_vl":
170-
file_path = os.path.join(dir_path, f"tmp_{doc['question_id']}.jpg")
171-
visuals[0].save(file_path)
172-
image_content = "file://" + file_path
173-
review, model_name = qwen_multimodal_conversation_call(content, image_content=image_content)
174-
os.remove(file_path)
175-
elif API_TYPE == "openai":
176-
image_path = doc["image"]
177-
base64_image = image_to_base64(image_path)
178-
review, model_name = get_chat_response(base64_image, content)
127+
image_path = doc["image"]
128+
base64_image = image_to_base64(image_path)
129+
review, model_name = get_chat_response(base64_image, content)
179130
scores = parse_score(review)
180131
except Exception as e:
181132
eval_logger.error(f"Error for Question ID: {doc.get('question_id', 'Unknown')}: {e}")

0 commit comments

Comments
 (0)