13
13
# Set up a logger
14
14
eval_logger = logging .getLogger ("lmms-eval" )
15
15
16
- # Create a static variable to track if the message has been logged
17
- if not hasattr (eval_logger , "dashcope_warning_logged" ):
18
- eval_logger .dashcope_warning_logged = False
19
-
20
- try :
21
- import dashscope
22
- except ImportError :
23
- if not eval_logger .dashcope_warning_logged :
24
- eval_logger .debug ("Dashcope not found, make sure you install dashscope to use qwen vl" )
25
- eval_logger .dashcope_warning_logged = True
26
-
27
16
NUM_SECONDS_TO_SLEEP = 5
28
17
dir_path = os .path .dirname (os .path .realpath (__file__ ))
29
18
58
47
"Content-Type" : "application/json" ,
59
48
}
60
49
61
- elif API_TYPE == "qwen_vl" :
62
- API_URL = os .getenv ("QWEN_ENDPOINT" , "https://dashscope.aliyuncs.com/api/v1/services/aigc/multimodal-generation/generation" )
63
- API_KEY = os .getenv ("DASHSCOPE_API_KEY" , "YOUR_API_KEY" )
64
- headers = {
65
- "Authorization" : f"Bearer { API_KEY } " ,
66
- "Content-Type" : "application/json" ,
67
- }
68
-
69
50
70
51
def get_chat_response (base64_image , prompt , max_retries = 5 , wait_time = 10 ):
71
52
headers = {
@@ -114,29 +95,6 @@ def image_to_base64(pil_image):
114
95
return base64 .b64encode (buffered .getvalue ()).decode ("utf-8" )
115
96
116
97
117
- def qwen_multimodal_conversation_call (text_content , image_content , retries = 5 ):
118
- """Simple single round multimodal conversation call."""
119
- messages = [{"role" : "user" , "content" : [{"image" : image_content }, {"text" : text_content }]}]
120
- for attempt in range (retries ):
121
- try :
122
- response_data = dashscope .MultiModalConversation .call (model = GPT_EVAL_MODEL_NAME , messages = messages )
123
- # The response status_code is HTTPStatus.OK indicate success,
124
- # otherwise indicate request is failed, you can get error code
125
- # and message from code and message.
126
- content = response_data ["output" ]["choices" ][0 ]["message" ]["content" ][0 ]["text" ].strip ()
127
- if content != "" :
128
- return content , GPT_EVAL_MODEL_NAME
129
- break # If successful, break out of the loop
130
- except Exception as e :
131
- eval_logger .info (f"Attempt { attempt + 1 } failed with error: { e } " )
132
- if attempt < retries : # If we have retries left, sleep and then continue to next attempt
133
- time .sleep (NUM_SECONDS_TO_SLEEP )
134
- else : # If this was the last attempt, log and return empty
135
- eval_logger .error (f"All { retries } attempts failed. Last error message: { e } " )
136
- return "" , ""
137
- return "" , ""
138
-
139
-
140
98
def parse_score (review ):
141
99
try :
142
100
score_pair = review .split ("\n " )[0 ]
@@ -162,20 +120,13 @@ def llava_process_results(doc, result):
162
120
"""
163
121
try :
164
122
question = doc .get ("question" , "" )
165
- ans1 = doc .get ("gpt4v_answer " , "" )
123
+ ans1 = doc .get ("answer " , "" )
166
124
ans2 = result [0 ] if result else ""
167
125
content = f"[Question]\n { question } \n \n " + f"[Assistant 1]\n { ans1 } \n \n [End of Assistant 1]\n \n " + f"[Assistant 2]\n { ans2 } \n \n [End of Assistant 2]\n \n " f"[System]\n { judge_rules } \n \n "
168
126
visuals = llava_doc_to_visual (doc )
169
- if API_TYPE == "qwen_vl" :
170
- file_path = os .path .join (dir_path , f"tmp_{ doc ['question_id' ]} .jpg" )
171
- visuals [0 ].save (file_path )
172
- image_content = "file://" + file_path
173
- review , model_name = qwen_multimodal_conversation_call (content , image_content = image_content )
174
- os .remove (file_path )
175
- elif API_TYPE == "openai" :
176
- image_path = doc ["image" ]
177
- base64_image = image_to_base64 (image_path )
178
- review , model_name = get_chat_response (base64_image , content )
127
+ image_path = doc ["image" ]
128
+ base64_image = image_to_base64 (image_path )
129
+ review , model_name = get_chat_response (base64_image , content )
179
130
scores = parse_score (review )
180
131
except Exception as e :
181
132
eval_logger .error (f"Error for Question ID: { doc .get ('question_id' , 'Unknown' )} : { e } " )
0 commit comments