Skip to content

Commit 71401ba

Browse files
committed
add upd
1 parent 24dc435 commit 71401ba

25 files changed

+1119
-0
lines changed

Diff for: README.md

+13
Original file line numberDiff line numberDiff line change
@@ -190,6 +190,19 @@ We also provide the raw data exported from Weights & Biases for the detailed res
190190
- MMMU (mmmu)
191191
- MMMU Validation (mmmu_val)
192192
- MMMU Test (mmmu_test)
193+
- MMUPDBench (mmupdbench)
194+
- MMUPDBench Base (mmupdbench_base)
195+
- MMAADBench Base (mmaadbench_base)
196+
- MMIASDBench Base (mmiasdbench_base)
197+
- MMIVQDBench Base (mmivqdbench_base)
198+
- MMUPDBench Option (mmupdbench_option)
199+
- MMAADBench Option (mmaadbench_option)
200+
- MMIASDBench Option (mmiasdbench_option)
201+
- MMIVQDBench Option (mmivqdbench_option)
202+
- MMUPDBench Instruction (mmupdbench_instruction)
203+
- MMAADBench Instruction (mmaadbench_instruction)
204+
- MMIASDBench Instruction (mmiasdbench_instruction)
205+
- MMIVQDBench Instruction (mmivqdbench_instruction)
193206
- MMVet (mmvet)
194207
- Multi-DocVQA (multidocvqa)
195208
- Multi-DocVQA Validation (multidocvqa_val)
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,23 @@
1+
dataset_path: MM-UPD/MM-UPD
2+
doc_to_target: "answer"
3+
model_specific_prompt_kwargs:
4+
default:
5+
pre_prompt: ""
6+
post_prompt: "\n"
7+
doc_to_visual: !function utils.mmupdbench_doc_to_visual
8+
doc_to_text: !function utils.mmupdbench_doc_to_text
9+
doc_to_target: "answer"
10+
process_results: !function utils.mmupdbench_process_results
11+
model_specific_generation_kwargs:
12+
llava:
13+
image_aspect_ratio: original
14+
output_type: generate_until
15+
dataset_name: mmaad_base
16+
generation_kwargs:
17+
until:
18+
- "ASSISTANT:"
19+
max_new_tokens: 1024
20+
temperature: 0
21+
top_p: 0
22+
num_beams: 1
23+
do_sample: false
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,23 @@
1+
dataset_path: MM-UPD/MM-UPD
2+
doc_to_target: "answer"
3+
model_specific_prompt_kwargs:
4+
default:
5+
pre_prompt: ""
6+
post_prompt: "\nIf all the options are incorrect, answer \"F. None of the above\"."
7+
doc_to_visual: !function utils.mmupdbench_doc_to_visual
8+
doc_to_text: !function utils.mmupdbench_doc_to_text
9+
doc_to_target: "answer"
10+
process_results: !function utils.mmupdbench_process_results
11+
model_specific_generation_kwargs:
12+
llava:
13+
image_aspect_ratio: original
14+
output_type: generate_until
15+
dataset_name: mmaad_base
16+
generation_kwargs:
17+
until:
18+
- "ASSISTANT:"
19+
max_new_tokens: 1024
20+
temperature: 0
21+
top_p: 0
22+
num_beams: 1
23+
do_sample: false
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,23 @@
1+
dataset_path: MM-UPD/MM-UPD
2+
doc_to_target: "answer"
3+
model_specific_prompt_kwargs:
4+
default:
5+
pre_prompt: ""
6+
post_prompt: "\nAnswer with the option's letter from the given choices directly."
7+
doc_to_visual: !function utils.mmupdbench_doc_to_visual
8+
doc_to_text: !function utils.mmupdbench_doc_to_text
9+
doc_to_target: "answer"
10+
process_results: !function utils.mmupdbench_process_results
11+
model_specific_generation_kwargs:
12+
llava:
13+
image_aspect_ratio: original
14+
output_type: generate_until
15+
dataset_name: mmaad_option
16+
generation_kwargs:
17+
until:
18+
- "ASSISTANT:"
19+
max_new_tokens: 1024
20+
temperature: 0
21+
top_p: 0
22+
num_beams: 1
23+
do_sample: false
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,23 @@
1+
dataset_path: MM-UPD/MM-UPD
2+
doc_to_target: "answer"
3+
model_specific_prompt_kwargs:
4+
default:
5+
pre_prompt: ""
6+
post_prompt: "\n"
7+
doc_to_visual: !function utils.mmupdbench_doc_to_visual
8+
doc_to_text: !function utils.mmupdbench_doc_to_text
9+
doc_to_target: "answer"
10+
process_results: !function utils.mmupdbench_process_results
11+
model_specific_generation_kwargs:
12+
llava:
13+
image_aspect_ratio: original
14+
output_type: generate_until
15+
dataset_name: mmiasd_base
16+
generation_kwargs:
17+
until:
18+
- "ASSISTANT:"
19+
max_new_tokens: 1024
20+
temperature: 0
21+
top_p: 0
22+
num_beams: 1
23+
do_sample: false
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,23 @@
1+
dataset_path: MM-UPD/MM-UPD
2+
doc_to_target: "answer"
3+
model_specific_prompt_kwargs:
4+
default:
5+
pre_prompt: ""
6+
post_prompt: "\nIf all the options are incorrect, answer \"F. None of the above\"."
7+
doc_to_visual: !function utils.mmupdbench_doc_to_visual
8+
doc_to_text: !function utils.mmupdbench_doc_to_text
9+
doc_to_target: "answer"
10+
process_results: !function utils.mmupdbench_process_results
11+
model_specific_generation_kwargs:
12+
llava:
13+
image_aspect_ratio: original
14+
output_type: generate_until
15+
dataset_name: mmiasd_base
16+
generation_kwargs:
17+
until:
18+
- "ASSISTANT:"
19+
max_new_tokens: 1024
20+
temperature: 0
21+
top_p: 0
22+
num_beams: 1
23+
do_sample: false
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,23 @@
1+
dataset_path: MM-UPD/MM-UPD
2+
doc_to_target: "answer"
3+
model_specific_prompt_kwargs:
4+
default:
5+
pre_prompt: ""
6+
post_prompt: "\nAnswer with the option's letter from the given choices directly."
7+
doc_to_visual: !function utils.mmupdbench_doc_to_visual
8+
doc_to_text: !function utils.mmupdbench_doc_to_text
9+
doc_to_target: "answer"
10+
process_results: !function utils.mmupdbench_process_results
11+
model_specific_generation_kwargs:
12+
llava:
13+
image_aspect_ratio: original
14+
output_type: generate_until
15+
dataset_name: mmiasd_option
16+
generation_kwargs:
17+
until:
18+
- "ASSISTANT:"
19+
max_new_tokens: 1024
20+
temperature: 0
21+
top_p: 0
22+
num_beams: 1
23+
do_sample: false
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,23 @@
1+
dataset_path: MM-UPD/MM-UPD
2+
doc_to_target: "answer"
3+
model_specific_prompt_kwargs:
4+
default:
5+
pre_prompt: ""
6+
post_prompt: "\n"
7+
doc_to_visual: !function utils.mmupdbench_doc_to_visual
8+
doc_to_text: !function utils.mmupdbench_doc_to_text
9+
doc_to_target: "answer"
10+
process_results: !function utils.mmupdbench_process_results
11+
model_specific_generation_kwargs:
12+
llava:
13+
image_aspect_ratio: original
14+
output_type: generate_until
15+
dataset_name: mmivqd_base
16+
generation_kwargs:
17+
until:
18+
- "ASSISTANT:"
19+
max_new_tokens: 1024
20+
temperature: 0
21+
top_p: 0
22+
num_beams: 1
23+
do_sample: false
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,23 @@
1+
dataset_path: MM-UPD/MM-UPD
2+
doc_to_target: "answer"
3+
model_specific_prompt_kwargs:
4+
default:
5+
pre_prompt: ""
6+
post_prompt: "\nIf the given image is irrelevant to the question, answer \"F. The image and question are irrelevant.\"."
7+
doc_to_visual: !function utils.mmupdbench_doc_to_visual
8+
doc_to_text: !function utils.mmupdbench_doc_to_text
9+
doc_to_target: "answer"
10+
process_results: !function utils.mmupdbench_process_results
11+
model_specific_generation_kwargs:
12+
llava:
13+
image_aspect_ratio: original
14+
output_type: generate_until
15+
dataset_name: mmivqd_base
16+
generation_kwargs:
17+
until:
18+
- "ASSISTANT:"
19+
max_new_tokens: 1024
20+
temperature: 0
21+
top_p: 0
22+
num_beams: 1
23+
do_sample: false
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,23 @@
1+
dataset_path: MM-UPD/MM-UPD
2+
doc_to_target: "answer"
3+
model_specific_prompt_kwargs:
4+
default:
5+
pre_prompt: ""
6+
post_prompt: "\nAnswer with the option's letter from the given choices directly."
7+
doc_to_visual: !function utils.mmupdbench_doc_to_visual
8+
doc_to_text: !function utils.mmupdbench_doc_to_text
9+
doc_to_target: "answer"
10+
process_results: !function utils.mmupdbench_process_results
11+
model_specific_generation_kwargs:
12+
llava:
13+
image_aspect_ratio: original
14+
output_type: generate_until
15+
dataset_name: mmivqd_option
16+
generation_kwargs:
17+
until:
18+
- "ASSISTANT:"
19+
max_new_tokens: 1024
20+
temperature: 0
21+
top_p: 0
22+
num_beams: 1
23+
do_sample: false

Diff for: lmms_eval/tasks/mmupdbench/mmaadbench_base.yaml

+7
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,7 @@
1+
task: "mmaadbench_base"
2+
test_split: test
3+
include: _default_template_mmaadbench_base_yaml
4+
metric_list:
5+
- metric: gpt_eval_score
6+
aggregation: !function utils.mmaadbench_base
7+
higher_is_better: true
+7
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,7 @@
1+
task: "mmaadbench_instruction"
2+
test_split: test
3+
include: _default_template_mmaadbench_instruction_yaml
4+
metric_list:
5+
- metric: gpt_eval_score
6+
aggregation: !function utils.mmaadbench_instruction
7+
higher_is_better: true

Diff for: lmms_eval/tasks/mmupdbench/mmaadbench_option.yaml

+7
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,7 @@
1+
task: "mmaadbench_option"
2+
test_split: test
3+
include: _default_template_mmaadbench_option_yaml
4+
metric_list:
5+
- metric: gpt_eval_score
6+
aggregation: !function utils.mmaadbench_option
7+
higher_is_better: true

Diff for: lmms_eval/tasks/mmupdbench/mmiasdbench_base.yaml

+7
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,7 @@
1+
task: "mmiasdbench_base"
2+
test_split: test
3+
include: _default_template_mmiasdbench_base_yaml
4+
metric_list:
5+
- metric: gpt_eval_score
6+
aggregation: !function utils.mmiasdbench_base
7+
higher_is_better: true
+7
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,7 @@
1+
task: "mmiasdbench_instruction"
2+
test_split: test
3+
include: _default_template_mmiasdbench_instruction_yaml
4+
metric_list:
5+
- metric: gpt_eval_score
6+
aggregation: !function utils.mmiasdbench_instruction
7+
higher_is_better: true

Diff for: lmms_eval/tasks/mmupdbench/mmiasdbench_option.yaml

+7
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,7 @@
1+
task: "mmiasdbench_option"
2+
test_split: test
3+
include: _default_template_mmiasdbench_option_yaml
4+
metric_list:
5+
- metric: gpt_eval_score
6+
aggregation: !function utils.mmiasdbench_option
7+
higher_is_better: true

Diff for: lmms_eval/tasks/mmupdbench/mmivqdbench_base.yaml

+7
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,7 @@
1+
task: "mmivqdbench_base"
2+
test_split: test
3+
include: _default_template_mmivqdbench_base_yaml
4+
metric_list:
5+
- metric: gpt_eval_score
6+
aggregation: !function utils.mmivqdbench_base
7+
higher_is_better: true
+7
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,7 @@
1+
task: "mmivqdbench_instruction"
2+
test_split: test
3+
include: _default_template_mmivqdbench_instruction_yaml
4+
metric_list:
5+
- metric: gpt_eval_score
6+
aggregation: !function utils.mmivqdbench_instruction
7+
higher_is_better: true

Diff for: lmms_eval/tasks/mmupdbench/mmivqdbench_option.yaml

+7
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,7 @@
1+
task: "mmivqdbench_option"
2+
test_split: test
3+
include: _default_template_mmivqdbench_option_yaml
4+
metric_list:
5+
- metric: gpt_eval_score
6+
aggregation: !function utils.mmivqdbench_option
7+
higher_is_better: true

Diff for: lmms_eval/tasks/mmupdbench/mmupdbench.yaml

+15
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,15 @@
1+
group: mmupdbench
2+
task:
3+
- mmaadbench_base
4+
- mmaadbench_option
5+
- mmaadbench_instruction
6+
- mmiasdbench_base
7+
- mmiasdbench_option
8+
- mmiasdbench_instruction
9+
- mmivqdbench_base
10+
- mmivqdbench_option
11+
- mmivqdbench_instruction
12+
metadata:
13+
version: 0.0
14+
sys_prompt: ""
15+
gpt_eval_model_name: "gpt-3.5-turbo-0613"

Diff for: lmms_eval/tasks/mmupdbench/mmupdbench_base.yaml

+10
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,10 @@
1+
group: mmupdbench_base
2+
task:
3+
- mmaadbench_base
4+
- mmiasdbench_base
5+
- mmivqdbench_base
6+
metadata:
7+
version: 0.0
8+
sys_prompt: ""
9+
gpt_eval_model_name: "gpt-3.5-turbo-0613"
10+

0 commit comments

Comments
 (0)