Skip to content

Commit

Permalink
#67 Add siq2 loc evaluation pipeline
Browse files Browse the repository at this point in the history
  • Loading branch information
zhanwenchen committed Apr 11, 2024
1 parent 2a555f9 commit 02931f4
Show file tree
Hide file tree
Showing 3 changed files with 308 additions and 0 deletions.
170 changes: 170 additions & 0 deletions quantitative_evaluation/evaluate_loc.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,170 @@
import os
import argparse
import json
from multiprocessing.pool import Pool


def parse_args():
parser = argparse.ArgumentParser(description="question-answer-generation-using-gpt-3")
parser.add_argument("--pred_path", required=True, help="The path to file containing prediction.")
parser.add_argument("--output_dir", required=True, help="The path to save annotation json files.")
parser.add_argument("--output_json", required=True, help="The path to save annotation final combined json file.")
parser.add_argument("--num_tasks", required=True, type=int, help="Number of splits.")
args = parser.parse_args()
return args


def annotate(prediction_set, caption_files, output_dir):
"""
Evaluates question and answer pairs using GPT-3
Returns a score for correctness.
"""
for file in caption_files:
key = file[:-5] # Strip file extension
qa_set = prediction_set[key]
answer = qa_set['a']
pred = qa_set['pred']
# Compute the correctness score

# Convert response to a Python dictionary.
if answer == pred:
response_dict = {'pred': 'yes', 'score': 1}
else:
response_dict = {'pred': 'no', 'score': 0}
result_qa_pair = [response_dict, qa_set]

# Save the question-answer pairs to a json file.
with open(f"{output_dir}/{key}.json", "w") as f:
json.dump(result_qa_pair, f)



def main():
"""
Main function to control the flow of the program.
"""
# Parse arguments.
args = parse_args()

file = open(args.pred_path)
pred_contents = json.load(file)

# Dictionary to store the count of occurrences for each video_id
video_id_counts = {}
new_pred_contents = []

# Iterate through each sample in pred_contents
for sample in pred_contents:
video_id = sample['id']
if video_id in video_id_counts:
video_id_counts[video_id] += 1
else:
video_id_counts[video_id] = 0

# Create a new sample with the modified key
new_sample = sample
new_sample['id'] = f"{video_id}_{video_id_counts[video_id]}"
new_pred_contents.append(new_sample)

# Generating list of id's and corresponding files
id_list = [x['id'] for x in new_pred_contents]
caption_files = [f"{id}.json" for id in id_list]

output_dir = args.output_dir
# Generate output directory if not exists.
if not os.path.exists(output_dir):
os.makedirs(output_dir)

# Preparing dictionary of question-answer sets
prediction_set = {}
for sample in new_pred_contents:
id = sample['id']
question = sample['question']
answer = sample['answer']
pred = sample['pred']
qa_set = {"q": question, "a": answer, "pred": pred}
prediction_set[id] = qa_set

# Set the OpenAI API key.
# openai.a pi_key = args.api_key
num_tasks = args.num_tasks

# While loop to ensure that all captions are processed.
while True:
try:
# Files that have not been processed yet.
completed_files = os.listdir(output_dir)
print(f"completed_files: {len(completed_files)}")

# Files that have not been processed yet.
incomplete_files = [f for f in caption_files if f not in completed_files]
print(f"incomplete_files: {len(incomplete_files)}")

# Break the loop when there are no incomplete files
if len(incomplete_files) == 0:
break
if len(incomplete_files) <= num_tasks:
num_tasks = 1

# Split tasks into parts.
part_len = len(incomplete_files) // num_tasks
all_parts = [incomplete_files[i:i + part_len] for i in range(0, len(incomplete_files), part_len)]
task_args = [(prediction_set, part, args.output_dir) for part in all_parts]

# Use a pool of workers to process the files in parallel.
with Pool() as pool:
pool.starmap(annotate, task_args)

except Exception as e:
print(f"Error: {e}")

# Combine all the processed files into one
combined_contents = {}
json_path = args.output_json

# Iterate through json files
for file_name in os.listdir(output_dir):
if file_name.endswith(".json") and 'results' not in file_name and 'preds' not in file_name:
file_path = os.path.join(output_dir, file_name)
with open(file_path, "r") as json_file:
content = json.load(json_file)
combined_contents[file_name[:-5]] = content

# Write combined content to a json file
with open(json_path, "w") as json_file:
json.dump(combined_contents, json_file)
print("All evaluation completed!")

# Calculate average score and accuracy
score_sum = 0
count = 0
yes_count = 0
no_count = 0
for key, result in combined_contents.items():
# Computing score
count += 1
result_0 = result[0]
score_match = result_0['score']
score = int(score_match)
score_sum += score

# Computing accuracy
pred = result_0['pred']
pred_lower = pred.lower()
if "yes" in pred_lower:
yes_count += 1
elif "no" in pred_lower:
no_count += 1
else:
raise ValueError(f'For key={key}, there is no yes or no in the answer for result={result}')

average_score = score_sum / count
accuracy = yes_count / (yes_count + no_count)
print("Yes count:", yes_count)
print("No count:", no_count)
print("Accuracy:", accuracy)
print("Average score:", average_score)


if __name__ == "__main__":
main()
45 changes: 45 additions & 0 deletions quantitative_evaluation/evalute_loc.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,45 @@
# evalute_loc.sh

export TASK_NAME="loc"
export DATASET="siq2"
export SPLIT="val" # never train because why? # train, val, test
export MODEL_NAME="LLaVA-Lightning-7B-v1-1" # TODO
export PROJECT_ROOT="${HOME}/vtom"
export EXPERIMENT_NAME="${DATASET}_${TASK_NAME}_${SPLIT}_${MODEL_NAME}"
export EXPERIMENT_DIRPATH="${PROJECT_ROOT}/experiments/${EXPERIMENT_NAME}"
export OUTPUT_PER_VIDEO_DIRPATH="${EXPERIMENT_DIRPATH}/output_per_video"
export OUTPUT_DIRPATH="${EXPERIMENT_DIRPATH}/output"
export VIDEO_DIR="${PROJECT_ROOT}/data/${DATASET}/video"
export LOC_DIRPATH="${PROJECT_ROOT}/data/siq2/loc"
export PRED_FPATH="preds_${EXPERIMENT_NAME}"
export RESULTS_FPATH="results_${EXPERIMENT_NAME}.json"
export GT_WITH_TS_FPATH="${LOC_DIRPATH}/loc_${SPLIT}_with_ts.json"
export INSTRUCTION_FINETUNING_FPATH="${LOC_DIRPATH}/loc_${SPLIT}_instruction_with_ts.json"


mkdir -p "${OUTPUT_PER_VIDEO_DIRPATH}"
mkdir -p "${OUTPUT_DIRPATH}"


python ${PROJECT_ROOT}/scripts/convert_instruction_json_to_training_format_siq2_loc.py \
--input_json_file "${LOC_DIRPATH}/loc_${SPLIT}.json" \
--output_json_file "${INSTRUCTION_FINETUNING_FPATH}" \
--gt_ts_file "${GT_WITH_TS_FPATH}"


# Generate video features and predictions
export NPROC_PER_NODE=2
export OMP_NUM_THREADS=$(($(nproc) / ${NPROC_PER_NODE}))
PYTHONPATH="./:$PYTHONPATH" python video_chatgpt/eval/run_inference_loc.py \
--model-name "${PROJECT_ROOT}/${MODEL_NAME}" \
--video_dir "${VIDEO_DIR}" \
--gt_file_qa "${INSTRUCTION_FINETUNING_FPATH}" \
--output_dir "${OUTPUT_PER_VIDEO_DIRPATH}" \
--output_name "${PRED_FPATH}"


# PYTHONPATH="./:$PYTHONPATH" python quantitative_evaluation/evaluate_loc.py \
# --pred_path "${OUTPUT_DIRPATH}/${PRED_FPATH}.json" \
# --output_dir "${OUTPUT_PER_VIDEO_DIRPATH}" \
# --output_json "${OUTPUT_DIRPATH}/${RESULTS_FPATH}" \
# --num_tasks 1
93 changes: 93 additions & 0 deletions video_chatgpt/eval/run_inference_loc.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,93 @@
from os import makedirs as os_makedirs
from os.path import join as os_path_join, exists as os_path_exists
from argparse import ArgumentParser
from json import load as json_load, dump as json_dump
from glob import glob
from warnings import warn
from tqdm import tqdm
from torch import device as torch_device, no_grad as torch_no_grad
from video_chatgpt.eval.model_utils import initialize_model, load_video
from video_chatgpt.inference import video_chatgpt_infer


def parse_args():
"""
Parse command-line arguments.
"""
parser = ArgumentParser()

# Define the command-line arguments
parser.add_argument('--video_dir', help='Directory containing video files.', required=True)
parser.add_argument('--gt_file_qa', help='Path to the ground truth file containing question.', required=True)
parser.add_argument('--output_dir', help='Directory to save the model results JSON.', required=True)
parser.add_argument('--output_name', help='Name of the file for storing results JSON.', required=True)
parser.add_argument("--model-name", type=str, required=True)
parser.add_argument("--conv-mode", type=str, required=False, default='video-chatgpt_v1')
parser.add_argument("--projection_path", type=str, required=False)

return parser.parse_args()


@torch_no_grad()
def run_inference(args):
"""
Run inference on ActivityNet QA DataSet using the Video-ChatGPT model.
Args:
args: Command-line arguments.
"""
# Initialize the model
video_dir = args.video_dir
model, vision_tower, tokenizer, image_processor, video_token_len = initialize_model(args.model_name,
args.projection_path)
# Load both ground truth file containing questions and answers
with open(args.gt_file_qa) as file:
gt_qa = json_load(file)

# Create the output directory if it doesn't exist
output_dir = args.output_dir
if not os_path_exists(output_dir):
os_makedirs(output_dir)

output_list = [] # List to store the output results
conv_mode = args.conv_mode

device = torch_device('cuda')

# Iterate over each sample in the ground truth file
for question_dict in tqdm(gt_qa):
conversations = question_dict['conversations']
question = conversations[0]['value']
question_id = question_dict['id']
answer = conversations[1]['value']

sample_set = {'id': question_id, 'question': question, 'answer': answer}

videos_search_path = os_path_join(video_dir, question_id+'*')
videos_match_list = glob(videos_search_path)
if not videos_match_list:
warn(f'No videos found for {videos_search_path}')
continue
video_fpath = videos_match_list[0]
# Load the video file

# Check if the video exists
video_frames = load_video(video_fpath, device)

try:
# Run inference on the video and add the output to the list
output = video_chatgpt_infer(video_frames, question, conv_mode, model, vision_tower,
tokenizer, image_processor, video_token_len)
sample_set['pred'] = output
output_list.append(sample_set)
except Exception as e:
print(f"Error processing video file '{video_fpath}': {e}")

# Save the output list to a JSON file
with open(os_path_join(output_dir, f"{args.output_name}.json"), 'w') as file:
json_dump(output_list, file)


if __name__ == "__main__":
args = parse_args()
run_inference(args)

0 comments on commit 02931f4

Please sign in to comment.