@@ -146,7 +146,7 @@ def replace_model_name(old_name, tstamp):
146
146
"dumbledore-v3" : "llama-3.2-vision-90b-instruct" ,
147
147
"potter-v1" : "llama-3.2-vision-11b-instruct" ,
148
148
"sharp-game-player-v1" : "llama-3.2-3b-instruct" ,
149
- "zeus-flare-thunder-v1" : "llama-3.2-1b-instruct" ,
149
+ "zeus-flare-thunder-v1" : "llama-3.2-1b-instruct" ,
150
150
"qwen-vl-max-0809" : "qwen2-vl-72b" ,
151
151
"gemini-1.5-pro-002-test-sp" : "gemini-1.5-pro-002" ,
152
152
"gemini-1.5-flash-test-5" : "gemini-1.5-flash-002" ,
@@ -185,7 +185,7 @@ def replace_model_name(old_name, tstamp):
185
185
return old_name
186
186
else :
187
187
return old_name + "-old"
188
- if old_name == "eureka-chatbot" :
188
+ if old_name == "eureka-chatbot" :
189
189
if tstamp > 1721651521 :
190
190
return "eureka-chatbot-v2"
191
191
else :
@@ -293,12 +293,17 @@ def process_data_txt2img(
293
293
if row ["models" ][0 ] is None or row ["models" ][1 ] is None :
294
294
count_dict ["invalid" ] += 1
295
295
continue
296
- if not isinstance (row ["models" ][0 ], str ) or not isinstance (row ["models" ][1 ], str ):
296
+ if not isinstance (row ["models" ][0 ], str ) or not isinstance (
297
+ row ["models" ][1 ], str
298
+ ):
297
299
count_dict ["invalid" ] += 1
298
300
continue
299
301
300
302
# Resolve model names
301
- models_public = [remove_html (row ["models" ][0 ]), remove_html (row ["models" ][1 ])]
303
+ models_public = [
304
+ remove_html (row ["models" ][0 ]),
305
+ remove_html (row ["models" ][1 ]),
306
+ ]
302
307
if "model_name" in row ["states" ][0 ]:
303
308
models_hidden = [
304
309
row ["states" ][0 ]["model_name" ],
@@ -356,12 +361,12 @@ def process_data_txt2img(
356
361
"anony_bothbad_vote" : 0 ,
357
362
"anony_leftvote" : 0 ,
358
363
"anony_rightvote" : 0 ,
359
- "sanitized_id" : shortuuid .uuid ()
364
+ "sanitized_id" : shortuuid .uuid (),
360
365
}
361
366
all_ips [ip ]["count" ] += 1
362
367
if flag_anony :
363
368
all_ips [ip ]["anony_count" ] += 1
364
- all_ips [ip ]["anony_" + row ["type" ]] += 1
369
+ all_ips [ip ]["anony_" + row ["type" ]] += 1
365
370
366
371
if sanitize_ip :
367
372
user_id = f"{ all_ips [ip ]['sanitized_id' ]} "
@@ -389,6 +394,7 @@ def process_data_txt2img(
389
394
)
390
395
return battles , count_dict , count_leak , all_ips
391
396
397
+
392
398
def process_data (
393
399
data ,
394
400
exclude_model_names ,
@@ -433,12 +439,17 @@ def process_data(
433
439
if row ["models" ][0 ] is None or row ["models" ][1 ] is None :
434
440
count_dict ["invalid" ] += 1
435
441
continue
436
- if not isinstance (row ["models" ][0 ], str ) or not isinstance (row ["models" ][1 ], str ):
442
+ if not isinstance (row ["models" ][0 ], str ) or not isinstance (
443
+ row ["models" ][1 ], str
444
+ ):
437
445
count_dict ["invalid" ] += 1
438
446
continue
439
447
440
448
# Resolve model names
441
- models_public = [remove_html (row ["models" ][0 ]), remove_html (row ["models" ][1 ])]
449
+ models_public = [
450
+ remove_html (row ["models" ][0 ]),
451
+ remove_html (row ["models" ][1 ]),
452
+ ]
442
453
if "model_name" in row ["states" ][0 ]:
443
454
models_hidden = [
444
455
row ["states" ][0 ]["model_name" ],
@@ -484,7 +495,6 @@ def process_data(
484
495
print (state ["messages" ][state ["offset" ]][1 ])
485
496
raise ValueError
486
497
487
-
488
498
# Drop conversations if the model names are leaked
489
499
messages = ""
490
500
for i in range (2 ):
@@ -576,12 +586,12 @@ def process_data(
576
586
"anony_bothbad_vote" : 0 ,
577
587
"anony_leftvote" : 0 ,
578
588
"anony_rightvote" : 0 ,
579
- "sanitized_id" : shortuuid .uuid ()
589
+ "sanitized_id" : shortuuid .uuid (),
580
590
}
581
591
all_ips [ip ]["count" ] += 1
582
592
if flag_anony :
583
593
all_ips [ip ]["anony_count" ] += 1
584
- all_ips [ip ]["anony_" + row ["type" ]] += 1
594
+ all_ips [ip ]["anony_" + row ["type" ]] += 1
585
595
586
596
if sanitize_ip :
587
597
user_id = f"{ all_ips [ip ]['sanitized_id' ]} "
@@ -607,13 +617,25 @@ def process_data(
607
617
)
608
618
609
619
user_tokens = sum (
610
- [conv ["num_tokens" ] for conv in conversation_a if conv ["role" ] == "user" ]
620
+ [
621
+ conv ["num_tokens" ]
622
+ for conv in conversation_a
623
+ if conv ["role" ] == "user"
624
+ ]
611
625
)
612
626
assistant_a_tokens = sum (
613
- [conv ["num_tokens" ] for conv in conversation_a if conv ["role" ] == "assistant" ]
627
+ [
628
+ conv ["num_tokens" ]
629
+ for conv in conversation_a
630
+ if conv ["role" ] == "assistant"
631
+ ]
614
632
)
615
633
assistant_b_tokens = sum (
616
- [conv ["num_tokens" ] for conv in conversation_b if conv ["role" ] == "assistant" ]
634
+ [
635
+ conv ["num_tokens" ]
636
+ for conv in conversation_b
637
+ if conv ["role" ] == "assistant"
638
+ ]
617
639
)
618
640
context_tokens_a = sum ([conv ["num_tokens" ] for conv in conversation_a [:- 1 ]])
619
641
context_tokens_b = sum ([conv ["num_tokens" ] for conv in conversation_b [:- 1 ]])
@@ -702,30 +724,36 @@ def clean_battle_data(
702
724
all_ips [ip ]["count" ] += sub_all_ips [ip ]["count" ]
703
725
all_ips [ip ]["anony_count" ] += sub_all_ips [ip ]["anony_count" ]
704
726
all_ips [ip ]["anony_tievote" ] += sub_all_ips [ip ]["anony_tievote" ]
705
- all_ips [ip ]["anony_bothbad_vote" ] += sub_all_ips [ip ]["anony_bothbad_vote" ]
727
+ all_ips [ip ]["anony_bothbad_vote" ] += sub_all_ips [ip ][
728
+ "anony_bothbad_vote"
729
+ ]
706
730
all_ips [ip ]["anony_leftvote" ] += sub_all_ips [ip ]["anony_leftvote" ]
707
731
all_ips [ip ]["anony_rightvote" ] += sub_all_ips [ip ]["anony_rightvote" ]
708
732
709
733
battles .sort (key = lambda x : x ["tstamp" ])
710
734
last_updated_tstamp = battles [- 1 ]["tstamp" ]
711
-
735
+
712
736
battles = pd .DataFrame (battles )
713
-
737
+
714
738
# drop rows with same question_id
715
739
print (f"before drop dups #battles: { len (battles )} " )
716
740
battles = battles .drop_duplicates (subset = ["question_id" ], keep = "first" )
717
741
battles = battles .reset_index (drop = True )
718
742
print (f"#battles: { len (battles )} " )
719
743
720
- battles = battles [battles ["anony" ]].reset_index (drop = True ) if anony_only else battles
744
+ battles = (
745
+ battles [battles ["anony" ]].reset_index (drop = True ) if anony_only else battles
746
+ )
721
747
if run_dedup and not (vision or txt2img ):
722
748
print ("Running deduplication..." )
723
749
battles = utils .dedup_process (battles )
724
750
num_dedup_battles = sum (battles ["dedup_tag" ].apply (lambda x : x ["sampled" ]))
725
751
print (f"#dedup_battles: { num_dedup_battles } " )
726
752
else :
727
753
print ("Skip deduplication..." )
728
- dedup_tags = np .array ([{"high_freq" : False , "sampled" : True } for _ in range (len (battles ))])
754
+ dedup_tags = np .array (
755
+ [{"high_freq" : False , "sampled" : True } for _ in range (len (battles ))]
756
+ )
729
757
battles ["dedup_tag" ] = dedup_tags
730
758
731
759
last_updated_datetime = datetime .datetime .fromtimestamp (
@@ -746,7 +774,9 @@ def clean_battle_data(
746
774
for votetype in ["tievote" , "bothbad_vote" , "leftvote" , "rightvote" ]:
747
775
vote_key = "anony_" + votetype
748
776
userid_key = "sanitized_id" if sanitize_ip else "ip"
749
- top_30_users = sorted (all_ips .values (), key = lambda x : x [vote_key ], reverse = True )[:30 ]
777
+ top_30_users = sorted (
778
+ all_ips .values (), key = lambda x : x [vote_key ], reverse = True
779
+ )[:30 ]
750
780
top_30_ip_id = ["arena_user_" + ip [userid_key ] for ip in top_30_users ]
751
781
battles_top_30_ips = battles [battles ["judge" ].isin (top_30_ip_id )]
752
782
print (f"Top 30 IPs #battles: { len (battles_top_30_ips )} " )
@@ -755,13 +785,15 @@ def clean_battle_data(
755
785
for user in top_30_users :
756
786
user_ip = user ["ip" ]
757
787
user_id = "arena_user_" + user [userid_key ]
758
-
788
+
759
789
ip_battles = battles_top_30_ips [battles_top_30_ips ["judge" ] == user_id ]
760
790
win_count = len (ip_battles [ip_battles ["winner" ] == "model_a" ])
761
791
tie_count = len (ip_battles [ip_battles ["winner" ] == "tie" ])
762
792
loss_count = len (ip_battles [ip_battles ["winner" ] == "model_b" ])
763
- print (f"{ user_id } : model_a { win_count } , tie { tie_count } , mobel_b { loss_count } , { user_ip } " )
764
-
793
+ print (
794
+ f"{ user_id } : model_a { win_count } , tie { tie_count } , mobel_b { loss_count } , { user_ip } "
795
+ )
796
+
765
797
return battles
766
798
767
799
@@ -784,8 +816,14 @@ def clean_battle_data(
784
816
ban_ip_list = json .load (open (args .ban_ip_file )) if args .ban_ip_file else None
785
817
786
818
battles = clean_battle_data (
787
- log_files , args .exclude_model_names or [], ban_ip_list , args .sanitize_ip , anony_only = args .anony_only ,
788
- run_dedup = args .run_dedup , vision = args .vision , txt2img = args .txt2img
819
+ log_files ,
820
+ args .exclude_model_names or [],
821
+ ban_ip_list ,
822
+ args .sanitize_ip ,
823
+ anony_only = args .anony_only ,
824
+ run_dedup = args .run_dedup ,
825
+ vision = args .vision ,
826
+ txt2img = args .txt2img ,
789
827
)
790
828
last_updated_tstamp = battles .iloc [- 1 ]["tstamp" ]
791
829
cutoff_date = datetime .datetime .fromtimestamp (
@@ -801,7 +839,9 @@ def clean_battle_data(
801
839
print (f"Write cleaned data to { output } " )
802
840
803
841
if not args .txt2img :
804
- battles = battles .drop (columns = ["conversation_a" , "conversation_b" , "question_id" ])
842
+ battles = battles .drop (
843
+ columns = ["conversation_a" , "conversation_b" , "question_id" ]
844
+ )
805
845
print ("Samples:" )
806
846
print (battles [:5 ])
807
847
0 commit comments