From c2e52fde8b9dd8b922e1c1781a0a7f3501f96827 Mon Sep 17 00:00:00 2001
From: ljvmiranda921 <ljvmiranda@gmail.com>
Date: Fri, 11 Oct 2024 20:58:03 -0700
Subject: [PATCH 1/9] [WIP] Update

---
 analysis/avg_agreement_final.py | 118 +++++++++++++-------------------
 1 file changed, 48 insertions(+), 70 deletions(-)

diff --git a/analysis/avg_agreement_final.py b/analysis/avg_agreement_final.py
index cb93b89..0ff6d4f 100644
--- a/analysis/avg_agreement_final.py
+++ b/analysis/avg_agreement_final.py
@@ -2,63 +2,39 @@
 import matplotlib.pyplot as plt
 import numpy as np
 
+FONT_SIZES = {"small": 12, "medium": 16, "large": 18}
+
+PLOT_PARAMS = {
+    "font.family": "serif",
+    "font.serif": ["Times New Roman", "STIX"],
+    "font.size": FONT_SIZES.get("medium"),
+    "axes.titlesize": FONT_SIZES.get("large"),
+    "axes.labelsize": FONT_SIZES.get("large"),
+    "xtick.labelsize": FONT_SIZES.get("large"),
+    "ytick.labelsize": FONT_SIZES.get("large"),
+    "legend.fontsize": FONT_SIZES.get("medium"),
+    "figure.titlesize": FONT_SIZES.get("medium"),
+    "text.usetex": False,
+}
+
+plt.rcParams.update(PLOT_PARAMS)
+
+
 data = {
-  "meta-llama/Meta-Llama-3.1-8B-Instruct": [
-    0.3533086666014079,
-    0.052422082615756406
-  ],
-  "cohere/c4ai-aya-23-35b": [
-    0.43767196047824003,
-    0.026040919354464294
-  ],
-  "cohere/c4ai-aya-23-8b": [
-    0.013483014909052663,
-    0.03363706833599835
-  ],
-  "cohere/command-r-08-2024": [
-    0.374457668650282,
-    0.02926089754079793
-  ],
-  "cohere/command-r-plus-08-2024": [
-    0.3830841816733316,
-    0.020185255968455686
-  ],
-  "google/gemma-1.1-7b-it": [
-    0.5190375637539242,
-    0.027757722654111305
-  ],
-  "google/gemma-2-9b-it": [
-    0.5181663123111222,
-    0.031090119385244894
-  ],
-  "meta-llama/Meta-Llama-3-70B-Instruct": [
-    0.5685224105896568,
-    0.04853344616275034
-  ],
-  "meta-llama/Meta-Llama-3-8B-Instruct": [
-    0.37936948540837095,
-    0.032172769265151994
-  ],
-  "meta-llama/Meta-Llama-3.1-70B-Instruct": [
-    0.603536768244583,
-    0.027191895488989915
-  ],
-  "mistralai/Mistral-7B-Instruct-v0.2": [
-    0.4071166722276529,
-    0.04577594028555328
-  ],
-  "mistralai/Mistral-7B-Instruct-v0.3": [
-    0.41195018984687265,
-    0.056184679972755454
-  ],
-  "openai/gpt-4-turbo-2024-04-09": [
-    0.6106943361444249,
-    0.02932446842558468
-  ],
-  "openai/gpt-4o-2024-05-13": [
-    0.5833874065757011,
-    0.023695391445384514
-  ]
+    "LlaMa 3.1 8B": [0.3533086666014079, 0.052422082615756406],
+    "Aya 23 35B": [0.43767196047824003, 0.026040919354464294],
+    # "Aya 23 8B": [0.013483014909052663, 0.03363706833599835],
+    "Command R": [0.374457668650282, 0.02926089754079793],
+    "Command R+": [0.3830841816733316, 0.020185255968455686],
+    "Gemma 1.1 7B": [0.5190375637539242, 0.027757722654111305],
+    "Gemma 2 9B": [0.5181663123111222, 0.031090119385244894],
+    "LlaMa 3 70B": [0.5685224105896568, 0.04853344616275034],
+    "LlaMa 3 8B": [0.37936948540837095, 0.032172769265151994],
+    "LlaMa 3.1 70B": [0.603536768244583, 0.027191895488989915],
+    "Mistal 7B v0.2": [0.4071166722276529, 0.04577594028555328],
+    "Mistral 7B v0.3": [0.41195018984687265, 0.056184679972755454],
+    "GPT-4 Turbo": [0.6106943361444249, 0.02932446842558468],
+    "GPT-4o": [0.5833874065757011, 0.023695391445384514],
 }
 
 sorted_data = dict(sorted(data.items(), key=lambda item: item[1][0]))
@@ -66,27 +42,29 @@
 means_sorted = [v[0] for v in sorted_data.values()]
 std_devs_sorted = [v[1] for v in sorted_data.values()]
 
-sns.set(style="whitegrid")
-palette = sns.color_palette("coolwarm", len(labels_sorted))
+# sns.set(style="whitegrid")
+# palette = sns.color_palette("coolwarm", len(labels_sorted))
 
-plt.figure(figsize=(10, 6))
+plt.figure(figsize=(10, 5))
 x_pos_sorted = np.arange(len(labels_sorted))
 
-ax1 = sns.barplot(x=x_pos_sorted, y=means_sorted, palette=palette, errorbar=None)
-plt.errorbar(x_pos_sorted, means_sorted, yerr=std_devs_sorted, fmt='none', c='black', capsize=5)
+ax1 = sns.barplot(x=x_pos_sorted, y=means_sorted, errorbar=None, color="green")
+plt.errorbar(x_pos_sorted, means_sorted, yerr=std_devs_sorted, fmt="none", c="black", capsize=5)
 
-ax1.spines['top'].set_color('black')
-ax1.spines['right'].set_color('black')
-ax1.spines['left'].set_color('black')
-ax1.spines['bottom'].set_color('black')
-for spine in ax1.spines.values():
-    spine.set_linewidth(2)  # Make the border thicker
+# ax1.spines["top"].set_color("black")
+# ax1.spines["right"].set_color("black")
+# ax1.spines["left"].set_color("black")
+# ax1.spines["bottom"].set_color("black")
+# for spine in ax1.spines.values():
+#     spine.set_linewidth(2)  # Make the border thicker
+plt.grid(color="gray", axis="y", alpha=0.2)
 
 plt.ylim(0, 0.8)
+plt.gca().set_axisbelow(True)
 
-plt.xticks(x_pos_sorted, labels_sorted, rotation=90)
+plt.xticks(x_pos_sorted, labels_sorted, rotation=45, ha="right")
 plt.ylabel("Cohen's Kappa")
-plt.title('Average Inner-Model Agreement Across Languages')
+plt.title("Average Inner-Model Agreement Across Languages")
 
 plt.tight_layout()
-plt.savefig(f"./innermodel_agreement.pdf", bbox_inches='tight')
\ No newline at end of file
+plt.savefig("plots/innermodel_agreement_green_oracle.pdf", bbox_inches="tight")

From 2fb0bed4d74b3d5a1fb07cd08130a4db3eff0fa7 Mon Sep 17 00:00:00 2001
From: ljvmiranda921 <ljvmiranda@gmail.com>
Date: Fri, 11 Oct 2024 23:20:26 -0700
Subject: [PATCH 2/9] [WIP] Update

---
 analysis/avg_agreement_final.py |  2 +-
 analysis/plot_results.py        | 38 +++++++++++++++++++++++++++------
 2 files changed, 32 insertions(+), 8 deletions(-)

diff --git a/analysis/avg_agreement_final.py b/analysis/avg_agreement_final.py
index 0ff6d4f..0b5dd3e 100644
--- a/analysis/avg_agreement_final.py
+++ b/analysis/avg_agreement_final.py
@@ -45,7 +45,7 @@
 # sns.set(style="whitegrid")
 # palette = sns.color_palette("coolwarm", len(labels_sorted))
 
-plt.figure(figsize=(10, 5))
+plt.figure(figsize=(7, 7))
 x_pos_sorted = np.arange(len(labels_sorted))
 
 ax1 = sns.barplot(x=x_pos_sorted, y=means_sorted, errorbar=None, color="green")
diff --git a/analysis/plot_results.py b/analysis/plot_results.py
index 2fbb64f..0e130fc 100644
--- a/analysis/plot_results.py
+++ b/analysis/plot_results.py
@@ -13,7 +13,7 @@
 
 PLOT_PARAMS = {
     "font.family": "serif",
-    "font.serif": ["Times New Roman", "STIX"],
+    "font.serif": ["Times", "Times New Roman", "STIX"],
     "font.size": FONT_SIZES.get("medium"),
     "axes.titlesize": FONT_SIZES.get("large"),
     "axes.labelsize": FONT_SIZES.get("large"),
@@ -122,6 +122,7 @@ def plot_main_heatmap(
     df = pd.read_csv(input_path)
     # Remove unnecessary column
     df.pop("eng_Latn")
+    df.pop("Family")
 
     df = df.sort_values(by="Avg_Multilingual", ascending=False).head(10).reset_index(drop=True)
     data = df[[col for col in df.columns if col not in ["Model_Type"]]].rename(columns={"Avg_Multilingual": "Avg"})
@@ -133,14 +134,37 @@ def plot_main_heatmap(
     data.pop("zho_Hant")
     data = data[sorted(data.columns)]
     data.columns = [col.split("_")[0] for col in data.columns]
+    data["Var"] = data[list(LANG_STANDARDIZATION.keys())].var(axis=1)
     data = data.rename(columns=LANG_STANDARDIZATION)
 
-    fig, ax = plt.subplots(1, 1, figsize=figsize)
-    sns.heatmap(data, ax=ax, cmap="YlGn", annot=True, annot_kws={"size": 16}, fmt=".2f", cbar=False)
-    ax.xaxis.set_ticks_position("top")
-    ax.tick_params(axis="x")
-    ax.set_ylabel("")
-    ax.set_yticklabels([f"{model}     " for model in data.index])
+    lang_results = data[list(LANG_STANDARDIZATION.values())]
+    avg = data[["Avg"]]
+    var = data[["Var"]]
+
+    fig, axs = plt.subplots(ncols=3, figsize=figsize, gridspec_kw={"width_ratios": [0.5, 0.5, 9]}, sharey=True)
+
+    sns.heatmap(avg, ax=axs[0], cmap="YlGn", annot=True, annot_kws={"size": 16}, fmt=".2f", cbar=False)
+    axs[0].xaxis.set_ticks_position("top")
+    axs[0].set_xticklabels(avg.columns, fontsize=20)
+    axs[0].tick_params(axis="x")
+    axs[0].set_ylabel("")
+    axs[0].set_yticklabels([f"{model}     " for model in avg.index], fontsize=20)
+
+    sns.heatmap(var, ax=axs[1], cmap="YlGn", annot=True, annot_kws={"size": 16}, fmt=".2f", cbar=False)
+    axs[1].xaxis.set_ticks_position("top")
+    axs[1].set_xticklabels(var.columns, fontsize=20)
+    axs[1].tick_params(axis="x")
+    axs[1].set_ylabel("")
+    axs[1].tick_params(axis="y", length=0)
+    axs[1].set_yticklabels([f"{model}     " for model in var.index], fontsize=20)
+
+    sns.heatmap(lang_results, ax=axs[2], cmap="YlGn", annot=True, annot_kws={"size": 16}, fmt=".2f", cbar=False)
+    axs[2].xaxis.set_ticks_position("top")
+    axs[2].set_xticklabels(lang_results.columns, fontsize=20)
+    axs[2].tick_params(axis="x")
+    axs[2].tick_params(axis="y", length=0)
+    axs[2].set_ylabel("")
+    axs[2].set_yticklabels([f"{model}     " for model in lang_results.index], fontsize=20)
 
     plt.tight_layout()
     fig.savefig(output_path, bbox_inches="tight")

From 9d6e82c0a8c2402f41ef534c8df5cbc6c8130550 Mon Sep 17 00:00:00 2001
From: ljvmiranda921 <ljvmiranda@gmail.com>
Date: Fri, 11 Oct 2024 23:46:32 -0700
Subject: [PATCH 3/9] [WIP] Update

---
 analysis/plot_results.py | 43 +++++++++++++++++++++++++---------------
 1 file changed, 27 insertions(+), 16 deletions(-)

diff --git a/analysis/plot_results.py b/analysis/plot_results.py
index 0e130fc..21bbd02 100644
--- a/analysis/plot_results.py
+++ b/analysis/plot_results.py
@@ -179,7 +179,7 @@ def plot_eng_drop_line(
     from scipy.stats import pearsonr, spearmanr
 
     df = pd.read_csv(input_path)
-    df = df[["Model", "Model_Type", "eng_Latn", "Avg_Multilingual"]]
+    df = df[["Model", "Model_Type", "Family", "eng_Latn", "Avg_Multilingual"]]
     df = df.sort_values(by="Avg_Multilingual", ascending=False).reset_index(drop=True)
     data = df.set_index("Model").dropna()
     data[data.select_dtypes(include="number").columns] = data.select_dtypes(include="number") * 100
@@ -191,6 +191,16 @@ def plot_eng_drop_line(
     fig, ax = plt.subplots(figsize=figsize)
 
     colors = ["red", "green", "blue"]
+    family = {
+        "Independent": "o",
+        "Qwen": "x",
+        "Skywork": "P",
+        "Cohere": "*",
+        "OpenAI": "s",
+        "AllenAI": "D",
+        "OpenBMB": "H",
+        "Meta": "^",
+    }
     for (label, group), color in zip(data.groupby("Model_Type"), colors):
         mrewardbench_scores = group["Avg_Multilingual"]
         rewardbench_scores = group["eng_Latn"]
@@ -212,22 +222,23 @@ def plot_eng_drop_line(
     ax.set_aspect("equal")
     ax.legend(frameon=False, handletextpad=0.2, fontsize=12)
 
-    model_names = [MODEL_STANDARDIZATION[model] for model in data.index]
-    texts = [
-        ax.text(
-            rewardbench_scores[idx],
-            mrewardbench_scores[idx],
-            model_names[idx],
-            fontsize=14,
+    if top_n:
+        model_names = [MODEL_STANDARDIZATION[model] for model in data.index]
+        texts = [
+            ax.text(
+                rewardbench_scores[idx],
+                mrewardbench_scores[idx],
+                model_names[idx],
+                fontsize=14,
+            )
+            for idx in range(len(data))
+        ]
+        adjust_text(
+            texts,
+            ax=ax,
+            force_static=0.15,
+            arrowprops=dict(arrowstyle="->", color="gray"),
         )
-        for idx in range(len(data))
-    ]
-    adjust_text(
-        texts,
-        ax=ax,
-        # force_static=0.15,
-        arrowprops=dict(arrowstyle="->", color="gray"),
-    )
 
     # ax.text(
     #     0.6,

From ae5d9018479cc59ff15056e96f97143b173e7cb1 Mon Sep 17 00:00:00 2001
From: ljvmiranda921 <ljvmiranda@gmail.com>
Date: Sat, 12 Oct 2024 12:43:52 -0700
Subject: [PATCH 4/9] [wip] Update

---
 analysis/plot_results.py | 44 ++++++++++++++++++++++------------------
 1 file changed, 24 insertions(+), 20 deletions(-)

diff --git a/analysis/plot_results.py b/analysis/plot_results.py
index 0e130fc..13cf511 100644
--- a/analysis/plot_results.py
+++ b/analysis/plot_results.py
@@ -142,15 +142,17 @@ def plot_main_heatmap(
     var = data[["Var"]]
 
     fig, axs = plt.subplots(ncols=3, figsize=figsize, gridspec_kw={"width_ratios": [0.5, 0.5, 9]}, sharey=True)
+    cmap = "Greys"
+    fmt = ".1f"
 
-    sns.heatmap(avg, ax=axs[0], cmap="YlGn", annot=True, annot_kws={"size": 16}, fmt=".2f", cbar=False)
+    sns.heatmap(avg, ax=axs[0], cmap=cmap, annot=True, annot_kws={"size": 16}, fmt=fmt, cbar=False)
     axs[0].xaxis.set_ticks_position("top")
     axs[0].set_xticklabels(avg.columns, fontsize=20)
     axs[0].tick_params(axis="x")
     axs[0].set_ylabel("")
     axs[0].set_yticklabels([f"{model}     " for model in avg.index], fontsize=20)
 
-    sns.heatmap(var, ax=axs[1], cmap="YlGn", annot=True, annot_kws={"size": 16}, fmt=".2f", cbar=False)
+    sns.heatmap(var, ax=axs[1], cmap=cmap, annot=True, annot_kws={"size": 16}, fmt=fmt, cbar=False)
     axs[1].xaxis.set_ticks_position("top")
     axs[1].set_xticklabels(var.columns, fontsize=20)
     axs[1].tick_params(axis="x")
@@ -158,7 +160,7 @@ def plot_main_heatmap(
     axs[1].tick_params(axis="y", length=0)
     axs[1].set_yticklabels([f"{model}     " for model in var.index], fontsize=20)
 
-    sns.heatmap(lang_results, ax=axs[2], cmap="YlGn", annot=True, annot_kws={"size": 16}, fmt=".2f", cbar=False)
+    sns.heatmap(lang_results, ax=axs[2], cmap=cmap, annot=True, annot_kws={"size": 16}, fmt=fmt, cbar=False)
     axs[2].xaxis.set_ticks_position("top")
     axs[2].set_xticklabels(lang_results.columns, fontsize=20)
     axs[2].tick_params(axis="x")
@@ -191,10 +193,11 @@ def plot_eng_drop_line(
     fig, ax = plt.subplots(figsize=figsize)
 
     colors = ["red", "green", "blue"]
-    for (label, group), color in zip(data.groupby("Model_Type"), colors):
+    markers = ["o", "*", "D"]
+    for (label, group), marker in zip(data.groupby("Model_Type"), markers):
         mrewardbench_scores = group["Avg_Multilingual"]
         rewardbench_scores = group["eng_Latn"]
-        ax.scatter(rewardbench_scores, mrewardbench_scores, marker="o", s=40, label=label, color=color)
+        ax.scatter(rewardbench_scores, mrewardbench_scores, marker=marker, s=60, label=label, color="k")
 
     mrewardbench_scores = data["Avg_Multilingual"]
     rewardbench_scores = data["eng_Latn"]
@@ -212,22 +215,23 @@ def plot_eng_drop_line(
     ax.set_aspect("equal")
     ax.legend(frameon=False, handletextpad=0.2, fontsize=12)
 
-    model_names = [MODEL_STANDARDIZATION[model] for model in data.index]
-    texts = [
-        ax.text(
-            rewardbench_scores[idx],
-            mrewardbench_scores[idx],
-            model_names[idx],
-            fontsize=14,
+    if top_n:
+        model_names = [MODEL_STANDARDIZATION[model] for model in data.index]
+        texts = [
+            ax.text(
+                rewardbench_scores[idx],
+                mrewardbench_scores[idx],
+                model_names[idx],
+                fontsize=14,
+            )
+            for idx in range(len(data))
+        ]
+        adjust_text(
+            texts,
+            ax=ax,
+            # force_static=0.15,
+            arrowprops=dict(arrowstyle="->", color="gray"),
         )
-        for idx in range(len(data))
-    ]
-    adjust_text(
-        texts,
-        ax=ax,
-        # force_static=0.15,
-        arrowprops=dict(arrowstyle="->", color="gray"),
-    )
 
     # ax.text(
     #     0.6,

From 47026a18e1f9ff89e6a0e8e45fa6285a929f4535 Mon Sep 17 00:00:00 2001
From: ljvmiranda921 <ljvmiranda@gmail.com>
Date: Sat, 12 Oct 2024 12:59:29 -0700
Subject: [PATCH 5/9] [wip] Update

---
 analysis/plot_results.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/analysis/plot_results.py b/analysis/plot_results.py
index 13cf511..5fd586a 100644
--- a/analysis/plot_results.py
+++ b/analysis/plot_results.py
@@ -298,7 +298,8 @@ def plot_ling_dims(
             y=dim,
             data=lingdf,
             ax=ax,
-            color="green",
+            color="gray",
+            edgecolor="k",
             width=0.4 if dim == "Resource Availability" else 0.7,
         )
         ax.set_title(dim)

From e422fce11b20daadf7c8490880ca2263060020ed Mon Sep 17 00:00:00 2001
From: ljvmiranda921 <ljvmiranda@gmail.com>
Date: Sat, 12 Oct 2024 13:02:23 -0700
Subject: [PATCH 6/9] [wip] Update

---
 analysis/avg_agreement_final.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/analysis/avg_agreement_final.py b/analysis/avg_agreement_final.py
index 0b5dd3e..57cb36f 100644
--- a/analysis/avg_agreement_final.py
+++ b/analysis/avg_agreement_final.py
@@ -48,7 +48,7 @@
 plt.figure(figsize=(7, 7))
 x_pos_sorted = np.arange(len(labels_sorted))
 
-ax1 = sns.barplot(x=x_pos_sorted, y=means_sorted, errorbar=None, color="green")
+ax1 = sns.barplot(x=x_pos_sorted, y=means_sorted, errorbar=None, color="gray")
 plt.errorbar(x_pos_sorted, means_sorted, yerr=std_devs_sorted, fmt="none", c="black", capsize=5)
 
 # ax1.spines["top"].set_color("black")

From a77432dead38281972d566f87ba1748673caffab Mon Sep 17 00:00:00 2001
From: ljvmiranda921 <ljvmiranda@gmail.com>
Date: Sat, 12 Oct 2024 13:02:44 -0700
Subject: [PATCH 7/9] [wip] Update

---
 analysis/avg_agreement_final.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/analysis/avg_agreement_final.py b/analysis/avg_agreement_final.py
index 57cb36f..fb33d66 100644
--- a/analysis/avg_agreement_final.py
+++ b/analysis/avg_agreement_final.py
@@ -48,7 +48,7 @@
 plt.figure(figsize=(7, 7))
 x_pos_sorted = np.arange(len(labels_sorted))
 
-ax1 = sns.barplot(x=x_pos_sorted, y=means_sorted, errorbar=None, color="gray")
+ax1 = sns.barplot(x=x_pos_sorted, y=means_sorted, errorbar=None, color="gray", edgecolor="k")
 plt.errorbar(x_pos_sorted, means_sorted, yerr=std_devs_sorted, fmt="none", c="black", capsize=5)
 
 # ax1.spines["top"].set_color("black")

From 12d649235962514314d229318dd53a24640aa773 Mon Sep 17 00:00:00 2001
From: ljvmiranda921 <ljvmiranda@gmail.com>
Date: Sat, 12 Oct 2024 18:01:48 -0700
Subject: [PATCH 8/9] [WIP] Update

---
 analysis/plot_results.py | 15 ++++++++++++---
 1 file changed, 12 insertions(+), 3 deletions(-)

diff --git a/analysis/plot_results.py b/analysis/plot_results.py
index c155515..9b58f03 100644
--- a/analysis/plot_results.py
+++ b/analysis/plot_results.py
@@ -66,6 +66,8 @@
     "zho": "zh",
 }
 
+COLORS = {"green": "#355145", "purple": "#d8a6e5", "orange": "#fe7759"}
+
 
 def get_args():
     # fmt: off
@@ -192,12 +194,19 @@ def plot_eng_drop_line(
 
     fig, ax = plt.subplots(figsize=figsize)
 
-    colors = ["red", "green", "blue"]
+    colors = [COLORS.get("green"), COLORS.get("purple"), COLORS.get("orange")]
     markers = ["o", "*", "D"]
-    for (label, group), marker in zip(data.groupby("Model_Type"), markers):
+    for (label, group), color in zip(data.groupby("Model_Type"), colors):
         mrewardbench_scores = group["Avg_Multilingual"]
         rewardbench_scores = group["eng_Latn"]
-        ax.scatter(rewardbench_scores, mrewardbench_scores, marker=marker, s=60, label=label, color="k")
+        ax.scatter(
+            rewardbench_scores,
+            mrewardbench_scores,
+            marker="o",
+            s=60,
+            label=label,
+            color=color,
+        )
 
     mrewardbench_scores = data["Avg_Multilingual"]
     rewardbench_scores = data["eng_Latn"]

From 0ab3aaa1a9920ae81dad73ef73f58fc86ad831f5 Mon Sep 17 00:00:00 2001
From: ljvmiranda921 <ljvmiranda@gmail.com>
Date: Sat, 12 Oct 2024 18:08:09 -0700
Subject: [PATCH 9/9] [WIP] Update

---
 analysis/avg_agreement_final.py | 9 ++++++++-
 analysis/plot_results.py        | 4 ++--
 2 files changed, 10 insertions(+), 3 deletions(-)

diff --git a/analysis/avg_agreement_final.py b/analysis/avg_agreement_final.py
index fb33d66..25ed807 100644
--- a/analysis/avg_agreement_final.py
+++ b/analysis/avg_agreement_final.py
@@ -3,6 +3,7 @@
 import numpy as np
 
 FONT_SIZES = {"small": 12, "medium": 16, "large": 18}
+COLORS = {"green": "#355145", "purple": "#d8a6e5", "orange": "#fe7759"}
 
 PLOT_PARAMS = {
     "font.family": "serif",
@@ -48,7 +49,13 @@
 plt.figure(figsize=(7, 7))
 x_pos_sorted = np.arange(len(labels_sorted))
 
-ax1 = sns.barplot(x=x_pos_sorted, y=means_sorted, errorbar=None, color="gray", edgecolor="k")
+ax1 = sns.barplot(
+    x=x_pos_sorted,
+    y=means_sorted,
+    errorbar=None,
+    color=COLORS.get("orange"),
+    edgecolor=COLORS.get("green"),
+)
 plt.errorbar(x_pos_sorted, means_sorted, yerr=std_devs_sorted, fmt="none", c="black", capsize=5)
 
 # ax1.spines["top"].set_color("black")
diff --git a/analysis/plot_results.py b/analysis/plot_results.py
index 9b58f03..1ec28c4 100644
--- a/analysis/plot_results.py
+++ b/analysis/plot_results.py
@@ -307,8 +307,8 @@ def plot_ling_dims(
             y=dim,
             data=lingdf,
             ax=ax,
-            color="gray",
-            edgecolor="k",
+            color=COLORS.get("orange"),
+            edgecolor=COLORS.get("green"),
             width=0.4 if dim == "Resource Availability" else 0.7,
         )
         ax.set_title(dim)