Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[Feature] Real2Sim Eval Digital Twins #536

Merged
merged 56 commits into from
Sep 13, 2024
Merged

[Feature] Real2Sim Eval Digital Twins #536

merged 56 commits into from
Sep 13, 2024

Conversation

StoneT2000
Copy link
Member

@StoneT2000 StoneT2000 commented Aug 29, 2024

@StoneT2000
Copy link
Member Author

StoneT2000 commented Sep 12, 2024

image

MMRV and correlation coefficients of the MS3 real2sim digital twins. Good enough for use.

rom typing import Sequence
import matplotlib.pyplot as plt
import numpy as np
def pearson_correlation(perf_sim: Sequence[float], perf_real: Sequence[float]) -> float:
    perf_sim, perf_real = np.array(perf_sim), np.array(perf_real)
    assert perf_sim.shape == perf_real.shape
    perf_sim = perf_sim - np.mean(perf_sim)
    perf_real = perf_real - np.mean(perf_real)
    if np.all(perf_sim == perf_real):
        pearson = 1
    else:
        pearson = np.sum(perf_sim * perf_real) / (
            np.sqrt(np.sum(perf_sim**2) * np.sum(perf_real**2)) + 1e-8
        )
    return pearson
def mean_maximum_rank_violation(
    perf_sim: Sequence[float], perf_real: Sequence[float]
) -> float:
    perf_sim, perf_real = np.array(perf_sim), np.array(perf_real)
    assert perf_sim.shape == perf_real.shape
    rank_violations = []
    for i in range(len(perf_sim)):
        rank_violation = 0.0
        for j in range(len(perf_sim)):
            if (perf_sim[i] > perf_sim[j]) != (perf_real[i] > perf_real[j]):
                rank_violation = max(
                    rank_violation, np.abs(perf_real[i] - perf_real[j])
                )
        rank_violations.append(rank_violation)
    rank_violation = np.mean(rank_violations)
    return rank_violation
# Data from the table
real_eval_data = {
    "PutCarrotOnPlateInScene-v1": {
        "octo_base": {"success": 0.25, "grasp": 0.5},
        "octo_small": {"success": 0.083, "grasp": 0.208},
        "rt-1x": {"success": 0, "grasp": 0.167},
    },
    "PutSpoonOnTableClothInScene-v1": {
        "octo_base": {"success": 0.333, "grasp": 0.5},
        "octo_small": {"success": 0.417, "grasp": 0.542},
        "rt-1x": {"success": 0.0, "grasp": 0.042},
    },
    "StackGreenCubeOnYellowCubeInScene-v1": {
        "octo_base": {"success": 0.0, "grasp": 0.292},
        "octo_small": {"success": 0.125, "grasp": 0.583},
        "rt-1x": {"success": 0.0, "grasp": 0.0},
    },
    "PutEggplantInBasketInScene-v1": {
        "octo_base": {"success": 0.233, "grasp": 0.4},
        "octo_small": {"success": 0.433, "grasp": 0.7},
        "rt-1x": {"success": 0.0, "grasp": 0.033},
    }
}
sim_eval_data = {
    "PutCarrotOnPlateInScene-v1": {
        # "octo_base": {"success": 0.20833, "grasp": 0.4166}, # 1 seed
        "octo_base": {"success": 0.167, "grasp": 0.417}, # 3 seeds / 72 episodes
        # "octo_small": {"success": 0.125, "grasp": 0.29166}, # 1 seed
        "octo_small": {"success": 0.083, "grasp": 0.194}, # 3 seeds / 72 episodes
        "rt-1x": {"success": 0.125, "grasp": 0.208},
    },
    "PutSpoonOnTableClothInScene-v1": {
        # "octo_base": {"success": 0.125, "grasp": 0.333}, # 1 seed
        "octo_base": {"success": 0.069, "grasp": 0.347}, # 3 seeds / 72 episodes
        # "octo_small": {"success": 0.25, "grasp": 0.75}, # 1 seed
        "octo_small": {"success": 0.361, "grasp": 0.681}, # 3 seeds / 72 episodes
        "rt-1x": {"success": 0.125, "grasp": 0.166},
    },
    "StackGreenCubeOnYellowCubeInScene-v1": {
        # "octo_base": {"success": 0.0, "grasp": 0.25}, # 1 seed
        "octo_base": {"success": 0.0, "grasp": 0.194}, # 3 seeds / 72 episodes
        # "octo_small": {"success": 0.042, "grasp": 0.25}, # 1 seed
        "octo_small": {"success": 0.028, "grasp": 0.278}, # 3 seeds / 72 episodes
        "rt-1x": {"success": 0.0, "grasp": 0.042},
    },
    # results from commit d1f0893c58a4aaff787b20c5cd6fdf1a536fd3e5, which might look a bit bugged on occassion...
    "PutEggplantInBasketInScene-v1": {
        "octo_base": {"success": 0.396, "grasp": 0.75},
        "octo_small": {"success": 0.531, "grasp": 0.75},
        "rt-1x": {"success": 0.0, "grasp": 0.0},
    }
}
task_colors = ["blue", "red", "green", "purple"]
marker_styles = ["x", "o", "+"]
# Create scatter plot
plt.figure(figsize=(8, 6))
plt.grid(True)
plt.plot([0, 1], [0, 1], linestyle='--', color='gray', label='y=x')
for i, task in enumerate(real_eval_data.keys()):
    for j, model in enumerate(real_eval_data[task].keys()):
        real_task_data = real_eval_data[task][model]
        sim_task_data = sim_eval_data[task][model]
        marker_style = marker_styles[j]
        if marker_style == '+':
            markersize = 60
        else:
            markersize = 40
        plt.scatter(real_task_data["success"], sim_task_data["success"], marker=marker_style, color=task_colors[i],alpha=0.5, s=markersize)
        plt.scatter(real_task_data["grasp"], sim_task_data["grasp"], marker=marker_style, color=task_colors[i], s=markersize)

# Compute Pearson correlation coefficient for each task
task_correlations = {}
task_mmrvs = {}
for task in real_eval_data.keys():
    real_values = []
    sim_values = []
    for data_type in ["success", "grasp"]:
        for model in real_eval_data[task].keys():
            real_values.extend([real_eval_data[task][model][data_type]])
            sim_values.extend([sim_eval_data[task][model][data_type]])
        task_correlation = pearson_correlation(real_values, sim_values)
        task_correlations[f"{task}_{data_type}"] = task_correlation
        task_mmrv = mean_maximum_rank_violation(real_values, sim_values)
        task_mmrvs[f"{task}_{data_type}"] = task_mmrv
        print(f"Pearson correlation coefficient for {task}_{data_type}: {task_correlation:.4f}")
        print(f"Mean maximum rank violation for {task}_{data_type}: {task_mmrv:.4f}")

# Compute average Pearson correlation coefficient
overall_correlation = np.mean([task_correlations[task] for task in task_correlations.keys()])
print(f"Overall Pearson correlation coefficient: {overall_correlation:.4f}")
overall_mmrv = np.mean([task_mmrvs[task] for task in task_mmrvs.keys()])
print(f"Overall mean maximum rank violation: {overall_mmrv:.4f}")

# Add text annotation for correlation coefficient
plt.text(0.05, 0.95, f"r = {overall_correlation:.4f}", transform=plt.gca().transAxes, 
         verticalalignment='top', fontsize=10, 
         bbox=dict(boxstyle='round', facecolor='white', alpha=0.8))
plt.text(0.05, 0.85, f"MMRV = {overall_mmrv:.4f}", transform=plt.gca().transAxes, 
         verticalalignment='top', fontsize=10, 
         bbox=dict(boxstyle='round', facecolor='white', alpha=0.8))
legend_elements = [
    plt.Line2D([0], [0], marker='x', color='none', label='octo_base', markeredgecolor='black', markersize=10),
    plt.Line2D([0], [0], marker='o', color='none', label='octo_small', markeredgecolor='black', markersize=10),
    plt.Line2D([0], [0], marker='+', color='none', label='rt-1x', markeredgecolor='black', markersize=10),
    plt.Line2D([0], [0], linestyle='none', marker='o', color='blue', label='Put Carrot', alpha=0.5),
    plt.Line2D([0], [0], linestyle='none', marker='o', color='blue', label='Grasp Carrot'),
    plt.Line2D([0], [0], linestyle='none', marker='o', color='red', label='Put Spoon', alpha=0.5),
    plt.Line2D([0], [0], linestyle='none', marker='o', color='red', label='Grasp Spoon'),
    plt.Line2D([0], [0], linestyle='none', marker='o', color='green', label='Stack Green Cube', alpha=0.5),
    plt.Line2D([0], [0], linestyle='none', marker='o', color='green', label='Grasp Green Cube'),
    plt.Line2D([0], [0], linestyle='none', marker='o', color='purple', label='Put Eggplant', alpha=0.5),
    plt.Line2D([0], [0], linestyle='none', marker='o', color='purple', label='Grasp Eggplant'),
    # plt.Line2D([0], [0], linestyle='none', marker='o', color='black', label='Success', markersize=10),
    # plt.Line2D([0], [0], linestyle='none', marker='o', color='black', label='Grasp', markersize=10, alpha=0.5)
]

# Add the legend to the plot
plt.legend(handles=legend_elements, loc='lower center', bbox_to_anchor=(0.5, -0.4), ncol=3)
# Set labels for x and y axes
plt.xlabel('Real eval')
plt.ylabel('Sim eval')
plt.title('Real eval vs Sim eval (Success and Grasp)')

# Adjust layout to prevent legend from being cut off
plt.tight_layout()


plt.show()

code snippet to generate the plot

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Labels
None yet
Projects
None yet
Development

Successfully merging this pull request may close these issues.

1 participant