Breakout Robust Policy Benchmark v1
objective
{
"name": "Breakout robust policy benchmark v1",
"frozen_policy": "breakout_policy_v3",
"frozen_env": "current Go shadow env after env_gap_audit_v1",
"selection_reason": "breakout_policy_v3 has the best prior Atari six-seed mean; planner_v1 reduced low-score count but lowered mean."
}
benchmark
{
"go": {
"distribution": {
"count": 20,
"scores": [
8.0,
7.0,
6.0,
7.0,
7.0,
8.0,
7.0,
9.0,
8.0,
7.0,
9.0,
7.0,
7.0,
6.0,
7.0,
7.0,
7.0,
6.0,
7.0,
7.0
],
"mean": 7.2,
"median": 7.0,
"min": 6.0,
"max": 9.0,
"stdev": 0.812403840463596,
"low_score_threshold": 3.0,
"low_score_count": 0
},
"rows": [
{
"timestamp": "20260510T131817Z",
"trial_index": 1,
"policy_name": "breakout_policy_v3",
"seed": 0,
"episode": 0,
"score": 8,
"steps": 505,
"done": true,
"replay_path": "breakout/runs-robust-policy-benchmark-v1/replays/20260510T131817Z_ep000_seed0.jsonl"
},
{
"timestamp": "20260510T131817Z",
"trial_index": 1,
"policy_name": "breakout_policy_v3",
"seed": 1,
"episode": 1,
"score": 7,
"steps": 492,
"done": true,
"replay_path": "breakout/runs-robust-policy-benchmark-v1/replays/20260510T131817Z_ep001_seed1.jsonl"
},
{
"timestamp": "20260510T131817Z",
"trial_index": 1,
"policy_name": "breakout_policy_v3",
"seed": 2,
"episode": 2,
"score": 6,
"steps": 467,
"done": true,
"replay_path": "breakout/runs-robust-policy-benchmark-v1/replays/20260510T131817Z_ep002_seed2.jsonl"
},
{
"timestamp": "20260510T131817Z",
"trial_index": 1,
"policy_name": "breakout_policy_v3",
"seed": 3,
"episode": 3,
"score": 7,
"steps": 483,
"done": true,
"replay_path": "breakout/runs-robust-policy-benchmark-v1/replays/20260510T131817Z_ep003_seed3.jsonl"
},
{
"timestamp": "20260510T131817Z",
"trial_index": 1,
"policy_name": "breakout_policy_v3",
"seed": 4,
"episode": 4,
"score": 7,
"steps": 482,
"done": true,
"replay_path": "breakout/runs-robust-policy-benchmark-v1/replays/20260510T131817Z_ep004_seed4.jsonl"
},
{
"timestamp": "20260510T131817Z",
"trial_index": 1,
"policy_name": "breakout_policy_v3",
"seed": 5,
"episode": 5,
"score": 8,
"steps": 505,
"done": true,
"replay_path": "breakout/runs-robust-policy-benchmark-v1/replays/20260510T131817Z_ep005_seed5.jsonl"
},
{
"timestamp": "20260510T131817Z",
"trial_index": 1,
"policy_name": "breakout_policy_v3",
"seed": 6,
"episode": 6,
"score": 7,
"steps": 485,
"done": true,
"replay_path": "breakout/runs-robust-policy-benchmark-v1/replays/20260510T131817Z_ep006_seed6.jsonl"
},
{
"timestamp": "20260510T131817Z",
"trial_index": 1,
"policy_name": "breakout_policy_v3",
"seed": 7,
"episode": 7,
"score": 9,
"steps": 516,
"done": true,
"replay_path": "breakout/runs-robust-policy-benchmark-v1/replays/20260510T131817Z_ep007_seed7.jsonl"
},
{
"timestamp": "20260510T131817Z",
"trial_index": 1,
"policy_name": "breakout_policy_v3",
"seed": 8,
"episode": 8,
"score": 8,
"steps": 499,
"done": true,
"replay_path": "breakout/runs-robust-policy-benchmark-v1/replays/20260510T131817Z_ep008_seed8.jsonl"
},
{
"timestamp": "20260510T131817Z",
"trial_index": 1,
"policy_name": "breakout_policy_v3",
"seed": 9,
"episode": 9,
"score": 7,
"steps": 488,
"done": true,
"replay_path": "breakout/runs-robust-policy-benchmark-v1/replays/20260510T131817Z_ep009_seed9.jsonl"
},
{
"timestamp": "20260510T131817Z",
"trial_index": 1,
"policy_name": "breakout_policy_v3",
"seed": 10,
"episode": 10,
"score": 9,
"steps": 516,
"done": true,
"replay_path": "breakout/runs-robust-policy-benchmark-v1/replays/20260510T131817Z_ep010_seed10.jsonl"
},
{
"timestamp": "20260510T131817Z",
"trial_index": 1,
"policy_name": "breakout_policy_v3",
"seed": 11,
"episode": 11,
"score": 7,
"steps": 483,
"done": true,
"replay_path": "breakout/runs-robust-policy-benchmark-v1/replays/20260510T131817Z_ep011_seed11.jsonl"
},
{
"timestamp": "20260510T131817Z",
"trial_index": 1,
"policy_name": "breakout_policy_v3",
"seed": 12,
"episode": 12,
"score": 7,
"steps": 484,
"done": true,
"replay_path": "breakout/runs-robust-policy-benchmark-v1/replays/20260510T131817Z_ep012_seed12.jsonl"
},
{
"timestamp": "20260510T131817Z",
"trial_index": 1,
"policy_name": "breakout_policy_v3",
"seed": 13,
"episode": 13,
"score": 6,
"steps": 465,
"done": true,
"replay_path": "breakout/runs-robust-policy-benchmark-v1/replays/20260510T131817Z_ep013_seed13.jsonl"
},
{
"timestamp": "20260510T131817Z",
"trial_index": 1,
"policy_name": "breakout_policy_v3",
"seed": 14,
"episode": 14,
"score": 7,
"steps": 484,
"done": true,
"replay_path": "breakout/runs-robust-policy-benchmark-v1/replays/20260510T131817Z_ep014_seed14.jsonl"
},
{
"timestamp": "20260510T131817Z",
"trial_index": 1,
"policy_name": "breakout_policy_v3",
"seed": 15,
"episode": 15,
"score": 7,
"steps": 484,
"done": true,
"replay_path": "breakout/runs-robust-policy-benchmark-v1/replays/20260510T131817Z_ep015_seed15.jsonl"
},
{
"timestamp": "20260510T131817Z",
"trial_index": 1,
"policy_name": "breakout_policy_v3",
"seed": 16,
"episode": 16,
"score": 7,
"steps": 477,
"done": true,
"replay_path": "breakout/runs-robust-policy-benchmark-v1/replays/20260510T131817Z_ep016_seed16.jsonl"
},
{
"timestamp": "20260510T131817Z",
"trial_index": 1,
"policy_name": "breakout_policy_v3",
"seed": 17,
"episode": 17,
"score": 6,
"steps": 468,
"done": true,
"replay_path": "breakout/runs-robust-policy-benchmark-v1/replays/20260510T131817Z_ep017_seed17.jsonl"
},
{
"timestamp": "20260510T131817Z",
"trial_index": 1,
"policy_name": "breakout_policy_v3",
"seed": 18,
"episode": 18,
"score": 7,
"steps": 484,
"done": true,
"replay_path": "breakout/runs-robust-policy-benchmark-v1/replays/20260510T131817Z_ep018_seed18.jsonl"
},
{
"timestamp": "20260510T131817Z",
"trial_index": 1,
"policy_name": "breakout_policy_v3",
"seed": 19,
"episode": 19,
"score": 7,
"steps": 484,
"done": true,
"replay_path": "breakout/runs-robust-policy-benchmark-v1/replays/20260510T131817Z_ep019_seed19.jsonl"
}
],
"low_score_seeds": []
},
"atari": {
"distribution": {
"count": 20,
"scores": [
2.0,
13.0,
13.0,
13.0,
0.0,
13.0,
2.0,
13.0,
2.0,
0.0,
0.0,
0.0,
13.0,
2.0,
0.0,
13.0,
2.0,
2.0,
0.0,
0.0
],
"mean": 5.15,
"median": 2.0,
"min": 0.0,
"max": 13.0,
"stdev": 5.816141332533109,
"low_score_threshold": 3.0,
"low_score_count": 13
},
"rows": [
{
"episode": 0,
"seed": 0,
"score": 2.0,
"steps": 1800,
"done": false,
"first_reward_step": 32,
"reward_count": 2
},
{
"episode": 1,
"seed": 1,
"score": 13.0,
"steps": 1800,
"done": false,
"first_reward_step": 40,
"reward_count": 13
},
{
"episode": 2,
"seed": 2,
"score": 13.0,
"steps": 1800,
"done": false,
"first_reward_step": 40,
"reward_count": 13
},
{
"episode": 3,
"seed": 3,
"score": 13.0,
"steps": 1800,
"done": false,
"first_reward_step": 40,
"reward_count": 13
},
{
"episode": 4,
"seed": 4,
"score": 0.0,
"steps": 1800,
"done": false,
"first_reward_step": null,
"reward_count": 0
},
{
"episode": 5,
"seed": 5,
"score": 13.0,
"steps": 1800,
"done": false,
"first_reward_step": 40,
"reward_count": 13
},
{
"episode": 6,
"seed": 6,
"score": 2.0,
"steps": 1800,
"done": false,
"first_reward_step": 32,
"reward_count": 2
},
{
"episode": 7,
"seed": 7,
"score": 13.0,
"steps": 1800,
"done": false,
"first_reward_step": 40,
"reward_count": 13
},
{
"episode": 8,
"seed": 8,
"score": 2.0,
"steps": 1800,
"done": false,
"first_reward_step": 32,
"reward_count": 2
},
{
"episode": 9,
"seed": 9,
"score": 0.0,
"steps": 1800,
"done": false,
"first_reward_step": null,
"reward_count": 0
},
{
"episode": 10,
"seed": 10,
"score": 0.0,
"steps": 1800,
"done": false,
"first_reward_step": null,
"reward_count": 0
},
{
"episode": 11,
"seed": 11,
"score": 0.0,
"steps": 1800,
"done": false,
"first_reward_step": null,
"reward_count": 0
},
{
"episode": 12,
"seed": 12,
"score": 13.0,
"steps": 1800,
"done": false,
"first_reward_step": 40,
"reward_count": 13
},
{
"episode": 13,
"seed": 13,
"score": 2.0,
"steps": 1800,
"done": false,
"first_reward_step": 32,
"reward_count": 2
},
{
"episode": 14,
"seed": 14,
"score": 0.0,
"steps": 1800,
"done": false,
"first_reward_step": null,
"reward_count": 0
},
{
"episode": 15,
"seed": 15,
"score": 13.0,
"steps": 1800,
"done": false,
"first_reward_step": 40,
"reward_count": 13
},
{
"episode": 16,
"seed": 16,
"score": 2.0,
"steps": 1800,
"done": false,
"first_reward_step": 32,
"reward_count": 2
},
{
"episode": 17,
"seed": 17,
"score": 2.0,
"steps": 1800,
"done": false,
"first_reward_step": 32,
"reward_count": 2
},
{
"episode": 18,
"seed": 18,
"score": 0.0,
"steps": 1800,
"done": false,
"first_reward_step": null,
"reward_count": 0
},
{
"episode": 19,
"seed": 19,
"score": 0.0,
"steps": 1800,
"done": false,
"first_reward_step": null,
"reward_count": 0
}
],
"low_score_seeds": [
0,
4,
6,
8,
9,
10,
11,
13,
14,
16,
17,
18,
19
]
},
"paired_by_seed": [
{
"seed": 0,
"go_score": 8.0,
"atari_score": 2.0,
"delta_atari_minus_go": -6.0
},
{
"seed": 1,
"go_score": 7.0,
"atari_score": 13.0,
"delta_atari_minus_go": 6.0
},
{
"seed": 2,
"go_score": 6.0,
"atari_score": 13.0,
"delta_atari_minus_go": 7.0
},
{
"seed": 3,
"go_score": 7.0,
"atari_score": 13.0,
"delta_atari_minus_go": 6.0
},
{
"seed": 4,
"go_score": 7.0,
"atari_score": 0.0,
"delta_atari_minus_go": -7.0
},
{
"seed": 5,
"go_score": 8.0,
"atari_score": 13.0,
"delta_atari_minus_go": 5.0
},
{
"seed": 6,
"go_score": 7.0,
"atari_score": 2.0,
"delta_atari_minus_go": -5.0
},
{
"seed": 7,
"go_score": 9.0,
"atari_score": 13.0,
"delta_atari_minus_go": 4.0
},
{
"seed": 8,
"go_score": 8.0,
"atari_score": 2.0,
"delta_atari_minus_go": -6.0
},
{
"seed": 9,
"go_score": 7.0,
"atari_score": 0.0,
"delta_atari_minus_go": -7.0
},
{
"seed": 10,
"go_score": 9.0,
"atari_score": 0.0,
"delta_atari_minus_go": -9.0
},
{
"seed": 11,
"go_score": 7.0,
"atari_score": 0.0,
"delta_atari_minus_go": -7.0
},
{
"seed": 12,
"go_score": 7.0,
"atari_score": 13.0,
"delta_atari_minus_go": 6.0
},
{
"seed": 13,
"go_score": 6.0,
"atari_score": 2.0,
"delta_atari_minus_go": -4.0
},
{
"seed": 14,
"go_score": 7.0,
"atari_score": 0.0,
"delta_atari_minus_go": -7.0
},
{
"seed": 15,
"go_score": 7.0,
"atari_score": 13.0,
"delta_atari_minus_go": 6.0
},
{
"seed": 16,
"go_score": 7.0,
"atari_score": 2.0,
"delta_atari_minus_go": -5.0
},
{
"seed": 17,
"go_score": 6.0,
"atari_score": 2.0,
"delta_atari_minus_go": -4.0
},
{
"seed": 18,
"go_score": 7.0,
"atari_score": 0.0,
"delta_atari_minus_go": -7.0
},
{
"seed": 19,
"go_score": 7.0,
"atari_score": 0.0,
"delta_atari_minus_go": -7.0
}
]
}
low_score_analysis
{
"threshold": 3.0,
"atari_low_score_seeds": [
0,
4,
6,
8,
9,
10,
11,
13,
14,
16,
17,
18,
19
],
"go_low_score_seeds": [],
"interpretation": "Atari low-score seeds remain frequent across the 20-seed window, so six-seed gains were not robust."
}
r2_samples
{
"high_seed": {
"seed": 1,
"score": 13,
"url": "https://file.aimusic.win/gemma/breakout/robust_policy_benchmark_v1_atari_seed1_high.mp4"
},
"low_seed": {
"seed": 4,
"score": 0,
"url": "https://file.aimusic.win/gemma/breakout/robust_policy_benchmark_v1_atari_seed4_low.mp4"
},
"median_seed": {
"seed": 0,
"score": 2,
"url": "https://file.aimusic.win/gemma/breakout/robust_policy_benchmark_v1_atari_seed0_median.mp4"
}
}
decision
{
"go_no_go": "no_go",
"can_train_mainly_in_go": false,
"reason": "Atari 20-seed distribution is too unstable for Go-primary training; continue using Go as a coarse filter with substantial Atari validation.",
"next_recommendation": "Before a training loop, target the env gaps from env_gap_audit_v1 or make the training loop explicitly Atari-validated."
}