Breakout Policy Robustness v1
baseline_policy
{
"name": "breakout_policy_v1",
"source_report": "breakout/calibration/reports/transfer_v4_report.json",
"judgment": {
"verdict": "partial_transfer",
"go_score_mean": 6.333333333333333,
"atari_score_mean": 2.6666666666666665,
"atari_to_go_score_ratio": 0.42105263157894735,
"interpretation": "The policy earns positive Atari reward, so the shadow-env optimization transfers partially.",
"next_recommendation": "Run forced-loss trace to confirm lives/serve RAM, then use transfer results to tune survival and late-rally dynamics."
}
}
candidate_policy
{
"name": "breakout_policy_v2",
"logic": "One-step RAM velocity lead near the bottom; continue moving through the old deadzone when the ball is descending."
}
candidate_transfer
{
"policy": {
"name": "breakout_policy_v2",
"logic": "One-step RAM velocity lead near the bottom; continue moving through the old deadzone when the ball is descending."
},
"go_shadow_eval": {
"episodes": 6,
"score_sum": 42.0,
"score_mean": 7.0,
"score_min": 5.0,
"score_max": 8.0,
"steps_sum": 2902,
"video_urls": [
"https://file.aimusic.win/gemma/breakout/20260510T112920Z_ep000_seed0.mp4",
"https://file.aimusic.win/gemma/breakout/20260510T112920Z_ep001_seed1.mp4",
"https://file.aimusic.win/gemma/breakout/20260510T112920Z_ep002_seed2.mp4",
"https://file.aimusic.win/gemma/breakout/20260510T112920Z_ep003_seed3.mp4",
"https://file.aimusic.win/gemma/breakout/20260510T112920Z_ep004_seed4.mp4",
"https://file.aimusic.win/gemma/breakout/20260510T112920Z_ep005_seed5.mp4"
],
"rows": [
{
"timestamp": "20260510T112920Z",
"trial_index": 1,
"policy_name": "breakout_policy_v2",
"seed": 0,
"episode": 0,
"score": 8,
"steps": 504,
"done": true,
"replay_path": "breakout/runs-policy-robustness-v1/replays/20260510T112920Z_ep000_seed0.jsonl",
"video_path": "breakout/runs-policy-robustness-v1/videos/20260510T112920Z_ep000_seed0.mp4",
"video_url": "https://file.aimusic.win/gemma/breakout/20260510T112920Z_ep000_seed0.mp4"
},
{
"timestamp": "20260510T112920Z",
"trial_index": 1,
"policy_name": "breakout_policy_v2",
"seed": 1,
"episode": 1,
"score": 7,
"steps": 492,
"done": true,
"replay_path": "breakout/runs-policy-robustness-v1/replays/20260510T112920Z_ep001_seed1.jsonl",
"video_path": "breakout/runs-policy-robustness-v1/videos/20260510T112920Z_ep001_seed1.mp4",
"video_url": "https://file.aimusic.win/gemma/breakout/20260510T112920Z_ep001_seed1.mp4"
},
{
"timestamp": "20260510T112920Z",
"trial_index": 1,
"policy_name": "breakout_policy_v2",
"seed": 2,
"episode": 2,
"score": 5,
"steps": 440,
"done": true,
"replay_path": "breakout/runs-policy-robustness-v1/replays/20260510T112920Z_ep002_seed2.jsonl",
"video_path": "breakout/runs-policy-robustness-v1/videos/20260510T112920Z_ep002_seed2.mp4",
"video_url": "https://file.aimusic.win/gemma/breakout/20260510T112920Z_ep002_seed2.mp4"
},
{
"timestamp": "20260510T112920Z",
"trial_index": 1,
"policy_name": "breakout_policy_v2",
"seed": 3,
"episode": 3,
"score": 7,
"steps": 480,
"done": true,
"replay_path": "breakout/runs-policy-robustness-v1/replays/20260510T112920Z_ep003_seed3.jsonl",
"video_path": "breakout/runs-policy-robustness-v1/videos/20260510T112920Z_ep003_seed3.mp4",
"video_url": "https://file.aimusic.win/gemma/breakout/20260510T112920Z_ep003_seed3.mp4"
},
{
"timestamp": "20260510T112920Z",
"trial_index": 1,
"policy_name": "breakout_policy_v2",
"seed": 4,
"episode": 4,
"score": 7,
"steps": 482,
"done": true,
"replay_path": "breakout/runs-policy-robustness-v1/replays/20260510T112920Z_ep004_seed4.jsonl",
"video_path": "breakout/runs-policy-robustness-v1/videos/20260510T112920Z_ep004_seed4.mp4",
"video_url": "https://file.aimusic.win/gemma/breakout/20260510T112920Z_ep004_seed4.mp4"
},
{
"timestamp": "20260510T112920Z",
"trial_index": 1,
"policy_name": "breakout_policy_v2",
"seed": 5,
"episode": 5,
"score": 8,
"steps": 504,
"done": true,
"replay_path": "breakout/runs-policy-robustness-v1/replays/20260510T112920Z_ep005_seed5.jsonl",
"video_path": "breakout/runs-policy-robustness-v1/videos/20260510T112920Z_ep005_seed5.mp4",
"video_url": "https://file.aimusic.win/gemma/breakout/20260510T112920Z_ep005_seed5.mp4"
}
]
},
"atari_backtest": {
"episodes": 6,
"score_sum": 19.0,
"score_mean": 3.1666666666666665,
"score_min": 1.0,
"score_max": 4.0,
"steps_sum": 10800,
"video_urls": [
"https://file.aimusic.win/gemma/breakout/policy_robustness_v1_atari.mp4"
],
"rows": [
{
"episode": 0,
"score": 1.0,
"steps": 1800,
"first_reward_step": 32,
"reward_count": 1,
"done": false
},
{
"episode": 1,
"score": 4.0,
"steps": 1800,
"first_reward_step": 40,
"reward_count": 4,
"done": false
},
{
"episode": 2,
"score": 4.0,
"steps": 1800,
"first_reward_step": 40,
"reward_count": 4,
"done": false
},
{
"episode": 3,
"score": 4.0,
"steps": 1800,
"first_reward_step": 40,
"reward_count": 4,
"done": false
},
{
"episode": 4,
"score": 2.0,
"steps": 1800,
"first_reward_step": 41,
"reward_count": 2,
"done": false
},
{
"episode": 5,
"score": 4.0,
"steps": 1800,
"first_reward_step": 40,
"reward_count": 4,
"done": false
}
]
},
"judgment": {
"verdict": "partial_transfer",
"go_score_mean": 7.0,
"atari_score_mean": 3.1666666666666665,
"atari_to_go_score_ratio": 0.4523809523809524,
"interpretation": "The policy earns positive Atari reward, so the shadow-env optimization transfers partially.",
"next_recommendation": "Run forced-loss trace to confirm lives/serve RAM, then use transfer results to tune survival and late-rally dynamics."
}
}
comparison
{
"improved": true,
"atari_mean_delta_v2_minus_v1": 0.5,
"atari_low_score_count_reduction": 3,
"baseline_atari": {
"scores": [
7.0,
2.0,
2.0,
2.0,
1.0,
2.0
],
"mean": 2.6666666666666665,
"median": 2.0,
"stdev": 1.9720265943665387,
"low_score_threshold": 3.0,
"low_score_count": 5
},
"candidate_atari": {
"scores": [
1.0,
4.0,
4.0,
4.0,
2.0,
4.0
],
"mean": 3.1666666666666665,
"median": 4.0,
"stdev": 1.2133516482134197,
"low_score_threshold": 3.0,
"low_score_count": 2
},
"baseline_go": {
"scores": [
7.0,
5.0,
6.0,
7.0,
6.0,
7.0
],
"mean": 6.333333333333333,
"median": 6.5,
"stdev": 0.7453559924999299,
"low_score_threshold": 3.0,
"low_score_count": 0
},
"candidate_go": {
"scores": [
8.0,
7.0,
5.0,
7.0,
7.0,
8.0
],
"mean": 7.0,
"median": 7.0,
"stdev": 1.0,
"low_score_threshold": 3.0,
"low_score_count": 0
},
"diagnosis": "Predictive near-bottom action timing improves Atari robustness across the six-seed window.",
"next_recommendation": "Use breakout_policy_v2 as the next baseline, then search lead/deadzone parameters on Atari validation seeds."
}