Breakout Multi-Seed Transfer Validation v4
policy
{
"name": "breakout_policy_v1",
"logic": "RAM/Go heuristic tracks ball x against paddle x and fires on reset."
}
go_shadow_eval
{
"episodes": 6,
"score_sum": 38.0,
"score_mean": 6.333333333333333,
"score_min": 5.0,
"score_max": 7.0,
"steps_sum": 2828,
"video_urls": [
"https://file.aimusic.win/gemma/breakout/20260510T111932Z_ep000_seed0.mp4",
"https://file.aimusic.win/gemma/breakout/20260510T111932Z_ep001_seed1.mp4",
"https://file.aimusic.win/gemma/breakout/20260510T111932Z_ep002_seed2.mp4",
"https://file.aimusic.win/gemma/breakout/20260510T111932Z_ep003_seed3.mp4",
"https://file.aimusic.win/gemma/breakout/20260510T111932Z_ep004_seed4.mp4",
"https://file.aimusic.win/gemma/breakout/20260510T111932Z_ep005_seed5.mp4"
],
"rows": [
{
"timestamp": "20260510T111932Z",
"trial_index": 1,
"policy_name": "breakout_policy_v1",
"seed": 0,
"episode": 0,
"score": 7,
"steps": 488,
"done": true,
"replay_path": "breakout/runs-transfer-v4/replays/20260510T111932Z_ep000_seed0.jsonl",
"video_path": "breakout/runs-transfer-v4/videos/20260510T111932Z_ep000_seed0.mp4",
"video_url": "https://file.aimusic.win/gemma/breakout/20260510T111932Z_ep000_seed0.mp4"
},
{
"timestamp": "20260510T111932Z",
"trial_index": 1,
"policy_name": "breakout_policy_v1",
"seed": 1,
"episode": 1,
"score": 5,
"steps": 440,
"done": true,
"replay_path": "breakout/runs-transfer-v4/replays/20260510T111932Z_ep001_seed1.jsonl",
"video_path": "breakout/runs-transfer-v4/videos/20260510T111932Z_ep001_seed1.mp4",
"video_url": "https://file.aimusic.win/gemma/breakout/20260510T111932Z_ep001_seed1.mp4"
},
{
"timestamp": "20260510T111932Z",
"trial_index": 1,
"policy_name": "breakout_policy_v1",
"seed": 2,
"episode": 2,
"score": 6,
"steps": 465,
"done": true,
"replay_path": "breakout/runs-transfer-v4/replays/20260510T111932Z_ep002_seed2.jsonl",
"video_path": "breakout/runs-transfer-v4/videos/20260510T111932Z_ep002_seed2.mp4",
"video_url": "https://file.aimusic.win/gemma/breakout/20260510T111932Z_ep002_seed2.mp4"
},
{
"timestamp": "20260510T111932Z",
"trial_index": 1,
"policy_name": "breakout_policy_v1",
"seed": 3,
"episode": 3,
"score": 7,
"steps": 484,
"done": true,
"replay_path": "breakout/runs-transfer-v4/replays/20260510T111932Z_ep003_seed3.jsonl",
"video_path": "breakout/runs-transfer-v4/videos/20260510T111932Z_ep003_seed3.mp4",
"video_url": "https://file.aimusic.win/gemma/breakout/20260510T111932Z_ep003_seed3.mp4"
},
{
"timestamp": "20260510T111932Z",
"trial_index": 1,
"policy_name": "breakout_policy_v1",
"seed": 4,
"episode": 4,
"score": 6,
"steps": 463,
"done": true,
"replay_path": "breakout/runs-transfer-v4/replays/20260510T111932Z_ep004_seed4.jsonl",
"video_path": "breakout/runs-transfer-v4/videos/20260510T111932Z_ep004_seed4.mp4",
"video_url": "https://file.aimusic.win/gemma/breakout/20260510T111932Z_ep004_seed4.mp4"
},
{
"timestamp": "20260510T111932Z",
"trial_index": 1,
"policy_name": "breakout_policy_v1",
"seed": 5,
"episode": 5,
"score": 7,
"steps": 488,
"done": true,
"replay_path": "breakout/runs-transfer-v4/replays/20260510T111932Z_ep005_seed5.jsonl",
"video_path": "breakout/runs-transfer-v4/videos/20260510T111932Z_ep005_seed5.mp4",
"video_url": "https://file.aimusic.win/gemma/breakout/20260510T111932Z_ep005_seed5.mp4"
}
]
}
atari_backtest
{
"episodes": 6,
"score_sum": 16.0,
"score_mean": 2.6666666666666665,
"score_min": 1.0,
"score_max": 7.0,
"steps_sum": 10800,
"video_urls": [
"https://file.aimusic.win/gemma/breakout/transfer_v4_atari.mp4"
],
"rows": [
{
"episode": 0,
"score": 7.0,
"steps": 1800,
"first_reward_step": 40,
"reward_count": 7,
"done": false
},
{
"episode": 1,
"score": 2.0,
"steps": 1800,
"first_reward_step": 40,
"reward_count": 2,
"done": false
},
{
"episode": 2,
"score": 2.0,
"steps": 1800,
"first_reward_step": 40,
"reward_count": 2,
"done": false
},
{
"episode": 3,
"score": 2.0,
"steps": 1800,
"first_reward_step": 40,
"reward_count": 2,
"done": false
},
{
"episode": 4,
"score": 1.0,
"steps": 1800,
"first_reward_step": 32,
"reward_count": 1,
"done": false
},
{
"episode": 5,
"score": 2.0,
"steps": 1800,
"first_reward_step": 40,
"reward_count": 2,
"done": false
}
]
}
judgment
{
"verdict": "partial_transfer",
"go_score_mean": 6.333333333333333,
"atari_score_mean": 2.6666666666666665,
"atari_to_go_score_ratio": 0.42105263157894735,
"interpretation": "The policy earns positive Atari reward, so the shadow-env optimization transfers partially.",
"next_recommendation": "Run forced-loss trace to confirm lives/serve RAM, then use transfer results to tune survival and late-rally dynamics."
}
historical_ratios
[
{
"report": "breakout/calibration/reports/transfer_v1_report.json",
"ratio": 0.45,
"go_score_mean": 10.0,
"atari_score_mean": 4.5,
"verdict": "partial_transfer",
"go_episodes": 6,
"atari_episodes": 2
},
{
"report": "breakout/calibration/reports/transfer_v2_report.json",
"ratio": 0.5,
"go_score_mean": 9.0,
"atari_score_mean": 4.5,
"verdict": "partial_transfer",
"go_episodes": 2,
"atari_episodes": 2
},
{
"report": "breakout/calibration/reports/transfer_v3_report.json",
"ratio": 0.75,
"go_score_mean": 6.0,
"atari_score_mean": 4.5,
"verdict": "partial_transfer",
"go_episodes": 2,
"atari_episodes": 2
}
]
diagnosis
{
"target_ratio": 0.75,
"v3_ratio": 0.75,
"v4_ratio": 0.42105263157894735,
"ratio_delta_from_v3": -0.32894736842105265,
"dropped_below_target": true,
"likely_reason": "larger_seed_window_exposes_atari_policy_variance",
"next_recommendation": "Treat v3 as seed-window optimistic; next improve policy robustness and only then tune brick-state details.",
"go_score_distribution": {
"scores": [
7.0,
5.0,
6.0,
7.0,
6.0,
7.0
],
"median": 6.5,
"stdev": 0.7453559924999299,
"zero_score_count": 0
},
"atari_score_distribution": {
"scores": [
7.0,
2.0,
2.0,
2.0,
1.0,
2.0
],
"median": 2.0,
"stdev": 1.9720265943665387,
"zero_score_count": 0
}
}