Breakout Multi-Seed Transfer Validation v4

policy

{
  "name": "breakout_policy_v1",
  "logic": "RAM/Go heuristic tracks ball x against paddle x and fires on reset."
}

go_shadow_eval

{
  "episodes": 6,
  "score_sum": 38.0,
  "score_mean": 6.333333333333333,
  "score_min": 5.0,
  "score_max": 7.0,
  "steps_sum": 2828,
  "video_urls": [
    "https://file.aimusic.win/gemma/breakout/20260510T111932Z_ep000_seed0.mp4",
    "https://file.aimusic.win/gemma/breakout/20260510T111932Z_ep001_seed1.mp4",
    "https://file.aimusic.win/gemma/breakout/20260510T111932Z_ep002_seed2.mp4",
    "https://file.aimusic.win/gemma/breakout/20260510T111932Z_ep003_seed3.mp4",
    "https://file.aimusic.win/gemma/breakout/20260510T111932Z_ep004_seed4.mp4",
    "https://file.aimusic.win/gemma/breakout/20260510T111932Z_ep005_seed5.mp4"
  ],
  "rows": [
    {
      "timestamp": "20260510T111932Z",
      "trial_index": 1,
      "policy_name": "breakout_policy_v1",
      "seed": 0,
      "episode": 0,
      "score": 7,
      "steps": 488,
      "done": true,
      "replay_path": "breakout/runs-transfer-v4/replays/20260510T111932Z_ep000_seed0.jsonl",
      "video_path": "breakout/runs-transfer-v4/videos/20260510T111932Z_ep000_seed0.mp4",
      "video_url": "https://file.aimusic.win/gemma/breakout/20260510T111932Z_ep000_seed0.mp4"
    },
    {
      "timestamp": "20260510T111932Z",
      "trial_index": 1,
      "policy_name": "breakout_policy_v1",
      "seed": 1,
      "episode": 1,
      "score": 5,
      "steps": 440,
      "done": true,
      "replay_path": "breakout/runs-transfer-v4/replays/20260510T111932Z_ep001_seed1.jsonl",
      "video_path": "breakout/runs-transfer-v4/videos/20260510T111932Z_ep001_seed1.mp4",
      "video_url": "https://file.aimusic.win/gemma/breakout/20260510T111932Z_ep001_seed1.mp4"
    },
    {
      "timestamp": "20260510T111932Z",
      "trial_index": 1,
      "policy_name": "breakout_policy_v1",
      "seed": 2,
      "episode": 2,
      "score": 6,
      "steps": 465,
      "done": true,
      "replay_path": "breakout/runs-transfer-v4/replays/20260510T111932Z_ep002_seed2.jsonl",
      "video_path": "breakout/runs-transfer-v4/videos/20260510T111932Z_ep002_seed2.mp4",
      "video_url": "https://file.aimusic.win/gemma/breakout/20260510T111932Z_ep002_seed2.mp4"
    },
    {
      "timestamp": "20260510T111932Z",
      "trial_index": 1,
      "policy_name": "breakout_policy_v1",
      "seed": 3,
      "episode": 3,
      "score": 7,
      "steps": 484,
      "done": true,
      "replay_path": "breakout/runs-transfer-v4/replays/20260510T111932Z_ep003_seed3.jsonl",
      "video_path": "breakout/runs-transfer-v4/videos/20260510T111932Z_ep003_seed3.mp4",
      "video_url": "https://file.aimusic.win/gemma/breakout/20260510T111932Z_ep003_seed3.mp4"
    },
    {
      "timestamp": "20260510T111932Z",
      "trial_index": 1,
      "policy_name": "breakout_policy_v1",
      "seed": 4,
      "episode": 4,
      "score": 6,
      "steps": 463,
      "done": true,
      "replay_path": "breakout/runs-transfer-v4/replays/20260510T111932Z_ep004_seed4.jsonl",
      "video_path": "breakout/runs-transfer-v4/videos/20260510T111932Z_ep004_seed4.mp4",
      "video_url": "https://file.aimusic.win/gemma/breakout/20260510T111932Z_ep004_seed4.mp4"
    },
    {
      "timestamp": "20260510T111932Z",
      "trial_index": 1,
      "policy_name": "breakout_policy_v1",
      "seed": 5,
      "episode": 5,
      "score": 7,
      "steps": 488,
      "done": true,
      "replay_path": "breakout/runs-transfer-v4/replays/20260510T111932Z_ep005_seed5.jsonl",
      "video_path": "breakout/runs-transfer-v4/videos/20260510T111932Z_ep005_seed5.mp4",
      "video_url": "https://file.aimusic.win/gemma/breakout/20260510T111932Z_ep005_seed5.mp4"
    }
  ]
}

atari_backtest

{
  "episodes": 6,
  "score_sum": 16.0,
  "score_mean": 2.6666666666666665,
  "score_min": 1.0,
  "score_max": 7.0,
  "steps_sum": 10800,
  "video_urls": [
    "https://file.aimusic.win/gemma/breakout/transfer_v4_atari.mp4"
  ],
  "rows": [
    {
      "episode": 0,
      "score": 7.0,
      "steps": 1800,
      "first_reward_step": 40,
      "reward_count": 7,
      "done": false
    },
    {
      "episode": 1,
      "score": 2.0,
      "steps": 1800,
      "first_reward_step": 40,
      "reward_count": 2,
      "done": false
    },
    {
      "episode": 2,
      "score": 2.0,
      "steps": 1800,
      "first_reward_step": 40,
      "reward_count": 2,
      "done": false
    },
    {
      "episode": 3,
      "score": 2.0,
      "steps": 1800,
      "first_reward_step": 40,
      "reward_count": 2,
      "done": false
    },
    {
      "episode": 4,
      "score": 1.0,
      "steps": 1800,
      "first_reward_step": 32,
      "reward_count": 1,
      "done": false
    },
    {
      "episode": 5,
      "score": 2.0,
      "steps": 1800,
      "first_reward_step": 40,
      "reward_count": 2,
      "done": false
    }
  ]
}

judgment

{
  "verdict": "partial_transfer",
  "go_score_mean": 6.333333333333333,
  "atari_score_mean": 2.6666666666666665,
  "atari_to_go_score_ratio": 0.42105263157894735,
  "interpretation": "The policy earns positive Atari reward, so the shadow-env optimization transfers partially.",
  "next_recommendation": "Run forced-loss trace to confirm lives/serve RAM, then use transfer results to tune survival and late-rally dynamics."
}

historical_ratios

[
  {
    "report": "breakout/calibration/reports/transfer_v1_report.json",
    "ratio": 0.45,
    "go_score_mean": 10.0,
    "atari_score_mean": 4.5,
    "verdict": "partial_transfer",
    "go_episodes": 6,
    "atari_episodes": 2
  },
  {
    "report": "breakout/calibration/reports/transfer_v2_report.json",
    "ratio": 0.5,
    "go_score_mean": 9.0,
    "atari_score_mean": 4.5,
    "verdict": "partial_transfer",
    "go_episodes": 2,
    "atari_episodes": 2
  },
  {
    "report": "breakout/calibration/reports/transfer_v3_report.json",
    "ratio": 0.75,
    "go_score_mean": 6.0,
    "atari_score_mean": 4.5,
    "verdict": "partial_transfer",
    "go_episodes": 2,
    "atari_episodes": 2
  }
]

diagnosis

{
  "target_ratio": 0.75,
  "v3_ratio": 0.75,
  "v4_ratio": 0.42105263157894735,
  "ratio_delta_from_v3": -0.32894736842105265,
  "dropped_below_target": true,
  "likely_reason": "larger_seed_window_exposes_atari_policy_variance",
  "next_recommendation": "Treat v3 as seed-window optimistic; next improve policy robustness and only then tune brick-state details.",
  "go_score_distribution": {
    "scores": [
      7.0,
      5.0,
      6.0,
      7.0,
      6.0,
      7.0
    ],
    "median": 6.5,
    "stdev": 0.7453559924999299,
    "zero_score_count": 0
  },
  "atari_score_distribution": {
    "scores": [
      7.0,
      2.0,
      2.0,
      2.0,
      1.0,
      2.0
    ],
    "median": 2.0,
    "stdev": 1.9720265943665387,
    "zero_score_count": 0
  }
}