Breakout Policy Robustness v1

baseline_policy

{
  "name": "breakout_policy_v1",
  "source_report": "breakout/calibration/reports/transfer_v4_report.json",
  "judgment": {
    "verdict": "partial_transfer",
    "go_score_mean": 6.333333333333333,
    "atari_score_mean": 2.6666666666666665,
    "atari_to_go_score_ratio": 0.42105263157894735,
    "interpretation": "The policy earns positive Atari reward, so the shadow-env optimization transfers partially.",
    "next_recommendation": "Run forced-loss trace to confirm lives/serve RAM, then use transfer results to tune survival and late-rally dynamics."
  }
}

candidate_policy

{
  "name": "breakout_policy_v2",
  "logic": "One-step RAM velocity lead near the bottom; continue moving through the old deadzone when the ball is descending."
}

candidate_transfer

{
  "policy": {
    "name": "breakout_policy_v2",
    "logic": "One-step RAM velocity lead near the bottom; continue moving through the old deadzone when the ball is descending."
  },
  "go_shadow_eval": {
    "episodes": 6,
    "score_sum": 42.0,
    "score_mean": 7.0,
    "score_min": 5.0,
    "score_max": 8.0,
    "steps_sum": 2902,
    "video_urls": [
      "https://file.aimusic.win/gemma/breakout/20260510T112920Z_ep000_seed0.mp4",
      "https://file.aimusic.win/gemma/breakout/20260510T112920Z_ep001_seed1.mp4",
      "https://file.aimusic.win/gemma/breakout/20260510T112920Z_ep002_seed2.mp4",
      "https://file.aimusic.win/gemma/breakout/20260510T112920Z_ep003_seed3.mp4",
      "https://file.aimusic.win/gemma/breakout/20260510T112920Z_ep004_seed4.mp4",
      "https://file.aimusic.win/gemma/breakout/20260510T112920Z_ep005_seed5.mp4"
    ],
    "rows": [
      {
        "timestamp": "20260510T112920Z",
        "trial_index": 1,
        "policy_name": "breakout_policy_v2",
        "seed": 0,
        "episode": 0,
        "score": 8,
        "steps": 504,
        "done": true,
        "replay_path": "breakout/runs-policy-robustness-v1/replays/20260510T112920Z_ep000_seed0.jsonl",
        "video_path": "breakout/runs-policy-robustness-v1/videos/20260510T112920Z_ep000_seed0.mp4",
        "video_url": "https://file.aimusic.win/gemma/breakout/20260510T112920Z_ep000_seed0.mp4"
      },
      {
        "timestamp": "20260510T112920Z",
        "trial_index": 1,
        "policy_name": "breakout_policy_v2",
        "seed": 1,
        "episode": 1,
        "score": 7,
        "steps": 492,
        "done": true,
        "replay_path": "breakout/runs-policy-robustness-v1/replays/20260510T112920Z_ep001_seed1.jsonl",
        "video_path": "breakout/runs-policy-robustness-v1/videos/20260510T112920Z_ep001_seed1.mp4",
        "video_url": "https://file.aimusic.win/gemma/breakout/20260510T112920Z_ep001_seed1.mp4"
      },
      {
        "timestamp": "20260510T112920Z",
        "trial_index": 1,
        "policy_name": "breakout_policy_v2",
        "seed": 2,
        "episode": 2,
        "score": 5,
        "steps": 440,
        "done": true,
        "replay_path": "breakout/runs-policy-robustness-v1/replays/20260510T112920Z_ep002_seed2.jsonl",
        "video_path": "breakout/runs-policy-robustness-v1/videos/20260510T112920Z_ep002_seed2.mp4",
        "video_url": "https://file.aimusic.win/gemma/breakout/20260510T112920Z_ep002_seed2.mp4"
      },
      {
        "timestamp": "20260510T112920Z",
        "trial_index": 1,
        "policy_name": "breakout_policy_v2",
        "seed": 3,
        "episode": 3,
        "score": 7,
        "steps": 480,
        "done": true,
        "replay_path": "breakout/runs-policy-robustness-v1/replays/20260510T112920Z_ep003_seed3.jsonl",
        "video_path": "breakout/runs-policy-robustness-v1/videos/20260510T112920Z_ep003_seed3.mp4",
        "video_url": "https://file.aimusic.win/gemma/breakout/20260510T112920Z_ep003_seed3.mp4"
      },
      {
        "timestamp": "20260510T112920Z",
        "trial_index": 1,
        "policy_name": "breakout_policy_v2",
        "seed": 4,
        "episode": 4,
        "score": 7,
        "steps": 482,
        "done": true,
        "replay_path": "breakout/runs-policy-robustness-v1/replays/20260510T112920Z_ep004_seed4.jsonl",
        "video_path": "breakout/runs-policy-robustness-v1/videos/20260510T112920Z_ep004_seed4.mp4",
        "video_url": "https://file.aimusic.win/gemma/breakout/20260510T112920Z_ep004_seed4.mp4"
      },
      {
        "timestamp": "20260510T112920Z",
        "trial_index": 1,
        "policy_name": "breakout_policy_v2",
        "seed": 5,
        "episode": 5,
        "score": 8,
        "steps": 504,
        "done": true,
        "replay_path": "breakout/runs-policy-robustness-v1/replays/20260510T112920Z_ep005_seed5.jsonl",
        "video_path": "breakout/runs-policy-robustness-v1/videos/20260510T112920Z_ep005_seed5.mp4",
        "video_url": "https://file.aimusic.win/gemma/breakout/20260510T112920Z_ep005_seed5.mp4"
      }
    ]
  },
  "atari_backtest": {
    "episodes": 6,
    "score_sum": 19.0,
    "score_mean": 3.1666666666666665,
    "score_min": 1.0,
    "score_max": 4.0,
    "steps_sum": 10800,
    "video_urls": [
      "https://file.aimusic.win/gemma/breakout/policy_robustness_v1_atari.mp4"
    ],
    "rows": [
      {
        "episode": 0,
        "score": 1.0,
        "steps": 1800,
        "first_reward_step": 32,
        "reward_count": 1,
        "done": false
      },
      {
        "episode": 1,
        "score": 4.0,
        "steps": 1800,
        "first_reward_step": 40,
        "reward_count": 4,
        "done": false
      },
      {
        "episode": 2,
        "score": 4.0,
        "steps": 1800,
        "first_reward_step": 40,
        "reward_count": 4,
        "done": false
      },
      {
        "episode": 3,
        "score": 4.0,
        "steps": 1800,
        "first_reward_step": 40,
        "reward_count": 4,
        "done": false
      },
      {
        "episode": 4,
        "score": 2.0,
        "steps": 1800,
        "first_reward_step": 41,
        "reward_count": 2,
        "done": false
      },
      {
        "episode": 5,
        "score": 4.0,
        "steps": 1800,
        "first_reward_step": 40,
        "reward_count": 4,
        "done": false
      }
    ]
  },
  "judgment": {
    "verdict": "partial_transfer",
    "go_score_mean": 7.0,
    "atari_score_mean": 3.1666666666666665,
    "atari_to_go_score_ratio": 0.4523809523809524,
    "interpretation": "The policy earns positive Atari reward, so the shadow-env optimization transfers partially.",
    "next_recommendation": "Run forced-loss trace to confirm lives/serve RAM, then use transfer results to tune survival and late-rally dynamics."
  }
}

comparison

{
  "improved": true,
  "atari_mean_delta_v2_minus_v1": 0.5,
  "atari_low_score_count_reduction": 3,
  "baseline_atari": {
    "scores": [
      7.0,
      2.0,
      2.0,
      2.0,
      1.0,
      2.0
    ],
    "mean": 2.6666666666666665,
    "median": 2.0,
    "stdev": 1.9720265943665387,
    "low_score_threshold": 3.0,
    "low_score_count": 5
  },
  "candidate_atari": {
    "scores": [
      1.0,
      4.0,
      4.0,
      4.0,
      2.0,
      4.0
    ],
    "mean": 3.1666666666666665,
    "median": 4.0,
    "stdev": 1.2133516482134197,
    "low_score_threshold": 3.0,
    "low_score_count": 2
  },
  "baseline_go": {
    "scores": [
      7.0,
      5.0,
      6.0,
      7.0,
      6.0,
      7.0
    ],
    "mean": 6.333333333333333,
    "median": 6.5,
    "stdev": 0.7453559924999299,
    "low_score_threshold": 3.0,
    "low_score_count": 0
  },
  "candidate_go": {
    "scores": [
      8.0,
      7.0,
      5.0,
      7.0,
      7.0,
      8.0
    ],
    "mean": 7.0,
    "median": 7.0,
    "stdev": 1.0,
    "low_score_threshold": 3.0,
    "low_score_count": 0
  },
  "diagnosis": "Predictive near-bottom action timing improves Atari robustness across the six-seed window.",
  "next_recommendation": "Use breakout_policy_v2 as the next baseline, then search lead/deadzone parameters on Atari validation seeds."
}