Breakout Paddle Intercept Planner v1

objective

{
  "name": "Breakout paddle intercept planner v1",
  "target_policy": "breakout_policy_planner_v1"
}

policy_comparison

{
  "breakout_policy_v2": {
    "policy": "breakout_policy_v2",
    "source_report": "policy_robustness_v1_report.json",
    "go": {
      "scores": [
        8.0,
        7.0,
        5.0,
        7.0,
        7.0,
        8.0
      ],
      "mean": 7.0,
      "median": 7.0,
      "min": 5.0,
      "max": 8.0,
      "low_score_threshold": 3.0,
      "low_score_count": 0
    },
    "atari": {
      "scores": [
        1.0,
        4.0,
        4.0,
        4.0,
        2.0,
        4.0
      ],
      "mean": 3.1666666666666665,
      "median": 4.0,
      "min": 1.0,
      "max": 4.0,
      "low_score_threshold": 3.0,
      "low_score_count": 2
    }
  },
  "breakout_policy_v3": {
    "policy": "breakout_policy_v3",
    "source_report": "policy_search_v2_report.json",
    "go": {
      "scores": [
        8.0,
        7.0,
        6.0,
        7.0,
        7.0,
        8.0
      ],
      "mean": 7.166666666666667,
      "median": 7.0,
      "min": 6.0,
      "max": 8.0,
      "low_score_threshold": 3.0,
      "low_score_count": 0
    },
    "atari": {
      "scores": [
        2.0,
        13.0,
        13.0,
        13.0,
        0.0,
        13.0
      ],
      "mean": 9.0,
      "median": 13.0,
      "min": 0.0,
      "max": 13.0,
      "low_score_threshold": 3.0,
      "low_score_count": 2
    }
  },
  "breakout_policy_planner_v1": {
    "policy": "breakout_policy_planner_v1",
    "logic": "Predict descending ball landing at paddle height with approximate wall reflection, then fall back to v3 near-bottom timing.",
    "go": {
      "scores": [
        8.0,
        7.0,
        6.0,
        9.0,
        8.0,
        8.0
      ],
      "mean": 7.666666666666667,
      "median": 8.0,
      "min": 6.0,
      "max": 9.0,
      "low_score_threshold": 3.0,
      "low_score_count": 0
    },
    "atari": {
      "scores": [
        9.0,
        6.0,
        6.0,
        6.0,
        1.0,
        6.0
      ],
      "mean": 5.666666666666667,
      "median": 6.0,
      "min": 1.0,
      "max": 9.0,
      "low_score_threshold": 3.0,
      "low_score_count": 1
    },
    "judgment": {
      "verdict": "partial_transfer",
      "go_score_mean": 7.666666666666667,
      "atari_score_mean": 5.666666666666667,
      "atari_to_go_score_ratio": 0.7391304347826088,
      "interpretation": "The policy earns positive Atari reward, so the shadow-env optimization transfers partially.",
      "next_recommendation": "Run forced-loss trace to confirm lives/serve RAM, then use transfer results to tune survival and late-rally dynamics."
    }
  }
}

failure_analysis

{
  "low_score_episodes": [
    {
      "episode": 4,
      "seed": 4,
      "score": 1.0,
      "steps": 1800,
      "final_ball_x": 164,
      "final_ball_y": 0,
      "final_paddle_x": 177,
      "final_vx": 0,
      "final_vy": 0,
      "final_intercept_error_px": 13,
      "final_action": 3,
      "serve_reset_first_active": {
        "step": 3,
        "action": 3,
        "ball_x": 183,
        "ball_y": 121,
        "paddle_x": 146
      },
      "first_reward_step": 40,
      "diagnosis": "Late miss: paddle was not aligned with the ball near terminal state; action lag or planner target timing remains a bottleneck."
    }
  ]
}

acceptance

{
  "atari_mean_delta_planner_minus_v3": -3.333333333333333,
  "atari_low_score_count_reduction_planner_minus_v3": 1,
  "improved": true,
  "verdict": "pass",
  "diagnosis": "Planner reduced the Atari low-score seed count versus breakout_policy_v3, but lower mean shows v3 near-bottom timing is still stronger on high seeds."
}

evidence

{
  "go_episodes": "breakout/runs-planner-policy-v1/episodes.jsonl",
  "atari_trace": "breakout/calibration/traces/planner_policy_v1_atari.jsonl",
  "atari_video_url": "https://file.aimusic.win/gemma/breakout/planner_policy_v1_atari.mp4"
}