Breakout Paddle Intercept Planner v1
objective
{
"name": "Breakout paddle intercept planner v1",
"target_policy": "breakout_policy_planner_v1"
}
policy_comparison
{
"breakout_policy_v2": {
"policy": "breakout_policy_v2",
"source_report": "policy_robustness_v1_report.json",
"go": {
"scores": [
8.0,
7.0,
5.0,
7.0,
7.0,
8.0
],
"mean": 7.0,
"median": 7.0,
"min": 5.0,
"max": 8.0,
"low_score_threshold": 3.0,
"low_score_count": 0
},
"atari": {
"scores": [
1.0,
4.0,
4.0,
4.0,
2.0,
4.0
],
"mean": 3.1666666666666665,
"median": 4.0,
"min": 1.0,
"max": 4.0,
"low_score_threshold": 3.0,
"low_score_count": 2
}
},
"breakout_policy_v3": {
"policy": "breakout_policy_v3",
"source_report": "policy_search_v2_report.json",
"go": {
"scores": [
8.0,
7.0,
6.0,
7.0,
7.0,
8.0
],
"mean": 7.166666666666667,
"median": 7.0,
"min": 6.0,
"max": 8.0,
"low_score_threshold": 3.0,
"low_score_count": 0
},
"atari": {
"scores": [
2.0,
13.0,
13.0,
13.0,
0.0,
13.0
],
"mean": 9.0,
"median": 13.0,
"min": 0.0,
"max": 13.0,
"low_score_threshold": 3.0,
"low_score_count": 2
}
},
"breakout_policy_planner_v1": {
"policy": "breakout_policy_planner_v1",
"logic": "Predict descending ball landing at paddle height with approximate wall reflection, then fall back to v3 near-bottom timing.",
"go": {
"scores": [
8.0,
7.0,
6.0,
9.0,
8.0,
8.0
],
"mean": 7.666666666666667,
"median": 8.0,
"min": 6.0,
"max": 9.0,
"low_score_threshold": 3.0,
"low_score_count": 0
},
"atari": {
"scores": [
9.0,
6.0,
6.0,
6.0,
1.0,
6.0
],
"mean": 5.666666666666667,
"median": 6.0,
"min": 1.0,
"max": 9.0,
"low_score_threshold": 3.0,
"low_score_count": 1
},
"judgment": {
"verdict": "partial_transfer",
"go_score_mean": 7.666666666666667,
"atari_score_mean": 5.666666666666667,
"atari_to_go_score_ratio": 0.7391304347826088,
"interpretation": "The policy earns positive Atari reward, so the shadow-env optimization transfers partially.",
"next_recommendation": "Run forced-loss trace to confirm lives/serve RAM, then use transfer results to tune survival and late-rally dynamics."
}
}
}
failure_analysis
{
"low_score_episodes": [
{
"episode": 4,
"seed": 4,
"score": 1.0,
"steps": 1800,
"final_ball_x": 164,
"final_ball_y": 0,
"final_paddle_x": 177,
"final_vx": 0,
"final_vy": 0,
"final_intercept_error_px": 13,
"final_action": 3,
"serve_reset_first_active": {
"step": 3,
"action": 3,
"ball_x": 183,
"ball_y": 121,
"paddle_x": 146
},
"first_reward_step": 40,
"diagnosis": "Late miss: paddle was not aligned with the ball near terminal state; action lag or planner target timing remains a bottleneck."
}
]
}
acceptance
{
"atari_mean_delta_planner_minus_v3": -3.333333333333333,
"atari_low_score_count_reduction_planner_minus_v3": 1,
"improved": true,
"verdict": "pass",
"diagnosis": "Planner reduced the Atari low-score seed count versus breakout_policy_v3, but lower mean shows v3 near-bottom timing is still stronger on high seeds."
}
evidence
{
"go_episodes": "breakout/runs-planner-policy-v1/episodes.jsonl",
"atari_trace": "breakout/calibration/traces/planner_policy_v1_atari.jsonl",
"atari_video_url": "https://file.aimusic.win/gemma/breakout/planner_policy_v1_atari.mp4"
}