Breakout Env Fidelity Fix v3
objective
{
"name": "Breakout env fidelity fix v3",
"policy": "breakout_policy_v3",
"go_episodes": "breakout/runs-env-fidelity-fix-v3/episodes.jsonl",
"atari_trace": "breakout/calibration/traces/env_fidelity_fix_v3_atari.jsonl"
}
env_changes
[
{
"gap": "paddle/action dynamics mismatch",
"change": "Widen effective paddle hit window by lowering paddle_hit_inset from 7.5 to 4.",
"evidence": "Local sweep increased mean paddle contacts and raised first-10 Go score mean from 7.4 to 9.0."
},
{
"gap": "ball speed and collision rhythm mismatch",
"change": "Lower paddle_bounce_y from 10.0 to 3.75.",
"evidence": "Local sweep reduced post-paddle vertical speed and extended mean Go episode length from 492.2 to 889.8 steps."
},
{
"gap": "brick-state / score rhythm mismatch",
"change": "Raise brick_cooldown from 12 to 90.",
"evidence": "With the paddle/bounce fix, higher cooldown kept first-10 Go score mean at 9.0 instead of overshooting into the 12-20 range."
}
]
rejected_change
{
"gap": "episode horizon / life-reset termination mismatch",
"candidate": "Continue episodes after lives are exhausted.",
"reason": "It aligned the 1800-step horizon but inflated Go scores above 18 on the first 10 seeds, worsening score rhythm. The terminal semantics gap remains open for a targeted reset/life study."
}
comparison
{
"baseline_go_20_seed": {
"distribution": {
"count": 20,
"scores": [
8.0,
7.0,
6.0,
7.0,
7.0,
8.0,
7.0,
9.0,
8.0,
7.0,
9.0,
7.0,
7.0,
6.0,
7.0,
7.0,
7.0,
6.0,
7.0,
7.0
],
"mean": 7.2,
"median": 7.0,
"min": 6.0,
"max": 9.0,
"stdev": 0.812403840463596,
"low_score_threshold": 3.0,
"low_score_count": 0
},
"rows": [
{
"timestamp": "20260510T131817Z",
"trial_index": 1,
"policy_name": "breakout_policy_v3",
"seed": 0,
"episode": 0,
"score": 8,
"steps": 505,
"done": true,
"replay_path": "breakout/runs-robust-policy-benchmark-v1/replays/20260510T131817Z_ep000_seed0.jsonl"
},
{
"timestamp": "20260510T131817Z",
"trial_index": 1,
"policy_name": "breakout_policy_v3",
"seed": 1,
"episode": 1,
"score": 7,
"steps": 492,
"done": true,
"replay_path": "breakout/runs-robust-policy-benchmark-v1/replays/20260510T131817Z_ep001_seed1.jsonl"
},
{
"timestamp": "20260510T131817Z",
"trial_index": 1,
"policy_name": "breakout_policy_v3",
"seed": 2,
"episode": 2,
"score": 6,
"steps": 467,
"done": true,
"replay_path": "breakout/runs-robust-policy-benchmark-v1/replays/20260510T131817Z_ep002_seed2.jsonl"
},
{
"timestamp": "20260510T131817Z",
"trial_index": 1,
"policy_name": "breakout_policy_v3",
"seed": 3,
"episode": 3,
"score": 7,
"steps": 483,
"done": true,
"replay_path": "breakout/runs-robust-policy-benchmark-v1/replays/20260510T131817Z_ep003_seed3.jsonl"
},
{
"timestamp": "20260510T131817Z",
"trial_index": 1,
"policy_name": "breakout_policy_v3",
"seed": 4,
"episode": 4,
"score": 7,
"steps": 482,
"done": true,
"replay_path": "breakout/runs-robust-policy-benchmark-v1/replays/20260510T131817Z_ep004_seed4.jsonl"
},
{
"timestamp": "20260510T131817Z",
"trial_index": 1,
"policy_name": "breakout_policy_v3",
"seed": 5,
"episode": 5,
"score": 8,
"steps": 505,
"done": true,
"replay_path": "breakout/runs-robust-policy-benchmark-v1/replays/20260510T131817Z_ep005_seed5.jsonl"
},
{
"timestamp": "20260510T131817Z",
"trial_index": 1,
"policy_name": "breakout_policy_v3",
"seed": 6,
"episode": 6,
"score": 7,
"steps": 485,
"done": true,
"replay_path": "breakout/runs-robust-policy-benchmark-v1/replays/20260510T131817Z_ep006_seed6.jsonl"
},
{
"timestamp": "20260510T131817Z",
"trial_index": 1,
"policy_name": "breakout_policy_v3",
"seed": 7,
"episode": 7,
"score": 9,
"steps": 516,
"done": true,
"replay_path": "breakout/runs-robust-policy-benchmark-v1/replays/20260510T131817Z_ep007_seed7.jsonl"
},
{
"timestamp": "20260510T131817Z",
"trial_index": 1,
"policy_name": "breakout_policy_v3",
"seed": 8,
"episode": 8,
"score": 8,
"steps": 499,
"done": true,
"replay_path": "breakout/runs-robust-policy-benchmark-v1/replays/20260510T131817Z_ep008_seed8.jsonl"
},
{
"timestamp": "20260510T131817Z",
"trial_index": 1,
"policy_name": "breakout_policy_v3",
"seed": 9,
"episode": 9,
"score": 7,
"steps": 488,
"done": true,
"replay_path": "breakout/runs-robust-policy-benchmark-v1/replays/20260510T131817Z_ep009_seed9.jsonl"
},
{
"timestamp": "20260510T131817Z",
"trial_index": 1,
"policy_name": "breakout_policy_v3",
"seed": 10,
"episode": 10,
"score": 9,
"steps": 516,
"done": true,
"replay_path": "breakout/runs-robust-policy-benchmark-v1/replays/20260510T131817Z_ep010_seed10.jsonl"
},
{
"timestamp": "20260510T131817Z",
"trial_index": 1,
"policy_name": "breakout_policy_v3",
"seed": 11,
"episode": 11,
"score": 7,
"steps": 483,
"done": true,
"replay_path": "breakout/runs-robust-policy-benchmark-v1/replays/20260510T131817Z_ep011_seed11.jsonl"
},
{
"timestamp": "20260510T131817Z",
"trial_index": 1,
"policy_name": "breakout_policy_v3",
"seed": 12,
"episode": 12,
"score": 7,
"steps": 484,
"done": true,
"replay_path": "breakout/runs-robust-policy-benchmark-v1/replays/20260510T131817Z_ep012_seed12.jsonl"
},
{
"timestamp": "20260510T131817Z",
"trial_index": 1,
"policy_name": "breakout_policy_v3",
"seed": 13,
"episode": 13,
"score": 6,
"steps": 465,
"done": true,
"replay_path": "breakout/runs-robust-policy-benchmark-v1/replays/20260510T131817Z_ep013_seed13.jsonl"
},
{
"timestamp": "20260510T131817Z",
"trial_index": 1,
"policy_name": "breakout_policy_v3",
"seed": 14,
"episode": 14,
"score": 7,
"steps": 484,
"done": true,
"replay_path": "breakout/runs-robust-policy-benchmark-v1/replays/20260510T131817Z_ep014_seed14.jsonl"
},
{
"timestamp": "20260510T131817Z",
"trial_index": 1,
"policy_name": "breakout_policy_v3",
"seed": 15,
"episode": 15,
"score": 7,
"steps": 484,
"done": true,
"replay_path": "breakout/runs-robust-policy-benchmark-v1/replays/20260510T131817Z_ep015_seed15.jsonl"
},
{
"timestamp": "20260510T131817Z",
"trial_index": 1,
"policy_name": "breakout_policy_v3",
"seed": 16,
"episode": 16,
"score": 7,
"steps": 477,
"done": true,
"replay_path": "breakout/runs-robust-policy-benchmark-v1/replays/20260510T131817Z_ep016_seed16.jsonl"
},
{
"timestamp": "20260510T131817Z",
"trial_index": 1,
"policy_name": "breakout_policy_v3",
"seed": 17,
"episode": 17,
"score": 6,
"steps": 468,
"done": true,
"replay_path": "breakout/runs-robust-policy-benchmark-v1/replays/20260510T131817Z_ep017_seed17.jsonl"
},
{
"timestamp": "20260510T131817Z",
"trial_index": 1,
"policy_name": "breakout_policy_v3",
"seed": 18,
"episode": 18,
"score": 7,
"steps": 484,
"done": true,
"replay_path": "breakout/runs-robust-policy-benchmark-v1/replays/20260510T131817Z_ep018_seed18.jsonl"
},
{
"timestamp": "20260510T131817Z",
"trial_index": 1,
"policy_name": "breakout_policy_v3",
"seed": 19,
"episode": 19,
"score": 7,
"steps": 484,
"done": true,
"replay_path": "breakout/runs-robust-policy-benchmark-v1/replays/20260510T131817Z_ep019_seed19.jsonl"
}
],
"low_score_seeds": []
},
"after_go": {
"distribution": {
"count": 10,
"scores": [
7.0,
6.0,
15.0,
13.0,
9.0,
7.0,
9.0,
8.0,
8.0,
8.0
],
"mean": 9.0,
"median": 8.0,
"min": 6.0,
"max": 15.0,
"stdev": 2.6832815729997477,
"low_score_count_lt3": 0
},
"steps_mean": 889.8,
"done_count": 10,
"event_means": {
"life_lost": 5.0,
"paddle": 4.0,
"brick": 9.0,
"wall": 13.9
},
"rows": [
{
"seed": 0,
"score": 7,
"steps": 688,
"done": true,
"life_lost": 5,
"paddle": 2,
"brick": 7,
"wall": 13
},
{
"seed": 1,
"score": 6,
"steps": 575,
"done": true,
"life_lost": 5,
"paddle": 1,
"brick": 6,
"wall": 11
},
{
"seed": 2,
"score": 15,
"steps": 1481,
"done": true,
"life_lost": 5,
"paddle": 10,
"brick": 15,
"wall": 19
},
{
"seed": 3,
"score": 13,
"steps": 1248,
"done": true,
"life_lost": 5,
"paddle": 8,
"brick": 13,
"wall": 16
},
{
"seed": 4,
"score": 9,
"steps": 924,
"done": true,
"life_lost": 5,
"paddle": 4,
"brick": 9,
"wall": 13
},
{
"seed": 5,
"score": 7,
"steps": 688,
"done": true,
"life_lost": 5,
"paddle": 2,
"brick": 7,
"wall": 13
},
{
"seed": 6,
"score": 9,
"steps": 866,
"done": true,
"life_lost": 5,
"paddle": 4,
"brick": 9,
"wall": 14
},
{
"seed": 7,
"score": 8,
"steps": 809,
"done": true,
"life_lost": 5,
"paddle": 3,
"brick": 8,
"wall": 14
},
{
"seed": 8,
"score": 8,
"steps": 810,
"done": true,
"life_lost": 5,
"paddle": 3,
"brick": 8,
"wall": 14
},
{
"seed": 9,
"score": 8,
"steps": 809,
"done": true,
"life_lost": 5,
"paddle": 3,
"brick": 8,
"wall": 12
}
]
},
"atari": {
"distribution": {
"count": 10,
"scores": [
2.0,
13.0,
13.0,
13.0,
0.0,
13.0,
2.0,
13.0,
2.0,
0.0
],
"mean": 7.1,
"median": 7.5,
"min": 0.0,
"max": 13.0,
"stdev": 5.940538696111658,
"low_score_count_lt3": 5
},
"steps_mean": 1800.0,
"done_count": 0,
"rows": [
{
"episode": 0,
"seed": 0,
"score": 2.0,
"steps": 1800,
"done": false,
"reward_count": 2,
"first_reward_step": 32,
"life_ram_start": 5,
"life_ram_end": 4
},
{
"episode": 1,
"seed": 1,
"score": 13.0,
"steps": 1800,
"done": false,
"reward_count": 13,
"first_reward_step": 40,
"life_ram_start": 5,
"life_ram_end": 4
},
{
"episode": 2,
"seed": 2,
"score": 13.0,
"steps": 1800,
"done": false,
"reward_count": 13,
"first_reward_step": 40,
"life_ram_start": 5,
"life_ram_end": 4
},
{
"episode": 3,
"seed": 3,
"score": 13.0,
"steps": 1800,
"done": false,
"reward_count": 13,
"first_reward_step": 40,
"life_ram_start": 5,
"life_ram_end": 4
},
{
"episode": 4,
"seed": 4,
"score": 0.0,
"steps": 1800,
"done": false,
"reward_count": 0,
"first_reward_step": null,
"life_ram_start": 5,
"life_ram_end": 4
},
{
"episode": 5,
"seed": 5,
"score": 13.0,
"steps": 1800,
"done": false,
"reward_count": 13,
"first_reward_step": 40,
"life_ram_start": 5,
"life_ram_end": 4
},
{
"episode": 6,
"seed": 6,
"score": 2.0,
"steps": 1800,
"done": false,
"reward_count": 2,
"first_reward_step": 32,
"life_ram_start": 5,
"life_ram_end": 4
},
{
"episode": 7,
"seed": 7,
"score": 13.0,
"steps": 1800,
"done": false,
"reward_count": 13,
"first_reward_step": 40,
"life_ram_start": 5,
"life_ram_end": 4
},
{
"episode": 8,
"seed": 8,
"score": 2.0,
"steps": 1800,
"done": false,
"reward_count": 2,
"first_reward_step": 32,
"life_ram_start": 5,
"life_ram_end": 4
},
{
"episode": 9,
"seed": 9,
"score": 0.0,
"steps": 1800,
"done": false,
"reward_count": 0,
"first_reward_step": null,
"life_ram_start": 5,
"life_ram_end": 4
}
]
},
"first10_before_after": {
"baseline_go_scores": [
8.0,
7.0,
6.0,
7.0,
7.0,
8.0,
7.0,
9.0,
8.0,
7.0
],
"baseline_go_mean_first10": 7.4,
"baseline_atari_scores": [
2.0,
13.0,
13.0,
13.0,
0.0,
13.0,
2.0,
13.0,
2.0,
0.0
],
"baseline_atari_mean_first10": 7.1,
"after_go_scores": [
7.0,
6.0,
15.0,
13.0,
9.0,
7.0,
9.0,
8.0,
8.0,
8.0
],
"after_go_mean": 9.0,
"atari_scores": [
2.0,
13.0,
13.0,
13.0,
0.0,
13.0,
2.0,
13.0,
2.0,
0.0
],
"atari_mean": 7.1
},
"seed_score_correlation": {
"baseline_first10_pearson": 0.033666980425682384,
"baseline_first10_spearman": 0.07829231071388786,
"after_first10_pearson": 0.28857968451109245,
"after_first10_spearman": -0.05026881914166648,
"interpretation": "Seed-level score correlation is a proxy, not candidate-rank correlation. Candidate-rank correlation must be remeasured in the next Atari-gated search goal."
}
}
decision
{
"modified_go_env": true,
"handled_gap_count": 3,
"remaining_primary_risk": "Episode horizon/life-reset semantics still differ; Go episodes still terminate before the Atari 1800-step horizon on many seeds.",
"rank_correlation_status": "Seed-level correlation is reported as a proxy; candidate-rank correlation is not re-estimated because this goal validates one frozen policy, not a candidate set.",
"next_recommendation": "Run Atari-gated search on this env only after WSL validation; if Atari transfer worsens, revert or isolate the paddle/bounce/cooldown changes independently."
}
evidence
{
"r2_video_url": "https://file.aimusic.win/gemma/breakout/env_fidelity_fix_v3_atari_seed1.mp4"
}