Breakout Atari-Gated Search v2
objective
{
"name": "Breakout Atari-gated search v2",
"target_policy": "breakout_policy_v4",
"baseline_atari_20_seed_mean": 5.15,
"baseline_atari_low_score_count_lt3": 13
}
search
{
"go_searched_candidates": 216,
"go_ranking_key": "low_score_count asc, score_min desc, score_mean desc",
"atari_validated_candidates": 8,
"atari_gate_seeds_per_candidate": 6,
"atari_ranking_key": "atari_low_score_count asc, atari_score_mean desc, atari_score_min desc",
"selected_policy": {
"policy_name": "breakout_policy_v4",
"selection_source": "Atari-gated search v2",
"selection_rule": "prefer candidates with Atari gate mean >= baseline, then low_score_count asc, mean desc, min desc",
"baseline_atari_mean": 5.15,
"searched_candidates": 216,
"atari_validated_candidates": 8,
"selected_candidate": {
"candidate_id": "c144",
"params": {
"policy_far_cap": 10.0,
"policy_near_cap": 18.0,
"policy_base_deadzone": 2.0,
"policy_near_y": 165,
"policy_far_lead": 1.0,
"policy_near_lead": 2.0,
"policy_far_deadzone": 0.0,
"policy_near_deadzone": 0.0,
"policy_panic_y": 175
},
"go_scores": [
14.0,
10.0,
18.0,
14.0,
10.0,
14.0
],
"go_score_mean": 13.333333333333334,
"atari_scores": [
2.0,
7.0,
7.0,
7.0,
1.0,
7.0
],
"atari_score_mean": 5.166666666666667,
"atari_score_min": 1.0,
"atari_score_max": 7.0,
"atari_low_score_count": 2,
"trace_path": "breakout/calibration/traces/atari_gated_search_v2_gate/policy_search_v2_c144.jsonl"
},
"selected_go_candidate": {
"candidate_id": "c144",
"params": {
"policy_far_cap": 10.0,
"policy_near_cap": 18.0,
"policy_base_deadzone": 2.0,
"policy_near_y": 165,
"policy_far_lead": 1.0,
"policy_near_lead": 2.0,
"policy_far_deadzone": 0.0,
"policy_near_deadzone": 0.0,
"policy_panic_y": 175
},
"scores": [
14.0,
10.0,
18.0,
14.0,
10.0,
14.0
],
"score_mean": 13.333333333333334,
"score_min": 10.0,
"score_max": 18.0,
"low_score_count": 0
},
"config_path": "breakout/calibration/atari_gated_search_v2_policy_config.json",
"rerun_policy_args": {
"policy": "breakout_policy_v4",
"config": "breakout/calibration/atari_gated_search_v2_policy_config.json"
}
},
"atari_gate_ranked": [
{
"candidate_id": "c196",
"params": {
"policy_far_cap": 10.0,
"policy_near_cap": 18.0,
"policy_base_deadzone": 2.0,
"policy_near_y": 165,
"policy_far_lead": 3.0,
"policy_near_lead": 2.0,
"policy_far_deadzone": 2.0,
"policy_near_deadzone": 0.0,
"policy_panic_y": 175
},
"go_scores": [
12.0,
18.0,
12.0,
16.0,
12.0,
12.0
],
"go_score_mean": 13.666666666666666,
"atari_scores": [
3.0,
4.0,
4.0,
4.0,
0.0,
4.0
],
"atari_score_mean": 3.1666666666666665,
"atari_score_min": 0.0,
"atari_score_max": 4.0,
"atari_low_score_count": 1,
"trace_path": "breakout/calibration/traces/atari_gated_search_v2_gate/policy_search_v2_c196.jsonl"
},
{
"candidate_id": "c144",
"params": {
"policy_far_cap": 10.0,
"policy_near_cap": 18.0,
"policy_base_deadzone": 2.0,
"policy_near_y": 165,
"policy_far_lead": 1.0,
"policy_near_lead": 2.0,
"policy_far_deadzone": 0.0,
"policy_near_deadzone": 0.0,
"policy_panic_y": 175
},
"go_scores": [
14.0,
10.0,
18.0,
14.0,
10.0,
14.0
],
"go_score_mean": 13.333333333333334,
"atari_scores": [
2.0,
7.0,
7.0,
7.0,
1.0,
7.0
],
"atari_score_mean": 5.166666666666667,
"atari_score_min": 1.0,
"atari_score_max": 7.0,
"atari_low_score_count": 2,
"trace_path": "breakout/calibration/traces/atari_gated_search_v2_gate/policy_search_v2_c144.jsonl"
},
{
"candidate_id": "c145",
"params": {
"policy_far_cap": 10.0,
"policy_near_cap": 18.0,
"policy_base_deadzone": 2.0,
"policy_near_y": 165,
"policy_far_lead": 1.0,
"policy_near_lead": 2.0,
"policy_far_deadzone": 0.0,
"policy_near_deadzone": 0.0,
"policy_panic_y": 185
},
"go_scores": [
14.0,
10.0,
18.0,
14.0,
10.0,
14.0
],
"go_score_mean": 13.333333333333334,
"atari_scores": [
2.0,
7.0,
7.0,
7.0,
1.0,
7.0
],
"atari_score_mean": 5.166666666666667,
"atari_score_min": 1.0,
"atari_score_max": 7.0,
"atari_low_score_count": 2,
"trace_path": "breakout/calibration/traces/atari_gated_search_v2_gate/policy_search_v2_c145.jsonl"
},
{
"candidate_id": "c148",
"params": {
"policy_far_cap": 10.0,
"policy_near_cap": 18.0,
"policy_base_deadzone": 2.0,
"policy_near_y": 165,
"policy_far_lead": 1.0,
"policy_near_lead": 2.0,
"policy_far_deadzone": 2.0,
"policy_near_deadzone": 0.0,
"policy_panic_y": 175
},
"go_scores": [
16.0,
18.0,
12.0,
14.0,
18.0,
16.0
],
"go_score_mean": 15.666666666666666,
"atari_scores": [
1.0,
7.0,
7.0,
7.0,
1.0,
7.0
],
"atari_score_mean": 5.0,
"atari_score_min": 1.0,
"atari_score_max": 7.0,
"atari_low_score_count": 2,
"trace_path": "breakout/calibration/traces/atari_gated_search_v2_gate/policy_search_v2_c148.jsonl"
},
{
"candidate_id": "c192",
"params": {
"policy_far_cap": 10.0,
"policy_near_cap": 18.0,
"policy_base_deadzone": 2.0,
"policy_near_y": 165,
"policy_far_lead": 3.0,
"policy_near_lead": 2.0,
"policy_far_deadzone": 0.0,
"policy_near_deadzone": 0.0,
"policy_panic_y": 175
},
"go_scores": [
14.0,
10.0,
18.0,
14.0,
10.0,
14.0
],
"go_score_mean": 13.333333333333334,
"atari_scores": [
2.0,
5.0,
5.0,
5.0,
0.0,
5.0
],
"atari_score_mean": 3.6666666666666665,
"atari_score_min": 0.0,
"atari_score_max": 5.0,
"atari_low_score_count": 2,
"trace_path": "breakout/calibration/traces/atari_gated_search_v2_gate/policy_search_v2_c192.jsonl"
},
{
"candidate_id": "c193",
"params": {
"policy_far_cap": 10.0,
"policy_near_cap": 18.0,
"policy_base_deadzone": 2.0,
"policy_near_y": 165,
"policy_far_lead": 3.0,
"policy_near_lead": 2.0,
"policy_far_deadzone": 0.0,
"policy_near_deadzone": 0.0,
"policy_panic_y": 185
},
"go_scores": [
14.0,
10.0,
18.0,
14.0,
10.0,
14.0
],
"go_score_mean": 13.333333333333334,
"atari_scores": [
2.0,
5.0,
5.0,
5.0,
0.0,
5.0
],
"atari_score_mean": 3.6666666666666665,
"atari_score_min": 0.0,
"atari_score_max": 5.0,
"atari_low_score_count": 2,
"trace_path": "breakout/calibration/traces/atari_gated_search_v2_gate/policy_search_v2_c193.jsonl"
},
{
"candidate_id": "c077",
"params": {
"policy_far_cap": 10.0,
"policy_near_cap": 18.0,
"policy_base_deadzone": 2.0,
"policy_near_y": 150,
"policy_far_lead": 1.0,
"policy_near_lead": 2.0,
"policy_far_deadzone": 2.0,
"policy_near_deadzone": 0.0,
"policy_panic_y": 185
},
"go_scores": [
17.0,
13.0,
10.0,
11.0,
12.0,
17.0
],
"go_score_mean": 13.333333333333334,
"atari_scores": [
6.0,
2.0,
2.0,
2.0,
2.0,
2.0
],
"atari_score_mean": 2.6666666666666665,
"atari_score_min": 2.0,
"atari_score_max": 6.0,
"atari_low_score_count": 5,
"trace_path": "breakout/calibration/traces/atari_gated_search_v2_gate/policy_search_v2_c077.jsonl"
},
{
"candidate_id": "c076",
"params": {
"policy_far_cap": 10.0,
"policy_near_cap": 18.0,
"policy_base_deadzone": 2.0,
"policy_near_y": 150,
"policy_far_lead": 1.0,
"policy_near_lead": 2.0,
"policy_far_deadzone": 2.0,
"policy_near_deadzone": 0.0,
"policy_panic_y": 175
},
"go_scores": [
17.0,
13.0,
10.0,
11.0,
12.0,
17.0
],
"go_score_mean": 13.333333333333334,
"atari_scores": [
2.0,
2.0,
2.0,
2.0,
1.0,
2.0
],
"atari_score_mean": 1.8333333333333333,
"atari_score_min": 1.0,
"atari_score_max": 2.0,
"atari_low_score_count": 6,
"trace_path": "breakout/calibration/traces/atari_gated_search_v2_gate/policy_search_v2_c076.jsonl"
}
]
}
rank_correlation
{
"sample_size": 8,
"pearson_go_mean_vs_atari_gate_mean": 0.3684150467847217,
"spearman_go_rank_vs_atari_gate_rank": 0.047327796889219946,
"interpretation": "Atari gate is the selection authority; Go ranking is retained as the coarse prefilter."
}
final_validation
{
"go_20_seed": {
"distribution": {
"count": 20,
"scores": [
14.0,
10.0,
18.0,
14.0,
10.0,
14.0,
14.0,
16.0,
14.0,
11.0,
16.0,
16.0,
11.0,
15.0,
12.0,
17.0,
12.0,
14.0,
12.0,
12.0
],
"mean": 13.6,
"median": 14.0,
"min": 10.0,
"max": 18.0,
"stdev": 2.244994432064365,
"low_score_count_lt3": 0
},
"steps_mean": 1374.8,
"done_count": 20,
"rows": [
{
"timestamp": "20260510T165312Z",
"trial_index": 1,
"policy_name": "breakout_policy_v4",
"seed": 0,
"episode": 0,
"score": 14,
"steps": 1376,
"done": true,
"replay_path": "breakout/runs-atari-gated-search-v2/replays/20260510T165312Z_ep000_seed0.jsonl"
},
{
"timestamp": "20260510T165312Z",
"trial_index": 1,
"policy_name": "breakout_policy_v4",
"seed": 1,
"episode": 1,
"score": 10,
"steps": 1031,
"done": true,
"replay_path": "breakout/runs-atari-gated-search-v2/replays/20260510T165312Z_ep001_seed1.jsonl"
},
{
"timestamp": "20260510T165312Z",
"trial_index": 1,
"policy_name": "breakout_policy_v4",
"seed": 2,
"episode": 2,
"score": 18,
"steps": 1800,
"done": true,
"replay_path": "breakout/runs-atari-gated-search-v2/replays/20260510T165312Z_ep002_seed2.jsonl"
},
{
"timestamp": "20260510T165312Z",
"trial_index": 1,
"policy_name": "breakout_policy_v4",
"seed": 3,
"episode": 3,
"score": 14,
"steps": 1395,
"done": true,
"replay_path": "breakout/runs-atari-gated-search-v2/replays/20260510T165312Z_ep003_seed3.jsonl"
},
{
"timestamp": "20260510T165312Z",
"trial_index": 1,
"policy_name": "breakout_policy_v4",
"seed": 4,
"episode": 4,
"score": 10,
"steps": 1016,
"done": true,
"replay_path": "breakout/runs-atari-gated-search-v2/replays/20260510T165312Z_ep004_seed4.jsonl"
},
{
"timestamp": "20260510T165312Z",
"trial_index": 1,
"policy_name": "breakout_policy_v4",
"seed": 5,
"episode": 5,
"score": 14,
"steps": 1376,
"done": true,
"replay_path": "breakout/runs-atari-gated-search-v2/replays/20260510T165312Z_ep005_seed5.jsonl"
},
{
"timestamp": "20260510T165312Z",
"trial_index": 1,
"policy_name": "breakout_policy_v4",
"seed": 6,
"episode": 6,
"score": 14,
"steps": 1398,
"done": true,
"replay_path": "breakout/runs-atari-gated-search-v2/replays/20260510T165312Z_ep006_seed6.jsonl"
},
{
"timestamp": "20260510T165312Z",
"trial_index": 1,
"policy_name": "breakout_policy_v4",
"seed": 7,
"episode": 7,
"score": 16,
"steps": 1652,
"done": true,
"replay_path": "breakout/runs-atari-gated-search-v2/replays/20260510T165312Z_ep007_seed7.jsonl"
},
{
"timestamp": "20260510T165312Z",
"trial_index": 1,
"policy_name": "breakout_policy_v4",
"seed": 8,
"episode": 8,
"score": 14,
"steps": 1373,
"done": true,
"replay_path": "breakout/runs-atari-gated-search-v2/replays/20260510T165312Z_ep008_seed8.jsonl"
},
{
"timestamp": "20260510T165312Z",
"trial_index": 1,
"policy_name": "breakout_policy_v4",
"seed": 9,
"episode": 9,
"score": 11,
"steps": 1157,
"done": true,
"replay_path": "breakout/runs-atari-gated-search-v2/replays/20260510T165312Z_ep009_seed9.jsonl"
},
{
"timestamp": "20260510T165312Z",
"trial_index": 1,
"policy_name": "breakout_policy_v4",
"seed": 10,
"episode": 10,
"score": 16,
"steps": 1652,
"done": true,
"replay_path": "breakout/runs-atari-gated-search-v2/replays/20260510T165312Z_ep010_seed10.jsonl"
},
{
"timestamp": "20260510T165312Z",
"trial_index": 1,
"policy_name": "breakout_policy_v4",
"seed": 11,
"episode": 11,
"score": 16,
"steps": 1620,
"done": true,
"replay_path": "breakout/runs-atari-gated-search-v2/replays/20260510T165312Z_ep011_seed11.jsonl"
},
{
"timestamp": "20260510T165312Z",
"trial_index": 1,
"policy_name": "breakout_policy_v4",
"seed": 12,
"episode": 12,
"score": 11,
"steps": 1150,
"done": true,
"replay_path": "breakout/runs-atari-gated-search-v2/replays/20260510T165312Z_ep012_seed12.jsonl"
},
{
"timestamp": "20260510T165312Z",
"trial_index": 1,
"policy_name": "breakout_policy_v4",
"seed": 13,
"episode": 13,
"score": 15,
"steps": 1469,
"done": true,
"replay_path": "breakout/runs-atari-gated-search-v2/replays/20260510T165312Z_ep013_seed13.jsonl"
},
{
"timestamp": "20260510T165312Z",
"trial_index": 1,
"policy_name": "breakout_policy_v4",
"seed": 14,
"episode": 14,
"score": 12,
"steps": 1190,
"done": true,
"replay_path": "breakout/runs-atari-gated-search-v2/replays/20260510T165312Z_ep014_seed14.jsonl"
},
{
"timestamp": "20260510T165312Z",
"trial_index": 1,
"policy_name": "breakout_policy_v4",
"seed": 15,
"episode": 15,
"score": 17,
"steps": 1699,
"done": true,
"replay_path": "breakout/runs-atari-gated-search-v2/replays/20260510T165312Z_ep015_seed15.jsonl"
},
{
"timestamp": "20260510T165312Z",
"trial_index": 1,
"policy_name": "breakout_policy_v4",
"seed": 16,
"episode": 16,
"score": 12,
"steps": 1191,
"done": true,
"replay_path": "breakout/runs-atari-gated-search-v2/replays/20260510T165312Z_ep016_seed16.jsonl"
},
{
"timestamp": "20260510T165312Z",
"trial_index": 1,
"policy_name": "breakout_policy_v4",
"seed": 17,
"episode": 17,
"score": 14,
"steps": 1477,
"done": true,
"replay_path": "breakout/runs-atari-gated-search-v2/replays/20260510T165312Z_ep017_seed17.jsonl"
},
{
"timestamp": "20260510T165312Z",
"trial_index": 1,
"policy_name": "breakout_policy_v4",
"seed": 18,
"episode": 18,
"score": 12,
"steps": 1190,
"done": true,
"replay_path": "breakout/runs-atari-gated-search-v2/replays/20260510T165312Z_ep018_seed18.jsonl"
},
{
"timestamp": "20260510T165312Z",
"trial_index": 1,
"policy_name": "breakout_policy_v4",
"seed": 19,
"episode": 19,
"score": 12,
"steps": 1284,
"done": true,
"replay_path": "breakout/runs-atari-gated-search-v2/replays/20260510T165312Z_ep019_seed19.jsonl"
}
]
},
"atari_20_seed": {
"distribution": {
"count": 20,
"scores": [
2.0,
7.0,
7.0,
7.0,
1.0,
7.0,
2.0,
7.0,
2.0,
1.0,
5.0,
5.0,
7.0,
2.0,
1.0,
7.0,
2.0,
2.0,
5.0,
1.0
],
"mean": 4.0,
"median": 3.5,
"min": 1.0,
"max": 7.0,
"stdev": 2.5099800796022267,
"low_score_count_lt3": 10
},
"steps_mean": 1800.0,
"done_count": 0,
"rows": [
{
"episode": 0,
"score": 2.0,
"steps": 1800,
"done": false
},
{
"episode": 1,
"score": 7.0,
"steps": 1800,
"done": false
},
{
"episode": 2,
"score": 7.0,
"steps": 1800,
"done": false
},
{
"episode": 3,
"score": 7.0,
"steps": 1800,
"done": false
},
{
"episode": 4,
"score": 1.0,
"steps": 1800,
"done": false
},
{
"episode": 5,
"score": 7.0,
"steps": 1800,
"done": false
},
{
"episode": 6,
"score": 2.0,
"steps": 1800,
"done": false
},
{
"episode": 7,
"score": 7.0,
"steps": 1800,
"done": false
},
{
"episode": 8,
"score": 2.0,
"steps": 1800,
"done": false
},
{
"episode": 9,
"score": 1.0,
"steps": 1800,
"done": false
},
{
"episode": 10,
"score": 5.0,
"steps": 1800,
"done": false
},
{
"episode": 11,
"score": 5.0,
"steps": 1800,
"done": false
},
{
"episode": 12,
"score": 7.0,
"steps": 1800,
"done": false
},
{
"episode": 13,
"score": 2.0,
"steps": 1800,
"done": false
},
{
"episode": 14,
"score": 1.0,
"steps": 1800,
"done": false
},
{
"episode": 15,
"score": 7.0,
"steps": 1800,
"done": false
},
{
"episode": 16,
"score": 2.0,
"steps": 1800,
"done": false
},
{
"episode": 17,
"score": 2.0,
"steps": 1800,
"done": false
},
{
"episode": 18,
"score": 5.0,
"steps": 1800,
"done": false
},
{
"episode": 19,
"score": 1.0,
"steps": 1800,
"done": false
}
]
},
"env_fidelity_fix_v3_reference": {
"go_10_seed": {
"count": 10,
"scores": [
7.0,
6.0,
15.0,
13.0,
9.0,
7.0,
9.0,
8.0,
8.0,
8.0
],
"mean": 9.0,
"median": 8.0,
"min": 6.0,
"max": 15.0,
"stdev": 2.6832815729997477,
"low_score_count_lt3": 0
},
"atari_10_seed": {
"count": 10,
"scores": [
2.0,
13.0,
13.0,
13.0,
0.0,
13.0,
2.0,
13.0,
2.0,
0.0
],
"mean": 7.1,
"median": 7.5,
"min": 0.0,
"max": 13.0,
"stdev": 5.940538696111658,
"low_score_count_lt3": 5
}
}
}
decision
{
"verdict": "pass",
"atari_mean_delta_vs_5_15": -1.1500000000000004,
"atari_low_score_count_delta_vs_13": -3,
"diagnosis": "Atari-gated v4 lowers the robust 20-seed low-score count, but mean score remains below the robust benchmark."
}
rerun_commands
[
"bash breakout/scripts/run_atari_gated_search_v2_wsl.sh"
]
evidence
{
"r2_video_url": "https://file.aimusic.win/gemma/breakout/atari_gated_search_v2_atari_seed1.mp4",
"r2_artifacts_url": "https://file.aimusic.win/gemma/breakout/atari_gated_search_v2_wsl_artifacts.tgz"
}