Breakout Atari-Validated Policy Search Harness v1
objective
{
"name": "Breakout Atari-validated policy search harness v1",
"script": "breakout/scripts/search_policies.py",
"policy_family": "breakout_policy_v3 parameter configs"
}
search
{
"go_searched_candidates": 216,
"atari_validated_candidates": 8,
"go_ranking_key": "low_score_count asc, score_min desc, score_mean desc",
"atari_ranking_key": "atari_low_score_count asc, atari_score_mean desc, atari_score_min desc",
"validated_candidates": [
{
"candidate_id": "c182",
"params": {
"policy_far_cap": 10.0,
"policy_near_cap": 18.0,
"policy_base_deadzone": 2.0,
"policy_near_y": 165,
"policy_far_lead": 2.0,
"policy_near_lead": 3.0,
"policy_far_deadzone": 2.0,
"policy_near_deadzone": 1.0,
"policy_panic_y": 175
},
"go_score_mean": 7.666666666666667,
"go_score_min": 6.0,
"go_low_score_count": 0,
"atari_score_mean": 3.6666666666666665,
"atari_score_min": 1.0,
"atari_low_score_count": 2,
"go_scores": [
9.0,
7.0,
6.0,
8.0,
7.0,
9.0
],
"atari_scores": [
1.0,
5.0,
5.0,
5.0,
1.0,
5.0
]
},
{
"candidate_id": "c039",
"params": {
"policy_far_cap": 10.0,
"policy_near_cap": 18.0,
"policy_base_deadzone": 2.0,
"policy_near_y": 140,
"policy_far_lead": 2.0,
"policy_near_lead": 3.0,
"policy_far_deadzone": 2.0,
"policy_near_deadzone": 1.0,
"policy_panic_y": 185
},
"go_score_mean": 7.5,
"go_score_min": 6.0,
"go_low_score_count": 0,
"atari_score_mean": 3.5,
"atari_score_min": 0.0,
"atari_low_score_count": 2,
"go_scores": [
9.0,
7.0,
6.0,
7.0,
7.0,
9.0
],
"atari_scores": [
1.0,
5.0,
5.0,
5.0,
0.0,
5.0
]
},
{
"candidate_id": "c111",
"params": {
"policy_far_cap": 10.0,
"policy_near_cap": 18.0,
"policy_base_deadzone": 2.0,
"policy_near_y": 150,
"policy_far_lead": 2.0,
"policy_near_lead": 3.0,
"policy_far_deadzone": 2.0,
"policy_near_deadzone": 1.0,
"policy_panic_y": 185
},
"go_score_mean": 7.5,
"go_score_min": 6.0,
"go_low_score_count": 0,
"atari_score_mean": 3.5,
"atari_score_min": 0.0,
"atari_low_score_count": 2,
"go_scores": [
9.0,
7.0,
6.0,
7.0,
7.0,
9.0
],
"atari_scores": [
1.0,
5.0,
5.0,
5.0,
0.0,
5.0
]
},
{
"candidate_id": "c183",
"params": {
"policy_far_cap": 10.0,
"policy_near_cap": 18.0,
"policy_base_deadzone": 2.0,
"policy_near_y": 165,
"policy_far_lead": 2.0,
"policy_near_lead": 3.0,
"policy_far_deadzone": 2.0,
"policy_near_deadzone": 1.0,
"policy_panic_y": 185
},
"go_score_mean": 7.5,
"go_score_min": 6.0,
"go_low_score_count": 0,
"atari_score_mean": 3.6666666666666665,
"atari_score_min": 1.0,
"atari_low_score_count": 2,
"go_scores": [
9.0,
7.0,
6.0,
7.0,
7.0,
9.0
],
"atari_scores": [
1.0,
5.0,
5.0,
5.0,
1.0,
5.0
]
},
{
"candidate_id": "c011",
"params": {
"policy_far_cap": 10.0,
"policy_near_cap": 18.0,
"policy_base_deadzone": 2.0,
"policy_near_y": 140,
"policy_far_lead": 1.0,
"policy_near_lead": 3.0,
"policy_far_deadzone": 0.0,
"policy_near_deadzone": 1.0,
"policy_panic_y": 185
},
"go_score_mean": 7.166666666666667,
"go_score_min": 6.0,
"go_low_score_count": 0,
"atari_score_mean": 3.6666666666666665,
"atari_score_min": 0.0,
"atari_low_score_count": 2,
"go_scores": [
8.0,
7.0,
6.0,
7.0,
7.0,
8.0
],
"atari_scores": [
2.0,
5.0,
5.0,
5.0,
0.0,
5.0
]
},
{
"candidate_id": "c015",
"params": {
"policy_far_cap": 10.0,
"policy_near_cap": 18.0,
"policy_base_deadzone": 2.0,
"policy_near_y": 140,
"policy_far_lead": 1.0,
"policy_near_lead": 3.0,
"policy_far_deadzone": 2.0,
"policy_near_deadzone": 1.0,
"policy_panic_y": 185
},
"go_score_mean": 7.166666666666667,
"go_score_min": 6.0,
"go_low_score_count": 0,
"atari_score_mean": 9.0,
"atari_score_min": 0.0,
"atari_low_score_count": 2,
"go_scores": [
8.0,
7.0,
6.0,
7.0,
7.0,
8.0
],
"atari_scores": [
2.0,
13.0,
13.0,
13.0,
0.0,
13.0
]
},
{
"candidate_id": "c035",
"params": {
"policy_far_cap": 10.0,
"policy_near_cap": 18.0,
"policy_base_deadzone": 2.0,
"policy_near_y": 140,
"policy_far_lead": 2.0,
"policy_near_lead": 3.0,
"policy_far_deadzone": 0.0,
"policy_near_deadzone": 1.0,
"policy_panic_y": 185
},
"go_score_mean": 7.166666666666667,
"go_score_min": 6.0,
"go_low_score_count": 0,
"atari_score_mean": 3.6666666666666665,
"atari_score_min": 0.0,
"atari_low_score_count": 2,
"go_scores": [
8.0,
7.0,
6.0,
7.0,
7.0,
8.0
],
"atari_scores": [
2.0,
5.0,
5.0,
5.0,
0.0,
5.0
]
},
{
"candidate_id": "c059",
"params": {
"policy_far_cap": 10.0,
"policy_near_cap": 18.0,
"policy_base_deadzone": 2.0,
"policy_near_y": 140,
"policy_far_lead": 3.0,
"policy_near_lead": 3.0,
"policy_far_deadzone": 0.0,
"policy_near_deadzone": 1.0,
"policy_panic_y": 185
},
"go_score_mean": 7.166666666666667,
"go_score_min": 6.0,
"go_low_score_count": 0,
"atari_score_mean": 3.0,
"atari_score_min": 1.0,
"atari_low_score_count": 2,
"go_scores": [
8.0,
7.0,
6.0,
7.0,
7.0,
8.0
],
"atari_scores": [
1.0,
4.0,
4.0,
4.0,
1.0,
4.0
]
}
],
"go_ranked_candidates": [
{
"candidate_id": "c182",
"params": {
"policy_far_cap": 10.0,
"policy_near_cap": 18.0,
"policy_base_deadzone": 2.0,
"policy_near_y": 165,
"policy_far_lead": 2.0,
"policy_near_lead": 3.0,
"policy_far_deadzone": 2.0,
"policy_near_deadzone": 1.0,
"policy_panic_y": 175
},
"go_score_mean": 7.666666666666667,
"go_score_min": 6.0,
"go_low_score_count": 0,
"atari_score_mean": 3.6666666666666665,
"atari_score_min": 1.0,
"atari_low_score_count": 2,
"go_scores": [
9.0,
7.0,
6.0,
8.0,
7.0,
9.0
],
"atari_scores": [
1.0,
5.0,
5.0,
5.0,
1.0,
5.0
]
},
{
"candidate_id": "c039",
"params": {
"policy_far_cap": 10.0,
"policy_near_cap": 18.0,
"policy_base_deadzone": 2.0,
"policy_near_y": 140,
"policy_far_lead": 2.0,
"policy_near_lead": 3.0,
"policy_far_deadzone": 2.0,
"policy_near_deadzone": 1.0,
"policy_panic_y": 185
},
"go_score_mean": 7.5,
"go_score_min": 6.0,
"go_low_score_count": 0,
"atari_score_mean": 3.5,
"atari_score_min": 0.0,
"atari_low_score_count": 2,
"go_scores": [
9.0,
7.0,
6.0,
7.0,
7.0,
9.0
],
"atari_scores": [
1.0,
5.0,
5.0,
5.0,
0.0,
5.0
]
},
{
"candidate_id": "c111",
"params": {
"policy_far_cap": 10.0,
"policy_near_cap": 18.0,
"policy_base_deadzone": 2.0,
"policy_near_y": 150,
"policy_far_lead": 2.0,
"policy_near_lead": 3.0,
"policy_far_deadzone": 2.0,
"policy_near_deadzone": 1.0,
"policy_panic_y": 185
},
"go_score_mean": 7.5,
"go_score_min": 6.0,
"go_low_score_count": 0,
"atari_score_mean": 3.5,
"atari_score_min": 0.0,
"atari_low_score_count": 2,
"go_scores": [
9.0,
7.0,
6.0,
7.0,
7.0,
9.0
],
"atari_scores": [
1.0,
5.0,
5.0,
5.0,
0.0,
5.0
]
},
{
"candidate_id": "c183",
"params": {
"policy_far_cap": 10.0,
"policy_near_cap": 18.0,
"policy_base_deadzone": 2.0,
"policy_near_y": 165,
"policy_far_lead": 2.0,
"policy_near_lead": 3.0,
"policy_far_deadzone": 2.0,
"policy_near_deadzone": 1.0,
"policy_panic_y": 185
},
"go_score_mean": 7.5,
"go_score_min": 6.0,
"go_low_score_count": 0,
"atari_score_mean": 3.6666666666666665,
"atari_score_min": 1.0,
"atari_low_score_count": 2,
"go_scores": [
9.0,
7.0,
6.0,
7.0,
7.0,
9.0
],
"atari_scores": [
1.0,
5.0,
5.0,
5.0,
1.0,
5.0
]
},
{
"candidate_id": "c011",
"params": {
"policy_far_cap": 10.0,
"policy_near_cap": 18.0,
"policy_base_deadzone": 2.0,
"policy_near_y": 140,
"policy_far_lead": 1.0,
"policy_near_lead": 3.0,
"policy_far_deadzone": 0.0,
"policy_near_deadzone": 1.0,
"policy_panic_y": 185
},
"go_score_mean": 7.166666666666667,
"go_score_min": 6.0,
"go_low_score_count": 0,
"atari_score_mean": 3.6666666666666665,
"atari_score_min": 0.0,
"atari_low_score_count": 2,
"go_scores": [
8.0,
7.0,
6.0,
7.0,
7.0,
8.0
],
"atari_scores": [
2.0,
5.0,
5.0,
5.0,
0.0,
5.0
]
},
{
"candidate_id": "c015",
"params": {
"policy_far_cap": 10.0,
"policy_near_cap": 18.0,
"policy_base_deadzone": 2.0,
"policy_near_y": 140,
"policy_far_lead": 1.0,
"policy_near_lead": 3.0,
"policy_far_deadzone": 2.0,
"policy_near_deadzone": 1.0,
"policy_panic_y": 185
},
"go_score_mean": 7.166666666666667,
"go_score_min": 6.0,
"go_low_score_count": 0,
"atari_score_mean": 9.0,
"atari_score_min": 0.0,
"atari_low_score_count": 2,
"go_scores": [
8.0,
7.0,
6.0,
7.0,
7.0,
8.0
],
"atari_scores": [
2.0,
13.0,
13.0,
13.0,
0.0,
13.0
]
},
{
"candidate_id": "c035",
"params": {
"policy_far_cap": 10.0,
"policy_near_cap": 18.0,
"policy_base_deadzone": 2.0,
"policy_near_y": 140,
"policy_far_lead": 2.0,
"policy_near_lead": 3.0,
"policy_far_deadzone": 0.0,
"policy_near_deadzone": 1.0,
"policy_panic_y": 185
},
"go_score_mean": 7.166666666666667,
"go_score_min": 6.0,
"go_low_score_count": 0,
"atari_score_mean": 3.6666666666666665,
"atari_score_min": 0.0,
"atari_low_score_count": 2,
"go_scores": [
8.0,
7.0,
6.0,
7.0,
7.0,
8.0
],
"atari_scores": [
2.0,
5.0,
5.0,
5.0,
0.0,
5.0
]
},
{
"candidate_id": "c059",
"params": {
"policy_far_cap": 10.0,
"policy_near_cap": 18.0,
"policy_base_deadzone": 2.0,
"policy_near_y": 140,
"policy_far_lead": 3.0,
"policy_near_lead": 3.0,
"policy_far_deadzone": 0.0,
"policy_near_deadzone": 1.0,
"policy_panic_y": 185
},
"go_score_mean": 7.166666666666667,
"go_score_min": 6.0,
"go_low_score_count": 0,
"atari_score_mean": 3.0,
"atari_score_min": 1.0,
"atari_low_score_count": 2,
"go_scores": [
8.0,
7.0,
6.0,
7.0,
7.0,
8.0
],
"atari_scores": [
1.0,
4.0,
4.0,
4.0,
1.0,
4.0
]
}
],
"atari_ranked_candidates": [
{
"candidate_id": "c015",
"params": {
"policy_far_cap": 10.0,
"policy_near_cap": 18.0,
"policy_base_deadzone": 2.0,
"policy_near_y": 140,
"policy_far_lead": 1.0,
"policy_near_lead": 3.0,
"policy_far_deadzone": 2.0,
"policy_near_deadzone": 1.0,
"policy_panic_y": 185
},
"go_score_mean": 7.166666666666667,
"go_score_min": 6.0,
"go_low_score_count": 0,
"atari_score_mean": 9.0,
"atari_score_min": 0.0,
"atari_low_score_count": 2,
"go_scores": [
8.0,
7.0,
6.0,
7.0,
7.0,
8.0
],
"atari_scores": [
2.0,
13.0,
13.0,
13.0,
0.0,
13.0
]
},
{
"candidate_id": "c182",
"params": {
"policy_far_cap": 10.0,
"policy_near_cap": 18.0,
"policy_base_deadzone": 2.0,
"policy_near_y": 165,
"policy_far_lead": 2.0,
"policy_near_lead": 3.0,
"policy_far_deadzone": 2.0,
"policy_near_deadzone": 1.0,
"policy_panic_y": 175
},
"go_score_mean": 7.666666666666667,
"go_score_min": 6.0,
"go_low_score_count": 0,
"atari_score_mean": 3.6666666666666665,
"atari_score_min": 1.0,
"atari_low_score_count": 2,
"go_scores": [
9.0,
7.0,
6.0,
8.0,
7.0,
9.0
],
"atari_scores": [
1.0,
5.0,
5.0,
5.0,
1.0,
5.0
]
},
{
"candidate_id": "c183",
"params": {
"policy_far_cap": 10.0,
"policy_near_cap": 18.0,
"policy_base_deadzone": 2.0,
"policy_near_y": 165,
"policy_far_lead": 2.0,
"policy_near_lead": 3.0,
"policy_far_deadzone": 2.0,
"policy_near_deadzone": 1.0,
"policy_panic_y": 185
},
"go_score_mean": 7.5,
"go_score_min": 6.0,
"go_low_score_count": 0,
"atari_score_mean": 3.6666666666666665,
"atari_score_min": 1.0,
"atari_low_score_count": 2,
"go_scores": [
9.0,
7.0,
6.0,
7.0,
7.0,
9.0
],
"atari_scores": [
1.0,
5.0,
5.0,
5.0,
1.0,
5.0
]
},
{
"candidate_id": "c011",
"params": {
"policy_far_cap": 10.0,
"policy_near_cap": 18.0,
"policy_base_deadzone": 2.0,
"policy_near_y": 140,
"policy_far_lead": 1.0,
"policy_near_lead": 3.0,
"policy_far_deadzone": 0.0,
"policy_near_deadzone": 1.0,
"policy_panic_y": 185
},
"go_score_mean": 7.166666666666667,
"go_score_min": 6.0,
"go_low_score_count": 0,
"atari_score_mean": 3.6666666666666665,
"atari_score_min": 0.0,
"atari_low_score_count": 2,
"go_scores": [
8.0,
7.0,
6.0,
7.0,
7.0,
8.0
],
"atari_scores": [
2.0,
5.0,
5.0,
5.0,
0.0,
5.0
]
},
{
"candidate_id": "c035",
"params": {
"policy_far_cap": 10.0,
"policy_near_cap": 18.0,
"policy_base_deadzone": 2.0,
"policy_near_y": 140,
"policy_far_lead": 2.0,
"policy_near_lead": 3.0,
"policy_far_deadzone": 0.0,
"policy_near_deadzone": 1.0,
"policy_panic_y": 185
},
"go_score_mean": 7.166666666666667,
"go_score_min": 6.0,
"go_low_score_count": 0,
"atari_score_mean": 3.6666666666666665,
"atari_score_min": 0.0,
"atari_low_score_count": 2,
"go_scores": [
8.0,
7.0,
6.0,
7.0,
7.0,
8.0
],
"atari_scores": [
2.0,
5.0,
5.0,
5.0,
0.0,
5.0
]
},
{
"candidate_id": "c039",
"params": {
"policy_far_cap": 10.0,
"policy_near_cap": 18.0,
"policy_base_deadzone": 2.0,
"policy_near_y": 140,
"policy_far_lead": 2.0,
"policy_near_lead": 3.0,
"policy_far_deadzone": 2.0,
"policy_near_deadzone": 1.0,
"policy_panic_y": 185
},
"go_score_mean": 7.5,
"go_score_min": 6.0,
"go_low_score_count": 0,
"atari_score_mean": 3.5,
"atari_score_min": 0.0,
"atari_low_score_count": 2,
"go_scores": [
9.0,
7.0,
6.0,
7.0,
7.0,
9.0
],
"atari_scores": [
1.0,
5.0,
5.0,
5.0,
0.0,
5.0
]
},
{
"candidate_id": "c111",
"params": {
"policy_far_cap": 10.0,
"policy_near_cap": 18.0,
"policy_base_deadzone": 2.0,
"policy_near_y": 150,
"policy_far_lead": 2.0,
"policy_near_lead": 3.0,
"policy_far_deadzone": 2.0,
"policy_near_deadzone": 1.0,
"policy_panic_y": 185
},
"go_score_mean": 7.5,
"go_score_min": 6.0,
"go_low_score_count": 0,
"atari_score_mean": 3.5,
"atari_score_min": 0.0,
"atari_low_score_count": 2,
"go_scores": [
9.0,
7.0,
6.0,
7.0,
7.0,
9.0
],
"atari_scores": [
1.0,
5.0,
5.0,
5.0,
0.0,
5.0
]
},
{
"candidate_id": "c059",
"params": {
"policy_far_cap": 10.0,
"policy_near_cap": 18.0,
"policy_base_deadzone": 2.0,
"policy_near_y": 140,
"policy_far_lead": 3.0,
"policy_near_lead": 3.0,
"policy_far_deadzone": 0.0,
"policy_near_deadzone": 1.0,
"policy_panic_y": 185
},
"go_score_mean": 7.166666666666667,
"go_score_min": 6.0,
"go_low_score_count": 0,
"atari_score_mean": 3.0,
"atari_score_min": 1.0,
"atari_low_score_count": 2,
"go_scores": [
8.0,
7.0,
6.0,
7.0,
7.0,
8.0
],
"atari_scores": [
1.0,
4.0,
4.0,
4.0,
1.0,
4.0
]
}
]
}
correlation
{
"sample_size": 8,
"pearson_go_mean_vs_atari_mean": -0.32584242221528265,
"spearman_go_rank_vs_atari_rank": -0.13989092759813318,
"go_shadow_effective_as_ranker": false,
"interpretation": "Go shadow score is not a reliable ranker in this candidate window; use it as a coarse prefilter and keep Atari validation in the loop."
}
evidence
{
"go_candidates": "breakout/calibration/reports/policy_search_v2_go_candidates.json",
"atari_candidates": "breakout/calibration/reports/policy_search_v2_atari_probe_candidates.json",
"r2_video_url": "https://file.aimusic.win/gemma/breakout/search_harness_v1_atari_best.mp4"
}