Breakout Policy Parameter Search v2
objective
{
"name": "Breakout policy parameter search v2",
"target_policy": "breakout_policy_v3",
"selected_candidate_id": "c015",
"selected_params": {
"policy_far_cap": 10.0,
"policy_near_cap": 18.0,
"policy_base_deadzone": 2.0,
"policy_near_y": 140,
"policy_far_lead": 1.0,
"policy_near_lead": 3.0,
"policy_far_deadzone": 2.0,
"policy_near_deadzone": 1.0,
"policy_panic_y": 185
}
}
search
{
"go_searched_candidates": 216,
"go_ranking_key": "low_score_count asc, score_min desc, score_mean desc",
"top_go_candidates": [
{
"candidate_id": "c182",
"params": {
"policy_far_cap": 10.0,
"policy_near_cap": 18.0,
"policy_base_deadzone": 2.0,
"policy_near_y": 165,
"policy_far_lead": 2.0,
"policy_near_lead": 3.0,
"policy_far_deadzone": 2.0,
"policy_near_deadzone": 1.0,
"policy_panic_y": 175
},
"scores": [
9.0,
7.0,
6.0,
8.0,
7.0,
9.0
],
"score_mean": 7.666666666666667,
"score_min": 6.0,
"score_max": 9.0,
"low_score_count": 0
},
{
"candidate_id": "c039",
"params": {
"policy_far_cap": 10.0,
"policy_near_cap": 18.0,
"policy_base_deadzone": 2.0,
"policy_near_y": 140,
"policy_far_lead": 2.0,
"policy_near_lead": 3.0,
"policy_far_deadzone": 2.0,
"policy_near_deadzone": 1.0,
"policy_panic_y": 185
},
"scores": [
9.0,
7.0,
6.0,
7.0,
7.0,
9.0
],
"score_mean": 7.5,
"score_min": 6.0,
"score_max": 9.0,
"low_score_count": 0
},
{
"candidate_id": "c111",
"params": {
"policy_far_cap": 10.0,
"policy_near_cap": 18.0,
"policy_base_deadzone": 2.0,
"policy_near_y": 150,
"policy_far_lead": 2.0,
"policy_near_lead": 3.0,
"policy_far_deadzone": 2.0,
"policy_near_deadzone": 1.0,
"policy_panic_y": 185
},
"scores": [
9.0,
7.0,
6.0,
7.0,
7.0,
9.0
],
"score_mean": 7.5,
"score_min": 6.0,
"score_max": 9.0,
"low_score_count": 0
},
{
"candidate_id": "c183",
"params": {
"policy_far_cap": 10.0,
"policy_near_cap": 18.0,
"policy_base_deadzone": 2.0,
"policy_near_y": 165,
"policy_far_lead": 2.0,
"policy_near_lead": 3.0,
"policy_far_deadzone": 2.0,
"policy_near_deadzone": 1.0,
"policy_panic_y": 185
},
"scores": [
9.0,
7.0,
6.0,
7.0,
7.0,
9.0
],
"score_mean": 7.5,
"score_min": 6.0,
"score_max": 9.0,
"low_score_count": 0
},
{
"candidate_id": "c011",
"params": {
"policy_far_cap": 10.0,
"policy_near_cap": 18.0,
"policy_base_deadzone": 2.0,
"policy_near_y": 140,
"policy_far_lead": 1.0,
"policy_near_lead": 3.0,
"policy_far_deadzone": 0.0,
"policy_near_deadzone": 1.0,
"policy_panic_y": 185
},
"scores": [
8.0,
7.0,
6.0,
7.0,
7.0,
8.0
],
"score_mean": 7.166666666666667,
"score_min": 6.0,
"score_max": 8.0,
"low_score_count": 0
},
{
"candidate_id": "c015",
"params": {
"policy_far_cap": 10.0,
"policy_near_cap": 18.0,
"policy_base_deadzone": 2.0,
"policy_near_y": 140,
"policy_far_lead": 1.0,
"policy_near_lead": 3.0,
"policy_far_deadzone": 2.0,
"policy_near_deadzone": 1.0,
"policy_panic_y": 185
},
"scores": [
8.0,
7.0,
6.0,
7.0,
7.0,
8.0
],
"score_mean": 7.166666666666667,
"score_min": 6.0,
"score_max": 8.0,
"low_score_count": 0
},
{
"candidate_id": "c035",
"params": {
"policy_far_cap": 10.0,
"policy_near_cap": 18.0,
"policy_base_deadzone": 2.0,
"policy_near_y": 140,
"policy_far_lead": 2.0,
"policy_near_lead": 3.0,
"policy_far_deadzone": 0.0,
"policy_near_deadzone": 1.0,
"policy_panic_y": 185
},
"scores": [
8.0,
7.0,
6.0,
7.0,
7.0,
8.0
],
"score_mean": 7.166666666666667,
"score_min": 6.0,
"score_max": 8.0,
"low_score_count": 0
},
{
"candidate_id": "c059",
"params": {
"policy_far_cap": 10.0,
"policy_near_cap": 18.0,
"policy_base_deadzone": 2.0,
"policy_near_y": 140,
"policy_far_lead": 3.0,
"policy_near_lead": 3.0,
"policy_far_deadzone": 0.0,
"policy_near_deadzone": 1.0,
"policy_panic_y": 185
},
"scores": [
8.0,
7.0,
6.0,
7.0,
7.0,
8.0
],
"score_mean": 7.166666666666667,
"score_min": 6.0,
"score_max": 8.0,
"low_score_count": 0
},
{
"candidate_id": "c063",
"params": {
"policy_far_cap": 10.0,
"policy_near_cap": 18.0,
"policy_base_deadzone": 2.0,
"policy_near_y": 140,
"policy_far_lead": 3.0,
"policy_near_lead": 3.0,
"policy_far_deadzone": 2.0,
"policy_near_deadzone": 1.0,
"policy_panic_y": 185
},
"scores": [
8.0,
7.0,
6.0,
7.0,
7.0,
8.0
],
"score_mean": 7.166666666666667,
"score_min": 6.0,
"score_max": 8.0,
"low_score_count": 0
},
{
"candidate_id": "c083",
"params": {
"policy_far_cap": 10.0,
"policy_near_cap": 18.0,
"policy_base_deadzone": 2.0,
"policy_near_y": 150,
"policy_far_lead": 1.0,
"policy_near_lead": 3.0,
"policy_far_deadzone": 0.0,
"policy_near_deadzone": 1.0,
"policy_panic_y": 185
},
"scores": [
8.0,
7.0,
6.0,
7.0,
7.0,
8.0
],
"score_mean": 7.166666666666667,
"score_min": 6.0,
"score_max": 8.0,
"low_score_count": 0
}
],
"atari_validated_candidates": 8,
"atari_ranking_key": "atari_low_score_count asc, atari_score_mean desc, atari_score_min desc",
"top_atari_candidates": [
{
"candidate_id": "c015",
"params": {
"policy_far_cap": 10.0,
"policy_near_cap": 18.0,
"policy_base_deadzone": 2.0,
"policy_near_y": 140,
"policy_far_lead": 1.0,
"policy_near_lead": 3.0,
"policy_far_deadzone": 2.0,
"policy_near_deadzone": 1.0,
"policy_panic_y": 185
},
"go_scores": [
8.0,
7.0,
6.0,
7.0,
7.0,
8.0
],
"go_score_mean": 7.166666666666667,
"atari_scores": [
2.0,
13.0,
13.0,
13.0,
0.0,
13.0
],
"atari_score_mean": 9.0,
"atari_score_min": 0.0,
"atari_score_max": 13.0,
"atari_low_score_count": 2,
"trace_path": "/tmp/policy_search_v2_c015.jsonl"
},
{
"candidate_id": "c182",
"params": {
"policy_far_cap": 10.0,
"policy_near_cap": 18.0,
"policy_base_deadzone": 2.0,
"policy_near_y": 165,
"policy_far_lead": 2.0,
"policy_near_lead": 3.0,
"policy_far_deadzone": 2.0,
"policy_near_deadzone": 1.0,
"policy_panic_y": 175
},
"go_scores": [
9.0,
7.0,
6.0,
8.0,
7.0,
9.0
],
"go_score_mean": 7.666666666666667,
"atari_scores": [
1.0,
5.0,
5.0,
5.0,
1.0,
5.0
],
"atari_score_mean": 3.6666666666666665,
"atari_score_min": 1.0,
"atari_score_max": 5.0,
"atari_low_score_count": 2,
"trace_path": "/tmp/policy_search_v2_c182.jsonl"
},
{
"candidate_id": "c183",
"params": {
"policy_far_cap": 10.0,
"policy_near_cap": 18.0,
"policy_base_deadzone": 2.0,
"policy_near_y": 165,
"policy_far_lead": 2.0,
"policy_near_lead": 3.0,
"policy_far_deadzone": 2.0,
"policy_near_deadzone": 1.0,
"policy_panic_y": 185
},
"go_scores": [
9.0,
7.0,
6.0,
7.0,
7.0,
9.0
],
"go_score_mean": 7.5,
"atari_scores": [
1.0,
5.0,
5.0,
5.0,
1.0,
5.0
],
"atari_score_mean": 3.6666666666666665,
"atari_score_min": 1.0,
"atari_score_max": 5.0,
"atari_low_score_count": 2,
"trace_path": "/tmp/policy_search_v2_c183.jsonl"
},
{
"candidate_id": "c011",
"params": {
"policy_far_cap": 10.0,
"policy_near_cap": 18.0,
"policy_base_deadzone": 2.0,
"policy_near_y": 140,
"policy_far_lead": 1.0,
"policy_near_lead": 3.0,
"policy_far_deadzone": 0.0,
"policy_near_deadzone": 1.0,
"policy_panic_y": 185
},
"go_scores": [
8.0,
7.0,
6.0,
7.0,
7.0,
8.0
],
"go_score_mean": 7.166666666666667,
"atari_scores": [
2.0,
5.0,
5.0,
5.0,
0.0,
5.0
],
"atari_score_mean": 3.6666666666666665,
"atari_score_min": 0.0,
"atari_score_max": 5.0,
"atari_low_score_count": 2,
"trace_path": "/tmp/policy_search_v2_c011.jsonl"
},
{
"candidate_id": "c035",
"params": {
"policy_far_cap": 10.0,
"policy_near_cap": 18.0,
"policy_base_deadzone": 2.0,
"policy_near_y": 140,
"policy_far_lead": 2.0,
"policy_near_lead": 3.0,
"policy_far_deadzone": 0.0,
"policy_near_deadzone": 1.0,
"policy_panic_y": 185
},
"go_scores": [
8.0,
7.0,
6.0,
7.0,
7.0,
8.0
],
"go_score_mean": 7.166666666666667,
"atari_scores": [
2.0,
5.0,
5.0,
5.0,
0.0,
5.0
],
"atari_score_mean": 3.6666666666666665,
"atari_score_min": 0.0,
"atari_score_max": 5.0,
"atari_low_score_count": 2,
"trace_path": "/tmp/policy_search_v2_c035.jsonl"
},
{
"candidate_id": "c039",
"params": {
"policy_far_cap": 10.0,
"policy_near_cap": 18.0,
"policy_base_deadzone": 2.0,
"policy_near_y": 140,
"policy_far_lead": 2.0,
"policy_near_lead": 3.0,
"policy_far_deadzone": 2.0,
"policy_near_deadzone": 1.0,
"policy_panic_y": 185
},
"go_scores": [
9.0,
7.0,
6.0,
7.0,
7.0,
9.0
],
"go_score_mean": 7.5,
"atari_scores": [
1.0,
5.0,
5.0,
5.0,
0.0,
5.0
],
"atari_score_mean": 3.5,
"atari_score_min": 0.0,
"atari_score_max": 5.0,
"atari_low_score_count": 2,
"trace_path": "/tmp/policy_search_v2_c039.jsonl"
},
{
"candidate_id": "c111",
"params": {
"policy_far_cap": 10.0,
"policy_near_cap": 18.0,
"policy_base_deadzone": 2.0,
"policy_near_y": 150,
"policy_far_lead": 2.0,
"policy_near_lead": 3.0,
"policy_far_deadzone": 2.0,
"policy_near_deadzone": 1.0,
"policy_panic_y": 185
},
"go_scores": [
9.0,
7.0,
6.0,
7.0,
7.0,
9.0
],
"go_score_mean": 7.5,
"atari_scores": [
1.0,
5.0,
5.0,
5.0,
0.0,
5.0
],
"atari_score_mean": 3.5,
"atari_score_min": 0.0,
"atari_score_max": 5.0,
"atari_low_score_count": 2,
"trace_path": "/tmp/policy_search_v2_c111.jsonl"
},
{
"candidate_id": "c059",
"params": {
"policy_far_cap": 10.0,
"policy_near_cap": 18.0,
"policy_base_deadzone": 2.0,
"policy_near_y": 140,
"policy_far_lead": 3.0,
"policy_near_lead": 3.0,
"policy_far_deadzone": 0.0,
"policy_near_deadzone": 1.0,
"policy_panic_y": 185
},
"go_scores": [
8.0,
7.0,
6.0,
7.0,
7.0,
8.0
],
"go_score_mean": 7.166666666666667,
"atari_scores": [
1.0,
4.0,
4.0,
4.0,
1.0,
4.0
],
"atari_score_mean": 3.0,
"atari_score_min": 1.0,
"atari_score_max": 4.0,
"atari_low_score_count": 2,
"trace_path": "/tmp/policy_search_v2_c059.jsonl"
}
]
}
policy_comparison
{
"breakout_policy_v1": {
"policy": "breakout_policy_v1",
"source_report": "transfer_v4_report.json",
"go": {
"scores": [
7.0,
5.0,
6.0,
7.0,
6.0,
7.0
],
"mean": 6.333333333333333,
"median": 6.5,
"min": 5.0,
"max": 7.0,
"low_score_threshold": 3.0,
"low_score_count": 0
},
"atari": {
"scores": [
7.0,
2.0,
2.0,
2.0,
1.0,
2.0
],
"mean": 2.6666666666666665,
"median": 2.0,
"min": 1.0,
"max": 7.0,
"low_score_threshold": 3.0,
"low_score_count": 5
},
"judgment": {
"verdict": "partial_transfer",
"go_score_mean": 6.333333333333333,
"atari_score_mean": 2.6666666666666665,
"atari_to_go_score_ratio": 0.42105263157894735,
"interpretation": "The policy earns positive Atari reward, so the shadow-env optimization transfers partially.",
"next_recommendation": "Run forced-loss trace to confirm lives/serve RAM, then use transfer results to tune survival and late-rally dynamics."
}
},
"breakout_policy_v2": {
"policy": "breakout_policy_v2",
"source_report": "policy_robustness_v1_report.json",
"go": {
"scores": [
8.0,
7.0,
5.0,
7.0,
7.0,
8.0
],
"mean": 7.0,
"median": 7.0,
"min": 5.0,
"max": 8.0,
"low_score_threshold": 3.0,
"low_score_count": 0
},
"atari": {
"scores": [
1.0,
4.0,
4.0,
4.0,
2.0,
4.0
],
"mean": 3.1666666666666665,
"median": 4.0,
"min": 1.0,
"max": 4.0,
"low_score_threshold": 3.0,
"low_score_count": 2
},
"judgment": {
"verdict": "partial_transfer",
"go_score_mean": 7.0,
"atari_score_mean": 3.1666666666666665,
"atari_to_go_score_ratio": 0.4523809523809524,
"interpretation": "The policy earns positive Atari reward, so the shadow-env optimization transfers partially.",
"next_recommendation": "Run forced-loss trace to confirm lives/serve RAM, then use transfer results to tune survival and late-rally dynamics."
}
},
"breakout_policy_v3": {
"policy": "breakout_policy_v3",
"selected_candidate_id": "c015",
"logic": "RAM predictive policy with searched near-bottom lead/deadzone parameters.",
"go": {
"scores": [
8.0,
7.0,
6.0,
7.0,
7.0,
8.0
],
"mean": 7.166666666666667,
"median": 7.0,
"min": 6.0,
"max": 8.0,
"low_score_threshold": 3.0,
"low_score_count": 0
},
"atari": {
"scores": [
2.0,
13.0,
13.0,
13.0,
0.0,
13.0
],
"mean": 9.0,
"median": 13.0,
"min": 0.0,
"max": 13.0,
"low_score_threshold": 3.0,
"low_score_count": 2
},
"judgment": {
"verdict": "partial_transfer",
"go_score_mean": 7.166666666666667,
"atari_score_mean": 9.0,
"atari_to_go_score_ratio": 1.255813953488372,
"interpretation": "The policy earns positive Atari reward, so the shadow-env optimization transfers partially.",
"next_recommendation": "Run forced-loss trace to confirm lives/serve RAM, then use transfer results to tune survival and late-rally dynamics."
}
}
}
acceptance
{
"atari_mean_delta_v3_minus_v2": 5.833333333333334,
"atari_low_score_count_reduction_v3_minus_v2": 0,
"improved": true,
"verdict": "pass",
"diagnosis": "Parameter search found a v3 setting that improves Atari six-seed mean over breakout_policy_v2."
}
evidence
{
"go_episodes": "breakout/runs-policy-search-v2/episodes.jsonl",
"atari_trace": "breakout/calibration/traces/policy_search_v2_atari.jsonl",
"atari_video_url": "https://file.aimusic.win/gemma/breakout/policy_search_v2_atari.mp4"
}