Breakout Policy Parameter Search v2

objective

{
  "name": "Breakout policy parameter search v2",
  "target_policy": "breakout_policy_v3",
  "selected_candidate_id": "c015",
  "selected_params": {
    "policy_far_cap": 10.0,
    "policy_near_cap": 18.0,
    "policy_base_deadzone": 2.0,
    "policy_near_y": 140,
    "policy_far_lead": 1.0,
    "policy_near_lead": 3.0,
    "policy_far_deadzone": 2.0,
    "policy_near_deadzone": 1.0,
    "policy_panic_y": 185
  }
}

search

{
  "go_searched_candidates": 216,
  "go_ranking_key": "low_score_count asc, score_min desc, score_mean desc",
  "top_go_candidates": [
    {
      "candidate_id": "c182",
      "params": {
        "policy_far_cap": 10.0,
        "policy_near_cap": 18.0,
        "policy_base_deadzone": 2.0,
        "policy_near_y": 165,
        "policy_far_lead": 2.0,
        "policy_near_lead": 3.0,
        "policy_far_deadzone": 2.0,
        "policy_near_deadzone": 1.0,
        "policy_panic_y": 175
      },
      "scores": [
        9.0,
        7.0,
        6.0,
        8.0,
        7.0,
        9.0
      ],
      "score_mean": 7.666666666666667,
      "score_min": 6.0,
      "score_max": 9.0,
      "low_score_count": 0
    },
    {
      "candidate_id": "c039",
      "params": {
        "policy_far_cap": 10.0,
        "policy_near_cap": 18.0,
        "policy_base_deadzone": 2.0,
        "policy_near_y": 140,
        "policy_far_lead": 2.0,
        "policy_near_lead": 3.0,
        "policy_far_deadzone": 2.0,
        "policy_near_deadzone": 1.0,
        "policy_panic_y": 185
      },
      "scores": [
        9.0,
        7.0,
        6.0,
        7.0,
        7.0,
        9.0
      ],
      "score_mean": 7.5,
      "score_min": 6.0,
      "score_max": 9.0,
      "low_score_count": 0
    },
    {
      "candidate_id": "c111",
      "params": {
        "policy_far_cap": 10.0,
        "policy_near_cap": 18.0,
        "policy_base_deadzone": 2.0,
        "policy_near_y": 150,
        "policy_far_lead": 2.0,
        "policy_near_lead": 3.0,
        "policy_far_deadzone": 2.0,
        "policy_near_deadzone": 1.0,
        "policy_panic_y": 185
      },
      "scores": [
        9.0,
        7.0,
        6.0,
        7.0,
        7.0,
        9.0
      ],
      "score_mean": 7.5,
      "score_min": 6.0,
      "score_max": 9.0,
      "low_score_count": 0
    },
    {
      "candidate_id": "c183",
      "params": {
        "policy_far_cap": 10.0,
        "policy_near_cap": 18.0,
        "policy_base_deadzone": 2.0,
        "policy_near_y": 165,
        "policy_far_lead": 2.0,
        "policy_near_lead": 3.0,
        "policy_far_deadzone": 2.0,
        "policy_near_deadzone": 1.0,
        "policy_panic_y": 185
      },
      "scores": [
        9.0,
        7.0,
        6.0,
        7.0,
        7.0,
        9.0
      ],
      "score_mean": 7.5,
      "score_min": 6.0,
      "score_max": 9.0,
      "low_score_count": 0
    },
    {
      "candidate_id": "c011",
      "params": {
        "policy_far_cap": 10.0,
        "policy_near_cap": 18.0,
        "policy_base_deadzone": 2.0,
        "policy_near_y": 140,
        "policy_far_lead": 1.0,
        "policy_near_lead": 3.0,
        "policy_far_deadzone": 0.0,
        "policy_near_deadzone": 1.0,
        "policy_panic_y": 185
      },
      "scores": [
        8.0,
        7.0,
        6.0,
        7.0,
        7.0,
        8.0
      ],
      "score_mean": 7.166666666666667,
      "score_min": 6.0,
      "score_max": 8.0,
      "low_score_count": 0
    },
    {
      "candidate_id": "c015",
      "params": {
        "policy_far_cap": 10.0,
        "policy_near_cap": 18.0,
        "policy_base_deadzone": 2.0,
        "policy_near_y": 140,
        "policy_far_lead": 1.0,
        "policy_near_lead": 3.0,
        "policy_far_deadzone": 2.0,
        "policy_near_deadzone": 1.0,
        "policy_panic_y": 185
      },
      "scores": [
        8.0,
        7.0,
        6.0,
        7.0,
        7.0,
        8.0
      ],
      "score_mean": 7.166666666666667,
      "score_min": 6.0,
      "score_max": 8.0,
      "low_score_count": 0
    },
    {
      "candidate_id": "c035",
      "params": {
        "policy_far_cap": 10.0,
        "policy_near_cap": 18.0,
        "policy_base_deadzone": 2.0,
        "policy_near_y": 140,
        "policy_far_lead": 2.0,
        "policy_near_lead": 3.0,
        "policy_far_deadzone": 0.0,
        "policy_near_deadzone": 1.0,
        "policy_panic_y": 185
      },
      "scores": [
        8.0,
        7.0,
        6.0,
        7.0,
        7.0,
        8.0
      ],
      "score_mean": 7.166666666666667,
      "score_min": 6.0,
      "score_max": 8.0,
      "low_score_count": 0
    },
    {
      "candidate_id": "c059",
      "params": {
        "policy_far_cap": 10.0,
        "policy_near_cap": 18.0,
        "policy_base_deadzone": 2.0,
        "policy_near_y": 140,
        "policy_far_lead": 3.0,
        "policy_near_lead": 3.0,
        "policy_far_deadzone": 0.0,
        "policy_near_deadzone": 1.0,
        "policy_panic_y": 185
      },
      "scores": [
        8.0,
        7.0,
        6.0,
        7.0,
        7.0,
        8.0
      ],
      "score_mean": 7.166666666666667,
      "score_min": 6.0,
      "score_max": 8.0,
      "low_score_count": 0
    },
    {
      "candidate_id": "c063",
      "params": {
        "policy_far_cap": 10.0,
        "policy_near_cap": 18.0,
        "policy_base_deadzone": 2.0,
        "policy_near_y": 140,
        "policy_far_lead": 3.0,
        "policy_near_lead": 3.0,
        "policy_far_deadzone": 2.0,
        "policy_near_deadzone": 1.0,
        "policy_panic_y": 185
      },
      "scores": [
        8.0,
        7.0,
        6.0,
        7.0,
        7.0,
        8.0
      ],
      "score_mean": 7.166666666666667,
      "score_min": 6.0,
      "score_max": 8.0,
      "low_score_count": 0
    },
    {
      "candidate_id": "c083",
      "params": {
        "policy_far_cap": 10.0,
        "policy_near_cap": 18.0,
        "policy_base_deadzone": 2.0,
        "policy_near_y": 150,
        "policy_far_lead": 1.0,
        "policy_near_lead": 3.0,
        "policy_far_deadzone": 0.0,
        "policy_near_deadzone": 1.0,
        "policy_panic_y": 185
      },
      "scores": [
        8.0,
        7.0,
        6.0,
        7.0,
        7.0,
        8.0
      ],
      "score_mean": 7.166666666666667,
      "score_min": 6.0,
      "score_max": 8.0,
      "low_score_count": 0
    }
  ],
  "atari_validated_candidates": 8,
  "atari_ranking_key": "atari_low_score_count asc, atari_score_mean desc, atari_score_min desc",
  "top_atari_candidates": [
    {
      "candidate_id": "c015",
      "params": {
        "policy_far_cap": 10.0,
        "policy_near_cap": 18.0,
        "policy_base_deadzone": 2.0,
        "policy_near_y": 140,
        "policy_far_lead": 1.0,
        "policy_near_lead": 3.0,
        "policy_far_deadzone": 2.0,
        "policy_near_deadzone": 1.0,
        "policy_panic_y": 185
      },
      "go_scores": [
        8.0,
        7.0,
        6.0,
        7.0,
        7.0,
        8.0
      ],
      "go_score_mean": 7.166666666666667,
      "atari_scores": [
        2.0,
        13.0,
        13.0,
        13.0,
        0.0,
        13.0
      ],
      "atari_score_mean": 9.0,
      "atari_score_min": 0.0,
      "atari_score_max": 13.0,
      "atari_low_score_count": 2,
      "trace_path": "/tmp/policy_search_v2_c015.jsonl"
    },
    {
      "candidate_id": "c182",
      "params": {
        "policy_far_cap": 10.0,
        "policy_near_cap": 18.0,
        "policy_base_deadzone": 2.0,
        "policy_near_y": 165,
        "policy_far_lead": 2.0,
        "policy_near_lead": 3.0,
        "policy_far_deadzone": 2.0,
        "policy_near_deadzone": 1.0,
        "policy_panic_y": 175
      },
      "go_scores": [
        9.0,
        7.0,
        6.0,
        8.0,
        7.0,
        9.0
      ],
      "go_score_mean": 7.666666666666667,
      "atari_scores": [
        1.0,
        5.0,
        5.0,
        5.0,
        1.0,
        5.0
      ],
      "atari_score_mean": 3.6666666666666665,
      "atari_score_min": 1.0,
      "atari_score_max": 5.0,
      "atari_low_score_count": 2,
      "trace_path": "/tmp/policy_search_v2_c182.jsonl"
    },
    {
      "candidate_id": "c183",
      "params": {
        "policy_far_cap": 10.0,
        "policy_near_cap": 18.0,
        "policy_base_deadzone": 2.0,
        "policy_near_y": 165,
        "policy_far_lead": 2.0,
        "policy_near_lead": 3.0,
        "policy_far_deadzone": 2.0,
        "policy_near_deadzone": 1.0,
        "policy_panic_y": 185
      },
      "go_scores": [
        9.0,
        7.0,
        6.0,
        7.0,
        7.0,
        9.0
      ],
      "go_score_mean": 7.5,
      "atari_scores": [
        1.0,
        5.0,
        5.0,
        5.0,
        1.0,
        5.0
      ],
      "atari_score_mean": 3.6666666666666665,
      "atari_score_min": 1.0,
      "atari_score_max": 5.0,
      "atari_low_score_count": 2,
      "trace_path": "/tmp/policy_search_v2_c183.jsonl"
    },
    {
      "candidate_id": "c011",
      "params": {
        "policy_far_cap": 10.0,
        "policy_near_cap": 18.0,
        "policy_base_deadzone": 2.0,
        "policy_near_y": 140,
        "policy_far_lead": 1.0,
        "policy_near_lead": 3.0,
        "policy_far_deadzone": 0.0,
        "policy_near_deadzone": 1.0,
        "policy_panic_y": 185
      },
      "go_scores": [
        8.0,
        7.0,
        6.0,
        7.0,
        7.0,
        8.0
      ],
      "go_score_mean": 7.166666666666667,
      "atari_scores": [
        2.0,
        5.0,
        5.0,
        5.0,
        0.0,
        5.0
      ],
      "atari_score_mean": 3.6666666666666665,
      "atari_score_min": 0.0,
      "atari_score_max": 5.0,
      "atari_low_score_count": 2,
      "trace_path": "/tmp/policy_search_v2_c011.jsonl"
    },
    {
      "candidate_id": "c035",
      "params": {
        "policy_far_cap": 10.0,
        "policy_near_cap": 18.0,
        "policy_base_deadzone": 2.0,
        "policy_near_y": 140,
        "policy_far_lead": 2.0,
        "policy_near_lead": 3.0,
        "policy_far_deadzone": 0.0,
        "policy_near_deadzone": 1.0,
        "policy_panic_y": 185
      },
      "go_scores": [
        8.0,
        7.0,
        6.0,
        7.0,
        7.0,
        8.0
      ],
      "go_score_mean": 7.166666666666667,
      "atari_scores": [
        2.0,
        5.0,
        5.0,
        5.0,
        0.0,
        5.0
      ],
      "atari_score_mean": 3.6666666666666665,
      "atari_score_min": 0.0,
      "atari_score_max": 5.0,
      "atari_low_score_count": 2,
      "trace_path": "/tmp/policy_search_v2_c035.jsonl"
    },
    {
      "candidate_id": "c039",
      "params": {
        "policy_far_cap": 10.0,
        "policy_near_cap": 18.0,
        "policy_base_deadzone": 2.0,
        "policy_near_y": 140,
        "policy_far_lead": 2.0,
        "policy_near_lead": 3.0,
        "policy_far_deadzone": 2.0,
        "policy_near_deadzone": 1.0,
        "policy_panic_y": 185
      },
      "go_scores": [
        9.0,
        7.0,
        6.0,
        7.0,
        7.0,
        9.0
      ],
      "go_score_mean": 7.5,
      "atari_scores": [
        1.0,
        5.0,
        5.0,
        5.0,
        0.0,
        5.0
      ],
      "atari_score_mean": 3.5,
      "atari_score_min": 0.0,
      "atari_score_max": 5.0,
      "atari_low_score_count": 2,
      "trace_path": "/tmp/policy_search_v2_c039.jsonl"
    },
    {
      "candidate_id": "c111",
      "params": {
        "policy_far_cap": 10.0,
        "policy_near_cap": 18.0,
        "policy_base_deadzone": 2.0,
        "policy_near_y": 150,
        "policy_far_lead": 2.0,
        "policy_near_lead": 3.0,
        "policy_far_deadzone": 2.0,
        "policy_near_deadzone": 1.0,
        "policy_panic_y": 185
      },
      "go_scores": [
        9.0,
        7.0,
        6.0,
        7.0,
        7.0,
        9.0
      ],
      "go_score_mean": 7.5,
      "atari_scores": [
        1.0,
        5.0,
        5.0,
        5.0,
        0.0,
        5.0
      ],
      "atari_score_mean": 3.5,
      "atari_score_min": 0.0,
      "atari_score_max": 5.0,
      "atari_low_score_count": 2,
      "trace_path": "/tmp/policy_search_v2_c111.jsonl"
    },
    {
      "candidate_id": "c059",
      "params": {
        "policy_far_cap": 10.0,
        "policy_near_cap": 18.0,
        "policy_base_deadzone": 2.0,
        "policy_near_y": 140,
        "policy_far_lead": 3.0,
        "policy_near_lead": 3.0,
        "policy_far_deadzone": 0.0,
        "policy_near_deadzone": 1.0,
        "policy_panic_y": 185
      },
      "go_scores": [
        8.0,
        7.0,
        6.0,
        7.0,
        7.0,
        8.0
      ],
      "go_score_mean": 7.166666666666667,
      "atari_scores": [
        1.0,
        4.0,
        4.0,
        4.0,
        1.0,
        4.0
      ],
      "atari_score_mean": 3.0,
      "atari_score_min": 1.0,
      "atari_score_max": 4.0,
      "atari_low_score_count": 2,
      "trace_path": "/tmp/policy_search_v2_c059.jsonl"
    }
  ]
}

policy_comparison

{
  "breakout_policy_v1": {
    "policy": "breakout_policy_v1",
    "source_report": "transfer_v4_report.json",
    "go": {
      "scores": [
        7.0,
        5.0,
        6.0,
        7.0,
        6.0,
        7.0
      ],
      "mean": 6.333333333333333,
      "median": 6.5,
      "min": 5.0,
      "max": 7.0,
      "low_score_threshold": 3.0,
      "low_score_count": 0
    },
    "atari": {
      "scores": [
        7.0,
        2.0,
        2.0,
        2.0,
        1.0,
        2.0
      ],
      "mean": 2.6666666666666665,
      "median": 2.0,
      "min": 1.0,
      "max": 7.0,
      "low_score_threshold": 3.0,
      "low_score_count": 5
    },
    "judgment": {
      "verdict": "partial_transfer",
      "go_score_mean": 6.333333333333333,
      "atari_score_mean": 2.6666666666666665,
      "atari_to_go_score_ratio": 0.42105263157894735,
      "interpretation": "The policy earns positive Atari reward, so the shadow-env optimization transfers partially.",
      "next_recommendation": "Run forced-loss trace to confirm lives/serve RAM, then use transfer results to tune survival and late-rally dynamics."
    }
  },
  "breakout_policy_v2": {
    "policy": "breakout_policy_v2",
    "source_report": "policy_robustness_v1_report.json",
    "go": {
      "scores": [
        8.0,
        7.0,
        5.0,
        7.0,
        7.0,
        8.0
      ],
      "mean": 7.0,
      "median": 7.0,
      "min": 5.0,
      "max": 8.0,
      "low_score_threshold": 3.0,
      "low_score_count": 0
    },
    "atari": {
      "scores": [
        1.0,
        4.0,
        4.0,
        4.0,
        2.0,
        4.0
      ],
      "mean": 3.1666666666666665,
      "median": 4.0,
      "min": 1.0,
      "max": 4.0,
      "low_score_threshold": 3.0,
      "low_score_count": 2
    },
    "judgment": {
      "verdict": "partial_transfer",
      "go_score_mean": 7.0,
      "atari_score_mean": 3.1666666666666665,
      "atari_to_go_score_ratio": 0.4523809523809524,
      "interpretation": "The policy earns positive Atari reward, so the shadow-env optimization transfers partially.",
      "next_recommendation": "Run forced-loss trace to confirm lives/serve RAM, then use transfer results to tune survival and late-rally dynamics."
    }
  },
  "breakout_policy_v3": {
    "policy": "breakout_policy_v3",
    "selected_candidate_id": "c015",
    "logic": "RAM predictive policy with searched near-bottom lead/deadzone parameters.",
    "go": {
      "scores": [
        8.0,
        7.0,
        6.0,
        7.0,
        7.0,
        8.0
      ],
      "mean": 7.166666666666667,
      "median": 7.0,
      "min": 6.0,
      "max": 8.0,
      "low_score_threshold": 3.0,
      "low_score_count": 0
    },
    "atari": {
      "scores": [
        2.0,
        13.0,
        13.0,
        13.0,
        0.0,
        13.0
      ],
      "mean": 9.0,
      "median": 13.0,
      "min": 0.0,
      "max": 13.0,
      "low_score_threshold": 3.0,
      "low_score_count": 2
    },
    "judgment": {
      "verdict": "partial_transfer",
      "go_score_mean": 7.166666666666667,
      "atari_score_mean": 9.0,
      "atari_to_go_score_ratio": 1.255813953488372,
      "interpretation": "The policy earns positive Atari reward, so the shadow-env optimization transfers partially.",
      "next_recommendation": "Run forced-loss trace to confirm lives/serve RAM, then use transfer results to tune survival and late-rally dynamics."
    }
  }
}

acceptance

{
  "atari_mean_delta_v3_minus_v2": 5.833333333333334,
  "atari_low_score_count_reduction_v3_minus_v2": 0,
  "improved": true,
  "verdict": "pass",
  "diagnosis": "Parameter search found a v3 setting that improves Atari six-seed mean over breakout_policy_v2."
}

evidence

{
  "go_episodes": "breakout/runs-policy-search-v2/episodes.jsonl",
  "atari_trace": "breakout/calibration/traces/policy_search_v2_atari.jsonl",
  "atari_video_url": "https://file.aimusic.win/gemma/breakout/policy_search_v2_atari.mp4"
}