Breakout Atari-Gated Search v2

objective

{
  "name": "Breakout Atari-gated search v2",
  "target_policy": "breakout_policy_v4",
  "baseline_atari_20_seed_mean": 5.15,
  "baseline_atari_low_score_count_lt3": 13
}

search

{
  "go_searched_candidates": 216,
  "go_ranking_key": "low_score_count asc, score_min desc, score_mean desc",
  "atari_validated_candidates": 8,
  "atari_gate_seeds_per_candidate": 6,
  "atari_ranking_key": "atari_low_score_count asc, atari_score_mean desc, atari_score_min desc",
  "selected_policy": {
    "policy_name": "breakout_policy_v4",
    "selection_source": "Atari-gated search v2",
    "selection_rule": "prefer candidates with Atari gate mean >= baseline, then low_score_count asc, mean desc, min desc",
    "baseline_atari_mean": 5.15,
    "searched_candidates": 216,
    "atari_validated_candidates": 8,
    "selected_candidate": {
      "candidate_id": "c144",
      "params": {
        "policy_far_cap": 10.0,
        "policy_near_cap": 18.0,
        "policy_base_deadzone": 2.0,
        "policy_near_y": 165,
        "policy_far_lead": 1.0,
        "policy_near_lead": 2.0,
        "policy_far_deadzone": 0.0,
        "policy_near_deadzone": 0.0,
        "policy_panic_y": 175
      },
      "go_scores": [
        14.0,
        10.0,
        18.0,
        14.0,
        10.0,
        14.0
      ],
      "go_score_mean": 13.333333333333334,
      "atari_scores": [
        2.0,
        7.0,
        7.0,
        7.0,
        1.0,
        7.0
      ],
      "atari_score_mean": 5.166666666666667,
      "atari_score_min": 1.0,
      "atari_score_max": 7.0,
      "atari_low_score_count": 2,
      "trace_path": "breakout/calibration/traces/atari_gated_search_v2_gate/policy_search_v2_c144.jsonl"
    },
    "selected_go_candidate": {
      "candidate_id": "c144",
      "params": {
        "policy_far_cap": 10.0,
        "policy_near_cap": 18.0,
        "policy_base_deadzone": 2.0,
        "policy_near_y": 165,
        "policy_far_lead": 1.0,
        "policy_near_lead": 2.0,
        "policy_far_deadzone": 0.0,
        "policy_near_deadzone": 0.0,
        "policy_panic_y": 175
      },
      "scores": [
        14.0,
        10.0,
        18.0,
        14.0,
        10.0,
        14.0
      ],
      "score_mean": 13.333333333333334,
      "score_min": 10.0,
      "score_max": 18.0,
      "low_score_count": 0
    },
    "config_path": "breakout/calibration/atari_gated_search_v2_policy_config.json",
    "rerun_policy_args": {
      "policy": "breakout_policy_v4",
      "config": "breakout/calibration/atari_gated_search_v2_policy_config.json"
    }
  },
  "atari_gate_ranked": [
    {
      "candidate_id": "c196",
      "params": {
        "policy_far_cap": 10.0,
        "policy_near_cap": 18.0,
        "policy_base_deadzone": 2.0,
        "policy_near_y": 165,
        "policy_far_lead": 3.0,
        "policy_near_lead": 2.0,
        "policy_far_deadzone": 2.0,
        "policy_near_deadzone": 0.0,
        "policy_panic_y": 175
      },
      "go_scores": [
        12.0,
        18.0,
        12.0,
        16.0,
        12.0,
        12.0
      ],
      "go_score_mean": 13.666666666666666,
      "atari_scores": [
        3.0,
        4.0,
        4.0,
        4.0,
        0.0,
        4.0
      ],
      "atari_score_mean": 3.1666666666666665,
      "atari_score_min": 0.0,
      "atari_score_max": 4.0,
      "atari_low_score_count": 1,
      "trace_path": "breakout/calibration/traces/atari_gated_search_v2_gate/policy_search_v2_c196.jsonl"
    },
    {
      "candidate_id": "c144",
      "params": {
        "policy_far_cap": 10.0,
        "policy_near_cap": 18.0,
        "policy_base_deadzone": 2.0,
        "policy_near_y": 165,
        "policy_far_lead": 1.0,
        "policy_near_lead": 2.0,
        "policy_far_deadzone": 0.0,
        "policy_near_deadzone": 0.0,
        "policy_panic_y": 175
      },
      "go_scores": [
        14.0,
        10.0,
        18.0,
        14.0,
        10.0,
        14.0
      ],
      "go_score_mean": 13.333333333333334,
      "atari_scores": [
        2.0,
        7.0,
        7.0,
        7.0,
        1.0,
        7.0
      ],
      "atari_score_mean": 5.166666666666667,
      "atari_score_min": 1.0,
      "atari_score_max": 7.0,
      "atari_low_score_count": 2,
      "trace_path": "breakout/calibration/traces/atari_gated_search_v2_gate/policy_search_v2_c144.jsonl"
    },
    {
      "candidate_id": "c145",
      "params": {
        "policy_far_cap": 10.0,
        "policy_near_cap": 18.0,
        "policy_base_deadzone": 2.0,
        "policy_near_y": 165,
        "policy_far_lead": 1.0,
        "policy_near_lead": 2.0,
        "policy_far_deadzone": 0.0,
        "policy_near_deadzone": 0.0,
        "policy_panic_y": 185
      },
      "go_scores": [
        14.0,
        10.0,
        18.0,
        14.0,
        10.0,
        14.0
      ],
      "go_score_mean": 13.333333333333334,
      "atari_scores": [
        2.0,
        7.0,
        7.0,
        7.0,
        1.0,
        7.0
      ],
      "atari_score_mean": 5.166666666666667,
      "atari_score_min": 1.0,
      "atari_score_max": 7.0,
      "atari_low_score_count": 2,
      "trace_path": "breakout/calibration/traces/atari_gated_search_v2_gate/policy_search_v2_c145.jsonl"
    },
    {
      "candidate_id": "c148",
      "params": {
        "policy_far_cap": 10.0,
        "policy_near_cap": 18.0,
        "policy_base_deadzone": 2.0,
        "policy_near_y": 165,
        "policy_far_lead": 1.0,
        "policy_near_lead": 2.0,
        "policy_far_deadzone": 2.0,
        "policy_near_deadzone": 0.0,
        "policy_panic_y": 175
      },
      "go_scores": [
        16.0,
        18.0,
        12.0,
        14.0,
        18.0,
        16.0
      ],
      "go_score_mean": 15.666666666666666,
      "atari_scores": [
        1.0,
        7.0,
        7.0,
        7.0,
        1.0,
        7.0
      ],
      "atari_score_mean": 5.0,
      "atari_score_min": 1.0,
      "atari_score_max": 7.0,
      "atari_low_score_count": 2,
      "trace_path": "breakout/calibration/traces/atari_gated_search_v2_gate/policy_search_v2_c148.jsonl"
    },
    {
      "candidate_id": "c192",
      "params": {
        "policy_far_cap": 10.0,
        "policy_near_cap": 18.0,
        "policy_base_deadzone": 2.0,
        "policy_near_y": 165,
        "policy_far_lead": 3.0,
        "policy_near_lead": 2.0,
        "policy_far_deadzone": 0.0,
        "policy_near_deadzone": 0.0,
        "policy_panic_y": 175
      },
      "go_scores": [
        14.0,
        10.0,
        18.0,
        14.0,
        10.0,
        14.0
      ],
      "go_score_mean": 13.333333333333334,
      "atari_scores": [
        2.0,
        5.0,
        5.0,
        5.0,
        0.0,
        5.0
      ],
      "atari_score_mean": 3.6666666666666665,
      "atari_score_min": 0.0,
      "atari_score_max": 5.0,
      "atari_low_score_count": 2,
      "trace_path": "breakout/calibration/traces/atari_gated_search_v2_gate/policy_search_v2_c192.jsonl"
    },
    {
      "candidate_id": "c193",
      "params": {
        "policy_far_cap": 10.0,
        "policy_near_cap": 18.0,
        "policy_base_deadzone": 2.0,
        "policy_near_y": 165,
        "policy_far_lead": 3.0,
        "policy_near_lead": 2.0,
        "policy_far_deadzone": 0.0,
        "policy_near_deadzone": 0.0,
        "policy_panic_y": 185
      },
      "go_scores": [
        14.0,
        10.0,
        18.0,
        14.0,
        10.0,
        14.0
      ],
      "go_score_mean": 13.333333333333334,
      "atari_scores": [
        2.0,
        5.0,
        5.0,
        5.0,
        0.0,
        5.0
      ],
      "atari_score_mean": 3.6666666666666665,
      "atari_score_min": 0.0,
      "atari_score_max": 5.0,
      "atari_low_score_count": 2,
      "trace_path": "breakout/calibration/traces/atari_gated_search_v2_gate/policy_search_v2_c193.jsonl"
    },
    {
      "candidate_id": "c077",
      "params": {
        "policy_far_cap": 10.0,
        "policy_near_cap": 18.0,
        "policy_base_deadzone": 2.0,
        "policy_near_y": 150,
        "policy_far_lead": 1.0,
        "policy_near_lead": 2.0,
        "policy_far_deadzone": 2.0,
        "policy_near_deadzone": 0.0,
        "policy_panic_y": 185
      },
      "go_scores": [
        17.0,
        13.0,
        10.0,
        11.0,
        12.0,
        17.0
      ],
      "go_score_mean": 13.333333333333334,
      "atari_scores": [
        6.0,
        2.0,
        2.0,
        2.0,
        2.0,
        2.0
      ],
      "atari_score_mean": 2.6666666666666665,
      "atari_score_min": 2.0,
      "atari_score_max": 6.0,
      "atari_low_score_count": 5,
      "trace_path": "breakout/calibration/traces/atari_gated_search_v2_gate/policy_search_v2_c077.jsonl"
    },
    {
      "candidate_id": "c076",
      "params": {
        "policy_far_cap": 10.0,
        "policy_near_cap": 18.0,
        "policy_base_deadzone": 2.0,
        "policy_near_y": 150,
        "policy_far_lead": 1.0,
        "policy_near_lead": 2.0,
        "policy_far_deadzone": 2.0,
        "policy_near_deadzone": 0.0,
        "policy_panic_y": 175
      },
      "go_scores": [
        17.0,
        13.0,
        10.0,
        11.0,
        12.0,
        17.0
      ],
      "go_score_mean": 13.333333333333334,
      "atari_scores": [
        2.0,
        2.0,
        2.0,
        2.0,
        1.0,
        2.0
      ],
      "atari_score_mean": 1.8333333333333333,
      "atari_score_min": 1.0,
      "atari_score_max": 2.0,
      "atari_low_score_count": 6,
      "trace_path": "breakout/calibration/traces/atari_gated_search_v2_gate/policy_search_v2_c076.jsonl"
    }
  ]
}

rank_correlation

{
  "sample_size": 8,
  "pearson_go_mean_vs_atari_gate_mean": 0.3684150467847217,
  "spearman_go_rank_vs_atari_gate_rank": 0.047327796889219946,
  "interpretation": "Atari gate is the selection authority; Go ranking is retained as the coarse prefilter."
}

final_validation

{
  "go_20_seed": {
    "distribution": {
      "count": 20,
      "scores": [
        14.0,
        10.0,
        18.0,
        14.0,
        10.0,
        14.0,
        14.0,
        16.0,
        14.0,
        11.0,
        16.0,
        16.0,
        11.0,
        15.0,
        12.0,
        17.0,
        12.0,
        14.0,
        12.0,
        12.0
      ],
      "mean": 13.6,
      "median": 14.0,
      "min": 10.0,
      "max": 18.0,
      "stdev": 2.244994432064365,
      "low_score_count_lt3": 0
    },
    "steps_mean": 1374.8,
    "done_count": 20,
    "rows": [
      {
        "timestamp": "20260510T165312Z",
        "trial_index": 1,
        "policy_name": "breakout_policy_v4",
        "seed": 0,
        "episode": 0,
        "score": 14,
        "steps": 1376,
        "done": true,
        "replay_path": "breakout/runs-atari-gated-search-v2/replays/20260510T165312Z_ep000_seed0.jsonl"
      },
      {
        "timestamp": "20260510T165312Z",
        "trial_index": 1,
        "policy_name": "breakout_policy_v4",
        "seed": 1,
        "episode": 1,
        "score": 10,
        "steps": 1031,
        "done": true,
        "replay_path": "breakout/runs-atari-gated-search-v2/replays/20260510T165312Z_ep001_seed1.jsonl"
      },
      {
        "timestamp": "20260510T165312Z",
        "trial_index": 1,
        "policy_name": "breakout_policy_v4",
        "seed": 2,
        "episode": 2,
        "score": 18,
        "steps": 1800,
        "done": true,
        "replay_path": "breakout/runs-atari-gated-search-v2/replays/20260510T165312Z_ep002_seed2.jsonl"
      },
      {
        "timestamp": "20260510T165312Z",
        "trial_index": 1,
        "policy_name": "breakout_policy_v4",
        "seed": 3,
        "episode": 3,
        "score": 14,
        "steps": 1395,
        "done": true,
        "replay_path": "breakout/runs-atari-gated-search-v2/replays/20260510T165312Z_ep003_seed3.jsonl"
      },
      {
        "timestamp": "20260510T165312Z",
        "trial_index": 1,
        "policy_name": "breakout_policy_v4",
        "seed": 4,
        "episode": 4,
        "score": 10,
        "steps": 1016,
        "done": true,
        "replay_path": "breakout/runs-atari-gated-search-v2/replays/20260510T165312Z_ep004_seed4.jsonl"
      },
      {
        "timestamp": "20260510T165312Z",
        "trial_index": 1,
        "policy_name": "breakout_policy_v4",
        "seed": 5,
        "episode": 5,
        "score": 14,
        "steps": 1376,
        "done": true,
        "replay_path": "breakout/runs-atari-gated-search-v2/replays/20260510T165312Z_ep005_seed5.jsonl"
      },
      {
        "timestamp": "20260510T165312Z",
        "trial_index": 1,
        "policy_name": "breakout_policy_v4",
        "seed": 6,
        "episode": 6,
        "score": 14,
        "steps": 1398,
        "done": true,
        "replay_path": "breakout/runs-atari-gated-search-v2/replays/20260510T165312Z_ep006_seed6.jsonl"
      },
      {
        "timestamp": "20260510T165312Z",
        "trial_index": 1,
        "policy_name": "breakout_policy_v4",
        "seed": 7,
        "episode": 7,
        "score": 16,
        "steps": 1652,
        "done": true,
        "replay_path": "breakout/runs-atari-gated-search-v2/replays/20260510T165312Z_ep007_seed7.jsonl"
      },
      {
        "timestamp": "20260510T165312Z",
        "trial_index": 1,
        "policy_name": "breakout_policy_v4",
        "seed": 8,
        "episode": 8,
        "score": 14,
        "steps": 1373,
        "done": true,
        "replay_path": "breakout/runs-atari-gated-search-v2/replays/20260510T165312Z_ep008_seed8.jsonl"
      },
      {
        "timestamp": "20260510T165312Z",
        "trial_index": 1,
        "policy_name": "breakout_policy_v4",
        "seed": 9,
        "episode": 9,
        "score": 11,
        "steps": 1157,
        "done": true,
        "replay_path": "breakout/runs-atari-gated-search-v2/replays/20260510T165312Z_ep009_seed9.jsonl"
      },
      {
        "timestamp": "20260510T165312Z",
        "trial_index": 1,
        "policy_name": "breakout_policy_v4",
        "seed": 10,
        "episode": 10,
        "score": 16,
        "steps": 1652,
        "done": true,
        "replay_path": "breakout/runs-atari-gated-search-v2/replays/20260510T165312Z_ep010_seed10.jsonl"
      },
      {
        "timestamp": "20260510T165312Z",
        "trial_index": 1,
        "policy_name": "breakout_policy_v4",
        "seed": 11,
        "episode": 11,
        "score": 16,
        "steps": 1620,
        "done": true,
        "replay_path": "breakout/runs-atari-gated-search-v2/replays/20260510T165312Z_ep011_seed11.jsonl"
      },
      {
        "timestamp": "20260510T165312Z",
        "trial_index": 1,
        "policy_name": "breakout_policy_v4",
        "seed": 12,
        "episode": 12,
        "score": 11,
        "steps": 1150,
        "done": true,
        "replay_path": "breakout/runs-atari-gated-search-v2/replays/20260510T165312Z_ep012_seed12.jsonl"
      },
      {
        "timestamp": "20260510T165312Z",
        "trial_index": 1,
        "policy_name": "breakout_policy_v4",
        "seed": 13,
        "episode": 13,
        "score": 15,
        "steps": 1469,
        "done": true,
        "replay_path": "breakout/runs-atari-gated-search-v2/replays/20260510T165312Z_ep013_seed13.jsonl"
      },
      {
        "timestamp": "20260510T165312Z",
        "trial_index": 1,
        "policy_name": "breakout_policy_v4",
        "seed": 14,
        "episode": 14,
        "score": 12,
        "steps": 1190,
        "done": true,
        "replay_path": "breakout/runs-atari-gated-search-v2/replays/20260510T165312Z_ep014_seed14.jsonl"
      },
      {
        "timestamp": "20260510T165312Z",
        "trial_index": 1,
        "policy_name": "breakout_policy_v4",
        "seed": 15,
        "episode": 15,
        "score": 17,
        "steps": 1699,
        "done": true,
        "replay_path": "breakout/runs-atari-gated-search-v2/replays/20260510T165312Z_ep015_seed15.jsonl"
      },
      {
        "timestamp": "20260510T165312Z",
        "trial_index": 1,
        "policy_name": "breakout_policy_v4",
        "seed": 16,
        "episode": 16,
        "score": 12,
        "steps": 1191,
        "done": true,
        "replay_path": "breakout/runs-atari-gated-search-v2/replays/20260510T165312Z_ep016_seed16.jsonl"
      },
      {
        "timestamp": "20260510T165312Z",
        "trial_index": 1,
        "policy_name": "breakout_policy_v4",
        "seed": 17,
        "episode": 17,
        "score": 14,
        "steps": 1477,
        "done": true,
        "replay_path": "breakout/runs-atari-gated-search-v2/replays/20260510T165312Z_ep017_seed17.jsonl"
      },
      {
        "timestamp": "20260510T165312Z",
        "trial_index": 1,
        "policy_name": "breakout_policy_v4",
        "seed": 18,
        "episode": 18,
        "score": 12,
        "steps": 1190,
        "done": true,
        "replay_path": "breakout/runs-atari-gated-search-v2/replays/20260510T165312Z_ep018_seed18.jsonl"
      },
      {
        "timestamp": "20260510T165312Z",
        "trial_index": 1,
        "policy_name": "breakout_policy_v4",
        "seed": 19,
        "episode": 19,
        "score": 12,
        "steps": 1284,
        "done": true,
        "replay_path": "breakout/runs-atari-gated-search-v2/replays/20260510T165312Z_ep019_seed19.jsonl"
      }
    ]
  },
  "atari_20_seed": {
    "distribution": {
      "count": 20,
      "scores": [
        2.0,
        7.0,
        7.0,
        7.0,
        1.0,
        7.0,
        2.0,
        7.0,
        2.0,
        1.0,
        5.0,
        5.0,
        7.0,
        2.0,
        1.0,
        7.0,
        2.0,
        2.0,
        5.0,
        1.0
      ],
      "mean": 4.0,
      "median": 3.5,
      "min": 1.0,
      "max": 7.0,
      "stdev": 2.5099800796022267,
      "low_score_count_lt3": 10
    },
    "steps_mean": 1800.0,
    "done_count": 0,
    "rows": [
      {
        "episode": 0,
        "score": 2.0,
        "steps": 1800,
        "done": false
      },
      {
        "episode": 1,
        "score": 7.0,
        "steps": 1800,
        "done": false
      },
      {
        "episode": 2,
        "score": 7.0,
        "steps": 1800,
        "done": false
      },
      {
        "episode": 3,
        "score": 7.0,
        "steps": 1800,
        "done": false
      },
      {
        "episode": 4,
        "score": 1.0,
        "steps": 1800,
        "done": false
      },
      {
        "episode": 5,
        "score": 7.0,
        "steps": 1800,
        "done": false
      },
      {
        "episode": 6,
        "score": 2.0,
        "steps": 1800,
        "done": false
      },
      {
        "episode": 7,
        "score": 7.0,
        "steps": 1800,
        "done": false
      },
      {
        "episode": 8,
        "score": 2.0,
        "steps": 1800,
        "done": false
      },
      {
        "episode": 9,
        "score": 1.0,
        "steps": 1800,
        "done": false
      },
      {
        "episode": 10,
        "score": 5.0,
        "steps": 1800,
        "done": false
      },
      {
        "episode": 11,
        "score": 5.0,
        "steps": 1800,
        "done": false
      },
      {
        "episode": 12,
        "score": 7.0,
        "steps": 1800,
        "done": false
      },
      {
        "episode": 13,
        "score": 2.0,
        "steps": 1800,
        "done": false
      },
      {
        "episode": 14,
        "score": 1.0,
        "steps": 1800,
        "done": false
      },
      {
        "episode": 15,
        "score": 7.0,
        "steps": 1800,
        "done": false
      },
      {
        "episode": 16,
        "score": 2.0,
        "steps": 1800,
        "done": false
      },
      {
        "episode": 17,
        "score": 2.0,
        "steps": 1800,
        "done": false
      },
      {
        "episode": 18,
        "score": 5.0,
        "steps": 1800,
        "done": false
      },
      {
        "episode": 19,
        "score": 1.0,
        "steps": 1800,
        "done": false
      }
    ]
  },
  "env_fidelity_fix_v3_reference": {
    "go_10_seed": {
      "count": 10,
      "scores": [
        7.0,
        6.0,
        15.0,
        13.0,
        9.0,
        7.0,
        9.0,
        8.0,
        8.0,
        8.0
      ],
      "mean": 9.0,
      "median": 8.0,
      "min": 6.0,
      "max": 15.0,
      "stdev": 2.6832815729997477,
      "low_score_count_lt3": 0
    },
    "atari_10_seed": {
      "count": 10,
      "scores": [
        2.0,
        13.0,
        13.0,
        13.0,
        0.0,
        13.0,
        2.0,
        13.0,
        2.0,
        0.0
      ],
      "mean": 7.1,
      "median": 7.5,
      "min": 0.0,
      "max": 13.0,
      "stdev": 5.940538696111658,
      "low_score_count_lt3": 5
    }
  }
}

decision

{
  "verdict": "pass",
  "atari_mean_delta_vs_5_15": -1.1500000000000004,
  "atari_low_score_count_delta_vs_13": -3,
  "diagnosis": "Atari-gated v4 lowers the robust 20-seed low-score count, but mean score remains below the robust benchmark."
}

rerun_commands

[
  "bash breakout/scripts/run_atari_gated_search_v2_wsl.sh"
]

evidence

{
  "r2_video_url": "https://file.aimusic.win/gemma/breakout/atari_gated_search_v2_atari_seed1.mp4",
  "r2_artifacts_url": "https://file.aimusic.win/gemma/breakout/atari_gated_search_v2_wsl_artifacts.tgz"
}