Breakout Atari-Validated Policy Search Harness v1

objective

{
  "name": "Breakout Atari-validated policy search harness v1",
  "script": "breakout/scripts/search_policies.py",
  "policy_family": "breakout_policy_v3 parameter configs"
}

search

{
  "go_searched_candidates": 216,
  "atari_validated_candidates": 8,
  "go_ranking_key": "low_score_count asc, score_min desc, score_mean desc",
  "atari_ranking_key": "atari_low_score_count asc, atari_score_mean desc, atari_score_min desc",
  "validated_candidates": [
    {
      "candidate_id": "c182",
      "params": {
        "policy_far_cap": 10.0,
        "policy_near_cap": 18.0,
        "policy_base_deadzone": 2.0,
        "policy_near_y": 165,
        "policy_far_lead": 2.0,
        "policy_near_lead": 3.0,
        "policy_far_deadzone": 2.0,
        "policy_near_deadzone": 1.0,
        "policy_panic_y": 175
      },
      "go_score_mean": 7.666666666666667,
      "go_score_min": 6.0,
      "go_low_score_count": 0,
      "atari_score_mean": 3.6666666666666665,
      "atari_score_min": 1.0,
      "atari_low_score_count": 2,
      "go_scores": [
        9.0,
        7.0,
        6.0,
        8.0,
        7.0,
        9.0
      ],
      "atari_scores": [
        1.0,
        5.0,
        5.0,
        5.0,
        1.0,
        5.0
      ]
    },
    {
      "candidate_id": "c039",
      "params": {
        "policy_far_cap": 10.0,
        "policy_near_cap": 18.0,
        "policy_base_deadzone": 2.0,
        "policy_near_y": 140,
        "policy_far_lead": 2.0,
        "policy_near_lead": 3.0,
        "policy_far_deadzone": 2.0,
        "policy_near_deadzone": 1.0,
        "policy_panic_y": 185
      },
      "go_score_mean": 7.5,
      "go_score_min": 6.0,
      "go_low_score_count": 0,
      "atari_score_mean": 3.5,
      "atari_score_min": 0.0,
      "atari_low_score_count": 2,
      "go_scores": [
        9.0,
        7.0,
        6.0,
        7.0,
        7.0,
        9.0
      ],
      "atari_scores": [
        1.0,
        5.0,
        5.0,
        5.0,
        0.0,
        5.0
      ]
    },
    {
      "candidate_id": "c111",
      "params": {
        "policy_far_cap": 10.0,
        "policy_near_cap": 18.0,
        "policy_base_deadzone": 2.0,
        "policy_near_y": 150,
        "policy_far_lead": 2.0,
        "policy_near_lead": 3.0,
        "policy_far_deadzone": 2.0,
        "policy_near_deadzone": 1.0,
        "policy_panic_y": 185
      },
      "go_score_mean": 7.5,
      "go_score_min": 6.0,
      "go_low_score_count": 0,
      "atari_score_mean": 3.5,
      "atari_score_min": 0.0,
      "atari_low_score_count": 2,
      "go_scores": [
        9.0,
        7.0,
        6.0,
        7.0,
        7.0,
        9.0
      ],
      "atari_scores": [
        1.0,
        5.0,
        5.0,
        5.0,
        0.0,
        5.0
      ]
    },
    {
      "candidate_id": "c183",
      "params": {
        "policy_far_cap": 10.0,
        "policy_near_cap": 18.0,
        "policy_base_deadzone": 2.0,
        "policy_near_y": 165,
        "policy_far_lead": 2.0,
        "policy_near_lead": 3.0,
        "policy_far_deadzone": 2.0,
        "policy_near_deadzone": 1.0,
        "policy_panic_y": 185
      },
      "go_score_mean": 7.5,
      "go_score_min": 6.0,
      "go_low_score_count": 0,
      "atari_score_mean": 3.6666666666666665,
      "atari_score_min": 1.0,
      "atari_low_score_count": 2,
      "go_scores": [
        9.0,
        7.0,
        6.0,
        7.0,
        7.0,
        9.0
      ],
      "atari_scores": [
        1.0,
        5.0,
        5.0,
        5.0,
        1.0,
        5.0
      ]
    },
    {
      "candidate_id": "c011",
      "params": {
        "policy_far_cap": 10.0,
        "policy_near_cap": 18.0,
        "policy_base_deadzone": 2.0,
        "policy_near_y": 140,
        "policy_far_lead": 1.0,
        "policy_near_lead": 3.0,
        "policy_far_deadzone": 0.0,
        "policy_near_deadzone": 1.0,
        "policy_panic_y": 185
      },
      "go_score_mean": 7.166666666666667,
      "go_score_min": 6.0,
      "go_low_score_count": 0,
      "atari_score_mean": 3.6666666666666665,
      "atari_score_min": 0.0,
      "atari_low_score_count": 2,
      "go_scores": [
        8.0,
        7.0,
        6.0,
        7.0,
        7.0,
        8.0
      ],
      "atari_scores": [
        2.0,
        5.0,
        5.0,
        5.0,
        0.0,
        5.0
      ]
    },
    {
      "candidate_id": "c015",
      "params": {
        "policy_far_cap": 10.0,
        "policy_near_cap": 18.0,
        "policy_base_deadzone": 2.0,
        "policy_near_y": 140,
        "policy_far_lead": 1.0,
        "policy_near_lead": 3.0,
        "policy_far_deadzone": 2.0,
        "policy_near_deadzone": 1.0,
        "policy_panic_y": 185
      },
      "go_score_mean": 7.166666666666667,
      "go_score_min": 6.0,
      "go_low_score_count": 0,
      "atari_score_mean": 9.0,
      "atari_score_min": 0.0,
      "atari_low_score_count": 2,
      "go_scores": [
        8.0,
        7.0,
        6.0,
        7.0,
        7.0,
        8.0
      ],
      "atari_scores": [
        2.0,
        13.0,
        13.0,
        13.0,
        0.0,
        13.0
      ]
    },
    {
      "candidate_id": "c035",
      "params": {
        "policy_far_cap": 10.0,
        "policy_near_cap": 18.0,
        "policy_base_deadzone": 2.0,
        "policy_near_y": 140,
        "policy_far_lead": 2.0,
        "policy_near_lead": 3.0,
        "policy_far_deadzone": 0.0,
        "policy_near_deadzone": 1.0,
        "policy_panic_y": 185
      },
      "go_score_mean": 7.166666666666667,
      "go_score_min": 6.0,
      "go_low_score_count": 0,
      "atari_score_mean": 3.6666666666666665,
      "atari_score_min": 0.0,
      "atari_low_score_count": 2,
      "go_scores": [
        8.0,
        7.0,
        6.0,
        7.0,
        7.0,
        8.0
      ],
      "atari_scores": [
        2.0,
        5.0,
        5.0,
        5.0,
        0.0,
        5.0
      ]
    },
    {
      "candidate_id": "c059",
      "params": {
        "policy_far_cap": 10.0,
        "policy_near_cap": 18.0,
        "policy_base_deadzone": 2.0,
        "policy_near_y": 140,
        "policy_far_lead": 3.0,
        "policy_near_lead": 3.0,
        "policy_far_deadzone": 0.0,
        "policy_near_deadzone": 1.0,
        "policy_panic_y": 185
      },
      "go_score_mean": 7.166666666666667,
      "go_score_min": 6.0,
      "go_low_score_count": 0,
      "atari_score_mean": 3.0,
      "atari_score_min": 1.0,
      "atari_low_score_count": 2,
      "go_scores": [
        8.0,
        7.0,
        6.0,
        7.0,
        7.0,
        8.0
      ],
      "atari_scores": [
        1.0,
        4.0,
        4.0,
        4.0,
        1.0,
        4.0
      ]
    }
  ],
  "go_ranked_candidates": [
    {
      "candidate_id": "c182",
      "params": {
        "policy_far_cap": 10.0,
        "policy_near_cap": 18.0,
        "policy_base_deadzone": 2.0,
        "policy_near_y": 165,
        "policy_far_lead": 2.0,
        "policy_near_lead": 3.0,
        "policy_far_deadzone": 2.0,
        "policy_near_deadzone": 1.0,
        "policy_panic_y": 175
      },
      "go_score_mean": 7.666666666666667,
      "go_score_min": 6.0,
      "go_low_score_count": 0,
      "atari_score_mean": 3.6666666666666665,
      "atari_score_min": 1.0,
      "atari_low_score_count": 2,
      "go_scores": [
        9.0,
        7.0,
        6.0,
        8.0,
        7.0,
        9.0
      ],
      "atari_scores": [
        1.0,
        5.0,
        5.0,
        5.0,
        1.0,
        5.0
      ]
    },
    {
      "candidate_id": "c039",
      "params": {
        "policy_far_cap": 10.0,
        "policy_near_cap": 18.0,
        "policy_base_deadzone": 2.0,
        "policy_near_y": 140,
        "policy_far_lead": 2.0,
        "policy_near_lead": 3.0,
        "policy_far_deadzone": 2.0,
        "policy_near_deadzone": 1.0,
        "policy_panic_y": 185
      },
      "go_score_mean": 7.5,
      "go_score_min": 6.0,
      "go_low_score_count": 0,
      "atari_score_mean": 3.5,
      "atari_score_min": 0.0,
      "atari_low_score_count": 2,
      "go_scores": [
        9.0,
        7.0,
        6.0,
        7.0,
        7.0,
        9.0
      ],
      "atari_scores": [
        1.0,
        5.0,
        5.0,
        5.0,
        0.0,
        5.0
      ]
    },
    {
      "candidate_id": "c111",
      "params": {
        "policy_far_cap": 10.0,
        "policy_near_cap": 18.0,
        "policy_base_deadzone": 2.0,
        "policy_near_y": 150,
        "policy_far_lead": 2.0,
        "policy_near_lead": 3.0,
        "policy_far_deadzone": 2.0,
        "policy_near_deadzone": 1.0,
        "policy_panic_y": 185
      },
      "go_score_mean": 7.5,
      "go_score_min": 6.0,
      "go_low_score_count": 0,
      "atari_score_mean": 3.5,
      "atari_score_min": 0.0,
      "atari_low_score_count": 2,
      "go_scores": [
        9.0,
        7.0,
        6.0,
        7.0,
        7.0,
        9.0
      ],
      "atari_scores": [
        1.0,
        5.0,
        5.0,
        5.0,
        0.0,
        5.0
      ]
    },
    {
      "candidate_id": "c183",
      "params": {
        "policy_far_cap": 10.0,
        "policy_near_cap": 18.0,
        "policy_base_deadzone": 2.0,
        "policy_near_y": 165,
        "policy_far_lead": 2.0,
        "policy_near_lead": 3.0,
        "policy_far_deadzone": 2.0,
        "policy_near_deadzone": 1.0,
        "policy_panic_y": 185
      },
      "go_score_mean": 7.5,
      "go_score_min": 6.0,
      "go_low_score_count": 0,
      "atari_score_mean": 3.6666666666666665,
      "atari_score_min": 1.0,
      "atari_low_score_count": 2,
      "go_scores": [
        9.0,
        7.0,
        6.0,
        7.0,
        7.0,
        9.0
      ],
      "atari_scores": [
        1.0,
        5.0,
        5.0,
        5.0,
        1.0,
        5.0
      ]
    },
    {
      "candidate_id": "c011",
      "params": {
        "policy_far_cap": 10.0,
        "policy_near_cap": 18.0,
        "policy_base_deadzone": 2.0,
        "policy_near_y": 140,
        "policy_far_lead": 1.0,
        "policy_near_lead": 3.0,
        "policy_far_deadzone": 0.0,
        "policy_near_deadzone": 1.0,
        "policy_panic_y": 185
      },
      "go_score_mean": 7.166666666666667,
      "go_score_min": 6.0,
      "go_low_score_count": 0,
      "atari_score_mean": 3.6666666666666665,
      "atari_score_min": 0.0,
      "atari_low_score_count": 2,
      "go_scores": [
        8.0,
        7.0,
        6.0,
        7.0,
        7.0,
        8.0
      ],
      "atari_scores": [
        2.0,
        5.0,
        5.0,
        5.0,
        0.0,
        5.0
      ]
    },
    {
      "candidate_id": "c015",
      "params": {
        "policy_far_cap": 10.0,
        "policy_near_cap": 18.0,
        "policy_base_deadzone": 2.0,
        "policy_near_y": 140,
        "policy_far_lead": 1.0,
        "policy_near_lead": 3.0,
        "policy_far_deadzone": 2.0,
        "policy_near_deadzone": 1.0,
        "policy_panic_y": 185
      },
      "go_score_mean": 7.166666666666667,
      "go_score_min": 6.0,
      "go_low_score_count": 0,
      "atari_score_mean": 9.0,
      "atari_score_min": 0.0,
      "atari_low_score_count": 2,
      "go_scores": [
        8.0,
        7.0,
        6.0,
        7.0,
        7.0,
        8.0
      ],
      "atari_scores": [
        2.0,
        13.0,
        13.0,
        13.0,
        0.0,
        13.0
      ]
    },
    {
      "candidate_id": "c035",
      "params": {
        "policy_far_cap": 10.0,
        "policy_near_cap": 18.0,
        "policy_base_deadzone": 2.0,
        "policy_near_y": 140,
        "policy_far_lead": 2.0,
        "policy_near_lead": 3.0,
        "policy_far_deadzone": 0.0,
        "policy_near_deadzone": 1.0,
        "policy_panic_y": 185
      },
      "go_score_mean": 7.166666666666667,
      "go_score_min": 6.0,
      "go_low_score_count": 0,
      "atari_score_mean": 3.6666666666666665,
      "atari_score_min": 0.0,
      "atari_low_score_count": 2,
      "go_scores": [
        8.0,
        7.0,
        6.0,
        7.0,
        7.0,
        8.0
      ],
      "atari_scores": [
        2.0,
        5.0,
        5.0,
        5.0,
        0.0,
        5.0
      ]
    },
    {
      "candidate_id": "c059",
      "params": {
        "policy_far_cap": 10.0,
        "policy_near_cap": 18.0,
        "policy_base_deadzone": 2.0,
        "policy_near_y": 140,
        "policy_far_lead": 3.0,
        "policy_near_lead": 3.0,
        "policy_far_deadzone": 0.0,
        "policy_near_deadzone": 1.0,
        "policy_panic_y": 185
      },
      "go_score_mean": 7.166666666666667,
      "go_score_min": 6.0,
      "go_low_score_count": 0,
      "atari_score_mean": 3.0,
      "atari_score_min": 1.0,
      "atari_low_score_count": 2,
      "go_scores": [
        8.0,
        7.0,
        6.0,
        7.0,
        7.0,
        8.0
      ],
      "atari_scores": [
        1.0,
        4.0,
        4.0,
        4.0,
        1.0,
        4.0
      ]
    }
  ],
  "atari_ranked_candidates": [
    {
      "candidate_id": "c015",
      "params": {
        "policy_far_cap": 10.0,
        "policy_near_cap": 18.0,
        "policy_base_deadzone": 2.0,
        "policy_near_y": 140,
        "policy_far_lead": 1.0,
        "policy_near_lead": 3.0,
        "policy_far_deadzone": 2.0,
        "policy_near_deadzone": 1.0,
        "policy_panic_y": 185
      },
      "go_score_mean": 7.166666666666667,
      "go_score_min": 6.0,
      "go_low_score_count": 0,
      "atari_score_mean": 9.0,
      "atari_score_min": 0.0,
      "atari_low_score_count": 2,
      "go_scores": [
        8.0,
        7.0,
        6.0,
        7.0,
        7.0,
        8.0
      ],
      "atari_scores": [
        2.0,
        13.0,
        13.0,
        13.0,
        0.0,
        13.0
      ]
    },
    {
      "candidate_id": "c182",
      "params": {
        "policy_far_cap": 10.0,
        "policy_near_cap": 18.0,
        "policy_base_deadzone": 2.0,
        "policy_near_y": 165,
        "policy_far_lead": 2.0,
        "policy_near_lead": 3.0,
        "policy_far_deadzone": 2.0,
        "policy_near_deadzone": 1.0,
        "policy_panic_y": 175
      },
      "go_score_mean": 7.666666666666667,
      "go_score_min": 6.0,
      "go_low_score_count": 0,
      "atari_score_mean": 3.6666666666666665,
      "atari_score_min": 1.0,
      "atari_low_score_count": 2,
      "go_scores": [
        9.0,
        7.0,
        6.0,
        8.0,
        7.0,
        9.0
      ],
      "atari_scores": [
        1.0,
        5.0,
        5.0,
        5.0,
        1.0,
        5.0
      ]
    },
    {
      "candidate_id": "c183",
      "params": {
        "policy_far_cap": 10.0,
        "policy_near_cap": 18.0,
        "policy_base_deadzone": 2.0,
        "policy_near_y": 165,
        "policy_far_lead": 2.0,
        "policy_near_lead": 3.0,
        "policy_far_deadzone": 2.0,
        "policy_near_deadzone": 1.0,
        "policy_panic_y": 185
      },
      "go_score_mean": 7.5,
      "go_score_min": 6.0,
      "go_low_score_count": 0,
      "atari_score_mean": 3.6666666666666665,
      "atari_score_min": 1.0,
      "atari_low_score_count": 2,
      "go_scores": [
        9.0,
        7.0,
        6.0,
        7.0,
        7.0,
        9.0
      ],
      "atari_scores": [
        1.0,
        5.0,
        5.0,
        5.0,
        1.0,
        5.0
      ]
    },
    {
      "candidate_id": "c011",
      "params": {
        "policy_far_cap": 10.0,
        "policy_near_cap": 18.0,
        "policy_base_deadzone": 2.0,
        "policy_near_y": 140,
        "policy_far_lead": 1.0,
        "policy_near_lead": 3.0,
        "policy_far_deadzone": 0.0,
        "policy_near_deadzone": 1.0,
        "policy_panic_y": 185
      },
      "go_score_mean": 7.166666666666667,
      "go_score_min": 6.0,
      "go_low_score_count": 0,
      "atari_score_mean": 3.6666666666666665,
      "atari_score_min": 0.0,
      "atari_low_score_count": 2,
      "go_scores": [
        8.0,
        7.0,
        6.0,
        7.0,
        7.0,
        8.0
      ],
      "atari_scores": [
        2.0,
        5.0,
        5.0,
        5.0,
        0.0,
        5.0
      ]
    },
    {
      "candidate_id": "c035",
      "params": {
        "policy_far_cap": 10.0,
        "policy_near_cap": 18.0,
        "policy_base_deadzone": 2.0,
        "policy_near_y": 140,
        "policy_far_lead": 2.0,
        "policy_near_lead": 3.0,
        "policy_far_deadzone": 0.0,
        "policy_near_deadzone": 1.0,
        "policy_panic_y": 185
      },
      "go_score_mean": 7.166666666666667,
      "go_score_min": 6.0,
      "go_low_score_count": 0,
      "atari_score_mean": 3.6666666666666665,
      "atari_score_min": 0.0,
      "atari_low_score_count": 2,
      "go_scores": [
        8.0,
        7.0,
        6.0,
        7.0,
        7.0,
        8.0
      ],
      "atari_scores": [
        2.0,
        5.0,
        5.0,
        5.0,
        0.0,
        5.0
      ]
    },
    {
      "candidate_id": "c039",
      "params": {
        "policy_far_cap": 10.0,
        "policy_near_cap": 18.0,
        "policy_base_deadzone": 2.0,
        "policy_near_y": 140,
        "policy_far_lead": 2.0,
        "policy_near_lead": 3.0,
        "policy_far_deadzone": 2.0,
        "policy_near_deadzone": 1.0,
        "policy_panic_y": 185
      },
      "go_score_mean": 7.5,
      "go_score_min": 6.0,
      "go_low_score_count": 0,
      "atari_score_mean": 3.5,
      "atari_score_min": 0.0,
      "atari_low_score_count": 2,
      "go_scores": [
        9.0,
        7.0,
        6.0,
        7.0,
        7.0,
        9.0
      ],
      "atari_scores": [
        1.0,
        5.0,
        5.0,
        5.0,
        0.0,
        5.0
      ]
    },
    {
      "candidate_id": "c111",
      "params": {
        "policy_far_cap": 10.0,
        "policy_near_cap": 18.0,
        "policy_base_deadzone": 2.0,
        "policy_near_y": 150,
        "policy_far_lead": 2.0,
        "policy_near_lead": 3.0,
        "policy_far_deadzone": 2.0,
        "policy_near_deadzone": 1.0,
        "policy_panic_y": 185
      },
      "go_score_mean": 7.5,
      "go_score_min": 6.0,
      "go_low_score_count": 0,
      "atari_score_mean": 3.5,
      "atari_score_min": 0.0,
      "atari_low_score_count": 2,
      "go_scores": [
        9.0,
        7.0,
        6.0,
        7.0,
        7.0,
        9.0
      ],
      "atari_scores": [
        1.0,
        5.0,
        5.0,
        5.0,
        0.0,
        5.0
      ]
    },
    {
      "candidate_id": "c059",
      "params": {
        "policy_far_cap": 10.0,
        "policy_near_cap": 18.0,
        "policy_base_deadzone": 2.0,
        "policy_near_y": 140,
        "policy_far_lead": 3.0,
        "policy_near_lead": 3.0,
        "policy_far_deadzone": 0.0,
        "policy_near_deadzone": 1.0,
        "policy_panic_y": 185
      },
      "go_score_mean": 7.166666666666667,
      "go_score_min": 6.0,
      "go_low_score_count": 0,
      "atari_score_mean": 3.0,
      "atari_score_min": 1.0,
      "atari_low_score_count": 2,
      "go_scores": [
        8.0,
        7.0,
        6.0,
        7.0,
        7.0,
        8.0
      ],
      "atari_scores": [
        1.0,
        4.0,
        4.0,
        4.0,
        1.0,
        4.0
      ]
    }
  ]
}

correlation

{
  "sample_size": 8,
  "pearson_go_mean_vs_atari_mean": -0.32584242221528265,
  "spearman_go_rank_vs_atari_rank": -0.13989092759813318,
  "go_shadow_effective_as_ranker": false,
  "interpretation": "Go shadow score is not a reliable ranker in this candidate window; use it as a coarse prefilter and keep Atari validation in the loop."
}

evidence

{
  "go_candidates": "breakout/calibration/reports/policy_search_v2_go_candidates.json",
  "atari_candidates": "breakout/calibration/reports/policy_search_v2_atari_probe_candidates.json",
  "r2_video_url": "https://file.aimusic.win/gemma/breakout/search_harness_v1_atari_best.mp4"
}