Breakout Brick/Score Fidelity v3

atari_reward_evidence

{
  "episodes": 2,
  "reward_count": 9,
  "score_sum": 9.0,
  "per_episode_spacing": {
    "0": {
      "steps": [
        40,
        89,
        138,
        174,
        199,
        222,
        246
      ],
      "count": 7,
      "min_gap": 23,
      "median_gap": 30.5,
      "dense_gap_le_12_count": 0,
      "gaps": [
        49,
        49,
        36,
        25,
        23,
        24
      ]
    },
    "1": {
      "steps": [
        40,
        79
      ],
      "count": 2,
      "min_gap": 39,
      "median_gap": 39.0,
      "dense_gap_le_12_count": 0,
      "gaps": [
        39
      ]
    }
  },
  "top_reward_window_ram": [
    {
      "byte": 70,
      "reward_window_changes": 9,
      "examples": [
        {
          "episode": 0,
          "step": 40,
          "values": [
            44,
            66,
            44
          ]
        },
        {
          "episode": 0,
          "step": 89,
          "values": [
            66,
            44,
            22
          ]
        },
        {
          "episode": 0,
          "step": 138,
          "values": [
            44,
            22,
            0
          ]
        },
        {
          "episode": 0,
          "step": 174,
          "values": [
            184,
            160,
            138
          ]
        },
        {
          "episode": 0,
          "step": 199,
          "values": [
            68,
            46,
            46
          ]
        },
        {
          "episode": 0,
          "step": 222,
          "values": [
            160,
            138,
            138
          ]
        },
        {
          "episode": 0,
          "step": 246,
          "values": [
            68,
            46,
            68
          ]
        },
        {
          "episode": 1,
          "step": 40,
          "values": [
            158,
            180,
            180
          ]
        }
      ]
    },
    {
      "byte": 72,
      "reward_window_changes": 9,
      "examples": [
        {
          "episode": 0,
          "step": 40,
          "values": [
            190,
            179,
            182
          ]
        },
        {
          "episode": 0,
          "step": 89,
          "values": [
            179,
            182,
            191
          ]
        },
        {
          "episode": 0,
          "step": 138,
          "values": [
            182,
            191,
            191
          ]
        },
        {
          "episode": 0,
          "step": 174,
          "values": [
            62,
            64,
            85
          ]
        },
        {
          "episode": 0,
          "step": 199,
          "values": [
            156,
            177,
            188
          ]
        },
        {
          "episode": 0,
          "step": 222,
          "values": [
            85,
            86,
            96
          ]
        },
        {
          "episode": 0,
          "step": 246,
          "values": [
            156,
            177,
            175
          ]
        },
        {
          "episode": 1,
          "step": 40,
          "values": [
            66,
            65,
            55
          ]
        }
      ]
    },
    {
      "byte": 77,
      "reward_window_changes": 9,
      "examples": [
        {
          "episode": 0,
          "step": 40,
          "values": [
            0,
            1,
            1
          ]
        },
        {
          "episode": 0,
          "step": 89,
          "values": [
            1,
            2,
            2
          ]
        },
        {
          "episode": 0,
          "step": 138,
          "values": [
            2,
            3,
            3
          ]
        },
        {
          "episode": 0,
          "step": 174,
          "values": [
            3,
            4,
            4
          ]
        },
        {
          "episode": 0,
          "step": 199,
          "values": [
            4,
            5,
            5
          ]
        },
        {
          "episode": 0,
          "step": 222,
          "values": [
            5,
            6,
            6
          ]
        },
        {
          "episode": 0,
          "step": 246,
          "values": [
            6,
            7,
            7
          ]
        },
        {
          "episode": 1,
          "step": 40,
          "values": [
            0,
            1,
            1
          ]
        }
      ]
    },
    {
      "byte": 84,
      "reward_window_changes": 9,
      "examples": [
        {
          "episode": 0,
          "step": 40,
          "values": [
            0,
            5,
            5
          ]
        },
        {
          "episode": 0,
          "step": 89,
          "values": [
            5,
            10,
            10
          ]
        },
        {
          "episode": 0,
          "step": 138,
          "values": [
            10,
            15,
            15
          ]
        },
        {
          "episode": 0,
          "step": 174,
          "values": [
            15,
            20,
            20
          ]
        },
        {
          "episode": 0,
          "step": 199,
          "values": [
            20,
            25,
            25
          ]
        },
        {
          "episode": 0,
          "step": 222,
          "values": [
            25,
            30,
            30
          ]
        },
        {
          "episode": 0,
          "step": 246,
          "values": [
            30,
            35,
            35
          ]
        },
        {
          "episode": 1,
          "step": 40,
          "values": [
            0,
            5,
            5
          ]
        }
      ]
    },
    {
      "byte": 90,
      "reward_window_changes": 9,
      "examples": [
        {
          "episode": 0,
          "step": 40,
          "values": [
            173,
            177,
            181
          ]
        },
        {
          "episode": 0,
          "step": 89,
          "values": [
            113,
            117,
            121
          ]
        },
        {
          "episode": 0,
          "step": 138,
          "values": [
            53,
            57,
            61
          ]
        },
        {
          "episode": 0,
          "step": 174,
          "values": [
            197,
            201,
            205
          ]
        },
        {
          "episode": 0,
          "step": 199,
          "values": [
            41,
            45,
            49
          ]
        },
        {
          "episode": 0,
          "step": 222,
          "values": [
            133,
            137,
            141
          ]
        },
        {
          "episode": 0,
          "step": 246,
          "values": [
            229,
            233,
            237
          ]
        },
        {
          "episode": 1,
          "step": 40,
          "values": [
            174,
            178,
            182
          ]
        }
      ]
    },
    {
      "byte": 95,
      "reward_window_changes": 9,
      "examples": [
        {
          "episode": 0,
          "step": 40,
          "values": [
            128,
            5,
            1
          ]
        },
        {
          "episode": 0,
          "step": 89,
          "values": [
            128,
            5,
            1
          ]
        },
        {
          "episode": 0,
          "step": 138,
          "values": [
            128,
            3,
            0
          ]
        },
        {
          "episode": 0,
          "step": 174,
          "values": [
            64,
            5,
            1
          ]
        },
        {
          "episode": 0,
          "step": 199,
          "values": [
            128,
            3,
            0
          ]
        },
        {
          "episode": 0,
          "step": 222,
          "values": [
            128,
            5,
            1
          ]
        },
        {
          "episode": 0,
          "step": 246,
          "values": [
            128,
            3,
            0
          ]
        },
        {
          "episode": 1,
          "step": 40,
          "values": [
            69,
            3,
            0
          ]
        }
      ]
    },
    {
      "byte": 99,
      "reward_window_changes": 9,
      "examples": [
        {
          "episode": 0,
          "step": 40,
          "values": [
            186,
            192,
            198
          ]
        },
        {
          "episode": 0,
          "step": 89,
          "values": [
            192,
            198,
            195
          ]
        },
        {
          "episode": 0,
          "step": 138,
          "values": [
            198,
            195,
            189
          ]
        },
        {
          "episode": 0,
          "step": 174,
          "values": [
            74,
            76,
            78
          ]
        },
        {
          "episode": 0,
          "step": 199,
          "values": [
            172,
            178,
            184
          ]
        },
        {
          "episode": 0,
          "step": 222,
          "values": [
            91,
            85,
            79
          ]
        },
        {
          "episode": 0,
          "step": 246,
          "values": [
            163,
            169,
            175
          ]
        },
        {
          "episode": 1,
          "step": 40,
          "values": [
            58,
            64,
            70
          ]
        }
      ]
    },
    {
      "byte": 101,
      "reward_window_changes": 9,
      "examples": [
        {
          "episode": 0,
          "step": 40,
          "values": [
            87,
            85,
            89
          ]
        },
        {
          "episode": 0,
          "step": 89,
          "values": [
            87,
            85,
            89
          ]
        },
        {
          "episode": 0,
          "step": 138,
          "values": [
            79,
            81,
            85
          ]
        },
        {
          "episode": 0,
          "step": 174,
          "values": [
            89,
            85,
            93
          ]
        },
        {
          "episode": 0,
          "step": 199,
          "values": [
            85,
            89,
            97
          ]
        },
        {
          "episode": 0,
          "step": 222,
          "values": [
            89,
            85,
            93
          ]
        },
        {
          "episode": 0,
          "step": 246,
          "values": [
            85,
            89,
            97
          ]
        },
        {
          "episode": 1,
          "step": 40,
          "values": [
            85,
            87,
            91
          ]
        }
      ]
    },
    {
      "byte": 103,
      "reward_window_changes": 9,
      "examples": [
        {
          "episode": 0,
          "step": 40,
          "values": [
            255,
            1,
            1
          ]
        },
        {
          "episode": 0,
          "step": 89,
          "values": [
            255,
            1,
            1
          ]
        },
        {
          "episode": 0,
          "step": 138,
          "values": [
            255,
            1,
            1
          ]
        },
        {
          "episode": 0,
          "step": 174,
          "values": [
            254,
            2,
            2
          ]
        },
        {
          "episode": 0,
          "step": 199,
          "values": [
            254,
            2,
            2
          ]
        },
        {
          "episode": 0,
          "step": 222,
          "values": [
            254,
            2,
            2
          ]
        },
        {
          "episode": 0,
          "step": 246,
          "values": [
            254,
            2,
            2
          ]
        },
        {
          "episode": 1,
          "step": 40,
          "values": [
            255,
            1,
            1
          ]
        }
      ]
    },
    {
      "byte": 107,
      "reward_window_changes": 9,
      "examples": [
        {
          "episode": 0,
          "step": 40,
          "values": [
            65,
            129,
            129
          ]
        },
        {
          "episode": 0,
          "step": 89,
          "values": [
            66,
            130,
            130
          ]
        },
        {
          "episode": 0,
          "step": 138,
          "values": [
            67,
            131,
            131
          ]
        },
        {
          "episode": 0,
          "step": 174,
          "values": [
            68,
            132,
            132
          ]
        },
        {
          "episode": 0,
          "step": 199,
          "values": [
            69,
            133,
            133
          ]
        },
        {
          "episode": 0,
          "step": 222,
          "values": [
            70,
            134,
            134
          ]
        },
        {
          "episode": 0,
          "step": 246,
          "values": [
            71,
            135,
            135
          ]
        },
        {
          "episode": 1,
          "step": 40,
          "values": [
            65,
            129,
            129
          ]
        }
      ]
    },
    {
      "byte": 122,
      "reward_window_changes": 5,
      "examples": [
        {
          "episode": 0,
          "step": 40,
          "values": [
            246,
            150,
            150
          ]
        },
        {
          "episode": 0,
          "step": 89,
          "values": [
            246,
            150,
            150
          ]
        },
        {
          "episode": 0,
          "step": 138,
          "values": [
            246,
            150,
            150
          ]
        },
        {
          "episode": 0,
          "step": 174,
          "values": [
            246,
            150,
            150
          ]
        },
        {
          "episode": 1,
          "step": 40,
          "values": [
            246,
            150,
            150
          ]
        }
      ]
    },
    {
      "byte": 96,
      "reward_window_changes": 4,
      "examples": [
        {
          "episode": 0,
          "step": 40,
          "values": [
            0,
            28,
            28
          ]
        },
        {
          "episode": 0,
          "step": 138,
          "values": [
            28,
            22,
            22
          ]
        },
        {
          "episode": 0,
          "step": 174,
          "values": [
            22,
            28,
            28
          ]
        },
        {
          "episode": 1,
          "step": 40,
          "values": [
            0,
            28,
            28
          ]
        }
      ]
    },
    {
      "byte": 0,
      "reward_window_changes": 2,
      "examples": [
        {
          "episode": 0,
          "step": 40,
          "values": [
            63,
            51,
            51
          ]
        },
        {
          "episode": 0,
          "step": 89,
          "values": [
            51,
            3,
            3
          ]
        }
      ]
    },
    {
      "byte": 6,
      "reward_window_changes": 2,
      "examples": [
        {
          "episode": 0,
          "step": 199,
          "values": [
            255,
            252,
            252
          ]
        },
        {
          "episode": 0,
          "step": 246,
          "values": [
            252,
            240,
            240
          ]
        }
      ]
    },
    {
      "byte": 24,
      "reward_window_changes": 2,
      "examples": [
        {
          "episode": 0,
          "step": 174,
          "values": [
            255,
            207,
            207
          ]
        },
        {
          "episode": 0,
          "step": 222,
          "values": [
            207,
            195,
            195
          ]
        }
      ]
    },
    {
      "byte": 105,
      "reward_window_changes": 2,
      "examples": [
        {
          "episode": 0,
          "step": 89,
          "values": [
            254,
            254,
            1
          ]
        },
        {
          "episode": 0,
          "step": 138,
          "values": [
            254,
            1,
            1
          ]
        }
      ]
    },
    {
      "byte": 1,
      "reward_window_changes": 1,
      "examples": [
        {
          "episode": 0,
          "step": 138,
          "values": [
            63,
            15,
            15
          ]
        }
      ]
    },
    {
      "byte": 12,
      "reward_window_changes": 1,
      "examples": [
        {
          "episode": 1,
          "step": 79,
          "values": [
            255,
            207,
            207
          ]
        }
      ]
    },
    {
      "byte": 30,
      "reward_window_changes": 1,
      "examples": [
        {
          "episode": 1,
          "step": 40,
          "values": [
            192,
            0,
            0
          ]
        }
      ]
    }
  ],
  "visual_reward_examples": [
    {
      "episode": 0,
      "step": 40,
      "reward": 1.0,
      "ball_before": [
        137.5,
        97.5
      ],
      "ball_at_reward": [
        143.5,
        95.5
      ],
      "ball_after": [
        149.5,
        99.5
      ],
      "brick_area_before": 7800.0,
      "brick_area_at_reward": 7752.0,
      "brick_area_after": 7752.0
    },
    {
      "episode": 0,
      "step": 89,
      "reward": 1.0,
      "ball_before": [
        143.5,
        97.5
      ],
      "ball_at_reward": [
        149.5,
        95.5
      ],
      "ball_after": [
        146.5,
        99.5
      ],
      "brick_area_before": 7752.0,
      "brick_area_at_reward": 7704.0,
      "brick_area_after": 7704.0
    },
    {
      "episode": 0,
      "step": 138,
      "reward": 1.0,
      "ball_before": null,
      "ball_at_reward": [
        146.5,
        91.0
      ],
      "ball_after": [
        140.5,
        95.5
      ],
      "brick_area_before": 7704.0,
      "brick_area_at_reward": 7658.0,
      "brick_area_after": 7656.0
    },
    {
      "episode": 0,
      "step": 174,
      "reward": 1.0,
      "ball_before": [
        25.5,
        99.5
      ],
      "ball_at_reward": [
        27.5,
        95.5
      ],
      "ball_after": [
        29.5,
        103.5
      ],
      "brick_area_before": 7656.0,
      "brick_area_at_reward": 7608.0,
      "brick_area_after": 7608.0
    },
    {
      "episode": 0,
      "step": 199,
      "reward": 1.0,
      "ball_before": [
        123.5,
        95.5
      ],
      "ball_at_reward": [
        129.5,
        99.5
      ],
      "ball_after": [
        135.5,
        107.5
      ],
      "brick_area_before": 7608.0,
      "brick_area_at_reward": 7560.0,
      "brick_area_after": 7560.0
    },
    {
      "episode": 0,
      "step": 222,
      "reward": 1.0,
      "ball_before": [
        42.5,
        99.5
      ],
      "ball_at_reward": [
        36.5,
        95.5
      ],
      "ball_after": [
        30.5,
        103.5
      ],
      "brick_area_before": 7560.0,
      "brick_area_at_reward": 7512.0,
      "brick_area_after": 7512.0
    },
    {
      "episode": 0,
      "step": 246,
      "reward": 1.0,
      "ball_before": [
        114.5,
        95.5
      ],
      "ball_at_reward": [
        120.5,
        99.5
      ],
      "ball_after": [
        126.5,
        107.5
      ],
      "brick_area_before": 7512.0,
      "brick_area_at_reward": 7464.0,
      "brick_area_after": 7464.0
    },
    {
      "episode": 1,
      "step": 40,
      "reward": 1.0,
      "ball_before": [
        9.5,
        95.5
      ],
      "ball_at_reward": [
        15.5,
        97.5
      ],
      "ball_after": [
        21.5,
        101.5
      ],
      "brick_area_before": 7800.0,
      "brick_area_at_reward": 7752.0,
      "brick_area_after": 7752.0
    },
    {
      "episode": 1,
      "step": 79,
      "reward": 1.0,
      "ball_before": null,
      "ball_at_reward": [
        80.5,
        100.5
      ],
      "ball_after": [
        76.5,
        106.5
      ],
      "brick_area_before": 7752.0,
      "brick_area_at_reward": 7704.0,
      "brick_area_after": 7704.0
    }
  ]
}

go_reward_evidence

{
  "episodes": 2,
  "reward_count": 12,
  "score_sum": 12.0,
  "per_episode_spacing": {
    "0": {
      "steps": [
        40,
        128,
        179,
        236,
        290,
        346,
        438
      ],
      "count": 7,
      "min_gap": 51,
      "median_gap": 56.5,
      "dense_gap_le_12_count": 0,
      "gaps": [
        88,
        51,
        57,
        54,
        56,
        92
      ]
    },
    "1": {
      "steps": [
        40,
        128,
        216,
        304,
        392
      ],
      "count": 5,
      "min_gap": 88,
      "median_gap": 88.0,
      "dense_gap_le_12_count": 0,
      "gaps": [
        88,
        88,
        88,
        88
      ]
    }
  },
  "max_rewards_in_12_step_window": 1,
  "brick_examples": [
    {
      "episode": 0,
      "step": 40,
      "reward": 1,
      "ball_x": 56,
      "ball_y": 79.75,
      "ball_vx": 3.5,
      "ball_vy": 2.75,
      "brick_events": [
        {
          "type": "brick",
          "row": 5,
          "col": 4
        }
      ]
    },
    {
      "episode": 0,
      "step": 128,
      "reward": 1,
      "ball_x": 35,
      "ball_y": 79.75,
      "ball_vx": 3.5,
      "ball_vy": 2.75,
      "brick_events": [
        {
          "type": "brick",
          "row": 5,
          "col": 2
        }
      ]
    },
    {
      "episode": 0,
      "step": 179,
      "reward": 1,
      "ball_x": 131.25,
      "ball_y": 78,
      "ball_vx": -0.75,
      "ball_vy": 10,
      "brick_events": [
        {
          "type": "brick",
          "row": 5,
          "col": 11
        }
      ]
    },
    {
      "episode": 0,
      "step": 236,
      "reward": 1,
      "ball_x": 48.5,
      "ball_y": 71.5,
      "ball_vx": -3.5,
      "ball_vy": 2.75,
      "brick_events": [
        {
          "type": "brick",
          "row": 4,
          "col": 3
        }
      ]
    },
    {
      "episode": 0,
      "step": 290,
      "reward": 1,
      "ball_x": 93.25,
      "ball_y": 78,
      "ball_vx": -0.75,
      "ball_vy": 10,
      "brick_events": [
        {
          "type": "brick",
          "row": 5,
          "col": 7
        }
      ]
    },
    {
      "episode": 0,
      "step": 346,
      "reward": 1,
      "ball_x": 59.5,
      "ball_y": 74.25,
      "ball_vx": 3.5,
      "ball_vy": 2.75,
      "brick_events": [
        {
          "type": "brick",
          "row": 5,
          "col": 5
        }
      ]
    },
    {
      "episode": 0,
      "step": 438,
      "reward": 1,
      "ball_x": 80,
      "ball_y": 74.25,
      "ball_vx": -3.5,
      "ball_vy": 2.75,
      "brick_events": [
        {
          "type": "brick",
          "row": 5,
          "col": 6
        }
      ]
    },
    {
      "episode": 1,
      "step": 40,
      "reward": 1,
      "ball_x": 101,
      "ball_y": 79.75,
      "ball_vx": -3.5,
      "ball_vy": 2.75,
      "brick_events": [
        {
          "type": "brick",
          "row": 5,
          "col": 8
        }
      ]
    },
    {
      "episode": 1,
      "step": 128,
      "reward": 1,
      "ball_x": 115,
      "ball_y": 79.75,
      "ball_vx": -3.5,
      "ball_vy": 2.75,
      "brick_events": [
        {
          "type": "brick",
          "row": 5,
          "col": 9
        }
      ]
    },
    {
      "episode": 1,
      "step": 216,
      "reward": 1,
      "ball_x": 136,
      "ball_y": 79.75,
      "ball_vx": -3.5,
      "ball_vy": 2.75,
      "brick_events": [
        {
          "type": "brick",
          "row": 5,
          "col": 11
        }
      ]
    },
    {
      "episode": 1,
      "step": 304,
      "reward": 1,
      "ball_x": 146.5,
      "ball_y": 79.75,
      "ball_vx": -3.5,
      "ball_vy": 2.75,
      "brick_events": [
        {
          "type": "brick",
          "row": 5,
          "col": 12
        }
      ]
    },
    {
      "episode": 1,
      "step": 392,
      "reward": 1,
      "ball_x": 157,
      "ball_y": 79.75,
      "ball_vx": -3.5,
      "ball_vy": 2.75,
      "brick_events": [
        {
          "type": "brick",
          "row": 5,
          "col": 13
        }
      ]
    }
  ]
}

transfer_delta

{
  "before_ratio": 0.5,
  "after_ratio": 0.75,
  "ratio_delta": 0.25,
  "before_go_mean": 9.0,
  "after_go_mean": 6.0,
  "before_atari_mean": 4.5,
  "after_atari_mean": 4.5,
  "after_verdict": "partial_transfer"
}

changes

[
  "Added brick_cooldown=12 to prevent one brick-region traversal from causing dense multi-brick reward bursts.",
  "The cooldown targets Go seed0's previous 7 rewards within 12 steps while preserving first_reward_step=40."
]

video_urls

[
  "https://file.aimusic.win/gemma/breakout/brick_score_v3_atari.mp4"
]