diff --git a/config/affine_lock.ini b/config/affine_lock.ini new file mode 100644 index 0000000000..2352e98a8a --- /dev/null +++ b/config/affine_lock.ini @@ -0,0 +1,153 @@ +[base] +env_name = affine_lock + +[vec] +total_agents = 4096 +num_buffers = 2 +num_threads = 16 + +[policy] +hidden_size = 256 +num_layers = 3 + +[env] +seed = 42 +start_depth = 2 +max_depth = 16 +step_grace = 0 + +[train] +total_timesteps = 200_000_000 +horizon = 64 +minibatch_size = 8192 +learning_rate = 0.012 +ent_coef = 0.2 +gamma = 0.8 +gae_lambda = 0.995 +replay_ratio = 3.0 +clip_coef = 0.83 +vf_coef = 4.75 +vf_clip_coef = 0.8 +max_grad_norm = 3.0 +beta1 = 0.5 +beta2 = 0.9915 +eps = 0.0001 +vtrace_rho_clip = 1.4 +vtrace_c_clip = 3.75 +prio_alpha = 0.055 +prio_beta0 = 0.161 + +[sweep] +metric = perf +goal = maximize +max_runs = 50 +use_gpu = False + +[sweep.train.total_timesteps] +distribution = log_normal +min = 100_000_000 +max = 200_000_000 +mean = 200_000_000 +scale = time + +[sweep.vec.total_agents] +distribution = uniform_pow2 +min = 4096 +max = 16384 +mean = 4096 +scale = auto + +[sweep.policy.hidden_size] +distribution = uniform_pow2 +min = 64 +max = 512 +mean = 256 +scale = auto + +[sweep.policy.num_layers] +distribution = uniform +min = 1 +max = 4 +mean = 3 +scale = auto + +[sweep.vec.num_buffers] +distribution = uniform +min = 1 +max = 4 +mean = 2 +scale = auto + +[sweep.train.horizon] +distribution = uniform_pow2 +min = 32 +max = 128 +mean = 64 +scale = auto + +[sweep.train.minibatch_size] +distribution = uniform_pow2 +min = 8192 +max = 131072 +mean = 8192 +scale = auto + +[sweep.train.replay_ratio] +distribution = uniform +min = 1.0 +max = 4.0 +mean = 3.0 +scale = auto + +[sweep.train.learning_rate] +mean = 0.012 + +[sweep.train.ent_coef] +mean = 0.2 + +[sweep.train.gamma] +mean = 0.8 + +[sweep.train.gae_lambda] +mean = 0.995 + +[sweep.train.vtrace_rho_clip] +mean = 1.4 + +[sweep.train.vtrace_c_clip] +mean = 3.75 + +[sweep.train.clip_coef] +mean = 0.83 + +[sweep.train.vf_clip_coef] +distribution = uniform +min = 0.001 +max = 5.0 +mean = 0.8 +scale = auto + +[sweep.train.vf_coef] +distribution = uniform +min = 0.1 +max = 8.0 +mean = 4.75 +scale = auto + +[sweep.train.max_grad_norm] +mean = 3.0 + +[sweep.train.beta1] +mean = 0.5 + +[sweep.train.beta2] +mean = 0.9915 + +[sweep.train.eps] +mean = 0.0001 + +[sweep.train.prio_alpha] +mean = 0.055 + +[sweep.train.prio_beta0] +mean = 0.161 diff --git a/ocean/affine_lock/README.md b/ocean/affine_lock/README.md new file mode 100644 index 0000000000..5fcb2b530f --- /dev/null +++ b/ocean/affine_lock/README.md @@ -0,0 +1,237 @@ +# affine_lock + +`affine_lock` is a single-agent 16-bit state-matching environment. Each episode +starts from a current bit state and a target bit state. The agent applies one of +eight reversible bit transforms until the current state equals the target. + +The committed training path uses the generated visible-target table: + +```text +ocean/affine_lock/generated/affine_lock_8action_visible_targets.bin +``` + +That table is loaded at reset time and provides exact start/target pairs for the +curriculum depths configured in `config/affine_lock.ini`. + +## Runtime Action Set + +The runtime environment uses the committed 8-action set. The generator and +manifest identify this exact transform set as `affine_lock_8action_v1`: + +| Id | Name | Effect | +| ---: | --- | --- | +| `0` | `shift_left` | rotate bit positions left | +| `1` | `shift_right` | rotate bit positions right | +| `2` | `invert_right_7` | flip bits `9..15` | +| `3` | `swap_adjacent_bits` | swap each adjacent bit pair | +| `4` | `swap_adjacent_pairs` | swap each adjacent two-bit pair | +| `5` | `swap_nibbles_each_byte` | swap low/high nibbles within each byte | +| `6` | `reverse_each_nibble` | reverse bit order within each nibble | +| `7` | `reverse_each_byte` | reverse bit order within each byte | + +The Puffer binding exposes one discrete action slot with +`AFFINE_LOCK_NUM_ACTIONS = 8`. + +## Resets + +Resets always sample from the committed visible-target table. To train or test +on different target distributions, generate a new table with the tool below and +point `AFFINE_LOCK_VISIBLE_TARGET_TABLE_PATH` at it when building. + +## Committed Target Table + +The committed table stores sampled visible start/target pairs at depths `2`, +`4`, `5`, `6`, and `8`, plus every known true depth-16 pair for this action +set. + +| Depth | True visible pairs | Stored records | +| ---: | ---: | ---: | +| `2` | `2,216,496` | `65,536` | +| `4` | `34,379,722` | `65,536` | +| `5` | `115,388,932` | `65,536` | +| `6` | `331,789,220` | `65,536` | +| `8` | `1,125,374,770` | `65,536` | +| `16` | `100,548` | `100,548` | + +The table format can store any depth sections, but this generator currently +targets the fixed depth list `{2, 4, 5, 6, 8, 16}`. The runtime `seed` controls +the episode sequence sampled from a loaded table. The generator's +`--sample-seed` controls which sampled depth-2/4/5/6/8 records are written into +a custom table. Depth 16 is stored in full for the committed 8-action set, so +changing `--sample-seed` does not change the depth-16 records. + +## Regenerating the Target Table + +If the generated binary artifact is omitted from a checkout, regenerate the +default table from the repo root: + +```bash +gcc -std=c11 -O3 -DNDEBUG -fopenmp \ + -I. -Iocean/affine_lock \ + ocean/affine_lock/tools/generate_8action_visible_targets.c \ + -lm -o /tmp/affine_lock_generate_visible_targets + +/tmp/affine_lock_generate_visible_targets +``` + +The no-argument generator run writes the default `.bin` and `.json` files under +`ocean/affine_lock/generated/`. The default sample seed is `0`, which preserves +the committed benchmark table. Changing the committed `.bin` changes the +training data and can change full-run `perf`, so regenerate and benchmark before +committing a replacement table. + +### Using a Custom 8-Action Table + +The same generator can create larger or seed-varied tables for the committed +8-action environment without changing the runtime action set: + +```bash +/tmp/affine_lock_generate_visible_targets \ + --sample-seed 42 \ + --sample-per-depth 131072 \ + --store-all-depth 16 \ + --output-bin /tmp/affine_lock_8action_visible_targets_seed42.bin \ + --output-json /tmp/affine_lock_8action_visible_targets_seed42.json +``` + +Increasing `--sample-per-depth` raises the number of stored records for sampled +depths. `--store-all-depth D` stores every exact pair for a supported target +depth. For the committed 8-action set, depth 16 is stored in full by default. +Using the same `--sample-seed` and options produces the same table; using a +different seed produces a different sampled d2/d4/d5/d6/d8 table while leaving +stored-all depths unchanged. + +To train against a custom 8-action table, either write it to the default path or +build with an explicit table path: + +```bash +EXTRA_CFLAGS='-DAFFINE_LOCK_VISIBLE_TARGET_TABLE_PATH="/tmp/affine_lock_8action_visible_targets_seed42.bin"' \ + ./build.sh affine_lock +``` + +The loader checks that the table action-set hash matches the runtime action +set. For seed-varied or larger 8-action tables, no runtime code changes are +needed as long as the table contains the curriculum depths requested by the +runtime. + +The generator currently uses one `--sample-per-depth` value for all sampled +depths. If a future benchmark wants asymmetric budgets such as fewer d2/d4 +records and more d6/d8 records, update the generator sampling options and +manifest/tests together, then regenerate and benchmark the replacement table. + +To generate train/test table variants, keep the same depth/count settings and +change only `--sample-seed` and the output paths: + +```bash +/tmp/affine_lock_generate_visible_targets \ + --sample-seed 42 \ + --sample-per-depth 65536 \ + --store-all-depth 16 \ + --output-bin /tmp/affine_lock_train_seed42.bin \ + --output-json /tmp/affine_lock_train_seed42.json + +/tmp/affine_lock_generate_visible_targets \ + --sample-seed 69 \ + --sample-per-depth 65536 \ + --store-all-depth 16 \ + --output-bin /tmp/affine_lock_test_seed69.bin \ + --output-json /tmp/affine_lock_test_seed69.json +``` + +### Dropping the Committed Binary + +The `.bin` is committed so the env works immediately and benchmark runs are +byte-for-byte reproducible. If the binary is removed from a branch, users must +run the no-argument generator before building/training: + +```bash +/tmp/affine_lock_generate_visible_targets +./build.sh affine_lock +python -m pufferlib.pufferl train affine_lock +``` + +This recreates the default table at the path expected by the runtime. The +matching `.json` manifest records the depth counts, checksum, action-set hash, +and generator options. + +## Experimental 4-Action Generator Set + +The generator also includes an experimental `affine_lock_4action_v1` action set: + +```text +shift_right +mirror +invert_right_7 +swap_adjacent_bits +``` + +This is generator-only. The committed runtime environment does not train on this +action set. It is kept as a small, explicit alternate because a four-action +policy can be easier to learn, and this graph has far more unique depth-16 +pairs than the committed 8-action table. To make it a runtime environment, +update the env action table, `AFFINE_LOCK_NUM_ACTIONS`, the visible-table +action-set hash/path, generated table artifact, and any policy/config +expectations that assume eight actions. + +The current true visible-pair counts for this generator action set are: + +| Depth | True visible pairs | +| ---: | ---: | +| `2` | `772,080` | +| `4` | `6,055,652` | +| `5` | `16,234,512` | +| `6` | `42,176,998` | +| `8` | `234,409,780` | +| `16` | `2,434,606` | + +Example generation command: + +```bash +/tmp/affine_lock_generate_visible_targets \ + --action-set affine_lock_4action_v1 \ + --sample-per-depth 65536 \ + --store-all-depth 16 \ + --output-bin /tmp/affine_lock_4action_visible_targets.bin \ + --output-json /tmp/affine_lock_4action_visible_targets.json +``` + +### Making 4-Action a Runtime Env + +The 4-action table is not plug-compatible with the committed 8-action runtime. +To make a real 4-action runtime variant: + +1. Change `AFFINE_LOCK_NUM_ACTIONS` to `4`. +2. Change the runtime action enum/table in `affine_lock.h` to match the + generator's `affine_lock_4action_v1` order. +3. Point `AFFINE_LOCK_VISIBLE_TARGET_TABLE_PATH` at a 4-action table. +4. Update the expected action-set hash in `affine_lock_visible_targets.h` to + the 4-action manifest's `action_set_hash`. +5. Remove runtime helpers and render labels that only exist for the old + 8-action table. +6. Update policy/config/test assumptions that expect eight actions. In + particular, the old all-actions-have-one-step-inverses test is + 8-action-specific because `shift_right` no longer has `shift_left` as an + action. Replace it with checks that match the new action cycles and refresh + the deterministic golden checksum. +7. Rebuild, run `ocean/affine_lock/tests/run_all.sh`, and rerun a full + benchmark train. + +## Adding New Depths Later + +Adding another depth such as `7`, `10`, or `12` is intentionally not part of the +committed runtime path, but the file format can represent it. A future change +would need to: + +1. Add the depth to `TARGET_DEPTHS` in + `tools/generate_8action_visible_targets.c`. +2. Regenerate the `.bin` and `.json`. +3. Add the depth to `AFFINE_LOCK_CURRICULUM_DEPTHS` and update + `AFFINE_LOCK_CURRICULUM_DEPTH_COUNT`. +4. Add matching `Log.depth_D_rate` and `Log.depth_D_solve_rate` fields plus + `my_log` exports if the depth should appear in training logs. +5. Update config/docs/tests to expect the new depth and record count. +6. Rerun the affine tests and a full training benchmark. + +The loader itself does not require a format change for additional depth +sections. If a new table omits a runtime-requested curriculum depth, reset will +abort because there is no valid record pool for that depth. diff --git a/ocean/affine_lock/affine_lock.c b/ocean/affine_lock/affine_lock.c new file mode 100644 index 0000000000..13b999db94 --- /dev/null +++ b/ocean/affine_lock/affine_lock.c @@ -0,0 +1,90 @@ +#include +#include +#include + +#include "affine_lock.h" + +static AffineLock* g_env = NULL; +static AffineLockShared* g_shared = NULL; + +static void demo_cleanup(void) { + if (g_env != NULL) { + free(g_env->observations); + free(g_env->actions); + free(g_env->rewards); + free(g_env->terminals); + c_close(g_env); + g_env = NULL; + } + if (g_shared != NULL) { + affine_lock_free_shared(g_shared); + free(g_shared); + g_shared = NULL; + } +} + +static int key_to_action(void) { + static const int keys[AFFINE_LOCK_NUM_ACTIONS] = { + KEY_ONE, KEY_TWO, KEY_THREE, KEY_FOUR, + KEY_FIVE, KEY_SIX, KEY_SEVEN, KEY_EIGHT, + }; + + for (int i = 0; i < AFFINE_LOCK_NUM_ACTIONS; i++) { + if (IsKeyPressed(keys[i])) { + return i; + } + } + return -1; +} + +int main(void) { + g_shared = (AffineLockShared*)calloc(1, sizeof(AffineLockShared)); + if (g_shared == NULL || + affine_lock_init_shared(g_shared, 2, 16, 2) != 0) { + fprintf(stderr, "failed to initialize affine_lock demo\n"); + demo_cleanup(); + return 1; + } + if (affine_lock_prepare_visible_targets(g_shared) != 0) { + fprintf(stderr, "failed to configure affine_lock demo\n"); + demo_cleanup(); + return 1; + } + + AffineLock env; + memset(&env, 0, sizeof(env)); + g_env = &env; + atexit(demo_cleanup); + + // Standalone demo buffers match the FloatTensor/float vecenv contract. + env.observations = (float*)calloc(AFFINE_LOCK_OBS_SIZE, sizeof(float)); + env.actions = (float*)calloc(AFFINE_LOCK_NUM_ATNS, sizeof(float)); + env.rewards = (float*)calloc(1, sizeof(float)); + env.terminals = (float*)calloc(1, sizeof(float)); + if (env.observations == NULL || env.actions == NULL || + env.rewards == NULL || env.terminals == NULL) { + fprintf(stderr, "failed to allocate affine_lock demo buffers\n"); + return 1; + } + + affine_lock_init_env(&env, g_shared, (unsigned int)time(NULL)); + c_reset(&env); + c_render(&env); + + while (!WindowShouldClose()) { + if (IsWindowReady() && IsKeyPressed(KEY_R)) { + c_reset(&env); + } + int action = key_to_action(); + + if (action >= 0) { + env.actions[0] = (float)action; + c_step(&env); + } + + c_render(&env); + } + + demo_cleanup(); + return 0; +} diff --git a/ocean/affine_lock/affine_lock.h b/ocean/affine_lock/affine_lock.h new file mode 100644 index 0000000000..82be93890f --- /dev/null +++ b/ocean/affine_lock/affine_lock.h @@ -0,0 +1,626 @@ +#pragma once + +#include +#include +#include +#include +#include + +#ifndef AFFINE_LOCK_NO_RENDER +#include "raylib.h" +#endif + +#include "affine_lock_visible_targets.h" + +#define AFFINE_LOCK_BITS 16 +#define AFFINE_LOCK_TIMER_INDEX (2 * AFFINE_LOCK_BITS) +#define AFFINE_LOCK_OBS_SIZE (AFFINE_LOCK_TIMER_INDEX + 1) +// PufferLib uses one action slot for this single-discrete-action env. +#define AFFINE_LOCK_NUM_ATNS 1 +#define AFFINE_LOCK_NUM_ACTIONS 8 +#define AFFINE_LOCK_MAX_SOLUTION_DEPTH 16 +#define AFFINE_LOCK_CURRICULUM_DEPTH_COUNT 6 +#define AFFINE_LOCK_STEP_REWARD (-0.01f) +#ifndef AFFINE_LOCK_VISIBLE_TARGET_TABLE_PATH +#define AFFINE_LOCK_VISIBLE_TARGET_TABLE_PATH \ + "ocean/affine_lock/generated/affine_lock_8action_visible_targets.bin" +#endif + +static const int AFFINE_LOCK_CURRICULUM_DEPTHS[ + AFFINE_LOCK_CURRICULUM_DEPTH_COUNT] = {2, 4, 5, 6, 8, 16}; + +typedef enum AffineLockAction { + AFFINE_LOCK_ACTION_SHIFT_LEFT = 0, + AFFINE_LOCK_ACTION_SHIFT_RIGHT = 1, + AFFINE_LOCK_ACTION_INVERT_RIGHT_7 = 2, + AFFINE_LOCK_ACTION_SWAP_ADJACENT_BITS = 3, + AFFINE_LOCK_ACTION_SWAP_ADJACENT_PAIRS = 4, + AFFINE_LOCK_ACTION_SWAP_NIBBLES_EACH_BYTE = 5, + AFFINE_LOCK_ACTION_REVERSE_EACH_NIBBLE = 6, + AFFINE_LOCK_ACTION_REVERSE_EACH_BYTE = 7, +} AffineLockAction; + +typedef struct Log { + float perf; + float score; + float solve_rate; + float max_depth_solve; + float episode_return; + float episode_length; + float solve_steps; + float timeout_rate; + float invalid_rate; + float solve_efficiency; + float target_distance; + float solved_target_distance; + float depth_2_rate; + float depth_2_solve_rate; + float depth_4_rate; + float depth_4_solve_rate; + float depth_5_rate; + float depth_5_solve_rate; + float depth_6_rate; + float depth_6_solve_rate; + float depth_8_rate; + float depth_8_solve_rate; + float depth_16_rate; + float depth_16_solve_rate; + float n; +} Log; + +typedef struct AffineLockShared { + int start_depth; + int max_depth; + int step_grace; + int num_states; + uint32_t mask; + uint32_t* next; + int visible_target_table_loaded; + AffineLockVisibleTargetTable visible_target_table; + float observation_bit_patterns[256][8]; +} AffineLockShared; + +typedef struct Client { + int screen_width; + int screen_height; +} Client; + +typedef struct AffineLock { + Log log; + float* observations; + float* actions; + float* rewards; + float* terminals; + uint32_t state; + uint32_t target; + int step_count; + int max_steps; + int scramble_depth; + int curriculum_depth; + int solution_length; + int solution_actions[AFFINE_LOCK_MAX_SOLUTION_DEPTH]; + int target_distance; + float episode_return; + unsigned int rng; + int num_agents; + AffineLockShared* shared; + Client* client; +} AffineLock; + +static float affine_lock_solve_credit(const AffineLockShared* shared, int depth) { + return shared->max_depth > 0 ? (float)depth / (float)shared->max_depth : 0.0f; +} + +static int affine_lock_log_depth(const AffineLock* env) { + return env->target_distance > 0 ? env->target_distance : env->scramble_depth; +} + +static void affine_lock_init_observation_bit_patterns(AffineLockShared* shared) { + for (uint32_t value = 0; value < 256u; value++) { + for (int bit = 0; bit < 8; bit++) { + shared->observation_bit_patterns[value][bit] = + (value & (1u << bit)) ? 1.0f : -1.0f; + } + } +} + +static uint32_t affine_lock_shift_left(uint32_t state) { + uint32_t first = state & 1u; + return (state >> 1) | (first << (AFFINE_LOCK_BITS - 1)); +} + +static uint32_t affine_lock_shift_right(uint32_t state) { + uint32_t last = (state >> (AFFINE_LOCK_BITS - 1)) & 1u; + return ((state << 1) & ((1u << AFFINE_LOCK_BITS) - 1u)) | last; +} + +static uint32_t affine_lock_swap_adjacent_bits(uint32_t state) { + return ((state & 0x5555u) << 1) | ((state & 0xaaaau) >> 1); +} + +static uint32_t affine_lock_swap_adjacent_pairs(uint32_t state) { + return ((state & 0x3333u) << 2) | ((state & 0xccccu) >> 2); +} + +static uint32_t affine_lock_swap_nibbles_each_byte(uint32_t state) { + return ((state & 0x0f0fu) << 4) | ((state & 0xf0f0u) >> 4); +} + +static uint32_t affine_lock_reverse_each_nibble(uint32_t state) { + return affine_lock_swap_adjacent_pairs( + affine_lock_swap_adjacent_bits(state)); +} + +static uint32_t affine_lock_reverse_each_byte(uint32_t state) { + return affine_lock_swap_nibbles_each_byte( + affine_lock_reverse_each_nibble(state)); +} + +static int affine_lock_init_shared( + AffineLockShared* shared, + int start_depth, + int max_depth, + int step_grace) { + memset(shared, 0, sizeof(*shared)); + + shared->start_depth = start_depth; + shared->max_depth = max_depth; + shared->step_grace = step_grace; + shared->num_states = 1 << AFFINE_LOCK_BITS; + shared->mask = (1u << AFFINE_LOCK_BITS) - 1u; + affine_lock_init_observation_bit_patterns(shared); + + size_t transition_count = + (size_t)shared->num_states * AFFINE_LOCK_NUM_ACTIONS; + shared->next = (uint32_t*)calloc(transition_count, sizeof(uint32_t)); + if (shared->next == NULL) { + fprintf(stderr, "affine_lock: failed to allocate action table\n"); + return -1; + } + + for (uint32_t state = 0; state < (uint32_t)shared->num_states; state++) { + for (int action = 0; action < AFFINE_LOCK_NUM_ACTIONS; action++) { + uint32_t next = state; + switch (action) { + case AFFINE_LOCK_ACTION_SHIFT_LEFT: + next = affine_lock_shift_left(state); + break; + case AFFINE_LOCK_ACTION_SHIFT_RIGHT: + next = affine_lock_shift_right(state); + break; + case AFFINE_LOCK_ACTION_INVERT_RIGHT_7: + next = state ^ 0xfe00u; + break; + case AFFINE_LOCK_ACTION_SWAP_ADJACENT_BITS: + next = affine_lock_swap_adjacent_bits(state); + break; + case AFFINE_LOCK_ACTION_SWAP_ADJACENT_PAIRS: + next = affine_lock_swap_adjacent_pairs(state); + break; + case AFFINE_LOCK_ACTION_SWAP_NIBBLES_EACH_BYTE: + next = affine_lock_swap_nibbles_each_byte(state); + break; + case AFFINE_LOCK_ACTION_REVERSE_EACH_NIBBLE: + next = affine_lock_reverse_each_nibble(state); + break; + case AFFINE_LOCK_ACTION_REVERSE_EACH_BYTE: + next = affine_lock_reverse_each_byte(state); + break; + } + shared->next[state * AFFINE_LOCK_NUM_ACTIONS + action] = + next & shared->mask; + } + } + + return 0; +} + +static int affine_lock_prepare_visible_targets(AffineLockShared* shared) { + if (shared->visible_target_table_loaded) { + return 0; + } + + char error[256]; + if (affine_lock_visible_targets_load( + AFFINE_LOCK_VISIBLE_TARGET_TABLE_PATH, + AFFINE_LOCK_VISIBLE_TARGET_8ACTION_V1_HASH, + &shared->visible_target_table, + error, + sizeof(error)) != 0) { + fprintf(stderr, "affine_lock: %s\n", error); + return -1; + } + + shared->visible_target_table_loaded = 1; + return 0; +} + +static void affine_lock_free_shared(AffineLockShared* shared) { + if (shared == NULL) { + return; + } + free(shared->next); + affine_lock_visible_targets_free(&shared->visible_target_table); + memset(shared, 0, sizeof(*shared)); +} + +static uint32_t affine_lock_apply_action( + const AffineLockShared* shared, uint32_t rel, int action) { + return shared->next[(rel & shared->mask) * AFFINE_LOCK_NUM_ACTIONS + action]; +} + +static uint32_t affine_lock_random_u32(AffineLock* env) { + env->rng = env->rng * 1664525u + 1013904223u; + return env->rng; +} + +// Keep RNG fully local to each env so sweep runs differ only by hyperparams. +// The mixer avoids weak low bits from the LCG when sampling bounded actions or +// bit states. Do not replace this with global rand()/srand(). +static uint32_t affine_lock_random_mixed_u32(AffineLock* env) { + uint32_t x = affine_lock_random_u32(env); + x ^= x >> 16; + x *= 0x7feb352du; + x ^= x >> 15; + x *= 0x846ca68bu; + x ^= x >> 16; + return x; +} + +static int affine_lock_random_bounded(AffineLock* env, int bound) { + uint32_t limit = UINT32_MAX - (UINT32_MAX % (uint32_t)bound); + uint32_t value = affine_lock_random_mixed_u32(env); + while (value >= limit) { + value = affine_lock_random_mixed_u32(env); + } + return (int)(value % (uint32_t)bound); +} + +static int affine_lock_parse_action(float raw_action, int* action_out) { + if (!isfinite(raw_action) || + raw_action < 0.0f || + raw_action > (float)(AFFINE_LOCK_NUM_ACTIONS - 1)) { + return 0; + } + + int action = (int)raw_action; + if ((float)action != raw_action) { + return 0; + } + + *action_out = action; + return 1; +} + +static void affine_lock_clear_generated_path(AffineLock* env) { + env->solution_length = 0; + for (int i = 0; i < AFFINE_LOCK_MAX_SOLUTION_DEPTH; i++) { + env->solution_actions[i] = -1; + } +} + +static const AffineLockVisibleTargetDepth* affine_lock_visible_target_depth( + const AffineLockShared* shared, + int requested_depth) { + const AffineLockVisibleTargetTable* table = &shared->visible_target_table; + for (uint32_t i = 0; i < table->depth_count; i++) { + if (table->depths[i].depth == (uint32_t)requested_depth) { + return &table->depths[i]; + } + } + return NULL; +} + +static void affine_lock_store_visible_solution_path( + AffineLock* env, + const AffineLockVisibleTargetRecord* record) { + int length = (int)record->solution_length; + if (length <= 0 || length > AFFINE_LOCK_MAX_SOLUTION_DEPTH) { + fprintf(stderr, "affine_lock: invalid visible target solution length\n"); + abort(); + } + + env->solution_length = length; + for (int i = 0; i < length; i++) { + int action = (int)((record->packed_actions >> (3u * i)) & 7ull); + if (action < 0 || action >= AFFINE_LOCK_NUM_ACTIONS) { + fprintf(stderr, "affine_lock: invalid visible target solution action\n"); + abort(); + } + env->solution_actions[i] = action; + } +} + +static void affine_lock_generate_visible_target_table_target(AffineLock* env) { + AffineLockShared* shared = env->shared; + int requested_depth = env->scramble_depth; + + if (affine_lock_prepare_visible_targets(shared) != 0) { + fprintf(stderr, "affine_lock: failed to load visible target table\n"); + abort(); + } + + const AffineLockVisibleTargetDepth* depth = + affine_lock_visible_target_depth(shared, requested_depth); + if (depth == NULL || depth->stored_count == 0) { + fprintf(stderr, + "affine_lock: visible target table has no records for depth %d\n", + requested_depth); + abort(); + } + + int choice = affine_lock_random_bounded(env, (int)depth->stored_count); + uint32_t record_index = depth->first_record + (uint32_t)choice; + if (record_index >= shared->visible_target_table.record_count) { + fprintf(stderr, "affine_lock: invalid visible target record index\n"); + abort(); + } + const AffineLockVisibleTargetRecord* record = + &shared->visible_target_table.records[record_index]; + if ((int)record->depth != requested_depth || + record->solution_length != record->depth) { + fprintf(stderr, "affine_lock: invalid visible target record\n"); + abort(); + } + + env->state = (uint32_t)record->start & shared->mask; + affine_lock_clear_generated_path(env); + env->target = record->target & shared->mask; + env->target_distance = (int)record->depth; + affine_lock_store_visible_solution_path(env, record); +} + +static void affine_lock_reset_state(AffineLock* env) { + AffineLockShared* shared = env->shared; + env->scramble_depth = env->curriculum_depth; + env->max_steps = env->scramble_depth + shared->step_grace; + env->step_count = 0; + env->episode_return = 0.0f; + env->target_distance = -1; + + affine_lock_generate_visible_target_table_target(env); + env->max_steps = env->target_distance + shared->step_grace; +} + +static void affine_lock_init_env( + AffineLock* env, AffineLockShared* shared, unsigned int seed) { + env->shared = shared; + env->rng = seed; + env->num_agents = 1; + env->curriculum_depth = shared->start_depth; + env->scramble_depth = shared->start_depth; + env->target_distance = -1; + env->max_steps = shared->start_depth + shared->step_grace; + env->step_count = 0; + env->episode_return = 0.0f; +} + +static void affine_lock_add_log( + AffineLock* env, + int solved, + int invalid) { + AffineLockShared* shared = env->shared; + int log_depth = affine_lock_log_depth(env); + int at_max_depth = log_depth == shared->max_depth; + float solve_credit = solved ? + affine_lock_solve_credit(shared, log_depth) : 0.0f; + env->log.perf += solve_credit; + env->log.score += solve_credit; + env->log.solve_rate += solved ? 1.0f : 0.0f; + env->log.max_depth_solve += + (solved && at_max_depth) ? 1.0f : 0.0f; + env->log.episode_return += env->episode_return; + env->log.episode_length += (float)env->step_count; + env->log.solve_steps += solved ? (float)env->step_count : 0.0f; + env->log.timeout_rate += (!solved && !invalid) ? 1.0f : 0.0f; + env->log.invalid_rate += invalid ? 1.0f : 0.0f; + env->log.solve_efficiency += solved && log_depth > 0 ? + (float)env->step_count / (float)log_depth : 0.0f; + env->log.target_distance += (float)env->target_distance; + env->log.solved_target_distance += + (solved && env->target_distance >= 0) ? (float)env->target_distance : 0.0f; + env->log.depth_2_rate += log_depth == 2 ? 1.0f : 0.0f; + env->log.depth_2_solve_rate += + (solved && log_depth == 2) ? 1.0f : 0.0f; + env->log.depth_4_rate += log_depth == 4 ? 1.0f : 0.0f; + env->log.depth_4_solve_rate += + (solved && log_depth == 4) ? 1.0f : 0.0f; + env->log.depth_5_rate += log_depth == 5 ? 1.0f : 0.0f; + env->log.depth_5_solve_rate += + (solved && log_depth == 5) ? 1.0f : 0.0f; + env->log.depth_6_rate += log_depth == 6 ? 1.0f : 0.0f; + env->log.depth_6_solve_rate += + (solved && log_depth == 6) ? 1.0f : 0.0f; + env->log.depth_8_rate += log_depth == 8 ? 1.0f : 0.0f; + env->log.depth_8_solve_rate += + (solved && log_depth == 8) ? 1.0f : 0.0f; + env->log.depth_16_rate += log_depth == 16 ? 1.0f : 0.0f; + env->log.depth_16_solve_rate += + (solved && log_depth == 16) ? 1.0f : 0.0f; + env->log.n += 1.0f; +} + +static void affine_lock_compute_observations(AffineLock* env) { + float (*patterns)[8] = env->shared->observation_bit_patterns; + uint32_t state = env->state; + uint32_t target = env->target; + memcpy(&env->observations[0], patterns[state & 0xffu], 8 * sizeof(float)); + memcpy(&env->observations[8], patterns[(state >> 8) & 0xffu], 8 * sizeof(float)); + memcpy(&env->observations[16], patterns[target & 0xffu], 8 * sizeof(float)); + memcpy(&env->observations[24], patterns[(target >> 8) & 0xffu], 8 * sizeof(float)); + env->observations[AFFINE_LOCK_TIMER_INDEX] = env->max_steps > 0 ? + (float)env->step_count / (float)env->max_steps : 0.0f; +} + +static void compute_observations(AffineLock* env) { + affine_lock_compute_observations(env); +} + +static void c_reset(AffineLock* env) { + env->rewards[0] = 0.0f; + env->terminals[0] = 0.0f; + affine_lock_reset_state(env); + compute_observations(env); +} + +static int affine_lock_next_curriculum_depth( + const AffineLockShared* shared, + int current_depth) { + for (int i = 0; i < AFFINE_LOCK_CURRICULUM_DEPTH_COUNT; i++) { + int depth = AFFINE_LOCK_CURRICULUM_DEPTHS[i]; + if (depth > current_depth) { + return depth < shared->max_depth ? depth : shared->max_depth; + } + } + return shared->max_depth; +} + +static void affine_lock_advance_curriculum(AffineLock* env, int solved) { + AffineLockShared* shared = env->shared; + if (!solved) { + env->curriculum_depth = shared->start_depth; + return; + } + + env->curriculum_depth = affine_lock_next_curriculum_depth( + shared, env->scramble_depth); +} + +static void affine_lock_finish_episode( + AffineLock* env, + int solved, + int invalid) { + affine_lock_add_log(env, solved, invalid); + affine_lock_advance_curriculum(env, solved); + affine_lock_reset_state(env); +} + +static void c_step(AffineLock* env) { + AffineLockShared* shared = env->shared; + int action = -1; + int valid_action = affine_lock_parse_action(env->actions[0], &action); + float reward = AFFINE_LOCK_STEP_REWARD; + int terminal = 0; + int solved = 0; + int invalid = 0; + + env->terminals[0] = 0.0f; + env->step_count += 1; + + if (!valid_action) { + reward = -1.0f; + terminal = 1; + invalid = 1; + } else { + env->state = affine_lock_apply_action(shared, env->state, action); + if (env->state == env->target) { + reward = 1.0f; + terminal = 1; + solved = 1; + } else if (env->step_count >= env->max_steps) { + reward = -1.0f; + terminal = 1; + } + } + env->rewards[0] = reward; + env->episode_return += reward; + + if (terminal) { + env->terminals[0] = 1.0f; + affine_lock_finish_episode(env, solved, invalid); + } + + compute_observations(env); +} + +static void c_close(AffineLock* env) { + if (env->client == NULL) { + return; + } +#ifndef AFFINE_LOCK_NO_RENDER + if (IsWindowReady()) { + CloseWindow(); + } +#endif + free(env->client); + env->client = NULL; +} + +#ifndef AFFINE_LOCK_NO_RENDER +static Client* affine_lock_make_client(void) { + Client* client = (Client*)calloc(1, sizeof(Client)); + client->screen_width = 780; + client->screen_height = 360; + InitWindow(client->screen_width, client->screen_height, "PufferLib AffineLock"); + SetTargetFPS(30); + return client; +} + +static Color affine_lock_bit_fill(int on) { + return on ? (Color){80, 210, 140, 255} : (Color){38, 48, 58, 255}; +} + +static void affine_lock_draw_bit_row( + AffineLock* env, const char* label, uint32_t value, int y) { + DrawText(label, 30, y + 9, 20, RAYWHITE); + for (int bit = 0; bit < AFFINE_LOCK_BITS; bit++) { + int x = 145 + bit * 34; + int on = (value >> bit) & 1u; + int mismatch = ((env->state ^ env->target) >> bit) & 1u; + Color fill = affine_lock_bit_fill(on); + Color border = mismatch ? + (Color){238, 88, 88, 255} : (Color){182, 196, 205, 255}; + DrawRectangle(x, y, 24, 34, fill); + DrawRectangleLinesEx((Rectangle){(float)x, (float)y, 24.0f, 34.0f}, + mismatch ? 3.0f : 1.0f, border); + DrawText(TextFormat("%d", bit), x + 5, y + 40, 10, + (Color){128, 140, 150, 255}); + } +} + +static void c_render(AffineLock* env) { + if (IsWindowReady() && (WindowShouldClose() || IsKeyPressed(KEY_ESCAPE))) { + c_close(env); + exit(0); + } + + if (env->client == NULL) { + env->client = affine_lock_make_client(); + } + + uint32_t rel = (env->state ^ env->target) & env->shared->mask; + float display_reward = env->rewards[0]; + int display_terminal = env->terminals[0] != 0.0f; + int display_solved = display_terminal && display_reward > 0.0f; + const char* status = "running"; + Color status_color = (Color){190, 198, 206, 255}; + if (display_terminal) { + status = display_solved ? "solved" : "failed"; + status_color = display_solved ? + (Color){80, 210, 140, 255} : (Color){238, 88, 88, 255}; + } + + BeginDrawing(); + ClearBackground((Color){12, 15, 18, 255}); + DrawText("Affine Lock", 30, 24, 28, RAYWHITE); + DrawText(TextFormat("depth %d/%d step %d/%d last reward %.2f", + env->scramble_depth, env->shared->max_depth, + env->step_count, env->max_steps, display_reward), + 30, 62, 20, (Color){180, 190, 200, 255}); + DrawText(TextFormat("status %s mismatches 0x%04x", + status, rel), 30, 90, 20, status_color); + + affine_lock_draw_bit_row(env, "current", env->state, 138); + affine_lock_draw_bit_row(env, "target", env->target, 220); + + DrawText("1 shiftL 2 shiftR 3 inv7 4 bit-swap 5 pair-swap", + 30, 300, 16, (Color){160, 170, 178, 255}); + DrawText("6 nib-swap 7 rev-nib 8 rev-byte R reset", + 30, 322, 16, (Color){160, 170, 178, 255}); + EndDrawing(); +} +#else +static void c_render(AffineLock* env) { + (void)env; +} +#endif diff --git a/ocean/affine_lock/affine_lock_visible_targets.h b/ocean/affine_lock/affine_lock_visible_targets.h new file mode 100644 index 0000000000..6388f7e3bc --- /dev/null +++ b/ocean/affine_lock/affine_lock_visible_targets.h @@ -0,0 +1,323 @@ +#pragma once + +#include +#include +#include +#include +#include +#include + +#define AFFINE_LOCK_VISIBLE_TARGET_FORMAT_VERSION 1u +#define AFFINE_LOCK_VISIBLE_TARGET_RECORD_SIZE 16u +#define AFFINE_LOCK_VISIBLE_TARGET_8ACTION_V1_HASH 0x6e11e18fdafc0baaull + +typedef struct AffineLockVisibleTargetDepth { + uint32_t depth; + uint32_t first_record; + uint32_t stored_count; + uint64_t exact_pair_count; +} AffineLockVisibleTargetDepth; + +typedef struct AffineLockVisibleTargetRecord { + uint16_t start; + uint16_t target; + uint64_t packed_actions; + uint8_t solution_length; + uint8_t depth; +} AffineLockVisibleTargetRecord; + +typedef struct AffineLockVisibleTargetTable { + uint32_t version; + uint32_t header_size; + uint32_t record_size; + uint32_t bits; + uint32_t num_actions; + uint32_t depth_count; + uint32_t record_count; + uint64_t checksum; + uint64_t action_set_hash; + AffineLockVisibleTargetDepth* depths; + AffineLockVisibleTargetRecord* records; +} AffineLockVisibleTargetTable; + +static uint64_t affine_lock_visible_targets_mix_u64( + uint64_t hash, + uint64_t value) { + hash ^= value; + hash *= 1099511628211ull; + return hash; +} + +static void affine_lock_visible_targets_set_error( + char* error, + size_t error_size, + const char* format, + ...) { + if (error == NULL || error_size == 0) { + return; + } + va_list args; + va_start(args, format); + vsnprintf(error, error_size, format, args); + va_end(args); +} + +static int affine_lock_visible_targets_read_exact( + FILE* file, + void* out, + size_t size) { + return fread(out, 1, size, file) == size ? 0 : -1; +} + +static int affine_lock_visible_targets_read_u16( + FILE* file, + uint16_t* out) { + unsigned char bytes[2]; + if (affine_lock_visible_targets_read_exact(file, bytes, sizeof(bytes)) != 0) { + return -1; + } + *out = (uint16_t)bytes[0] | ((uint16_t)bytes[1] << 8); + return 0; +} + +static int affine_lock_visible_targets_read_u32( + FILE* file, + uint32_t* out) { + unsigned char bytes[4]; + if (affine_lock_visible_targets_read_exact(file, bytes, sizeof(bytes)) != 0) { + return -1; + } + *out = (uint32_t)bytes[0] | + ((uint32_t)bytes[1] << 8) | + ((uint32_t)bytes[2] << 16) | + ((uint32_t)bytes[3] << 24); + return 0; +} + +static int affine_lock_visible_targets_read_u64( + FILE* file, + uint64_t* out) { + unsigned char bytes[8]; + if (affine_lock_visible_targets_read_exact(file, bytes, sizeof(bytes)) != 0) { + return -1; + } + uint64_t value = 0; + for (int i = 0; i < 8; i++) { + value |= (uint64_t)bytes[i] << (8 * i); + } + *out = value; + return 0; +} + +static void affine_lock_visible_targets_free( + AffineLockVisibleTargetTable* table) { + if (table == NULL) { + return; + } + free(table->depths); + free(table->records); + memset(table, 0, sizeof(*table)); +} + +static uint64_t affine_lock_visible_targets_checksum( + const AffineLockVisibleTargetTable* table) { + uint64_t hash = 1469598103934665603ull; + hash = affine_lock_visible_targets_mix_u64(hash, table->action_set_hash); + for (uint32_t depth_index = 0; depth_index < table->depth_count; + depth_index++) { + const AffineLockVisibleTargetDepth* depth = &table->depths[depth_index]; + hash = affine_lock_visible_targets_mix_u64(hash, depth->depth); + hash = affine_lock_visible_targets_mix_u64(hash, depth->exact_pair_count); + hash = affine_lock_visible_targets_mix_u64(hash, depth->stored_count); + for (uint32_t i = 0; i < depth->stored_count; i++) { + uint32_t record_index = depth->first_record + i; + const AffineLockVisibleTargetRecord* record = + &table->records[record_index]; + hash = affine_lock_visible_targets_mix_u64(hash, record->start); + hash = affine_lock_visible_targets_mix_u64(hash, record->target); + hash = affine_lock_visible_targets_mix_u64( + hash, record->packed_actions); + hash = affine_lock_visible_targets_mix_u64( + hash, record->solution_length); + hash = affine_lock_visible_targets_mix_u64(hash, record->depth); + } + } + return hash; +} + +static int affine_lock_visible_targets_load( + const char* path, + uint64_t expected_action_set_hash, + AffineLockVisibleTargetTable* table, + char* error, + size_t error_size) { + static const unsigned char expected_magic[8] = { + 'A', 'L', '7', 'T', 'G', 'T', '1', '\0' + }; + memset(table, 0, sizeof(*table)); + + FILE* file = fopen(path, "rb"); + if (file == NULL) { + affine_lock_visible_targets_set_error( + error, error_size, "failed to open %s: %s", path, strerror(errno)); + return -1; + } + + unsigned char magic[8]; + if (affine_lock_visible_targets_read_exact(file, magic, sizeof(magic)) != 0 || + affine_lock_visible_targets_read_u32(file, &table->version) != 0 || + affine_lock_visible_targets_read_u32(file, &table->header_size) != 0 || + affine_lock_visible_targets_read_u32(file, &table->record_size) != 0 || + affine_lock_visible_targets_read_u32(file, &table->bits) != 0 || + affine_lock_visible_targets_read_u32(file, &table->num_actions) != 0 || + affine_lock_visible_targets_read_u32(file, &table->depth_count) != 0 || + affine_lock_visible_targets_read_u32(file, &table->record_count) != 0 || + affine_lock_visible_targets_read_u64(file, &table->checksum) != 0 || + affine_lock_visible_targets_read_u64(file, &table->action_set_hash) != 0) { + affine_lock_visible_targets_set_error( + error, error_size, "truncated visible target header"); + fclose(file); + return -1; + } + + if (memcmp(magic, expected_magic, sizeof(magic)) != 0) { + affine_lock_visible_targets_set_error( + error, error_size, "invalid visible target magic"); + fclose(file); + return -1; + } + if (table->version != AFFINE_LOCK_VISIBLE_TARGET_FORMAT_VERSION || + table->record_size != AFFINE_LOCK_VISIBLE_TARGET_RECORD_SIZE || + table->bits != 16 || + table->num_actions == 0 || + table->num_actions > 8 || + table->depth_count == 0 || + table->depth_count > 16) { + affine_lock_visible_targets_set_error( + error, error_size, "unsupported visible target table header"); + fclose(file); + return -1; + } + uint32_t expected_header_size = 52u + table->depth_count * 24u; + if (table->header_size != expected_header_size) { + affine_lock_visible_targets_set_error( + error, error_size, "unexpected visible target header size"); + fclose(file); + return -1; + } + if (expected_action_set_hash != 0 && + table->action_set_hash != expected_action_set_hash) { + affine_lock_visible_targets_set_error( + error, error_size, "visible target action set hash mismatch"); + fclose(file); + return -1; + } + + table->depths = (AffineLockVisibleTargetDepth*)calloc( + table->depth_count, sizeof(AffineLockVisibleTargetDepth)); + table->records = (AffineLockVisibleTargetRecord*)calloc( + table->record_count, sizeof(AffineLockVisibleTargetRecord)); + if (table->depths == NULL || table->records == NULL) { + affine_lock_visible_targets_set_error( + error, error_size, "failed to allocate visible target table"); + fclose(file); + affine_lock_visible_targets_free(table); + return -1; + } + + uint64_t depth_record_total = 0; + for (uint32_t i = 0; i < table->depth_count; i++) { + AffineLockVisibleTargetDepth* depth = &table->depths[i]; + uint32_t reserved = 0; + if (affine_lock_visible_targets_read_u32(file, &depth->depth) != 0 || + affine_lock_visible_targets_read_u32( + file, &depth->first_record) != 0 || + affine_lock_visible_targets_read_u32( + file, &depth->stored_count) != 0 || + affine_lock_visible_targets_read_u32(file, &reserved) != 0 || + affine_lock_visible_targets_read_u64( + file, &depth->exact_pair_count) != 0) { + affine_lock_visible_targets_set_error( + error, error_size, "truncated visible target depth table"); + fclose(file); + affine_lock_visible_targets_free(table); + return -1; + } + if (reserved != 0 || + depth->first_record > table->record_count || + depth->stored_count > table->record_count || + depth->first_record + depth->stored_count > + table->record_count) { + affine_lock_visible_targets_set_error( + error, error_size, "invalid visible target depth table"); + fclose(file); + affine_lock_visible_targets_free(table); + return -1; + } + depth_record_total += depth->stored_count; + } + if (depth_record_total != table->record_count) { + affine_lock_visible_targets_set_error( + error, error_size, "visible target depth counts do not sum"); + fclose(file); + affine_lock_visible_targets_free(table); + return -1; + } + + for (uint32_t i = 0; i < table->record_count; i++) { + AffineLockVisibleTargetRecord* record = &table->records[i]; + uint16_t reserved = 0; + if (affine_lock_visible_targets_read_u16(file, &record->start) != 0 || + affine_lock_visible_targets_read_u16(file, &record->target) != 0 || + affine_lock_visible_targets_read_u64( + file, &record->packed_actions) != 0) { + affine_lock_visible_targets_set_error( + error, error_size, "truncated visible target record"); + fclose(file); + affine_lock_visible_targets_free(table); + return -1; + } + int solution_length = fgetc(file); + int depth = fgetc(file); + if (solution_length == EOF || depth == EOF || + affine_lock_visible_targets_read_u16(file, &reserved) != 0) { + affine_lock_visible_targets_set_error( + error, error_size, "truncated visible target record"); + fclose(file); + affine_lock_visible_targets_free(table); + return -1; + } + record->solution_length = (uint8_t)solution_length; + record->depth = (uint8_t)depth; + if (reserved != 0 || record->solution_length != record->depth) { + affine_lock_visible_targets_set_error( + error, error_size, "invalid visible target record"); + fclose(file); + affine_lock_visible_targets_free(table); + return -1; + } + } + + int extra = fgetc(file); + if (extra != EOF) { + affine_lock_visible_targets_set_error( + error, error_size, "visible target file has trailing bytes"); + fclose(file); + affine_lock_visible_targets_free(table); + return -1; + } + fclose(file); + + uint64_t computed_checksum = + affine_lock_visible_targets_checksum(table); + if (computed_checksum != table->checksum) { + affine_lock_visible_targets_set_error( + error, error_size, + "visible target checksum mismatch: got 0x%016llx expected 0x%016llx", + (unsigned long long)computed_checksum, + (unsigned long long)table->checksum); + affine_lock_visible_targets_free(table); + return -1; + } + return 0; +} diff --git a/ocean/affine_lock/binding.c b/ocean/affine_lock/binding.c new file mode 100644 index 0000000000..e51be3d83e --- /dev/null +++ b/ocean/affine_lock/binding.c @@ -0,0 +1,143 @@ +#include "affine_lock.h" + +#define OBS_SIZE AFFINE_LOCK_OBS_SIZE +#define NUM_ATNS AFFINE_LOCK_NUM_ATNS +#define ACT_SIZES {AFFINE_LOCK_NUM_ACTIONS} +#define OBS_TENSOR_T FloatTensor + +#define MY_VEC_INIT +#define MY_VEC_CLOSE +#define Env AffineLock +#include "vecenv.h" + +static uint32_t affine_lock_mix_seed(uint32_t value) { + value ^= value >> 16; + value *= 0x7feb352du; + value ^= value >> 15; + value *= 0x846ca68bu; + value ^= value >> 16; + return value; +} + +static unsigned int affine_lock_env_seed(int base_seed, int env_id) { + uint32_t value = 0x811c9dc5u; + value = (value ^ (uint32_t)base_seed) * 0x01000193u; + value = (value ^ (uint32_t)env_id) * 0x01000193u; + return affine_lock_mix_seed(value); +} + +Env* my_vec_init(int* num_envs_out, int* buffer_env_starts, int* buffer_env_counts, + Dict* vec_kwargs, Dict* env_kwargs) { + int total_agents = (int)dict_get(vec_kwargs, "total_agents")->value; + int num_buffers = (int)dict_get(vec_kwargs, "num_buffers")->value; + int agents_per_buffer = total_agents / num_buffers; + int base_seed = (int)dict_get(env_kwargs, "seed")->value; + + int start_depth = (int)dict_get(env_kwargs, "start_depth")->value; + int max_depth = (int)dict_get(env_kwargs, "max_depth")->value; + int step_grace = (int)dict_get(env_kwargs, "step_grace")->value; + + AffineLockShared* shared = + (AffineLockShared*)calloc(1, sizeof(AffineLockShared)); + if (shared == NULL || affine_lock_init_shared( + shared, start_depth, max_depth, step_grace) != 0) { + fprintf(stderr, "affine_lock: failed to initialize shared state\n"); + free(shared); + abort(); + } + if (affine_lock_prepare_visible_targets(shared) != 0) { + affine_lock_free_shared(shared); + free(shared); + abort(); + } + + Env* envs = (Env*)calloc((size_t)total_agents, sizeof(Env)); + if (envs == NULL) { + fprintf(stderr, "affine_lock: failed to allocate envs\n"); + affine_lock_free_shared(shared); + free(shared); + abort(); + } + + int buf = 0; + int buf_agents = 0; + buffer_env_starts[0] = 0; + buffer_env_counts[0] = 0; + + for (int i = 0; i < total_agents; i++) { + Env* env = &envs[i]; + affine_lock_init_env(env, shared, affine_lock_env_seed(base_seed, i)); + + buf_agents += env->num_agents; + buffer_env_counts[buf]++; + if (buf_agents >= agents_per_buffer && buf < num_buffers - 1) { + buf++; + buffer_env_starts[buf] = i + 1; + buffer_env_counts[buf] = 0; + buf_agents = 0; + } + } + + *num_envs_out = total_agents; + return envs; +} + +void my_vec_close(Env* envs) { + if (envs == NULL || envs[0].shared == NULL) { + return; + } + AffineLockShared* shared = envs[0].shared; + affine_lock_free_shared(shared); + free(shared); +} + +void my_init(Env* env, Dict* kwargs) { + (void)env; + (void)kwargs; +} + +static float conditional_rate(float numerator, float denominator) { + return denominator > 0.0f ? numerator / denominator : 0.0f; +} + +void my_log(Log* log, Dict* out) { + float conditional_solve_steps = + log->solve_rate > 0.0f ? log->solve_steps / log->solve_rate : 0.0f; + float conditional_solve_efficiency = + log->solve_rate > 0.0f ? + log->solve_efficiency / log->solve_rate : 0.0f; + float solved_min_win_moves = + log->solve_rate > 0.0f ? + log->solved_target_distance / log->solve_rate : 0.0f; + float depth_2_solve_rate = + conditional_rate(log->depth_2_solve_rate, log->depth_2_rate); + float depth_4_solve_rate = + conditional_rate(log->depth_4_solve_rate, log->depth_4_rate); + float depth_5_solve_rate = + conditional_rate(log->depth_5_solve_rate, log->depth_5_rate); + float depth_6_solve_rate = + conditional_rate(log->depth_6_solve_rate, log->depth_6_rate); + float depth_8_solve_rate = + conditional_rate(log->depth_8_solve_rate, log->depth_8_rate); + float depth_16_solve_rate = + conditional_rate(log->depth_16_solve_rate, log->depth_16_rate); + + dict_set(out, "perf", log->perf); + dict_set(out, "score", log->score); + dict_set(out, "solve_rate", log->solve_rate); + dict_set(out, "max_depth_solve", log->max_depth_solve); + dict_set(out, "episode_return", log->episode_return); + dict_set(out, "episode_length", log->episode_length); + dict_set(out, "timeout_rate", log->timeout_rate); + dict_set(out, "invalid_rate", log->invalid_rate); + dict_set(out, "min_win_moves", log->target_distance); + dict_set(out, "solved_min_win_moves", solved_min_win_moves); + dict_set(out, "conditional_solve_steps", conditional_solve_steps); + dict_set(out, "conditional_solve_efficiency", conditional_solve_efficiency); + dict_set(out, "depth_2_solve_rate", depth_2_solve_rate); + dict_set(out, "depth_4_solve_rate", depth_4_solve_rate); + dict_set(out, "depth_5_solve_rate", depth_5_solve_rate); + dict_set(out, "depth_6_solve_rate", depth_6_solve_rate); + dict_set(out, "depth_8_solve_rate", depth_8_solve_rate); + dict_set(out, "depth_16_solve_rate", depth_16_solve_rate); +} diff --git a/ocean/affine_lock/generated/affine_lock_8action_visible_targets.bin b/ocean/affine_lock/generated/affine_lock_8action_visible_targets.bin new file mode 100644 index 0000000000..092eb20932 Binary files /dev/null and b/ocean/affine_lock/generated/affine_lock_8action_visible_targets.bin differ diff --git a/ocean/affine_lock/generated/affine_lock_8action_visible_targets.json b/ocean/affine_lock/generated/affine_lock_8action_visible_targets.json new file mode 100644 index 0000000000..1cd2840cbf --- /dev/null +++ b/ocean/affine_lock/generated/affine_lock_8action_visible_targets.json @@ -0,0 +1,60 @@ +{ + "action_id_to_name": [ + "shift_left", + "shift_right", + "invert_right_7", + "swap_adjacent_bits", + "swap_adjacent_pairs", + "swap_nibbles_each_byte", + "reverse_each_nibble", + "reverse_each_byte" + ], + "action_set": "affine_lock_8action_v1", + "action_set_hash": "0x6e11e18fdafc0baa", + "binary_path": "ocean/affine_lock/generated/affine_lock_8action_visible_targets.bin", + "bits": 16, + "checksum": "0xce70379011f6386d", + "depth_records": [ + {"depth": 2, "exact_pair_count": 2216496, "first_record": 0, "stored_count": 65536}, + {"depth": 4, "exact_pair_count": 34379722, "first_record": 65536, "stored_count": 65536}, + {"depth": 5, "exact_pair_count": 115388932, "first_record": 131072, "stored_count": 65536}, + {"depth": 6, "exact_pair_count": 331789220, "first_record": 196608, "stored_count": 65536}, + {"depth": 8, "exact_pair_count": 1125374770, "first_record": 262144, "stored_count": 65536}, + {"depth": 16, "exact_pair_count": 100548, "first_record": 327680, "stored_count": 100548} + ], + "depths": [2, 4, 5, 6, 8, 16], + "disconnected_starts": 0, + "format": "affine_lock_visible_targets_bin", + "header_size": 196, + "max_distance": 20, + "num_actions": 8, + "record_count": 428228, + "record_size": 16, + "sample_per_depth": 65536, + "sample_seed": 0, + "stored_all_depths": [16], + "version": 1, + "visible_distance_histogram": { + "0": 65536, + "1": 517796, + "2": 2216496, + "3": 9482736, + "4": 34379722, + "5": 115388932, + "6": 331789220, + "7": 736430258, + "8": 1125374770, + "9": 1053662288, + "10": 582197010, + "11": 216704714, + "12": 65093582, + "13": 17020854, + "14": 3830040, + "15": 701474, + "16": 100548, + "17": 10352, + "18": 924, + "19": 40, + "20": 4 + } +} diff --git a/ocean/affine_lock/tests/run_all.sh b/ocean/affine_lock/tests/run_all.sh new file mode 100755 index 0000000000..814afa4a4e --- /dev/null +++ b/ocean/affine_lock/tests/run_all.sh @@ -0,0 +1,35 @@ +#!/usr/bin/env bash +set -euo pipefail + +ROOT="$(cd "$(dirname "$0")/../../.." && pwd)" +OUT="${TMPDIR:-/tmp}/affine_lock_tests" +LOG_OUT="${TMPDIR:-/tmp}/affine_lock_log_export_tests" +C99_OUT="${TMPDIR:-/tmp}/affine_lock_c99_compile" +CC_BIN="${CC:-clang}" + +python3 "$ROOT/ocean/affine_lock/tests/test_metadata_smoke.py" +bash "$ROOT/ocean/affine_lock/tests/test_8action_visible_targets_smoke.sh" + +"$CC_BIN" \ + -std=c99 -pedantic -Wall -Wextra -Werror -Wno-unused-function \ + -O0 -I"$ROOT" -I"$ROOT/src" -I"$ROOT/ocean/affine_lock" -I"$ROOT/vendor" \ + "$ROOT/ocean/affine_lock/tests/test_affine_lock.c" \ + -lm -o "$C99_OUT" + +"$CC_BIN" \ + -std=c11 -Wall -Wextra -Werror -Wno-unused-function \ + -O0 -g -fsanitize=address,undefined \ + -I"$ROOT" -I"$ROOT/src" -I"$ROOT/ocean/affine_lock" -I"$ROOT/vendor" \ + "$ROOT/ocean/affine_lock/tests/test_affine_lock.c" \ + -lm -o "$OUT" + +"$CC_BIN" \ + -std=c11 -Wall -Wextra -Werror -Wno-unused-function -Wno-unused-parameter \ + -D_POSIX_C_SOURCE=200809L \ + -O0 -g -ffunction-sections -fdata-sections -fsanitize=address,undefined \ + -I"$ROOT" -I"$ROOT/src" -I"$ROOT/ocean/affine_lock" -I"$ROOT/vendor" \ + "$ROOT/ocean/affine_lock/tests/test_affine_lock_log_export.c" \ + -Wl,--gc-sections -lm -o "$LOG_OUT" + +ASAN_OPTIONS="${ASAN_OPTIONS:-detect_leaks=0}" "$OUT" +ASAN_OPTIONS="${ASAN_OPTIONS:-detect_leaks=0}" "$LOG_OUT" diff --git a/ocean/affine_lock/tests/test_8action_visible_targets_smoke.sh b/ocean/affine_lock/tests/test_8action_visible_targets_smoke.sh new file mode 100644 index 0000000000..eb5119a257 --- /dev/null +++ b/ocean/affine_lock/tests/test_8action_visible_targets_smoke.sh @@ -0,0 +1,243 @@ +#!/usr/bin/env bash +set -euo pipefail + +ROOT="$(cd "$(dirname "$0")/../../.." && pwd)" +SRC="$ROOT/ocean/affine_lock/tools/generate_8action_visible_targets.c" +OUT="${TMPDIR:-/tmp}/affine_lock_generate_8action_visible_targets" +LOADER_SRC="$ROOT/ocean/affine_lock/tests/test_visible_targets_loader.c" +LOADER_OUT="${TMPDIR:-/tmp}/affine_lock_test_visible_targets_loader" +BIN_OUT="${TMPDIR:-/tmp}/affine_lock_8action_visible_targets.bin" +JSON_OUT="${TMPDIR:-/tmp}/affine_lock_8action_visible_targets.json" +SEED_42_A_BIN_OUT="${TMPDIR:-/tmp}/affine_lock_8action_visible_targets_seed42_a.bin" +SEED_42_A_JSON_OUT="${TMPDIR:-/tmp}/affine_lock_8action_visible_targets_seed42_a.json" +SEED_42_B_BIN_OUT="${TMPDIR:-/tmp}/affine_lock_8action_visible_targets_seed42_b.bin" +SEED_42_B_JSON_OUT="${TMPDIR:-/tmp}/affine_lock_8action_visible_targets_seed42_b.json" +SEED_69_BIN_OUT="${TMPDIR:-/tmp}/affine_lock_8action_visible_targets_seed69.bin" +SEED_69_JSON_OUT="${TMPDIR:-/tmp}/affine_lock_8action_visible_targets_seed69.json" +FOUR_BIN_OUT="${TMPDIR:-/tmp}/affine_lock_4action_visible_targets.bin" +FOUR_JSON_OUT="${TMPDIR:-/tmp}/affine_lock_4action_visible_targets.json" +CC_BIN="${CC:-gcc}" + +if [ ! -f "$SRC" ]; then + echo "missing 8-action visible target generator: $SRC" >&2 + exit 1 +fi +if [ ! -f "$LOADER_SRC" ]; then + echo "missing visible target loader test: $LOADER_SRC" >&2 + exit 1 +fi + +"$CC_BIN" \ + -std=c11 -O3 -DNDEBUG -fopenmp \ + -Wall -Wextra -Werror \ + -I"$ROOT" -I"$ROOT/ocean/affine_lock" \ + "$SRC" -lm -o "$OUT" + +"$CC_BIN" \ + -std=c11 -O0 -g \ + -Wall -Wextra -Werror \ + -I"$ROOT" -I"$ROOT/ocean/affine_lock" \ + "$LOADER_SRC" -lm -o "$LOADER_OUT" + +"$OUT" \ + --sample-per-depth 128 \ + --output-bin "$BIN_OUT" \ + --output-json "$JSON_OUT" + +"$LOADER_OUT" "$BIN_OUT" 101188 128 100548 + +"$OUT" \ + --sample-per-depth 8 \ + --sample-seed 42 \ + --output-bin "$SEED_42_A_BIN_OUT" \ + --output-json "$SEED_42_A_JSON_OUT" + +"$OUT" \ + --sample-per-depth 8 \ + --sample-seed 42 \ + --output-bin "$SEED_42_B_BIN_OUT" \ + --output-json "$SEED_42_B_JSON_OUT" + +"$OUT" \ + --sample-per-depth 8 \ + --sample-seed 69 \ + --output-bin "$SEED_69_BIN_OUT" \ + --output-json "$SEED_69_JSON_OUT" + +cmp "$SEED_42_A_BIN_OUT" "$SEED_42_B_BIN_OUT" +if cmp -s "$SEED_42_A_BIN_OUT" "$SEED_69_BIN_OUT"; then + echo "different sample seeds unexpectedly produced identical tables" >&2 + exit 1 +fi + +"$OUT" \ + --action-set affine_lock_4action_v1 \ + --sample-per-depth 16 \ + --output-bin "$FOUR_BIN_OUT" \ + --output-json "$FOUR_JSON_OUT" + +python3 - \ + "$BIN_OUT" "$JSON_OUT" \ + "$SEED_42_A_BIN_OUT" "$SEED_42_A_JSON_OUT" \ + "$SEED_69_BIN_OUT" "$SEED_69_JSON_OUT" \ + "$FOUR_BIN_OUT" "$FOUR_JSON_OUT" <<'PY' +import json +import struct +import sys +from pathlib import Path + +bin_path = Path(sys.argv[1]) +json_path = Path(sys.argv[2]) +seed_42_bin_path = Path(sys.argv[3]) +seed_42_json_path = Path(sys.argv[4]) +seed_69_bin_path = Path(sys.argv[5]) +seed_69_json_path = Path(sys.argv[6]) +four_bin_path = Path(sys.argv[7]) +four_json_path = Path(sys.argv[8]) +manifest = json.loads(json_path.read_text()) + +assert manifest["action_set"] == "affine_lock_8action_v1" +assert manifest["action_id_to_name"] == [ + "shift_left", + "shift_right", + "invert_right_7", + "swap_adjacent_bits", + "swap_adjacent_pairs", + "swap_nibbles_each_byte", + "reverse_each_nibble", + "reverse_each_byte", +] +assert manifest["bits"] == 16 +assert manifest["num_actions"] == 8 +assert manifest["depths"] == [2, 4, 5, 6, 8, 16] +assert manifest["sample_per_depth"] == 128 +assert manifest["sample_seed"] == 0 +assert manifest["stored_all_depths"] == [16] +assert manifest["max_distance"] == 20 +assert manifest["disconnected_starts"] == 0 +assert manifest["visible_distance_histogram"]["16"] == 100548 +assert manifest["visible_distance_histogram"]["20"] == 4 + +depth_records = manifest["depth_records"] +assert [record["depth"] for record in depth_records] == [2, 4, 5, 6, 8, 16] +for record in depth_records[:5]: + assert record["stored_count"] == 128 + assert record["exact_pair_count"] >= record["stored_count"] +assert depth_records[5]["stored_count"] == 100548 +assert depth_records[5]["exact_pair_count"] == 100548 + +data = bin_path.read_bytes() +fixed_header = struct.Struct("<8sIIIIIIIQQ") +( + magic, + version, + header_size, + record_size, + bits, + num_actions, + depth_count, + record_count, + checksum, + action_set_hash, +) = fixed_header.unpack_from(data, 0) + +assert magic == b"AL7TGT1\0" +assert version == 1 +assert header_size == manifest["header_size"] +assert record_size == manifest["record_size"] == 16 +assert bits == 16 +assert num_actions == 8 +assert depth_count == 6 +assert record_count == sum(record["stored_count"] for record in depth_records) +assert checksum == int(manifest["checksum"], 16) +assert action_set_hash == int(manifest["action_set_hash"], 16) +assert len(data) == header_size + record_count * record_size + +depth_struct = struct.Struct("= 0 + +seed_42_manifest = json.loads(seed_42_json_path.read_text()) +seed_69_manifest = json.loads(seed_69_json_path.read_text()) +assert seed_42_manifest["sample_seed"] == 42 +assert seed_69_manifest["sample_seed"] == 69 +assert seed_42_manifest["sample_per_depth"] == 8 +assert seed_69_manifest["sample_per_depth"] == 8 +assert seed_42_manifest["depth_records"] == seed_69_manifest["depth_records"] + +seed_42_data = seed_42_bin_path.read_bytes() +seed_69_data = seed_69_bin_path.read_bytes() + +def record_span(table_manifest, depth): + record = next( + record for record in table_manifest["depth_records"] + if record["depth"] == depth + ) + start = ( + table_manifest["header_size"] + + record["first_record"] * table_manifest["record_size"] + ) + end = start + record["stored_count"] * table_manifest["record_size"] + return start, end + +sampled_depths_changed = False +for depth in (2, 4, 5, 6, 8): + start, end = record_span(seed_42_manifest, depth) + if seed_42_data[start:end] != seed_69_data[start:end]: + sampled_depths_changed = True +assert sampled_depths_changed + +start, end = record_span(seed_42_manifest, 16) +assert seed_42_data[start:end] == seed_69_data[start:end] + +four_manifest = json.loads(four_json_path.read_text()) +assert four_manifest["action_set"] == "affine_lock_4action_v1" +assert four_manifest["action_id_to_name"] == [ + "shift_right", + "mirror", + "invert_right_7", + "swap_adjacent_bits", +] +assert four_manifest["bits"] == 16 +assert four_manifest["num_actions"] == 4 +assert four_manifest["depths"] == [2, 4, 5, 6, 8, 16] +assert four_manifest["sample_per_depth"] == 16 +assert four_manifest["sample_seed"] == 0 +assert four_manifest["stored_all_depths"] == [] +assert four_manifest["max_distance"] == 19 +assert four_manifest["disconnected_starts"] == 0 +assert four_manifest["visible_distance_histogram"]["16"] == 2434606 +assert [record["stored_count"] for record in four_manifest["depth_records"]] == [ + 16, + 16, + 16, + 16, + 16, + 16, +] +assert four_bin_path.stat().st_size == ( + four_manifest["header_size"] + + four_manifest["record_count"] * four_manifest["record_size"] +) +PY diff --git a/ocean/affine_lock/tests/test_affine_lock.c b/ocean/affine_lock/tests/test_affine_lock.c new file mode 100644 index 0000000000..ce57906efa --- /dev/null +++ b/ocean/affine_lock/tests/test_affine_lock.c @@ -0,0 +1,1262 @@ +#include +#include +#include +#include +#include + +#define AFFINE_LOCK_NO_RENDER +#include "../affine_lock.h" + +#define EXPECT_TRUE(cond) do { \ + if (!(cond)) { \ + fprintf(stderr, "%s:%d: expected true: %s\n", __FILE__, __LINE__, #cond); \ + exit(1); \ + } \ +} while (0) + +#define EXPECT_EQ_INT(actual, expected) do { \ + int _a = (int)(actual); \ + int _e = (int)(expected); \ + if (_a != _e) { \ + fprintf(stderr, "%s:%d: expected %s == %d, got %d\n", \ + __FILE__, __LINE__, #actual, _e, _a); \ + exit(1); \ + } \ +} while (0) + +#define EXPECT_EQ_U32(actual, expected) do { \ + uint32_t _a = (uint32_t)(actual); \ + uint32_t _e = (uint32_t)(expected); \ + if (_a != _e) { \ + fprintf(stderr, "%s:%d: expected %s == 0x%x, got 0x%x\n", \ + __FILE__, __LINE__, #actual, _e, _a); \ + exit(1); \ + } \ +} while (0) + +#define EXPECT_EQ_U64(actual, expected) do { \ + uint64_t _a = (uint64_t)(actual); \ + uint64_t _e = (uint64_t)(expected); \ + if (_a != _e) { \ + fprintf(stderr, "%s:%d: expected %s == 0x%llx, got 0x%llx\n", \ + __FILE__, __LINE__, #actual, \ + (unsigned long long)_e, (unsigned long long)_a); \ + exit(1); \ + } \ +} while (0) + +#define EXPECT_NE_U32(actual, expected) do { \ + uint32_t _a = (uint32_t)(actual); \ + uint32_t _e = (uint32_t)(expected); \ + if (_a == _e) { \ + fprintf(stderr, "%s:%d: expected %s != 0x%x\n", \ + __FILE__, __LINE__, #actual, _e); \ + exit(1); \ + } \ +} while (0) + +#define EXPECT_NEAR(actual, expected, eps) do { \ + float _a = (float)(actual); \ + float _e = (float)(expected); \ + if (fabsf(_a - _e) > (eps)) { \ + fprintf(stderr, "%s:%d: expected %s ~= %.6f, got %.6f\n", \ + __FILE__, __LINE__, #actual, _e, _a); \ + exit(1); \ + } \ +} while (0) + +static AffineLockShared make_shared( + int start_depth, int max_depth, + int step_grace) { + AffineLockShared shared; + memset(&shared, 0, sizeof(shared)); + int rc = affine_lock_init_shared( + &shared, start_depth, max_depth, step_grace); + EXPECT_EQ_INT(rc, 0); + EXPECT_EQ_INT(affine_lock_prepare_visible_targets(&shared), 0); + return shared; +} + +static void make_env( + AffineLock* env, + AffineLockShared* shared, + unsigned int seed, + float observations[AFFINE_LOCK_OBS_SIZE], + float actions[AFFINE_LOCK_NUM_ATNS], + float rewards[1], + float terminals[1]) { + memset(env, 0, sizeof(*env)); + memset(observations, 0, AFFINE_LOCK_OBS_SIZE * sizeof(float)); + actions[0] = 0.0f; + rewards[0] = 0.0f; + terminals[0] = 0.0f; + affine_lock_init_env(env, shared, seed); + env->observations = observations; + env->actions = actions; + env->rewards = rewards; + env->terminals = terminals; +} + +static uint32_t bits_from_text(const char* bits) { + EXPECT_EQ_INT(strlen(bits), AFFINE_LOCK_BITS); + uint32_t value = 0u; + for (int i = 0; i < AFFINE_LOCK_BITS; i++) { + EXPECT_TRUE(bits[i] == '0' || bits[i] == '1'); + if (bits[i] == '1') { + value |= 1u << i; + } + } + return value; +} + +static uint32_t test_apply_action(uint32_t state, int action) { + state &= 0xffffu; + switch (action) { + case 0: { + uint32_t first = state & 1u; + return ((state >> 1) | (first << 15)) & 0xffffu; + } + case 1: { + uint32_t last = (state >> 15) & 1u; + return ((state << 1) & 0xffffu) | last; + } + case 2: + return state ^ 0xfe00u; + case 3: + return ((state & 0x5555u) << 1) | ((state & 0xaaaau) >> 1); + case 4: + return ((state & 0x3333u) << 2) | ((state & 0xccccu) >> 2); + case 5: + return ((state & 0x0f0fu) << 4) | ((state & 0xf0f0u) >> 4); + case 6: + return test_apply_action(test_apply_action(state, 3), 4); + case 7: + return test_apply_action(test_apply_action(state, 6), 5); + default: + return state; + } +} + +typedef struct TestBfsStats { + int reachable_count; + int distance_histogram[128]; + int farthest_distance; + int shortest_distance; +} TestBfsStats; + +static void compute_test_bfs_stats( + const AffineLockShared* shared, + uint32_t start, + uint32_t target, + TestBfsStats* stats) { + memset(stats, 0, sizeof(*stats)); + stats->shortest_distance = -1; + + int* distances = (int*)malloc((size_t)shared->num_states * sizeof(int)); + uint32_t* queue = + (uint32_t*)malloc((size_t)shared->num_states * sizeof(uint32_t)); + EXPECT_TRUE(distances != NULL); + EXPECT_TRUE(queue != NULL); + + for (int i = 0; i < shared->num_states; i++) { + distances[i] = -1; + } + + int head = 0; + int tail = 0; + start &= shared->mask; + target &= shared->mask; + distances[start] = 0; + queue[tail++] = start; + + while (head < tail) { + uint32_t state = queue[head++]; + int distance = distances[state]; + stats->reachable_count += 1; + if (distance >= 0 && distance < (int)(sizeof(stats->distance_histogram) / + sizeof(stats->distance_histogram[0]))) { + stats->distance_histogram[distance] += 1; + } + if (distance > stats->farthest_distance) { + stats->farthest_distance = distance; + } + + for (int action = 0; action < AFFINE_LOCK_NUM_ACTIONS; action++) { + uint32_t next = test_apply_action(state, action) & shared->mask; + if (distances[next] >= 0) { + continue; + } + distances[next] = distance + 1; + queue[tail++] = next; + } + } + + stats->shortest_distance = distances[target]; + free(distances); + free(queue); +} + +static float expected_solve_credit(const AffineLockShared* shared, int depth); + +static void test_log_solve_credit_uses_known_target_distance(void) { + AffineLockShared shared = make_shared(2, 16, 0); + AffineLock env; + memset(&env, 0, sizeof(env)); + env.shared = &shared; + env.scramble_depth = 16; + env.target_distance = 8; + env.step_count = 8; + + affine_lock_add_log(&env, 1, 0); + + EXPECT_NEAR(env.log.perf, expected_solve_credit(&shared, 8), 0.0f); + EXPECT_NEAR(env.log.score, expected_solve_credit(&shared, 8), 0.0f); + EXPECT_NEAR(env.log.max_depth_solve, 0.0f, 0.0f); + EXPECT_NEAR(env.log.solve_efficiency, 1.0f, 0.0f); + EXPECT_NEAR(env.log.target_distance, 8.0f, 0.0f); + EXPECT_NEAR(env.log.solved_target_distance, 8.0f, 0.0f); + EXPECT_NEAR(env.log.depth_5_rate, 0.0f, 0.0f); + EXPECT_NEAR(env.log.depth_5_solve_rate, 0.0f, 0.0f); + EXPECT_NEAR(env.log.depth_6_rate, 0.0f, 0.0f); + EXPECT_NEAR(env.log.depth_6_solve_rate, 0.0f, 0.0f); + EXPECT_NEAR(env.log.depth_8_rate, 1.0f, 0.0f); + EXPECT_NEAR(env.log.depth_8_solve_rate, 1.0f, 0.0f); + EXPECT_NEAR(env.log.depth_16_rate, 0.0f, 0.0f); + EXPECT_NEAR(env.log.depth_16_solve_rate, 0.0f, 0.0f); + + affine_lock_free_shared(&shared); +} + +static void expect_observation_matches(const AffineLock* env) { + for (int bit = 0; bit < AFFINE_LOCK_BITS; bit++) { + uint32_t bit_mask = 1u << bit; + float expected_current = (env->state & bit_mask) ? 1.0f : -1.0f; + float expected_target = (env->target & bit_mask) ? 1.0f : -1.0f; + EXPECT_NEAR(env->observations[bit], expected_current, 0.0f); + EXPECT_NEAR(env->observations[AFFINE_LOCK_BITS + bit], expected_target, 0.0f); + } + + for (int i = 0; i < AFFINE_LOCK_TIMER_INDEX; i++) { + EXPECT_TRUE(env->observations[i] == -1.0f || env->observations[i] == 1.0f); + } + + float expected_timer = env->max_steps > 0 ? + (float)env->step_count / (float)env->max_steps : 0.0f; + EXPECT_TRUE(env->observations[AFFINE_LOCK_TIMER_INDEX] >= 0.0f); + EXPECT_TRUE(env->observations[AFFINE_LOCK_TIMER_INDEX] <= 1.0f); + EXPECT_NEAR(env->observations[AFFINE_LOCK_TIMER_INDEX], expected_timer, 0.000001f); +} + +static int find_non_solving_action(AffineLock* env) { + for (int action = 0; action < AFFINE_LOCK_NUM_ACTIONS; action++) { + uint32_t next = affine_lock_apply_action(env->shared, env->state, action); + if (next != env->target) { + return action; + } + } + return -1; +} + +static float expected_solve_credit(const AffineLockShared* shared, int depth) { + return (float)depth / (float)shared->max_depth; +} + +static uint64_t mix_u64(uint64_t hash, uint64_t value) { + hash ^= value; + hash *= 1099511628211ull; + return hash; +} + +static uint64_t mix_float(uint64_t hash, float value) { + uint32_t bits; + memcpy(&bits, &value, sizeof(bits)); + return mix_u64(hash, bits); +} + +static uint64_t log_snapshot_checksum(uint64_t hash, const Log* log) { + hash = mix_float(hash, log->perf); + hash = mix_float(hash, log->score); + hash = mix_float(hash, log->solve_rate); + hash = mix_float(hash, log->max_depth_solve); + hash = mix_float(hash, log->episode_return); + hash = mix_float(hash, log->episode_length); + hash = mix_float(hash, log->solve_steps); + hash = mix_float(hash, log->timeout_rate); + hash = mix_float(hash, log->invalid_rate); + hash = mix_float(hash, log->solve_efficiency); + hash = mix_float(hash, log->target_distance); + hash = mix_float(hash, log->solved_target_distance); + hash = mix_float(hash, log->depth_2_rate); + hash = mix_float(hash, log->depth_2_solve_rate); + hash = mix_float(hash, log->depth_4_rate); + hash = mix_float(hash, log->depth_4_solve_rate); + hash = mix_float(hash, log->depth_5_rate); + hash = mix_float(hash, log->depth_5_solve_rate); + hash = mix_float(hash, log->depth_6_rate); + hash = mix_float(hash, log->depth_6_solve_rate); + hash = mix_float(hash, log->depth_8_rate); + hash = mix_float(hash, log->depth_8_solve_rate); + hash = mix_float(hash, log->depth_16_rate); + hash = mix_float(hash, log->depth_16_solve_rate); + hash = mix_float(hash, log->n); + return hash; +} + +static uint64_t reset_snapshot_checksum(const AffineLock* env) { + uint64_t hash = 1469598103934665603ull; + hash = mix_u64(hash, env->state); + hash = mix_u64(hash, env->target); + hash = mix_u64(hash, (uint64_t)env->step_count); + hash = mix_u64(hash, (uint64_t)env->max_steps); + hash = mix_u64(hash, (uint64_t)env->scramble_depth); + hash = mix_u64(hash, (uint64_t)env->curriculum_depth); + hash = mix_u64(hash, (uint64_t)env->solution_length); + hash = mix_u64(hash, (uint64_t)(env->target_distance + 1)); + hash = mix_float(hash, env->rewards[0]); + hash = mix_float(hash, env->terminals[0]); + for (int i = 0; i < AFFINE_LOCK_OBS_SIZE; i++) { + hash = mix_float(hash, env->observations[i]); + } + hash = log_snapshot_checksum(hash, &env->log); + for (int i = 0; i < AFFINE_LOCK_MAX_SOLUTION_DEPTH; i++) { + hash = mix_u64(hash, (uint64_t)(env->solution_actions[i] + 1)); + } + return hash; +} + +static void expect_env_snapshots_equal( + const AffineLock* a, + const AffineLock* b, + const float obs_a[AFFINE_LOCK_OBS_SIZE], + const float obs_b[AFFINE_LOCK_OBS_SIZE]) { + EXPECT_EQ_U64(reset_snapshot_checksum(a), reset_snapshot_checksum(b)); + EXPECT_TRUE(memcmp(obs_a, obs_b, AFFINE_LOCK_OBS_SIZE * sizeof(float)) == 0); + EXPECT_EQ_U32(a->state, b->state); + EXPECT_EQ_U32(a->target, b->target); + EXPECT_EQ_INT(a->scramble_depth, b->scramble_depth); + EXPECT_EQ_INT(a->max_steps, b->max_steps); + EXPECT_EQ_INT(a->solution_length, b->solution_length); + EXPECT_TRUE(memcmp(a->solution_actions, b->solution_actions, + sizeof(a->solution_actions)) == 0); +} + +static void expect_solution_reaches_target( + const AffineLockShared* shared, + const AffineLock* env) { + TestBfsStats stats; + compute_test_bfs_stats(shared, env->state, env->target, &stats); + EXPECT_EQ_INT(env->target_distance, stats.shortest_distance); + EXPECT_EQ_INT(env->solution_length, stats.shortest_distance); + EXPECT_EQ_INT(env->max_steps, stats.shortest_distance + shared->step_grace); + EXPECT_TRUE(stats.reachable_count > 0); + EXPECT_TRUE(stats.farthest_distance >= stats.shortest_distance); + EXPECT_TRUE(stats.distance_histogram[env->target_distance] > 0); + + uint32_t simulated = env->state; + for (int i = 0; i < env->solution_length; i++) { + int action = env->solution_actions[i]; + EXPECT_TRUE(action >= 0 && action < AFFINE_LOCK_NUM_ACTIONS); + simulated = test_apply_action(simulated, action) & shared->mask; + } + EXPECT_EQ_U32(simulated, env->target); +} + +static void solve_with_stored_solution(AffineLock* env) { + int length = env->solution_length; + for (int step = 0; step < length; step++) { + env->actions[0] = (float)env->solution_actions[step]; + c_step(env); + if (env->terminals[0] != 0.0f) { + return; + } + } +} + +static void expect_depth_log_delta( + const Log* before, + const Log* after, + int depth, + int solved) { + EXPECT_NEAR(after->depth_2_rate, + before->depth_2_rate + (depth == 2 ? 1.0f : 0.0f), 0.0f); + EXPECT_NEAR(after->depth_2_solve_rate, + before->depth_2_solve_rate + (solved && depth == 2 ? 1.0f : 0.0f), 0.0f); + EXPECT_NEAR(after->depth_4_rate, + before->depth_4_rate + (depth == 4 ? 1.0f : 0.0f), 0.0f); + EXPECT_NEAR(after->depth_4_solve_rate, + before->depth_4_solve_rate + (solved && depth == 4 ? 1.0f : 0.0f), 0.0f); + EXPECT_NEAR(after->depth_5_rate, + before->depth_5_rate + (depth == 5 ? 1.0f : 0.0f), 0.0f); + EXPECT_NEAR(after->depth_5_solve_rate, + before->depth_5_solve_rate + (solved && depth == 5 ? 1.0f : 0.0f), 0.0f); + EXPECT_NEAR(after->depth_6_rate, + before->depth_6_rate + (depth == 6 ? 1.0f : 0.0f), 0.0f); + EXPECT_NEAR(after->depth_6_solve_rate, + before->depth_6_solve_rate + (solved && depth == 6 ? 1.0f : 0.0f), 0.0f); + EXPECT_NEAR(after->depth_8_rate, + before->depth_8_rate + (depth == 8 ? 1.0f : 0.0f), 0.0f); + EXPECT_NEAR(after->depth_8_solve_rate, + before->depth_8_solve_rate + (solved && depth == 8 ? 1.0f : 0.0f), 0.0f); + EXPECT_NEAR(after->depth_16_rate, + before->depth_16_rate + (depth == 16 ? 1.0f : 0.0f), 0.0f); + EXPECT_NEAR(after->depth_16_solve_rate, + before->depth_16_solve_rate + (solved && depth == 16 ? 1.0f : 0.0f), 0.0f); +} + +static void expect_oracle_episode_win(AffineLock* env, int depth) { + AffineLockShared* shared = env->shared; + EXPECT_EQ_INT(env->scramble_depth, depth); + EXPECT_TRUE(env->solution_length > 0); + expect_solution_reaches_target(shared, env); + + Log before = env->log; + int target_distance = env->target_distance; + int solution_length = env->solution_length; + EXPECT_TRUE(solution_length > 0); + EXPECT_EQ_INT(env->max_steps, target_distance + shared->step_grace); + + for (int step = 0; step < solution_length; step++) { + env->actions[0] = (float)env->solution_actions[step]; + c_step(env); + if (step + 1 < solution_length) { + EXPECT_NEAR(env->rewards[0], AFFINE_LOCK_STEP_REWARD, 0.0f); + EXPECT_NEAR(env->terminals[0], 0.0f, 0.0f); + EXPECT_EQ_INT(env->step_count, step + 1); + expect_observation_matches(env); + } + } + + EXPECT_NEAR(env->rewards[0], 1.0f, 0.0f); + EXPECT_NEAR(env->terminals[0], 1.0f, 0.0f); + EXPECT_EQ_INT(env->step_count, 0); + EXPECT_NEAR(env->log.n, before.n + 1.0f, 0.0f); + EXPECT_NEAR(env->log.perf, + before.perf + expected_solve_credit(shared, depth), 0.0f); + EXPECT_NEAR(env->log.score, + before.score + expected_solve_credit(shared, depth), 0.0f); + EXPECT_NEAR(env->log.solve_rate, before.solve_rate + 1.0f, 0.0f); + EXPECT_NEAR(env->log.timeout_rate, before.timeout_rate, 0.0f); + EXPECT_NEAR(env->log.invalid_rate, before.invalid_rate, 0.0f); + EXPECT_NEAR(env->log.episode_length, + before.episode_length + (float)solution_length, 0.0f); + EXPECT_NEAR(env->log.solve_steps, + before.solve_steps + (float)solution_length, 0.0f); + EXPECT_NEAR(env->log.target_distance, + before.target_distance + (float)target_distance, 0.0f); + EXPECT_NEAR(env->log.solved_target_distance, + before.solved_target_distance + (float)target_distance, 0.0f); + expect_depth_log_delta(&before, &env->log, depth, 1); + + int next_depth = affine_lock_next_curriculum_depth(shared, depth); + EXPECT_EQ_INT(env->scramble_depth, next_depth); + expect_observation_matches(env); +} + +static void expect_non_solving_episode_timeout(AffineLock* env, int depth) { + AffineLockShared* shared = env->shared; + EXPECT_EQ_INT(env->scramble_depth, depth); + EXPECT_TRUE(env->solution_length > 0); + expect_solution_reaches_target(shared, env); + + Log before = env->log; + int target_distance = env->target_distance; + int max_steps = env->max_steps; + EXPECT_TRUE(max_steps > 0); + + for (int step = 0; step < max_steps; step++) { + int action = find_non_solving_action(env); + EXPECT_TRUE(action >= 0); + env->actions[0] = (float)action; + c_step(env); + if (step + 1 < max_steps) { + EXPECT_NEAR(env->rewards[0], AFFINE_LOCK_STEP_REWARD, 0.0f); + EXPECT_NEAR(env->terminals[0], 0.0f, 0.0f); + EXPECT_EQ_INT(env->step_count, step + 1); + expect_observation_matches(env); + } + } + + EXPECT_NEAR(env->rewards[0], -1.0f, 0.0f); + EXPECT_NEAR(env->terminals[0], 1.0f, 0.0f); + EXPECT_EQ_INT(env->step_count, 0); + EXPECT_NEAR(env->log.n, before.n + 1.0f, 0.0f); + EXPECT_NEAR(env->log.perf, before.perf, 0.0f); + EXPECT_NEAR(env->log.score, before.score, 0.0f); + EXPECT_NEAR(env->log.solve_rate, before.solve_rate, 0.0f); + EXPECT_NEAR(env->log.timeout_rate, before.timeout_rate + 1.0f, 0.0f); + EXPECT_NEAR(env->log.invalid_rate, before.invalid_rate, 0.0f); + EXPECT_NEAR(env->log.episode_length, + before.episode_length + (float)max_steps, 0.0f); + EXPECT_NEAR(env->log.solve_steps, before.solve_steps, 0.0f); + EXPECT_NEAR(env->log.target_distance, + before.target_distance + (float)target_distance, 0.0f); + EXPECT_NEAR(env->log.solved_target_distance, + before.solved_target_distance, 0.0f); + expect_depth_log_delta(&before, &env->log, depth, 0); + EXPECT_EQ_INT(env->scramble_depth, shared->start_depth); + expect_observation_matches(env); +} + +static size_t read_text_file(const char* path, char* buffer, size_t capacity) { + FILE* file = fopen(path, "r"); + EXPECT_TRUE(file != NULL); + size_t nread = fread(buffer, 1, capacity - 1, file); + buffer[nread] = '\0'; + fclose(file); + return nread; +} + +static void test_metadata_contract(void) { + EXPECT_EQ_INT(AFFINE_LOCK_BITS, 16); + EXPECT_EQ_INT(AFFINE_LOCK_TIMER_INDEX, 32); + EXPECT_EQ_INT(AFFINE_LOCK_OBS_SIZE, 33); + EXPECT_EQ_INT(AFFINE_LOCK_NUM_ATNS, 1); + EXPECT_EQ_INT(AFFINE_LOCK_NUM_ACTIONS, 8); +} + +static void test_config_and_binding_metadata_contract(void) { + char config[16384]; + read_text_file("config/affine_lock.ini", config, sizeof(config)); + EXPECT_TRUE(strstr(config, "[base]") != NULL); + EXPECT_TRUE(strstr(config, "env_name = affine_lock") != NULL); + EXPECT_TRUE(strstr(config, "[env]") != NULL); + EXPECT_TRUE(strstr(config, "start_depth = 2") != NULL); + EXPECT_TRUE(strstr(config, "max_depth = 16") != NULL); + EXPECT_TRUE(strstr(config, "[sweep]") != NULL); + EXPECT_TRUE(strstr(config, "metric = perf") != NULL); + EXPECT_TRUE(strstr(config, "goal = maximize") != NULL); + EXPECT_TRUE(strstr(config, "min = 100_000_000") != NULL); + EXPECT_TRUE(strstr(config, "max = 200_000_000") != NULL); + EXPECT_TRUE(strstr(config, "[sweep.policy.num_layers]") != NULL); + EXPECT_TRUE(strstr(config, "max = 131072") != NULL); + EXPECT_TRUE(strstr(config, "max = 4.0") != NULL); + + char binding[8192]; + read_text_file("ocean/affine_lock/binding.c", binding, sizeof(binding)); + EXPECT_TRUE(strstr(binding, "#define OBS_SIZE AFFINE_LOCK_OBS_SIZE") != NULL); + EXPECT_TRUE(strstr(binding, "#define ACT_SIZES {AFFINE_LOCK_NUM_ACTIONS}") != NULL); + EXPECT_TRUE(strstr(binding, "#define OBS_TENSOR_T FloatTensor") != NULL); +} + +static void test_global_action_examples(void) { + AffineLockShared shared = make_shared(2, 16, 0); + uint32_t start = bits_from_text("0011011000010111"); + + const char* expected[AFFINE_LOCK_NUM_ACTIONS] = { + "0110110000101110", + "1001101100001011", + "0011011001101000", + "0011100100101011", + "1100100101001101", + "0110001101110001", + "1100011010001110", + "0110110011101000", + }; + + for (int action = 0; action < AFFINE_LOCK_NUM_ACTIONS; action++) { + uint32_t next = affine_lock_apply_action(&shared, start, action); + EXPECT_EQ_U32(next, bits_from_text(expected[action])); + } + + affine_lock_free_shared(&shared); +} + +static void test_actions_round_trip_for_all_states(void) { + AffineLockShared shared = make_shared(2, 16, 0); + const int inverse_actions[AFFINE_LOCK_NUM_ACTIONS] = { + AFFINE_LOCK_ACTION_SHIFT_RIGHT, + AFFINE_LOCK_ACTION_SHIFT_LEFT, + AFFINE_LOCK_ACTION_INVERT_RIGHT_7, + AFFINE_LOCK_ACTION_SWAP_ADJACENT_BITS, + AFFINE_LOCK_ACTION_SWAP_ADJACENT_PAIRS, + AFFINE_LOCK_ACTION_SWAP_NIBBLES_EACH_BYTE, + AFFINE_LOCK_ACTION_REVERSE_EACH_NIBBLE, + AFFINE_LOCK_ACTION_REVERSE_EACH_BYTE, + }; + EXPECT_EQ_INT(shared.num_states, 1 << 16); + EXPECT_EQ_U32(shared.mask, 0xffffu); + + for (int action = 0; action < AFFINE_LOCK_NUM_ACTIONS; action++) { + int inverse = inverse_actions[action]; + EXPECT_TRUE(inverse >= 0 && inverse < AFFINE_LOCK_NUM_ACTIONS); + EXPECT_EQ_INT(inverse_actions[inverse], action); + + for (uint32_t state = 0; state < (uint32_t)shared.num_states; state++) { + uint32_t next = affine_lock_apply_action(&shared, state, action); + EXPECT_EQ_U32(next & ~shared.mask, 0u); + uint32_t round_trip = affine_lock_apply_action(&shared, next, inverse); + EXPECT_EQ_U32(round_trip, state); + } + } + + affine_lock_free_shared(&shared); +} + +static void test_reset_randomizes_target_and_current(void) { + AffineLockShared shared = make_shared(2, 16, 0); + AffineLock env; + float observations[AFFINE_LOCK_OBS_SIZE]; + float actions[AFFINE_LOCK_NUM_ATNS]; + float rewards[1]; + float terminals[1]; + make_env(&env, &shared, 123, observations, actions, rewards, terminals); + + uint32_t first_target = 0; + uint32_t first_state = 0; + int target_changed = 0; + int state_changed = 0; + + for (int i = 0; i < 16; i++) { + c_reset(&env); + EXPECT_EQ_INT(env.scramble_depth, shared.start_depth); + EXPECT_EQ_INT(env.max_steps, shared.start_depth); + EXPECT_EQ_U32(env.target & ~shared.mask, 0u); + EXPECT_EQ_U32(env.state & ~shared.mask, 0u); + EXPECT_NE_U32(env.state, env.target); + expect_observation_matches(&env); + + if (i == 0) { + first_target = env.target; + first_state = env.state; + } else { + if (env.target != first_target) { + target_changed = 1; + } + if (env.state != first_state) { + state_changed = 1; + } + } + } + + EXPECT_TRUE(target_changed); + EXPECT_TRUE(state_changed); + affine_lock_free_shared(&shared); +} + +static void test_visible_target_table_initialization_samples_reachable_target(void) { + AffineLockShared shared = make_shared(8, 16, 0); + + AffineLock env; + float observations[AFFINE_LOCK_OBS_SIZE]; + float actions[AFFINE_LOCK_NUM_ATNS]; + float rewards[1]; + float terminals[1]; + make_env(&env, &shared, 777, observations, actions, rewards, terminals); + c_reset(&env); + + EXPECT_EQ_INT(env.scramble_depth, shared.start_depth); + EXPECT_EQ_INT(env.target_distance, shared.start_depth); + EXPECT_EQ_INT(env.max_steps, env.target_distance); + EXPECT_EQ_INT(env.solution_length, env.target_distance); + EXPECT_NE_U32(env.state, env.target); + expect_solution_reaches_target(&shared, &env); + expect_observation_matches(&env); + + affine_lock_free_shared(&shared); +} + +static void test_visible_target_table_depths_have_expected_distances(void) { + const int depths[] = {2, 4, 5, 6, 8, 16}; + for (int i = 0; i < 6; i++) { + int depth = depths[i]; + AffineLockShared shared = make_shared(depth, 16, 0); + + AffineLock env; + float observations[AFFINE_LOCK_OBS_SIZE]; + float actions[AFFINE_LOCK_NUM_ATNS]; + float rewards[1]; + float terminals[1]; + make_env(&env, &shared, (unsigned int)(1900 + depth), observations, + actions, rewards, terminals); + c_reset(&env); + + TestBfsStats stats; + compute_test_bfs_stats(&shared, env.state, env.target, &stats); + int expected_distance = depth <= stats.farthest_distance ? + depth : stats.farthest_distance; + EXPECT_EQ_INT(env.target_distance, expected_distance); + EXPECT_EQ_INT(env.solution_length, expected_distance); + EXPECT_EQ_INT(env.max_steps, expected_distance); + expect_solution_reaches_target(&shared, &env); + + solve_with_stored_solution(&env); + EXPECT_NEAR(rewards[0], 1.0f, 0.0f); + EXPECT_NEAR(terminals[0], 1.0f, 0.0f); + + affine_lock_free_shared(&shared); + } +} + +static void test_visible_target_table_reset_uses_stored_records(void) { + const int requested_depths[] = {2, 4, 5, 6, 8, 16}; + const int expected_pool_sizes[] = {65536, 65536, 65536, 65536, 65536, 100548}; + + for (int depth_index = 0; depth_index < 6; depth_index++) { + int requested_depth = requested_depths[depth_index]; + AffineLockShared shared = make_shared(requested_depth, 16, 0); + const AffineLockVisibleTargetDepth* table_depth = + affine_lock_visible_target_depth(&shared, requested_depth); + EXPECT_TRUE(table_depth != NULL); + EXPECT_EQ_INT((int)table_depth->stored_count, + expected_pool_sizes[depth_index]); + + AffineLock env; + float observations[AFFINE_LOCK_OBS_SIZE]; + float actions[AFFINE_LOCK_NUM_ATNS]; + float rewards[1]; + float terminals[1]; + make_env(&env, &shared, (unsigned int)(2500 + requested_depth), + observations, actions, rewards, terminals); + + for (int reset = 0; reset < 8; reset++) { + c_reset(&env); + + EXPECT_EQ_INT(env.target_distance, requested_depth); + EXPECT_EQ_INT(env.solution_length, requested_depth); + EXPECT_EQ_INT(env.max_steps, requested_depth); + TestBfsStats stats; + compute_test_bfs_stats(&shared, env.state, env.target, &stats); + EXPECT_EQ_INT(stats.shortest_distance, requested_depth); + expect_solution_reaches_target(&shared, &env); + } + + affine_lock_free_shared(&shared); + } +} + +static void test_visible_target_table_matches_independent_bfs_over_repeated_resets(void) { + const int depths[] = {2, 4, 5, 6, 8, 16}; + + for (int depth_index = 0; depth_index < 6; depth_index++) { + int depth = depths[depth_index]; + AffineLockShared shared = make_shared(depth, 16, 0); + + AffineLock env; + float observations[AFFINE_LOCK_OBS_SIZE]; + float actions[AFFINE_LOCK_NUM_ATNS]; + float rewards[1]; + float terminals[1]; + make_env(&env, &shared, (unsigned int)(1000 + depth), + observations, actions, rewards, terminals); + + for (int reset = 0; reset < 12; reset++) { + c_reset(&env); + EXPECT_TRUE(env.target_distance > 0); + EXPECT_TRUE(env.solution_length > 0); + expect_solution_reaches_target(&shared, &env); + expect_observation_matches(&env); + } + + affine_lock_free_shared(&shared); + } +} + +static void test_observation_encoding_is_32_signed_bit_floats_plus_timer(void) { + AffineLockShared shared = make_shared(2, 16, 0); + AffineLock env; + float observations[AFFINE_LOCK_OBS_SIZE]; + float actions[AFFINE_LOCK_NUM_ATNS]; + float rewards[1]; + float terminals[1]; + make_env(&env, &shared, 7, observations, actions, rewards, terminals); + + env.state = 0xa55au; + env.target = 0x0f0fu; + env.step_count = 3; + env.max_steps = 12; + affine_lock_compute_observations(&env); + + expect_observation_matches(&env); + affine_lock_free_shared(&shared); +} + +static void test_timer_observation_progresses_and_resets_after_timeout(void) { + AffineLockShared shared = make_shared(2, 16, 0); + AffineLock env; + float observations[AFFINE_LOCK_OBS_SIZE]; + float actions[AFFINE_LOCK_NUM_ATNS]; + float rewards[1]; + float terminals[1]; + make_env(&env, &shared, 19, observations, actions, rewards, terminals); + c_reset(&env); + EXPECT_NEAR(observations[AFFINE_LOCK_TIMER_INDEX], 0.0f, 0.0f); + + env.target = 0u; + env.state = shared.mask; + env.step_count = 0; + env.max_steps = 4; + affine_lock_compute_observations(&env); + EXPECT_NEAR(observations[AFFINE_LOCK_TIMER_INDEX], 0.0f, 0.0f); + + actions[0] = 1.0f; + c_step(&env); + EXPECT_NEAR(terminals[0], 0.0f, 0.0f); + EXPECT_NEAR(observations[AFFINE_LOCK_TIMER_INDEX], 0.25f, 0.000001f); + + c_step(&env); + EXPECT_NEAR(terminals[0], 0.0f, 0.0f); + EXPECT_NEAR(observations[AFFINE_LOCK_TIMER_INDEX], 0.5f, 0.000001f); + + c_step(&env); + EXPECT_NEAR(terminals[0], 0.0f, 0.0f); + EXPECT_NEAR(observations[AFFINE_LOCK_TIMER_INDEX], 0.75f, 0.000001f); + + c_step(&env); + EXPECT_NEAR(rewards[0], -1.0f, 0.0f); + EXPECT_NEAR(terminals[0], 1.0f, 0.0f); + EXPECT_EQ_INT(env.step_count, 0); + EXPECT_NEAR(observations[AFFINE_LOCK_TIMER_INDEX], 0.0f, 0.0f); + + affine_lock_free_shared(&shared); +} + +static void test_actions_apply_to_current_state_directly(void) { + AffineLockShared shared = make_shared(2, 16, 0); + AffineLock env; + float observations[AFFINE_LOCK_OBS_SIZE]; + float actions[AFFINE_LOCK_NUM_ATNS]; + float rewards[1]; + float terminals[1]; + make_env(&env, &shared, 55, observations, actions, rewards, terminals); + c_reset(&env); + + uint32_t target = bits_from_text("1111000011110000"); + uint32_t state = bits_from_text("0011011000010111"); + int action = 1; + uint32_t expected_state = affine_lock_apply_action(&shared, state, action); + EXPECT_NE_U32(expected_state, target); + + env.target = target; + env.state = state; + env.step_count = 0; + env.max_steps = 16; + env.actions[0] = (float)action; + c_step(&env); + + EXPECT_NEAR(rewards[0], -0.01f, 0.0f); + EXPECT_NEAR(terminals[0], 0.0f, 0.0f); + EXPECT_EQ_U32(env.target, target); + EXPECT_EQ_U32(env.state, expected_state); + + affine_lock_free_shared(&shared); +} + +static void test_action_float_validation_rejects_non_discrete_values(void) { + AffineLockShared shared = make_shared(2, 16, 0); + AffineLock env; + float observations[AFFINE_LOCK_OBS_SIZE]; + float actions[AFFINE_LOCK_NUM_ATNS]; + float rewards[1]; + float terminals[1]; + make_env(&env, &shared, 57, observations, actions, rewards, terminals); + + const float invalid_actions[] = { + -1.0f, + 8.0f, + 1.5f, + NAN, + INFINITY, + -INFINITY, + }; + int count = (int)(sizeof(invalid_actions) / sizeof(invalid_actions[0])); + for (int i = 0; i < count; i++) { + c_reset(&env); + float prev_invalid = env.log.invalid_rate; + float prev_n = env.log.n; + + actions[0] = invalid_actions[i]; + c_step(&env); + + EXPECT_NEAR(rewards[0], -1.0f, 0.0f); + EXPECT_NEAR(terminals[0], 1.0f, 0.0f); + EXPECT_NEAR(env.log.invalid_rate, prev_invalid + 1.0f, 0.0f); + EXPECT_NEAR(env.log.n, prev_n + 1.0f, 0.0f); + EXPECT_EQ_INT(env.step_count, 0); + } + + affine_lock_free_shared(&shared); +} + +static void test_visible_target_table_curriculum_and_logging(void) { + AffineLockShared shared = make_shared(2, 16, 0); + + AffineLock env; + float observations[AFFINE_LOCK_OBS_SIZE]; + float actions[AFFINE_LOCK_NUM_ATNS]; + float rewards[1]; + float terminals[1]; + make_env(&env, &shared, 42, observations, actions, rewards, terminals); + c_reset(&env); + + const int expected_depths[] = {2, 4, 5, 6, 8, 16}; + for (int episode = 0; episode < 6; episode++) { + int depth = expected_depths[episode]; + EXPECT_EQ_INT(env.scramble_depth, depth); + expect_solution_reaches_target(&shared, &env); + + float prev_n = env.log.n; + float prev_perf = env.log.perf; + float prev_max_depth_solve = env.log.max_depth_solve; + float prev_target_distance = env.log.target_distance; + float prev_solved_target_distance = env.log.solved_target_distance; + float prev_depth_2 = env.log.depth_2_rate; + float prev_depth_2_solve = env.log.depth_2_solve_rate; + float prev_depth_4 = env.log.depth_4_rate; + float prev_depth_4_solve = env.log.depth_4_solve_rate; + float prev_depth_5 = env.log.depth_5_rate; + float prev_depth_5_solve = env.log.depth_5_solve_rate; + float prev_depth_6 = env.log.depth_6_rate; + float prev_depth_6_solve = env.log.depth_6_solve_rate; + float prev_depth_8 = env.log.depth_8_rate; + float prev_depth_8_solve = env.log.depth_8_solve_rate; + float prev_depth_16 = env.log.depth_16_rate; + float prev_depth_16_solve = env.log.depth_16_solve_rate; + int target_distance = env.target_distance; + int metric_depth = target_distance > 0 ? target_distance : depth; + + solve_with_stored_solution(&env); + EXPECT_NEAR(rewards[0], 1.0f, 0.0f); + EXPECT_NEAR(terminals[0], 1.0f, 0.0f); + EXPECT_NEAR(env.log.n, prev_n + 1.0f, 0.0f); + EXPECT_NEAR(env.log.perf, + prev_perf + expected_solve_credit(&shared, metric_depth), 0.0f); + EXPECT_NEAR(env.log.max_depth_solve, + prev_max_depth_solve + (metric_depth == shared.max_depth ? 1.0f : 0.0f), + 0.0f); + EXPECT_NEAR(env.log.target_distance, + prev_target_distance + (float)target_distance, 0.0f); + EXPECT_NEAR(env.log.solved_target_distance, + prev_solved_target_distance + (float)target_distance, 0.0f); + EXPECT_NEAR(env.log.depth_2_rate, + prev_depth_2 + (metric_depth == 2 ? 1.0f : 0.0f), 0.0f); + EXPECT_NEAR(env.log.depth_2_solve_rate, + prev_depth_2_solve + (metric_depth == 2 ? 1.0f : 0.0f), 0.0f); + EXPECT_NEAR(env.log.depth_4_rate, + prev_depth_4 + (metric_depth == 4 ? 1.0f : 0.0f), 0.0f); + EXPECT_NEAR(env.log.depth_4_solve_rate, + prev_depth_4_solve + (metric_depth == 4 ? 1.0f : 0.0f), 0.0f); + EXPECT_NEAR(env.log.depth_5_rate, + prev_depth_5 + (metric_depth == 5 ? 1.0f : 0.0f), 0.0f); + EXPECT_NEAR(env.log.depth_5_solve_rate, + prev_depth_5_solve + (metric_depth == 5 ? 1.0f : 0.0f), 0.0f); + EXPECT_NEAR(env.log.depth_6_rate, + prev_depth_6 + (metric_depth == 6 ? 1.0f : 0.0f), 0.0f); + EXPECT_NEAR(env.log.depth_6_solve_rate, + prev_depth_6_solve + (metric_depth == 6 ? 1.0f : 0.0f), 0.0f); + EXPECT_NEAR(env.log.depth_8_rate, + prev_depth_8 + (metric_depth == 8 ? 1.0f : 0.0f), 0.0f); + EXPECT_NEAR(env.log.depth_8_solve_rate, + prev_depth_8_solve + (metric_depth == 8 ? 1.0f : 0.0f), 0.0f); + EXPECT_NEAR(env.log.depth_16_rate, + prev_depth_16 + (metric_depth == 16 ? 1.0f : 0.0f), 0.0f); + EXPECT_NEAR(env.log.depth_16_solve_rate, + prev_depth_16_solve + (metric_depth == 16 ? 1.0f : 0.0f), 0.0f); + + int next_depth = episode < 5 ? expected_depths[episode + 1] : 16; + EXPECT_EQ_INT(env.scramble_depth, next_depth); + } + + float prev_n = env.log.n; + float prev_perf = env.log.perf; + float prev_max_depth_solve = env.log.max_depth_solve; + float prev_invalid = env.log.invalid_rate; + EXPECT_EQ_INT(env.scramble_depth, shared.max_depth); + actions[0] = 999.0f; + c_step(&env); + EXPECT_NEAR(rewards[0], -1.0f, 0.0f); + EXPECT_NEAR(terminals[0], 1.0f, 0.0f); + EXPECT_NEAR(env.log.n, prev_n + 1.0f, 0.0f); + EXPECT_NEAR(env.log.perf, prev_perf, 0.0f); + EXPECT_NEAR(env.log.max_depth_solve, prev_max_depth_solve, 0.0f); + EXPECT_NEAR(env.log.invalid_rate, prev_invalid + 1.0f, 0.0f); + EXPECT_EQ_INT(env.scramble_depth, shared.start_depth); + + affine_lock_free_shared(&shared); +} + +static void test_visible_target_table_oracle_wins_all_curriculum_depths_end_to_end(void) { + AffineLockShared shared = make_shared(2, 16, 0); + + AffineLock env; + float observations[AFFINE_LOCK_OBS_SIZE]; + float actions[AFFINE_LOCK_NUM_ATNS]; + float rewards[1]; + float terminals[1]; + make_env(&env, &shared, 4242, observations, actions, rewards, terminals); + c_reset(&env); + + const int depths[] = {2, 4, 5, 6, 8, 16}; + for (int i = 0; i < 6; i++) { + expect_oracle_episode_win(&env, depths[i]); + } + + EXPECT_EQ_INT(env.scramble_depth, shared.max_depth); + EXPECT_NEAR(env.log.n, 6.0f, 0.0f); + EXPECT_NEAR(env.log.depth_2_rate, 1.0f, 0.0f); + EXPECT_NEAR(env.log.depth_2_solve_rate, 1.0f, 0.0f); + EXPECT_NEAR(env.log.depth_4_rate, 1.0f, 0.0f); + EXPECT_NEAR(env.log.depth_4_solve_rate, 1.0f, 0.0f); + EXPECT_NEAR(env.log.depth_5_rate, 1.0f, 0.0f); + EXPECT_NEAR(env.log.depth_5_solve_rate, 1.0f, 0.0f); + EXPECT_NEAR(env.log.depth_6_rate, 1.0f, 0.0f); + EXPECT_NEAR(env.log.depth_6_solve_rate, 1.0f, 0.0f); + EXPECT_NEAR(env.log.depth_8_rate, 1.0f, 0.0f); + EXPECT_NEAR(env.log.depth_8_solve_rate, 1.0f, 0.0f); + EXPECT_NEAR(env.log.depth_16_rate, 1.0f, 0.0f); + EXPECT_NEAR(env.log.depth_16_solve_rate, 1.0f, 0.0f); + EXPECT_NEAR(env.log.timeout_rate, 0.0f, 0.0f); + EXPECT_NEAR(env.log.invalid_rate, 0.0f, 0.0f); + + affine_lock_free_shared(&shared); +} + +static void test_visible_target_table_timeouts_at_all_curriculum_depths_end_to_end(void) { + const int loss_depths[] = {2, 4, 5, 6, 8, 16}; + + for (int i = 0; i < 6; i++) { + int loss_depth = loss_depths[i]; + AffineLockShared shared = make_shared(2, 16, 0); + + AffineLock env; + float observations[AFFINE_LOCK_OBS_SIZE]; + float actions[AFFINE_LOCK_NUM_ATNS]; + float rewards[1]; + float terminals[1]; + make_env(&env, &shared, (unsigned int)(5200 + loss_depth), + observations, actions, rewards, terminals); + c_reset(&env); + + while (env.scramble_depth < loss_depth) { + expect_oracle_episode_win(&env, env.scramble_depth); + } + expect_non_solving_episode_timeout(&env, loss_depth); + + EXPECT_EQ_INT(env.scramble_depth, shared.start_depth); + EXPECT_TRUE(env.log.timeout_rate >= 1.0f); + EXPECT_TRUE(env.log.solve_rate >= 0.0f); + EXPECT_NEAR(env.log.invalid_rate, 0.0f, 0.0f); + + affine_lock_free_shared(&shared); + } +} + +static int deterministic_stream_action(int episode, int step) { + return (episode * 3 + step * 7) % AFFINE_LOCK_NUM_ACTIONS; +} + +static uint64_t run_seed_sequence_checksum(unsigned int seed) { + AffineLockShared shared = make_shared(2, 16, 0); + + AffineLock env; + float observations[AFFINE_LOCK_OBS_SIZE]; + float actions[AFFINE_LOCK_NUM_ATNS]; + float rewards[1]; + float terminals[1]; + make_env(&env, &shared, seed, observations, actions, rewards, terminals); + + uint64_t checksum = 1469598103934665603ull; + for (int episode = 0; episode < 16; episode++) { + c_reset(&env); + checksum = mix_u64(checksum, reset_snapshot_checksum(&env)); + int max_steps = env.max_steps; + for (int step = 0; step < max_steps + 1; step++) { + int action = deterministic_stream_action(episode, step); + if (step < env.solution_length) { + action = env.solution_actions[step]; + } + actions[0] = (float)action; + c_step(&env); + checksum = mix_u64(checksum, reset_snapshot_checksum(&env)); + if (terminals[0] != 0.0f) { + break; + } + } + } + + affine_lock_free_shared(&shared); + return checksum; +} + +static void test_deterministic_seed_sequences(void) { + AffineLockShared shared = make_shared(2, 16, 0); + + AffineLock env_a; + AffineLock env_b; + float obs_a[AFFINE_LOCK_OBS_SIZE], obs_b[AFFINE_LOCK_OBS_SIZE]; + float atn_a[AFFINE_LOCK_NUM_ATNS], atn_b[AFFINE_LOCK_NUM_ATNS]; + float rew_a[1], rew_b[1]; + float term_a[1], term_b[1]; + make_env(&env_a, &shared, 12345, obs_a, atn_a, rew_a, term_a); + make_env(&env_b, &shared, 12345, obs_b, atn_b, rew_b, term_b); + + for (int episode = 0; episode < 16; episode++) { + c_reset(&env_a); + c_reset(&env_b); + expect_env_snapshots_equal(&env_a, &env_b, obs_a, obs_b); + int max_steps = env_a.max_steps; + for (int step = 0; step < max_steps + 1; step++) { + int action = deterministic_stream_action(episode, step); + if (step < env_a.solution_length) { + action = env_a.solution_actions[step]; + } + atn_a[0] = (float)action; + atn_b[0] = (float)action; + c_step(&env_a); + c_step(&env_b); + EXPECT_NEAR(rew_a[0], rew_b[0], 0.0f); + EXPECT_NEAR(term_a[0], term_b[0], 0.0f); + expect_env_snapshots_equal(&env_a, &env_b, obs_a, obs_b); + if (term_a[0] != 0.0f) { + break; + } + } + } + + affine_lock_free_shared(&shared); + + uint64_t seed_1 = run_seed_sequence_checksum(1); + uint64_t seed_1_repeat = run_seed_sequence_checksum(1); + uint64_t seed_2 = run_seed_sequence_checksum(2); + uint64_t seed_2_repeat = run_seed_sequence_checksum(2); + EXPECT_EQ_U64(seed_1, seed_1_repeat); + EXPECT_EQ_U64(seed_2, seed_2_repeat); + EXPECT_TRUE(seed_1 != seed_2); +} + +static uint64_t run_visible_table_seed_42_golden_sequence(void) { + AffineLockShared shared = make_shared(2, 16, 0); + + AffineLock env; + float observations[AFFINE_LOCK_OBS_SIZE]; + float actions[AFFINE_LOCK_NUM_ATNS]; + float rewards[1]; + float terminals[1]; + make_env(&env, &shared, 42, observations, actions, rewards, terminals); + + uint64_t checksum = 1469598103934665603ull; + c_reset(&env); + checksum = mix_u64(checksum, reset_snapshot_checksum(&env)); + for (int episode = 0; episode < 5; episode++) { + int length = env.solution_length; + for (int step = 0; step < length; step++) { + actions[0] = (float)env.solution_actions[step]; + c_step(&env); + checksum = mix_u64(checksum, reset_snapshot_checksum(&env)); + if (terminals[0] != 0.0f) { + break; + } + } + } + EXPECT_EQ_INT(env.scramble_depth, 16); + actions[0] = 999.0f; + c_step(&env); + checksum = mix_u64(checksum, reset_snapshot_checksum(&env)); + EXPECT_EQ_INT(env.scramble_depth, 2); + + affine_lock_free_shared(&shared); + return checksum; +} + +static void test_visible_table_seed_42_golden_checksum(void) { + uint64_t checksum = run_visible_table_seed_42_golden_sequence(); + EXPECT_EQ_U64(checksum, 0x1b6d67bf767fd010ull); +} + +static void test_deterministic_seed_sequences_and_distinct_env_ids(void) { + AffineLockShared shared = make_shared(2, 16, 0); + + AffineLock env_a; + AffineLock env_b; + float obs_a[AFFINE_LOCK_OBS_SIZE], obs_b[AFFINE_LOCK_OBS_SIZE]; + float atn_a[AFFINE_LOCK_NUM_ATNS], atn_b[AFFINE_LOCK_NUM_ATNS]; + float rew_a[1], rew_b[1]; + float term_a[1], term_b[1]; + make_env(&env_a, &shared, 12345, obs_a, atn_a, rew_a, term_a); + make_env(&env_b, &shared, 12345, obs_b, atn_b, rew_b, term_b); + + for (int episode = 0; episode < 8; episode++) { + c_reset(&env_a); + c_reset(&env_b); + EXPECT_EQ_U32(env_a.target, env_b.target); + EXPECT_EQ_U32(env_a.state, env_b.state); + EXPECT_EQ_INT(env_a.scramble_depth, env_b.scramble_depth); + EXPECT_EQ_INT(env_a.solution_length, env_b.solution_length); + EXPECT_TRUE(memcmp(env_a.solution_actions, env_b.solution_actions, + sizeof(env_a.solution_actions)) == 0); + EXPECT_TRUE(memcmp(obs_a, obs_b, sizeof(obs_a)) == 0); + + solve_with_stored_solution(&env_a); + solve_with_stored_solution(&env_b); + EXPECT_EQ_U32(env_a.target, env_b.target); + EXPECT_EQ_U32(env_a.state, env_b.state); + EXPECT_NEAR(rew_a[0], rew_b[0], 0.0f); + EXPECT_NEAR(term_a[0], term_b[0], 0.0f); + EXPECT_TRUE(memcmp(obs_a, obs_b, sizeof(obs_a)) == 0); + } + + AffineLock env_1; + AffineLock env_2; + AffineLock env_1_repeat; + AffineLock env_2_repeat; + float obs_1[AFFINE_LOCK_OBS_SIZE], obs_2[AFFINE_LOCK_OBS_SIZE]; + float obs_1r[AFFINE_LOCK_OBS_SIZE], obs_2r[AFFINE_LOCK_OBS_SIZE]; + float atn_1[AFFINE_LOCK_NUM_ATNS], atn_2[AFFINE_LOCK_NUM_ATNS]; + float atn_1r[AFFINE_LOCK_NUM_ATNS], atn_2r[AFFINE_LOCK_NUM_ATNS]; + float rew_1[1], rew_2[1], rew_1r[1], rew_2r[1]; + float term_1[1], term_2[1], term_1r[1], term_2r[1]; + make_env(&env_1, &shared, 1, obs_1, atn_1, rew_1, term_1); + make_env(&env_2, &shared, 2, obs_2, atn_2, rew_2, term_2); + make_env(&env_1_repeat, &shared, 1, obs_1r, atn_1r, rew_1r, term_1r); + make_env(&env_2_repeat, &shared, 2, obs_2r, atn_2r, rew_2r, term_2r); + + int differs = 0; + for (int i = 0; i < 8; i++) { + c_reset(&env_1); + c_reset(&env_2); + c_reset(&env_1_repeat); + c_reset(&env_2_repeat); + + EXPECT_EQ_U32(env_1.target, env_1_repeat.target); + EXPECT_EQ_U32(env_1.state, env_1_repeat.state); + EXPECT_EQ_U32(env_2.target, env_2_repeat.target); + EXPECT_EQ_U32(env_2.state, env_2_repeat.state); + EXPECT_TRUE(memcmp(env_1.solution_actions, env_1_repeat.solution_actions, + sizeof(env_1.solution_actions)) == 0); + EXPECT_TRUE(memcmp(env_2.solution_actions, env_2_repeat.solution_actions, + sizeof(env_2.solution_actions)) == 0); + + if (env_1.target != env_2.target || env_1.state != env_2.state || + memcmp(env_1.solution_actions, env_2.solution_actions, + sizeof(env_1.solution_actions)) != 0) { + differs = 1; + } + } + EXPECT_TRUE(differs); + + affine_lock_free_shared(&shared); +} + +int main(void) { + test_metadata_contract(); + test_config_and_binding_metadata_contract(); + test_global_action_examples(); + test_actions_round_trip_for_all_states(); + test_reset_randomizes_target_and_current(); + test_visible_target_table_initialization_samples_reachable_target(); + test_visible_target_table_depths_have_expected_distances(); + test_visible_target_table_reset_uses_stored_records(); + test_visible_target_table_matches_independent_bfs_over_repeated_resets(); + test_log_solve_credit_uses_known_target_distance(); + test_observation_encoding_is_32_signed_bit_floats_plus_timer(); + test_timer_observation_progresses_and_resets_after_timeout(); + test_actions_apply_to_current_state_directly(); + test_action_float_validation_rejects_non_discrete_values(); + test_visible_target_table_curriculum_and_logging(); + test_visible_target_table_oracle_wins_all_curriculum_depths_end_to_end(); + test_visible_target_table_timeouts_at_all_curriculum_depths_end_to_end(); + test_deterministic_seed_sequences(); + test_visible_table_seed_42_golden_checksum(); + test_deterministic_seed_sequences_and_distinct_env_ids(); + printf("affine_lock tests passed\n"); + return 0; +} diff --git a/ocean/affine_lock/tests/test_affine_lock_log_export.c b/ocean/affine_lock/tests/test_affine_lock_log_export.c new file mode 100644 index 0000000000..773d0279f4 --- /dev/null +++ b/ocean/affine_lock/tests/test_affine_lock_log_export.c @@ -0,0 +1,276 @@ +#include +#include +#include +#include + +#define AFFINE_LOCK_NO_RENDER +#include "../binding.c" + +#define EXPECT_NEAR(actual, expected, tolerance) do { \ + double _actual = (double)(actual); \ + double _expected = (double)(expected); \ + double _tolerance = (double)(tolerance); \ + if (fabs(_actual - _expected) > _tolerance) { \ + fprintf(stderr, "%s:%d: expected %.9f ~= %.9f\n", \ + __FILE__, __LINE__, _actual, _expected); \ + exit(1); \ + } \ +} while (0) + +#define EXPECT_EQ_INT(actual, expected) do { \ + int _actual = (int)(actual); \ + int _expected = (int)(expected); \ + if (_actual != _expected) { \ + fprintf(stderr, "%s:%d: expected %d == %d\n", \ + __FILE__, __LINE__, _actual, _expected); \ + exit(1); \ + } \ +} while (0) + +#define EXPECT_EQ_U32(actual, expected) do { \ + uint32_t _actual = (uint32_t)(actual); \ + uint32_t _expected = (uint32_t)(expected); \ + if (_actual != _expected) { \ + fprintf(stderr, "%s:%d: expected 0x%x == 0x%x\n", \ + __FILE__, __LINE__, _actual, _expected); \ + exit(1); \ + } \ +} while (0) + +#define EXPECT_NE_U32(actual, expected) do { \ + uint32_t _actual = (uint32_t)(actual); \ + uint32_t _expected = (uint32_t)(expected); \ + if (_actual == _expected) { \ + fprintf(stderr, "%s:%d: expected 0x%x != 0x%x\n", \ + __FILE__, __LINE__, _actual, _expected); \ + exit(1); \ + } \ +} while (0) + +#define EXPECT_TRUE(cond) do { \ + if (!(cond)) { \ + fprintf(stderr, "%s:%d: expected true: %s\n", \ + __FILE__, __LINE__, #cond); \ + exit(1); \ + } \ +} while (0) + +static double dict_value(Dict* dict, const char* key) { + return dict_get(dict, key)->value; +} + +static int dict_has_key(Dict* dict, const char* key) { + return dict_get_unsafe(dict, key) != NULL; +} + +static Dict* make_vec_kwargs_for_agents(int total_agents) { + Dict* vec_kwargs = create_dict(2); + dict_set(vec_kwargs, "total_agents", total_agents); + dict_set(vec_kwargs, "num_buffers", 1); + return vec_kwargs; +} + +static Dict* make_vec_kwargs(void) { + return make_vec_kwargs_for_agents(2); +} + +static Dict* make_env_kwargs(int seed) { + Dict* env_kwargs = create_dict(4); + dict_set(env_kwargs, "start_depth", 2); + dict_set(env_kwargs, "max_depth", 16); + dict_set(env_kwargs, "step_grace", 0); + dict_set(env_kwargs, "seed", seed); + return env_kwargs; +} + +static void free_dict(Dict* dict) { + free(dict->items); + free(dict); +} + +static Env* make_binding_envs(int seed) { + Dict* vec_kwargs = make_vec_kwargs(); + Dict* env_kwargs = make_env_kwargs(seed); + int starts[1] = {0}; + int counts[1] = {0}; + int num_envs = 0; + Env* envs = my_vec_init(&num_envs, starts, counts, vec_kwargs, env_kwargs); + EXPECT_EQ_INT(num_envs, 2); + EXPECT_EQ_INT(starts[0], 0); + EXPECT_EQ_INT(counts[0], 2); + free_dict(vec_kwargs); + free_dict(env_kwargs); + return envs; +} + +static Env* make_binding_env_batch(int seed, int total_agents) { + Dict* vec_kwargs = make_vec_kwargs_for_agents(total_agents); + Dict* env_kwargs = make_env_kwargs(seed); + int starts[1] = {0}; + int counts[1] = {0}; + int num_envs = 0; + Env* envs = my_vec_init(&num_envs, starts, counts, vec_kwargs, env_kwargs); + EXPECT_EQ_INT(num_envs, total_agents); + EXPECT_EQ_INT(starts[0], 0); + EXPECT_EQ_INT(counts[0], total_agents); + free_dict(vec_kwargs); + free_dict(env_kwargs); + return envs; +} + +static void free_binding_envs(Env* envs) { + my_vec_close(envs); + free(envs); +} + +static void test_vec_init_mixes_base_seed_and_env_id(void) { + Env* base = make_binding_envs(123); + Env* repeat = make_binding_envs(123); + Env* different_seed = make_binding_envs(124); + + EXPECT_EQ_U32(base[0].rng, repeat[0].rng); + EXPECT_EQ_U32(base[1].rng, repeat[1].rng); + EXPECT_NE_U32(base[0].rng, base[1].rng); + EXPECT_NE_U32(base[0].rng, different_seed[0].rng); + + free_binding_envs(base); + free_binding_envs(repeat); + free_binding_envs(different_seed); +} + +static uint64_t mix_u64_for_binding_test(uint64_t hash, uint64_t value) { + hash ^= value; + hash *= 1099511628211ull; + return hash; +} + +static uint64_t binding_reset_checksum(const Env* env) { + uint64_t hash = 1469598103934665603ull; + hash = mix_u64_for_binding_test(hash, env->state); + hash = mix_u64_for_binding_test(hash, env->target); + hash = mix_u64_for_binding_test(hash, (uint64_t)(env->target_distance + 1)); + hash = mix_u64_for_binding_test(hash, (uint64_t)env->solution_length); + for (int i = 0; i < AFFINE_LOCK_MAX_SOLUTION_DEPTH; i++) { + hash = mix_u64_for_binding_test( + hash, (uint64_t)(env->solution_actions[i] + 1)); + } + return hash; +} + +static void assign_binding_env_buffers( + Env* envs, + int total_agents, + float observations[][AFFINE_LOCK_OBS_SIZE], + float actions[], + float rewards[], + float terminals[]) { + memset(observations, 0, + (size_t)total_agents * AFFINE_LOCK_OBS_SIZE * sizeof(float)); + memset(actions, 0, (size_t)total_agents * sizeof(float)); + memset(rewards, 0, (size_t)total_agents * sizeof(float)); + memset(terminals, 0, (size_t)total_agents * sizeof(float)); + for (int i = 0; i < total_agents; i++) { + envs[i].observations = observations[i]; + envs[i].actions = &actions[i]; + envs[i].rewards = &rewards[i]; + envs[i].terminals = &terminals[i]; + } +} + +static void test_vec_init_visible_targets_repeat_across_runs_and_vary_by_env_id(void) { + const int total_agents = 64; + Env* run_a = make_binding_env_batch(42, total_agents); + Env* run_b = make_binding_env_batch(42, total_agents); + + float obs_a[64][AFFINE_LOCK_OBS_SIZE]; + float obs_b[64][AFFINE_LOCK_OBS_SIZE]; + float actions_a[64], actions_b[64]; + float rewards_a[64], rewards_b[64]; + float terminals_a[64], terminals_b[64]; + assign_binding_env_buffers( + run_a, total_agents, obs_a, actions_a, rewards_a, terminals_a); + assign_binding_env_buffers( + run_b, total_agents, obs_b, actions_b, rewards_b, terminals_b); + + int saw_different_puzzle = 0; + uint64_t first_checksum = 0; + for (int i = 0; i < total_agents; i++) { + c_reset(&run_a[i]); + c_reset(&run_b[i]); + + uint64_t checksum_a = binding_reset_checksum(&run_a[i]); + uint64_t checksum_b = binding_reset_checksum(&run_b[i]); + EXPECT_EQ_U32(run_a[i].rng, run_b[i].rng); + EXPECT_EQ_U32(run_a[i].state, run_b[i].state); + EXPECT_EQ_U32(run_a[i].target, run_b[i].target); + EXPECT_EQ_INT(run_a[i].target_distance, run_b[i].target_distance); + EXPECT_EQ_INT(run_a[i].solution_length, run_b[i].solution_length); + EXPECT_EQ_INT(run_a[i].target_distance, 2); + EXPECT_EQ_INT(run_a[i].solution_length, 2); + EXPECT_TRUE(checksum_a == checksum_b); + EXPECT_TRUE(memcmp(obs_a[i], obs_b[i], sizeof(obs_a[i])) == 0); + + if (i == 0) { + first_checksum = checksum_a; + } else if (checksum_a != first_checksum) { + saw_different_puzzle = 1; + } + } + EXPECT_TRUE(saw_different_puzzle); + + free_binding_envs(run_a); + free_binding_envs(run_b); +} + +static void test_depth_solve_rates_are_conditional_on_depth_attempts(void) { + Log log = {0}; + log.depth_2_rate = 0.25f; + log.depth_2_solve_rate = 0.125f; + log.depth_4_rate = 0.5f; + log.depth_4_solve_rate = 0.375f; + log.depth_5_rate = 0.25f; + log.depth_5_solve_rate = 0.125f; + log.depth_6_rate = 0.25f; + log.depth_6_solve_rate = 0.125f; + log.depth_8_rate = 0.0f; + log.depth_8_solve_rate = 0.0f; + log.depth_16_rate = 0.125f; + log.depth_16_solve_rate = 0.0f; + log.score = 0.75f; + log.target_distance = 4.0f; + log.solved_target_distance = 2.0f; + log.solve_rate = 0.5f; + + Dict* out = create_dict(32); + my_log(&log, out); + + EXPECT_EQ_INT(out->size, 18); + EXPECT_NEAR(dict_value(out, "score"), 0.75, 0.0); + EXPECT_TRUE(!dict_has_key(out, "solve_steps")); + EXPECT_TRUE(!dict_has_key(out, "solve_efficiency")); + EXPECT_TRUE(!dict_has_key(out, "scramble_unique_states")); + EXPECT_NEAR(dict_value(out, "min_win_moves"), 4.0, 0.0); + EXPECT_NEAR(dict_value(out, "solved_min_win_moves"), 4.0, 0.0); + EXPECT_TRUE(!dict_has_key(out, "depth_2_rate")); + EXPECT_NEAR(dict_value(out, "depth_2_solve_rate"), 0.5, 0.0); + EXPECT_TRUE(!dict_has_key(out, "depth_4_rate")); + EXPECT_NEAR(dict_value(out, "depth_4_solve_rate"), 0.75, 0.0); + EXPECT_TRUE(!dict_has_key(out, "depth_5_rate")); + EXPECT_NEAR(dict_value(out, "depth_5_solve_rate"), 0.5, 0.0); + EXPECT_TRUE(!dict_has_key(out, "depth_6_rate")); + EXPECT_NEAR(dict_value(out, "depth_6_solve_rate"), 0.5, 0.0); + EXPECT_TRUE(!dict_has_key(out, "depth_8_rate")); + EXPECT_NEAR(dict_value(out, "depth_8_solve_rate"), 0.0, 0.0); + EXPECT_TRUE(!dict_has_key(out, "depth_16_rate")); + EXPECT_NEAR(dict_value(out, "depth_16_solve_rate"), 0.0, 0.0); + + free(out->items); + free(out); +} + +int main(void) { + test_vec_init_mixes_base_seed_and_env_id(); + test_vec_init_visible_targets_repeat_across_runs_and_vary_by_env_id(); + test_depth_solve_rates_are_conditional_on_depth_attempts(); + return 0; +} diff --git a/ocean/affine_lock/tests/test_metadata_smoke.py b/ocean/affine_lock/tests/test_metadata_smoke.py new file mode 100644 index 0000000000..df3c6513de --- /dev/null +++ b/ocean/affine_lock/tests/test_metadata_smoke.py @@ -0,0 +1,249 @@ +#!/usr/bin/env python3 +import argparse +import configparser +import ctypes +import re +import sys +from pathlib import Path + + +ROOT = Path(__file__).resolve().parents[3] + +EXPECTED_MY_LOG_KEYS = [ + "perf", + "score", + "solve_rate", + "max_depth_solve", + "episode_return", + "episode_length", + "timeout_rate", + "invalid_rate", + "min_win_moves", + "solved_min_win_moves", + "conditional_solve_steps", + "conditional_solve_efficiency", + "depth_2_solve_rate", + "depth_4_solve_rate", + "depth_5_solve_rate", + "depth_6_solve_rate", + "depth_8_solve_rate", + "depth_16_solve_rate", +] + + +def parse_int(value): + return int(value.replace("_", "")) + + +def parse_float(value): + return float(value.replace("_", "")) + + +def assert_sweep_mean(config, section, expected): + assert parse_float(config[section]["mean"]) == expected + + +def check_config(): + config = configparser.ConfigParser() + config.read(ROOT / "config" / "default.ini") + config.read(ROOT / "config" / "affine_lock.ini") + + assert config["base"]["env_name"] == "affine_lock" + assert parse_int(config["vec"]["total_agents"]) == 4096 + assert parse_int(config["vec"]["num_buffers"]) == 2 + assert parse_int(config["vec"]["num_threads"]) == 16 + assert parse_int(config["policy"]["hidden_size"]) == 256 + assert parse_int(config["policy"]["num_layers"]) == 3 + assert parse_int(config["env"]["seed"]) == 42 + assert parse_int(config["env"]["start_depth"]) == 2 + assert parse_int(config["env"]["max_depth"]) == 16 + assert parse_int(config["train"]["total_timesteps"]) == 200_000_000 + assert parse_int(config["train"]["horizon"]) == 64 + assert parse_int(config["train"]["minibatch_size"]) == 8192 + assert parse_float(config["train"]["learning_rate"]) == 0.012 + assert parse_float(config["train"]["ent_coef"]) == 0.2 + assert parse_float(config["train"]["gamma"]) == 0.8 + assert parse_float(config["train"]["gae_lambda"]) == 0.995 + assert parse_float(config["train"]["replay_ratio"]) == 3.0 + assert parse_float(config["train"]["clip_coef"]) == 0.83 + assert parse_float(config["train"]["vf_coef"]) == 4.75 + assert parse_float(config["train"]["vf_clip_coef"]) == 0.8 + assert parse_float(config["train"]["max_grad_norm"]) == 3.0 + assert parse_float(config["train"]["beta1"]) == 0.5 + assert parse_float(config["train"]["beta2"]) == 0.9915 + assert parse_float(config["train"]["eps"]) == 0.0001 + assert parse_float(config["train"]["vtrace_rho_clip"]) == 1.4 + assert parse_float(config["train"]["vtrace_c_clip"]) == 3.75 + assert parse_float(config["train"]["prio_alpha"]) == 0.055 + assert parse_float(config["train"]["prio_beta0"]) == 0.161 + assert_sweep_mean(config, "sweep.train.total_timesteps", 200_000_000.0) + assert_sweep_mean(config, "sweep.vec.total_agents", 4096.0) + assert_sweep_mean(config, "sweep.vec.num_buffers", 2.0) + assert_sweep_mean(config, "sweep.policy.hidden_size", 256.0) + assert_sweep_mean(config, "sweep.policy.num_layers", 3.0) + assert_sweep_mean(config, "sweep.train.horizon", 64.0) + assert_sweep_mean(config, "sweep.train.minibatch_size", 8192.0) + assert_sweep_mean(config, "sweep.train.learning_rate", 0.012) + assert_sweep_mean(config, "sweep.train.ent_coef", 0.2) + assert_sweep_mean(config, "sweep.train.gamma", 0.8) + assert_sweep_mean(config, "sweep.train.gae_lambda", 0.995) + assert_sweep_mean(config, "sweep.train.replay_ratio", 3.0) + assert_sweep_mean(config, "sweep.train.clip_coef", 0.83) + assert_sweep_mean(config, "sweep.train.vf_coef", 4.75) + assert_sweep_mean(config, "sweep.train.vf_clip_coef", 0.8) + assert_sweep_mean(config, "sweep.train.max_grad_norm", 3.0) + assert_sweep_mean(config, "sweep.train.beta1", 0.5) + assert_sweep_mean(config, "sweep.train.beta2", 0.9915) + assert_sweep_mean(config, "sweep.train.eps", 0.0001) + assert_sweep_mean(config, "sweep.train.vtrace_rho_clip", 1.4) + assert_sweep_mean(config, "sweep.train.vtrace_c_clip", 3.75) + assert_sweep_mean(config, "sweep.train.prio_alpha", 0.055) + assert_sweep_mean(config, "sweep.train.prio_beta0", 0.161) + assert config["sweep"]["metric"] == "perf" + assert config["sweep"]["goal"] == "maximize" + + sweep_ts = config["sweep.train.total_timesteps"] + min_steps = parse_int(sweep_ts["min"]) + max_steps = parse_int(sweep_ts["max"]) + assert min_steps == 100_000_000 + assert max_steps == 200_000_000 + + assert parse_int(config["sweep.train.horizon"]["min"]) == 32 + assert parse_int(config["sweep.train.horizon"]["max"]) == 128 + assert parse_int(config["sweep.policy.hidden_size"]["min"]) == 64 + assert parse_int(config["sweep.policy.hidden_size"]["max"]) == 512 + assert float(config["sweep.policy.num_layers"]["min"]) == 1.0 + assert float(config["sweep.policy.num_layers"]["max"]) == 4.0 + assert parse_int(config["sweep.vec.total_agents"]["min"]) == 4096 + assert parse_int(config["sweep.vec.total_agents"]["max"]) == 16_384 + assert float(config["sweep.vec.num_buffers"]["min"]) == 1.0 + assert float(config["sweep.vec.num_buffers"]["max"]) == 4.0 + assert parse_int(config["sweep.train.minibatch_size"]["min"]) == 8192 + assert parse_int(config["sweep.train.minibatch_size"]["max"]) == 131_072 + assert float(config["sweep.train.replay_ratio"]["min"]) == 1.0 + assert float(config["sweep.train.replay_ratio"]["max"]) == 4.0 + assert float(config["sweep.train.vf_clip_coef"]["min"]) == 0.001 + assert float(config["sweep.train.vf_clip_coef"]["max"]) == 5.0 + assert float(config["sweep.train.vf_coef"]["min"]) == 0.1 + assert float(config["sweep.train.vf_coef"]["max"]) == 8.0 + + min_batch_size = ( + parse_int(config["sweep.vec.total_agents"]["min"]) + * parse_int(config["sweep.train.horizon"]["min"]) + ) + max_minibatch_size = parse_int(config["sweep.train.minibatch_size"]["max"]) + min_replay_ratio = float(config["sweep.train.replay_ratio"]["min"]) + assert min_replay_ratio * min_batch_size >= max_minibatch_size + + +def check_binding_text(): + binding = (ROOT / "ocean" / "affine_lock" / "binding.c").read_text() + assert "#define OBS_SIZE AFFINE_LOCK_OBS_SIZE" in binding + assert "#define ACT_SIZES {AFFINE_LOCK_NUM_ACTIONS}" in binding + assert "#define OBS_TENSOR_T FloatTensor" in binding + + log_keys = re.findall(r'dict_set\(out,\s*"([^"]+)"', binding) + assert log_keys == EXPECTED_MY_LOG_KEYS + assert len(log_keys) + 1 <= 32 # static_vec_log appends "n". + + +def float_buffer(ptr, count): + return (ctypes.c_float * count).from_address(ptr) + + +def check_backend_metadata(): + from pufferlib import _C + from pufferlib.pufferl import load_config + + assert _C.env_name == "affine_lock" + assert _C.gpu == 1 + + def load_affine_args(extra_argv): + old_argv = sys.argv + try: + sys.argv = [old_argv[0], *extra_argv] + return load_config("affine_lock") + finally: + sys.argv = old_argv + + base_args = load_affine_args([]) + assert base_args["env_name"] == "affine_lock" + assert base_args["vec"]["total_agents"] == 4096 + assert base_args["vec"]["num_buffers"] == 2 + assert base_args["policy"]["hidden_size"] == 256 + assert base_args["policy"]["num_layers"] == 3 + assert base_args["train"]["horizon"] == 64 + assert base_args["train"]["minibatch_size"] == 8192 + assert base_args["train"]["learning_rate"] == 0.012 + assert base_args["train"]["ent_coef"] == 0.2 + assert base_args["train"]["gamma"] == 0.8 + assert base_args["train"]["gae_lambda"] == 0.995 + assert base_args["train"]["replay_ratio"] == 3.0 + assert base_args["train"]["clip_coef"] == 0.83 + assert base_args["train"]["vf_coef"] == 4.75 + assert base_args["train"]["vf_clip_coef"] == 0.8 + assert base_args["train"]["max_grad_norm"] == 3.0 + assert base_args["train"]["beta1"] == 0.5 + assert base_args["train"]["beta2"] == 0.9915 + assert base_args["train"]["eps"] == 0.0001 + assert base_args["train"]["vtrace_rho_clip"] == 1.4 + assert base_args["train"]["vtrace_c_clip"] == 3.75 + assert base_args["train"]["prio_alpha"] == 0.055 + assert base_args["train"]["prio_beta0"] == 0.161 + + old_argv = sys.argv + try: + sys.argv = [old_argv[0]] + args = load_config("affine_lock") + finally: + sys.argv = old_argv + args["vec"]["total_agents"] = 2 + args["vec"]["num_buffers"] = 1 + vec = _C.create_vec(args, 0) + try: + assert vec.obs_size == 33 + assert vec.obs_dtype == "FloatTensor" + assert list(vec.act_sizes) == [8] + + obs = float_buffer(vec.obs_ptr, vec.total_agents * vec.obs_size) + rewards = float_buffer(vec.rewards_ptr, vec.total_agents) + terminals = float_buffer(vec.terminals_ptr, vec.total_agents) + + vec.reset() + assert list(rewards) == [0.0, 0.0] + assert list(terminals) == [0.0, 0.0] + for env_id in range(vec.total_agents): + timer = obs[env_id * vec.obs_size + 32] + assert timer == 0.0 + + actions = (ctypes.c_float * vec.total_agents)(8.0, 8.0) + vec.cpu_step(ctypes.addressof(actions)) + assert list(rewards) == [-1.0, -1.0] + assert list(terminals) == [1.0, 1.0] + + logs = vec.log() + assert logs["n"] == 2.0 + assert logs["invalid_rate"] == 1.0 + assert logs["timeout_rate"] == 0.0 + assert logs["solve_rate"] == 0.0 + assert logs["episode_length"] == 1.0 + assert logs["episode_return"] == -1.0 + assert "perf" in logs + assert "min_win_moves" in logs + finally: + vec.close() + + +def main(): + parser = argparse.ArgumentParser() + parser.add_argument("--require-backend", action="store_true") + args = parser.parse_args() + + check_config() + check_binding_text() + if args.require_backend: + check_backend_metadata() + + +if __name__ == "__main__": + main() diff --git a/ocean/affine_lock/tests/test_visible_targets_loader.c b/ocean/affine_lock/tests/test_visible_targets_loader.c new file mode 100644 index 0000000000..9fed276f83 --- /dev/null +++ b/ocean/affine_lock/tests/test_visible_targets_loader.c @@ -0,0 +1,116 @@ +#include +#include +#include + +#include "affine_lock_visible_targets.h" + +#define EXPECT_TRUE(expr) do { \ + if (!(expr)) { \ + fprintf(stderr, "EXPECT_TRUE failed at %s:%d: %s\n", \ + __FILE__, __LINE__, #expr); \ + exit(1); \ + } \ +} while (0) + +#define EXPECT_EQ_U32(actual, expected) do { \ + uint32_t actual_value = (uint32_t)(actual); \ + uint32_t expected_value = (uint32_t)(expected); \ + if (actual_value != expected_value) { \ + fprintf(stderr, \ + "EXPECT_EQ_U32 failed at %s:%d: %s=%u expected %u\n", \ + __FILE__, __LINE__, #actual, actual_value, expected_value); \ + exit(1); \ + } \ +} while (0) + +#define EXPECT_EQ_U64(actual, expected) do { \ + uint64_t actual_value = (uint64_t)(actual); \ + uint64_t expected_value = (uint64_t)(expected); \ + if (actual_value != expected_value) { \ + fprintf(stderr, \ + "EXPECT_EQ_U64 failed at %s:%d: %s=%llu expected %llu\n", \ + __FILE__, __LINE__, #actual, \ + (unsigned long long)actual_value, \ + (unsigned long long)expected_value); \ + exit(1); \ + } \ +} while (0) + +int main(int argc, char** argv) { + if (argc != 5) { + fprintf(stderr, + "usage: %s TARGET_BIN EXPECTED_RECORD_COUNT " + "EXPECTED_SAMPLE_COUNT EXPECTED_D16_COUNT\n", + argv[0]); + return 1; + } + + char* end = NULL; + unsigned long expected_record_count = strtoul(argv[2], &end, 10); + EXPECT_TRUE(end != argv[2] && *end == '\0'); + unsigned long expected_sample_count = strtoul(argv[3], &end, 10); + EXPECT_TRUE(end != argv[3] && *end == '\0'); + unsigned long expected_d16_count = strtoul(argv[4], &end, 10); + EXPECT_TRUE(end != argv[4] && *end == '\0'); + + AffineLockVisibleTargetTable table; + char error[256]; + int rc = affine_lock_visible_targets_load( + argv[1], + AFFINE_LOCK_VISIBLE_TARGET_8ACTION_V1_HASH, + &table, + error, + sizeof(error)); + if (rc != 0) { + fprintf(stderr, "failed to load visible target table: %s\n", error); + return 1; + } + + EXPECT_EQ_U32(table.bits, 16); + EXPECT_EQ_U32(table.num_actions, 8); + EXPECT_EQ_U32(table.depth_count, 6); + EXPECT_EQ_U32(table.record_size, 16); + EXPECT_EQ_U32(table.record_count, expected_record_count); + EXPECT_EQ_U64( + table.action_set_hash, + AFFINE_LOCK_VISIBLE_TARGET_8ACTION_V1_HASH); + + const uint32_t expected_depths[6] = {2, 4, 5, 6, 8, 16}; + const uint64_t expected_exact_counts[6] = { + 2216496ull, + 34379722ull, + 115388932ull, + 331789220ull, + 1125374770ull, + 100548ull, + }; + uint32_t first_record = 0; + for (uint32_t i = 0; i < table.depth_count; i++) { + EXPECT_EQ_U32(table.depths[i].depth, expected_depths[i]); + EXPECT_EQ_U32(table.depths[i].first_record, first_record); + uint32_t expected_stored_count = i == 5 ? + (uint32_t)expected_d16_count : (uint32_t)expected_sample_count; + EXPECT_EQ_U32(table.depths[i].stored_count, expected_stored_count); + EXPECT_EQ_U64(table.depths[i].exact_pair_count, expected_exact_counts[i]); + first_record += table.depths[i].stored_count; + } + + for (uint32_t i = 0; i < table.record_count; i++) { + const AffineLockVisibleTargetRecord* record = &table.records[i]; + EXPECT_TRUE(record->solution_length == record->depth); + EXPECT_TRUE( + record->depth == 2 || + record->depth == 4 || + record->depth == 5 || + record->depth == 6 || + record->depth == 8 || + record->depth == 16); + for (uint8_t step = 0; step < record->solution_length; step++) { + uint8_t action = (record->packed_actions >> (3u * step)) & 7u; + EXPECT_TRUE(action < table.num_actions); + } + } + + affine_lock_visible_targets_free(&table); + return 0; +} diff --git a/ocean/affine_lock/tools/generate_8action_visible_targets.c b/ocean/affine_lock/tools/generate_8action_visible_targets.c new file mode 100644 index 0000000000..74ac1d17b8 --- /dev/null +++ b/ocean/affine_lock/tools/generate_8action_visible_targets.c @@ -0,0 +1,954 @@ +#define _POSIX_C_SOURCE 200809L + +#include +#include +#include +#include +#include + +#ifdef _OPENMP +#include +#endif + +#define BITS 16 +#define STATE_COUNT (1u << BITS) +#define MAX_ACTIONS 8 +#define TARGET_DEPTH_COUNT 6 +#define MAX_DISTANCE 64 +#define RECORD_SIZE 16 +#define FORMAT_VERSION 1 + +static const int TARGET_DEPTHS[TARGET_DEPTH_COUNT] = {2, 4, 5, 6, 8, 16}; +typedef enum ActionOp { + ACTION_OP_SHIFT_LEFT = 0, + ACTION_OP_SHIFT_RIGHT = 1, + ACTION_OP_MIRROR = 2, + ACTION_OP_INVERT_RIGHT_7 = 3, + ACTION_OP_SWAP_ADJACENT_BITS = 4, + ACTION_OP_SWAP_ADJACENT_PAIRS = 5, + ACTION_OP_SWAP_NIBBLES_EACH_BYTE = 6, + ACTION_OP_REVERSE_EACH_NIBBLE = 7, + ACTION_OP_REVERSE_EACH_BYTE = 8, +} ActionOp; + +typedef struct ActionSet { + const char* name; + int num_actions; + int store_all_d16_by_default; + // Stable salt for deterministic sampled-record selection. + uint64_t candidate_score_seed; + const char* default_bin; + const char* default_json; + ActionOp ops[MAX_ACTIONS]; + const char* names[MAX_ACTIONS]; +} ActionSet; + +static const ActionSet ACTION_SETS[] = { + { + "affine_lock_8action_v1", + 8, + 1, + 0x7b7ba09982ec5a9dull, + "ocean/affine_lock/generated/affine_lock_8action_visible_targets.bin", + "ocean/affine_lock/generated/affine_lock_8action_visible_targets.json", + { + ACTION_OP_SHIFT_LEFT, + ACTION_OP_SHIFT_RIGHT, + ACTION_OP_INVERT_RIGHT_7, + ACTION_OP_SWAP_ADJACENT_BITS, + ACTION_OP_SWAP_ADJACENT_PAIRS, + ACTION_OP_SWAP_NIBBLES_EACH_BYTE, + ACTION_OP_REVERSE_EACH_NIBBLE, + ACTION_OP_REVERSE_EACH_BYTE, + }, + { + "shift_left", + "shift_right", + "invert_right_7", + "swap_adjacent_bits", + "swap_adjacent_pairs", + "swap_nibbles_each_byte", + "reverse_each_nibble", + "reverse_each_byte", + }, + }, + { + // Generator-only alternate for future runtime experiments. Fewer + // actions can make policy search easier while producing many more + // exact depth-16 pairs than the committed 8-action training set. + "affine_lock_4action_v1", + 4, + 0, + 0x8c4d9362024c02b8ull, + "ocean/affine_lock/generated/affine_lock_4action_visible_targets.bin", + "ocean/affine_lock/generated/affine_lock_4action_visible_targets.json", + { + ACTION_OP_SHIFT_RIGHT, + ACTION_OP_MIRROR, + ACTION_OP_INVERT_RIGHT_7, + ACTION_OP_SWAP_ADJACENT_BITS, + }, + { + "shift_right", + "mirror", + "invert_right_7", + "swap_adjacent_bits", + }, + }, +}; + +static const int ACTION_SET_COUNT = + (int)(sizeof(ACTION_SETS) / sizeof(ACTION_SETS[0])); +static const ActionSet* ACTIVE_ACTION_SET = &ACTION_SETS[0]; + +typedef struct TargetRecord { + uint16_t start; + uint16_t target; + uint64_t packed_actions; + uint8_t solution_length; + uint8_t depth; + uint64_t score; +} TargetRecord; + +typedef struct DepthSample { + int depth; + int store_all; + uint64_t exact_count; + uint32_t capacity; + uint32_t count; + TargetRecord* records; +} DepthSample; + +typedef struct WorkerResult { + DepthSample depths[TARGET_DEPTH_COUNT]; + uint64_t histogram[MAX_DISTANCE + 1]; + uint64_t disconnected_starts; + int max_distance; +} WorkerResult; + +typedef struct Options { + const char* output_bin; + const char* output_json; + const ActionSet* action_set; + uint32_t sample_per_depth; + uint64_t sample_seed; + int store_all_depths[TARGET_DEPTH_COUNT]; + int output_bin_explicit; + int output_json_explicit; +} Options; + +static uint16_t NEXT_STATE[STATE_COUNT][MAX_ACTIONS]; +static uint64_t ACTIVE_SAMPLE_SEED = 0u; + +static uint64_t mix_u64(uint64_t hash, uint64_t value) { + hash ^= value; + hash *= 1099511628211ull; + return hash; +} + +static uint64_t mix_bytes(uint64_t hash, const char* text) { + const unsigned char* ptr = (const unsigned char*)text; + while (*ptr != '\0') { + hash = mix_u64(hash, (uint64_t)*ptr); + ptr++; + } + return hash; +} + +static uint16_t shift_left(uint16_t state) { + uint16_t first = state & 1u; + return (uint16_t)((state >> 1) | (first << (BITS - 1))); +} + +static uint16_t shift_right(uint16_t state) { + uint16_t last = (uint16_t)((state >> (BITS - 1)) & 1u); + return (uint16_t)(((state << 1) & 0xffffu) | last); +} + +static uint16_t mirror_bits(uint16_t state) { + uint16_t out = 0u; + for (int bit = 0; bit < BITS; bit++) { + if ((state & (1u << bit)) != 0u) { + out |= (uint16_t)(1u << (BITS - 1 - bit)); + } + } + return out; +} + +static uint16_t swap_adjacent_bits(uint16_t state) { + return (uint16_t)(((state & 0x5555u) << 1) | + ((state & 0xaaaau) >> 1)); +} + +static uint16_t swap_adjacent_pairs(uint16_t state) { + return (uint16_t)(((state & 0x3333u) << 2) | + ((state & 0xccccu) >> 2)); +} + +static uint16_t swap_nibbles_each_byte(uint16_t state) { + return (uint16_t)(((state & 0x0f0fu) << 4) | + ((state & 0xf0f0u) >> 4)); +} + +static uint16_t reverse_each_nibble(uint16_t state) { + return swap_adjacent_pairs(swap_adjacent_bits(state)); +} + +static uint16_t reverse_each_byte(uint16_t state) { + return swap_nibbles_each_byte(reverse_each_nibble(state)); +} + +static uint16_t apply_action_op(uint16_t state, ActionOp op) { + switch (op) { + case ACTION_OP_SHIFT_LEFT: + return shift_left(state); + case ACTION_OP_SHIFT_RIGHT: + return shift_right(state); + case ACTION_OP_MIRROR: + return mirror_bits(state); + case ACTION_OP_INVERT_RIGHT_7: + return (uint16_t)(state ^ 0xfe00u); + case ACTION_OP_SWAP_ADJACENT_BITS: + return swap_adjacent_bits(state); + case ACTION_OP_SWAP_ADJACENT_PAIRS: + return swap_adjacent_pairs(state); + case ACTION_OP_SWAP_NIBBLES_EACH_BYTE: + return swap_nibbles_each_byte(state); + case ACTION_OP_REVERSE_EACH_NIBBLE: + return reverse_each_nibble(state); + case ACTION_OP_REVERSE_EACH_BYTE: + return reverse_each_byte(state); + default: + return state; + } +} + +static void build_next_state(void) { + for (uint32_t state = 0; state < STATE_COUNT; state++) { + for (int action = 0; action < ACTIVE_ACTION_SET->num_actions; action++) { + NEXT_STATE[state][action] = apply_action_op( + (uint16_t)state, ACTIVE_ACTION_SET->ops[action]); + } + } +} + +static const ActionSet* action_set_by_name(const char* name) { + for (int i = 0; i < ACTION_SET_COUNT; i++) { + if (strcmp(ACTION_SETS[i].name, name) == 0) { + return &ACTION_SETS[i]; + } + } + return NULL; +} + +static int target_depth_index(int depth) { + for (int i = 0; i < TARGET_DEPTH_COUNT; i++) { + if (TARGET_DEPTHS[i] == depth) { + return i; + } + } + return -1; +} + +static int record_worse(const TargetRecord* a, const TargetRecord* b) { + if (a->score != b->score) { + return a->score > b->score; + } + if (a->start != b->start) { + return a->start > b->start; + } + if (a->target != b->target) { + return a->target > b->target; + } + if (a->packed_actions != b->packed_actions) { + return a->packed_actions > b->packed_actions; + } + return a->depth > b->depth; +} + +static int record_better(const TargetRecord* a, const TargetRecord* b) { + return record_worse(b, a); +} + +static void heap_swap(TargetRecord* a, TargetRecord* b) { + TargetRecord tmp = *a; + *a = *b; + *b = tmp; +} + +static void heap_sift_up(TargetRecord* records, uint32_t index) { + while (index > 0) { + uint32_t parent = (index - 1u) / 2u; + if (!record_worse(&records[index], &records[parent])) { + break; + } + heap_swap(&records[index], &records[parent]); + index = parent; + } +} + +static void heap_sift_down(TargetRecord* records, uint32_t count, uint32_t index) { + while (1) { + uint32_t left = 2u * index + 1u; + uint32_t right = left + 1u; + uint32_t worst = index; + if (left < count && record_worse(&records[left], &records[worst])) { + worst = left; + } + if (right < count && record_worse(&records[right], &records[worst])) { + worst = right; + } + if (worst == index) { + break; + } + heap_swap(&records[index], &records[worst]); + index = worst; + } +} + +static int ensure_capacity(DepthSample* sample, uint32_t required) { + if (required <= sample->capacity) { + return 0; + } + uint32_t next_capacity = sample->capacity == 0 ? 1024u : sample->capacity; + while (next_capacity < required) { + if (next_capacity > UINT32_MAX / 2u) { + return -1; + } + next_capacity *= 2u; + } + TargetRecord* next = (TargetRecord*)realloc( + sample->records, (size_t)next_capacity * sizeof(TargetRecord)); + if (next == NULL) { + return -1; + } + sample->records = next; + sample->capacity = next_capacity; + return 0; +} + +static int add_record(DepthSample* sample, const TargetRecord* record) { + if (sample->store_all) { + if (ensure_capacity(sample, sample->count + 1u) != 0) { + return -1; + } + sample->records[sample->count++] = *record; + return 0; + } + + if (sample->capacity == 0) { + return 0; + } + if (sample->count < sample->capacity) { + sample->records[sample->count] = *record; + heap_sift_up(sample->records, sample->count); + sample->count += 1u; + return 0; + } + if (record_better(record, &sample->records[0])) { + sample->records[0] = *record; + heap_sift_down(sample->records, sample->count, 0); + } + return 0; +} + +static uint64_t candidate_score( + uint16_t start, + uint16_t target, + int depth, + uint64_t packed_actions, + int store_all) { + uint64_t hash = ACTIVE_ACTION_SET->candidate_score_seed; + // Store-all depths are complete sets, so keep their ordering stable across + // sample seeds and only reseed the sampled pools. + if (!store_all && ACTIVE_SAMPLE_SEED != 0u) { + hash = mix_u64(hash, ACTIVE_SAMPLE_SEED); + } + hash = mix_u64(hash, start); + hash = mix_u64(hash, target); + hash = mix_u64(hash, (uint64_t)depth); + hash = mix_u64(hash, packed_actions); + return hash; +} + +static uint64_t pack_solution( + uint16_t start, + uint16_t target, + uint8_t solution_length, + const uint16_t* parent, + const uint8_t* parent_action) { + uint8_t actions[MAX_DISTANCE]; + uint16_t state = target; + for (int i = (int)solution_length - 1; i >= 0; i--) { + actions[i] = parent_action[state]; + state = parent[state]; + } + if (state != start) { + fprintf(stderr, "failed to reconstruct path from %u to %u\n", + (unsigned int)start, (unsigned int)target); + exit(2); + } + + uint64_t packed = 0u; + for (uint8_t i = 0; i < solution_length; i++) { + packed |= (uint64_t)(actions[i] & 7u) << (3u * i); + } + return packed; +} + +static void init_worker_result( + WorkerResult* result, + const Options* options) { + memset(result, 0, sizeof(*result)); + for (int i = 0; i < TARGET_DEPTH_COUNT; i++) { + result->depths[i].depth = TARGET_DEPTHS[i]; + result->depths[i].store_all = options->store_all_depths[i]; + if (!result->depths[i].store_all && options->sample_per_depth > 0) { + result->depths[i].capacity = options->sample_per_depth; + result->depths[i].records = (TargetRecord*)calloc( + options->sample_per_depth, sizeof(TargetRecord)); + if (result->depths[i].records == NULL) { + fprintf(stderr, "failed to allocate target sampler\n"); + exit(2); + } + } + } +} + +static void free_worker_result(WorkerResult* result) { + for (int i = 0; i < TARGET_DEPTH_COUNT; i++) { + free(result->depths[i].records); + result->depths[i].records = NULL; + result->depths[i].capacity = 0; + result->depths[i].count = 0; + } +} + +static void compute_worker_records(WorkerResult* result) { + uint32_t* seen = (uint32_t*)calloc(STATE_COUNT, sizeof(uint32_t)); + uint16_t* queue = (uint16_t*)malloc(STATE_COUNT * sizeof(uint16_t)); + uint16_t* parent = (uint16_t*)malloc(STATE_COUNT * sizeof(uint16_t)); + uint8_t* parent_action = (uint8_t*)malloc(STATE_COUNT * sizeof(uint8_t)); + uint8_t* depth = (uint8_t*)malloc(STATE_COUNT * sizeof(uint8_t)); + if (seen == NULL || queue == NULL || parent == NULL || + parent_action == NULL || depth == NULL) { + fprintf(stderr, "failed to allocate BFS buffers\n"); + exit(2); + } + +#ifdef _OPENMP + #pragma omp for schedule(dynamic, 64) +#endif + for (uint32_t start = 0; start < STATE_COUNT; start++) { + uint32_t stamp = start + 1u; + uint32_t head = 0; + uint32_t tail = 0; + seen[start] = stamp; + parent[start] = (uint16_t)start; + parent_action[start] = 0; + depth[start] = 0; + queue[tail++] = (uint16_t)start; + result->histogram[0] += 1u; + + while (head < tail) { + uint16_t state = queue[head++]; + uint8_t state_depth = depth[state]; + const uint16_t* row = NEXT_STATE[state]; + for (int action = 0; action < ACTIVE_ACTION_SET->num_actions; action++) { + uint16_t next = row[action]; + if (seen[next] == stamp) { + continue; + } + uint8_t next_depth = (uint8_t)(state_depth + 1u); + seen[next] = stamp; + parent[next] = state; + parent_action[next] = (uint8_t)action; + depth[next] = next_depth; + queue[tail++] = next; + if (next_depth > MAX_DISTANCE) { + fprintf(stderr, "distance exceeded internal limit\n"); + exit(2); + } + result->histogram[next_depth] += 1u; + if ((int)next_depth > result->max_distance) { + result->max_distance = (int)next_depth; + } + + int depth_index = target_depth_index((int)next_depth); + if (depth_index < 0) { + continue; + } + DepthSample* sample = &result->depths[depth_index]; + sample->exact_count += 1u; + uint64_t packed_actions = pack_solution( + (uint16_t)start, next, next_depth, parent, parent_action); + TargetRecord record; + memset(&record, 0, sizeof(record)); + record.start = (uint16_t)start; + record.target = next; + record.packed_actions = packed_actions; + record.solution_length = next_depth; + record.depth = next_depth; + record.score = candidate_score( + (uint16_t)start, next, (int)next_depth, packed_actions, + sample->store_all); + if (add_record(sample, &record) != 0) { + fprintf(stderr, "failed to store sampled target record\n"); + exit(2); + } + } + } + + if (tail != STATE_COUNT) { + result->disconnected_starts += 1u; + } + } + + free(seen); + free(queue); + free(parent); + free(parent_action); + free(depth); +} + +static int compare_records(const void* lhs, const void* rhs) { + const TargetRecord* a = (const TargetRecord*)lhs; + const TargetRecord* b = (const TargetRecord*)rhs; + if (a->depth != b->depth) { + return (int)a->depth - (int)b->depth; + } + if (a->score < b->score) { + return -1; + } + if (a->score > b->score) { + return 1; + } + if (a->start != b->start) { + return (int)a->start - (int)b->start; + } + if (a->target != b->target) { + return (int)a->target - (int)b->target; + } + if (a->packed_actions < b->packed_actions) { + return -1; + } + if (a->packed_actions > b->packed_actions) { + return 1; + } + return 0; +} + +static void merge_results( + WorkerResult* merged, + WorkerResult* workers, + int worker_count, + const Options* options) { + init_worker_result(merged, options); + for (int worker_index = 0; worker_index < worker_count; worker_index++) { + WorkerResult* worker = &workers[worker_index]; + merged->disconnected_starts += worker->disconnected_starts; + if (worker->max_distance > merged->max_distance) { + merged->max_distance = worker->max_distance; + } + for (int distance = 0; distance <= MAX_DISTANCE; distance++) { + merged->histogram[distance] += worker->histogram[distance]; + } + for (int depth_index = 0; depth_index < TARGET_DEPTH_COUNT; depth_index++) { + DepthSample* dst = &merged->depths[depth_index]; + DepthSample* src = &worker->depths[depth_index]; + dst->exact_count += src->exact_count; + for (uint32_t i = 0; i < src->count; i++) { + if (add_record(dst, &src->records[i]) != 0) { + fprintf(stderr, "failed to merge sampled target records\n"); + exit(2); + } + } + } + } + + for (int depth_index = 0; depth_index < TARGET_DEPTH_COUNT; depth_index++) { + DepthSample* sample = &merged->depths[depth_index]; + qsort(sample->records, sample->count, sizeof(TargetRecord), + compare_records); + } +} + +static uint64_t action_set_hash(void) { + uint64_t hash = 1469598103934665603ull; + hash = mix_bytes(hash, ACTIVE_ACTION_SET->name); + hash = mix_u64(hash, BITS); + hash = mix_u64(hash, ACTIVE_ACTION_SET->num_actions); + hash = mix_u64(hash, 0xfe00u); + for (int i = 0; i < ACTIVE_ACTION_SET->num_actions; i++) { + hash = mix_u64(hash, (uint64_t)i); + hash = mix_bytes(hash, ACTIVE_ACTION_SET->names[i]); + } + return hash; +} + +static uint64_t checksum_records(const WorkerResult* result) { + uint64_t hash = 1469598103934665603ull; + hash = mix_u64(hash, action_set_hash()); + for (int depth_index = 0; depth_index < TARGET_DEPTH_COUNT; depth_index++) { + const DepthSample* sample = &result->depths[depth_index]; + hash = mix_u64(hash, (uint64_t)sample->depth); + hash = mix_u64(hash, sample->exact_count); + hash = mix_u64(hash, sample->count); + for (uint32_t i = 0; i < sample->count; i++) { + const TargetRecord* record = &sample->records[i]; + hash = mix_u64(hash, record->start); + hash = mix_u64(hash, record->target); + hash = mix_u64(hash, record->packed_actions); + hash = mix_u64(hash, record->solution_length); + hash = mix_u64(hash, record->depth); + } + } + return hash; +} + +static int write_bytes(FILE* file, const void* data, size_t size) { + return fwrite(data, 1, size, file) == size ? 0 : -1; +} + +static int write_u16(FILE* file, uint16_t value) { + unsigned char bytes[2] = { + (unsigned char)(value & 0xffu), + (unsigned char)((value >> 8) & 0xffu), + }; + return write_bytes(file, bytes, sizeof(bytes)); +} + +static int write_u32(FILE* file, uint32_t value) { + unsigned char bytes[4] = { + (unsigned char)(value & 0xffu), + (unsigned char)((value >> 8) & 0xffu), + (unsigned char)((value >> 16) & 0xffu), + (unsigned char)((value >> 24) & 0xffu), + }; + return write_bytes(file, bytes, sizeof(bytes)); +} + +static int write_u64(FILE* file, uint64_t value) { + unsigned char bytes[8]; + for (int i = 0; i < 8; i++) { + bytes[i] = (unsigned char)((value >> (8 * i)) & 0xffu); + } + return write_bytes(file, bytes, sizeof(bytes)); +} + +static uint32_t total_record_count(const WorkerResult* result) { + uint64_t count = 0; + for (int depth_index = 0; depth_index < TARGET_DEPTH_COUNT; depth_index++) { + count += result->depths[depth_index].count; + } + if (count > UINT32_MAX) { + fprintf(stderr, "too many target records for binary format\n"); + exit(2); + } + return (uint32_t)count; +} + +static uint32_t header_size(void) { + return 52u + (uint32_t)TARGET_DEPTH_COUNT * 24u; +} + +static int write_binary(const char* path, const WorkerResult* result) { + FILE* file = fopen(path, "wb"); + if (file == NULL) { + fprintf(stderr, "failed to open %s: %s\n", path, strerror(errno)); + return -1; + } + + const unsigned char magic[8] = {'A', 'L', '7', 'T', 'G', 'T', '1', '\0'}; + uint32_t record_count = total_record_count(result); + uint64_t checksum = checksum_records(result); + uint64_t set_hash = action_set_hash(); + int rc = 0; + rc |= write_bytes(file, magic, sizeof(magic)); + rc |= write_u32(file, FORMAT_VERSION); + rc |= write_u32(file, header_size()); + rc |= write_u32(file, RECORD_SIZE); + rc |= write_u32(file, BITS); + rc |= write_u32(file, (uint32_t)ACTIVE_ACTION_SET->num_actions); + rc |= write_u32(file, TARGET_DEPTH_COUNT); + rc |= write_u32(file, record_count); + rc |= write_u64(file, checksum); + rc |= write_u64(file, set_hash); + + uint32_t first_record = 0; + for (int depth_index = 0; depth_index < TARGET_DEPTH_COUNT; depth_index++) { + const DepthSample* sample = &result->depths[depth_index]; + rc |= write_u32(file, (uint32_t)sample->depth); + rc |= write_u32(file, first_record); + rc |= write_u32(file, sample->count); + rc |= write_u32(file, 0u); + rc |= write_u64(file, sample->exact_count); + first_record += sample->count; + } + + for (int depth_index = 0; depth_index < TARGET_DEPTH_COUNT; depth_index++) { + const DepthSample* sample = &result->depths[depth_index]; + for (uint32_t i = 0; i < sample->count; i++) { + const TargetRecord* record = &sample->records[i]; + rc |= write_u16(file, record->start); + rc |= write_u16(file, record->target); + rc |= write_u64(file, record->packed_actions); + rc |= fputc(record->solution_length, file) == EOF ? -1 : 0; + rc |= fputc(record->depth, file) == EOF ? -1 : 0; + rc |= write_u16(file, 0u); + } + } + + if (fclose(file) != 0) { + fprintf(stderr, "failed to close %s: %s\n", path, strerror(errno)); + return -1; + } + if (rc != 0) { + fprintf(stderr, "failed to write %s\n", path); + return -1; + } + return 0; +} + +static int write_json(const char* path, const WorkerResult* result, + const Options* options) { + FILE* file = fopen(path, "w"); + if (file == NULL) { + fprintf(stderr, "failed to open %s: %s\n", path, strerror(errno)); + return -1; + } + + uint32_t record_count = total_record_count(result); + uint64_t checksum = checksum_records(result); + uint64_t set_hash = action_set_hash(); + + fprintf(file, "{\n"); + fprintf(file, " \"action_id_to_name\": [\n"); + for (int i = 0; i < ACTIVE_ACTION_SET->num_actions; i++) { + fprintf(file, " \"%s\"%s\n", ACTIVE_ACTION_SET->names[i], + i == ACTIVE_ACTION_SET->num_actions - 1 ? "" : ","); + } + fprintf(file, " ],\n"); + fprintf(file, " \"action_set\": \"%s\",\n", ACTIVE_ACTION_SET->name); + fprintf(file, " \"action_set_hash\": \"0x%016llx\",\n", + (unsigned long long)set_hash); + fprintf(file, " \"binary_path\": \"%s\",\n", options->output_bin); + fprintf(file, " \"bits\": %d,\n", BITS); + fprintf(file, " \"checksum\": \"0x%016llx\",\n", + (unsigned long long)checksum); + fprintf(file, " \"depth_records\": [\n"); + uint32_t first_record = 0; + for (int depth_index = 0; depth_index < TARGET_DEPTH_COUNT; depth_index++) { + const DepthSample* sample = &result->depths[depth_index]; + fprintf(file, + " {\"depth\": %d, \"exact_pair_count\": %llu, " + "\"first_record\": %u, \"stored_count\": %u}%s\n", + sample->depth, + (unsigned long long)sample->exact_count, + first_record, + sample->count, + depth_index == TARGET_DEPTH_COUNT - 1 ? "" : ","); + first_record += sample->count; + } + fprintf(file, " ],\n"); + fprintf(file, " \"depths\": ["); + for (int i = 0; i < TARGET_DEPTH_COUNT; i++) { + fprintf(file, "%s%d", i == 0 ? "" : ", ", TARGET_DEPTHS[i]); + } + fprintf(file, "],\n"); + fprintf(file, " \"disconnected_starts\": %llu,\n", + (unsigned long long)result->disconnected_starts); + fprintf(file, " \"format\": \"affine_lock_visible_targets_bin\",\n"); + fprintf(file, " \"header_size\": %u,\n", header_size()); + fprintf(file, " \"max_distance\": %d,\n", result->max_distance); + fprintf(file, " \"num_actions\": %d,\n", ACTIVE_ACTION_SET->num_actions); + fprintf(file, " \"record_count\": %u,\n", record_count); + fprintf(file, " \"record_size\": %d,\n", RECORD_SIZE); + fprintf(file, " \"sample_per_depth\": %u,\n", + options->sample_per_depth); + fprintf(file, " \"sample_seed\": %llu,\n", + (unsigned long long)options->sample_seed); + fprintf(file, " \"stored_all_depths\": ["); + int wrote_depth = 0; + for (int i = 0; i < TARGET_DEPTH_COUNT; i++) { + if (!options->store_all_depths[i]) { + continue; + } + fprintf(file, "%s%d", wrote_depth ? ", " : "", TARGET_DEPTHS[i]); + wrote_depth = 1; + } + fprintf(file, "],\n"); + fprintf(file, " \"version\": %d,\n", FORMAT_VERSION); + fprintf(file, " \"visible_distance_histogram\": {\n"); + int first = 1; + for (int distance = 0; distance <= result->max_distance; distance++) { + if (!first) { + fprintf(file, ",\n"); + } + fprintf(file, " \"%d\": %llu", distance, + (unsigned long long)result->histogram[distance]); + first = 0; + } + fprintf(file, "\n }\n"); + fprintf(file, "}\n"); + + if (fclose(file) != 0) { + fprintf(stderr, "failed to close %s: %s\n", path, strerror(errno)); + return -1; + } + return 0; +} + +static int parse_uint32(const char* text, uint32_t* out) { + char* end = NULL; + errno = 0; + unsigned long value = strtoul(text, &end, 10); + if (errno != 0 || end == text || *end != '\0' || value > UINT32_MAX) { + return -1; + } + *out = (uint32_t)value; + return 0; +} + +static int parse_uint64(const char* text, uint64_t* out) { + char* end = NULL; + errno = 0; + if (text[0] == '-') { + return -1; + } + unsigned long long value = strtoull(text, &end, 0); + if (errno != 0 || end == text || *end != '\0') { + return -1; + } + *out = (uint64_t)value; + return 0; +} + +static void print_usage(const char* program) { + fprintf(stderr, + "usage: %s [--action-set NAME] [--sample-per-depth N] " + "[--sample-seed N] [--store-all-depth D] " + "[--output-bin PATH] [--output-json PATH]\n", + program); + fprintf(stderr, "available action sets:"); + for (int i = 0; i < ACTION_SET_COUNT; i++) { + fprintf(stderr, " %s", ACTION_SETS[i].name); + } + fprintf(stderr, "\n"); +} + +static int parse_args(int argc, char** argv, Options* options) { + options->action_set = &ACTION_SETS[0]; + options->output_bin = NULL; + options->output_json = NULL; + options->sample_per_depth = 65536u; + options->sample_seed = 0u; + memset(options->store_all_depths, 0, sizeof(options->store_all_depths)); + options->output_bin_explicit = 0; + options->output_json_explicit = 0; + + for (int i = 1; i < argc; i++) { + if (strcmp(argv[i], "--action-set") == 0 && i + 1 < argc) { + const ActionSet* action_set = action_set_by_name(argv[++i]); + if (action_set == NULL) { + fprintf(stderr, "unknown --action-set %s\n", argv[i]); + return -1; + } + options->action_set = action_set; + } else if (strcmp(argv[i], "--sample-per-depth") == 0 && i + 1 < argc) { + if (parse_uint32(argv[++i], &options->sample_per_depth) != 0) { + fprintf(stderr, "invalid --sample-per-depth value\n"); + return -1; + } + } else if (strcmp(argv[i], "--sample-seed") == 0 && i + 1 < argc) { + if (parse_uint64(argv[++i], &options->sample_seed) != 0) { + fprintf(stderr, "invalid --sample-seed value\n"); + return -1; + } + } else if (strcmp(argv[i], "--store-all-depth") == 0 && i + 1 < argc) { + uint32_t depth = 0; + if (parse_uint32(argv[++i], &depth) != 0) { + fprintf(stderr, "invalid --store-all-depth value\n"); + return -1; + } + int depth_index = target_depth_index((int)depth); + if (depth_index < 0) { + fprintf(stderr, "unsupported --store-all-depth %u\n", depth); + return -1; + } + options->store_all_depths[depth_index] = 1; + } else if (strcmp(argv[i], "--output-bin") == 0 && i + 1 < argc) { + options->output_bin = argv[++i]; + options->output_bin_explicit = 1; + } else if (strcmp(argv[i], "--output-json") == 0 && i + 1 < argc) { + options->output_json = argv[++i]; + options->output_json_explicit = 1; + } else if (strcmp(argv[i], "--help") == 0) { + print_usage(argv[0]); + exit(0); + } else { + print_usage(argv[0]); + return -1; + } + } + + if (options->output_bin == NULL || !options->output_bin_explicit) { + options->output_bin = options->action_set->default_bin; + } + if (options->output_json == NULL || !options->output_json_explicit) { + options->output_json = options->action_set->default_json; + } + if (options->action_set->store_all_d16_by_default) { + options->store_all_depths[target_depth_index(16)] = 1; + } + return 0; +} + +int main(int argc, char** argv) { + Options options; + if (parse_args(argc, argv, &options) != 0) { + return 1; + } + + ACTIVE_ACTION_SET = options.action_set; + ACTIVE_SAMPLE_SEED = options.sample_seed; + build_next_state(); + int worker_count = 1; +#ifdef _OPENMP + worker_count = omp_get_max_threads(); +#endif + WorkerResult* workers = + (WorkerResult*)calloc((size_t)worker_count, sizeof(WorkerResult)); + if (workers == NULL) { + fprintf(stderr, "failed to allocate worker results\n"); + return 1; + } + +#ifdef _OPENMP +#pragma omp parallel +#endif + { + int worker_index = 0; +#ifdef _OPENMP + worker_index = omp_get_thread_num(); +#endif + init_worker_result(&workers[worker_index], &options); + compute_worker_records(&workers[worker_index]); + } + + WorkerResult merged; + merge_results(&merged, workers, worker_count, &options); + int rc = 0; + if (write_binary(options.output_bin, &merged) != 0) { + rc = 1; + } + if (write_json(options.output_json, &merged, &options) != 0) { + rc = 1; + } + for (int i = 0; i < worker_count; i++) { + free_worker_result(&workers[i]); + } + free(workers); + free_worker_result(&merged); + return rc == 0 ? 0 : 1; +} diff --git a/pufferlib/sweep.py b/pufferlib/sweep.py index 36e27bf42a..fc753e5395 100644 --- a/pufferlib/sweep.py +++ b/pufferlib/sweep.py @@ -145,8 +145,11 @@ def _params_from_puffer_sweep(sweep_config, only_include=None): only_include = [p.strip() for p in sweep_config['sweep_only'].split(',')] for name, param in sweep_config.items(): - if name in ('method', 'metric', 'metric_distribution', 'goal', 'downsample', 'use_gpu', 'prune_pareto', - 'sweep_only', 'max_suggestion_cost', 'early_stop_quantile', 'gpus', 'max_runs'): + if name in ('method', 'metric', 'metric_distribution', 'goal', + 'downsample', 'use_gpu', 'prune_pareto', 'sweep_only', + 'max_suggestion_cost', 'early_stop_quantile', 'gpus', + 'max_runs', 'match_enemy_model_path', 'match_num_games', + 'match_enemy_hidden_size', 'match_enemy_num_layers'): continue assert isinstance(param, dict), f'Param {name} is not a dict'