diff --git a/config/affine_lock.ini b/config/affine_lock.ini
new file mode 100644
index 0000000000..2352e98a8a
--- /dev/null
+++ b/config/affine_lock.ini
@@ -0,0 +1,153 @@
+[base]
+env_name = affine_lock
+
+[vec]
+total_agents = 4096
+num_buffers = 2
+num_threads = 16
+
+[policy]
+hidden_size = 256
+num_layers = 3
+
+[env]
+seed = 42
+start_depth = 2
+max_depth = 16
+step_grace = 0
+
+[train]
+total_timesteps = 200_000_000
+horizon = 64
+minibatch_size = 8192
+learning_rate = 0.012
+ent_coef = 0.2
+gamma = 0.8
+gae_lambda = 0.995
+replay_ratio = 3.0
+clip_coef = 0.83
+vf_coef = 4.75
+vf_clip_coef = 0.8
+max_grad_norm = 3.0
+beta1 = 0.5
+beta2 = 0.9915
+eps = 0.0001
+vtrace_rho_clip = 1.4
+vtrace_c_clip = 3.75
+prio_alpha = 0.055
+prio_beta0 = 0.161
+
+[sweep]
+metric = perf
+goal = maximize
+max_runs = 50
+use_gpu = False
+
+[sweep.train.total_timesteps]
+distribution = log_normal
+min = 100_000_000
+max = 200_000_000
+mean = 200_000_000
+scale = time
+
+[sweep.vec.total_agents]
+distribution = uniform_pow2
+min = 4096
+max = 16384
+mean = 4096
+scale = auto
+
+[sweep.policy.hidden_size]
+distribution = uniform_pow2
+min = 64
+max = 512
+mean = 256
+scale = auto
+
+[sweep.policy.num_layers]
+distribution = uniform
+min = 1
+max = 4
+mean = 3
+scale = auto
+
+[sweep.vec.num_buffers]
+distribution = uniform
+min = 1
+max = 4
+mean = 2
+scale = auto
+
+[sweep.train.horizon]
+distribution = uniform_pow2
+min = 32
+max = 128
+mean = 64
+scale = auto
+
+[sweep.train.minibatch_size]
+distribution = uniform_pow2
+min = 8192
+max = 131072
+mean = 8192
+scale = auto
+
+[sweep.train.replay_ratio]
+distribution = uniform
+min = 1.0
+max = 4.0
+mean = 3.0
+scale = auto
+
+[sweep.train.learning_rate]
+mean = 0.012
+
+[sweep.train.ent_coef]
+mean = 0.2
+
+[sweep.train.gamma]
+mean = 0.8
+
+[sweep.train.gae_lambda]
+mean = 0.995
+
+[sweep.train.vtrace_rho_clip]
+mean = 1.4
+
+[sweep.train.vtrace_c_clip]
+mean = 3.75
+
+[sweep.train.clip_coef]
+mean = 0.83
+
+[sweep.train.vf_clip_coef]
+distribution = uniform
+min = 0.001
+max = 5.0
+mean = 0.8
+scale = auto
+
+[sweep.train.vf_coef]
+distribution = uniform
+min = 0.1
+max = 8.0
+mean = 4.75
+scale = auto
+
+[sweep.train.max_grad_norm]
+mean = 3.0
+
+[sweep.train.beta1]
+mean = 0.5
+
+[sweep.train.beta2]
+mean = 0.9915
+
+[sweep.train.eps]
+mean = 0.0001
+
+[sweep.train.prio_alpha]
+mean = 0.055
+
+[sweep.train.prio_beta0]
+mean = 0.161
diff --git a/ocean/affine_lock/README.md b/ocean/affine_lock/README.md
new file mode 100644
index 0000000000..5fcb2b530f
--- /dev/null
+++ b/ocean/affine_lock/README.md
@@ -0,0 +1,237 @@
+# affine_lock
+
+`affine_lock` is a single-agent 16-bit state-matching environment. Each episode
+starts from a current bit state and a target bit state. The agent applies one of
+eight reversible bit transforms until the current state equals the target.
+
+The committed training path uses the generated visible-target table:
+
+```text
+ocean/affine_lock/generated/affine_lock_8action_visible_targets.bin
+```
+
+That table is loaded at reset time and provides exact start/target pairs for the
+curriculum depths configured in `config/affine_lock.ini`.
+
+## Runtime Action Set
+
+The runtime environment uses the committed 8-action set. The generator and
+manifest identify this exact transform set as `affine_lock_8action_v1`:
+
+| Id | Name | Effect |
+| ---: | --- | --- |
+| `0` | `shift_left` | rotate bit positions left |
+| `1` | `shift_right` | rotate bit positions right |
+| `2` | `invert_right_7` | flip bits `9..15` |
+| `3` | `swap_adjacent_bits` | swap each adjacent bit pair |
+| `4` | `swap_adjacent_pairs` | swap each adjacent two-bit pair |
+| `5` | `swap_nibbles_each_byte` | swap low/high nibbles within each byte |
+| `6` | `reverse_each_nibble` | reverse bit order within each nibble |
+| `7` | `reverse_each_byte` | reverse bit order within each byte |
+
+The Puffer binding exposes one discrete action slot with
+`AFFINE_LOCK_NUM_ACTIONS = 8`.
+
+## Resets
+
+Resets always sample from the committed visible-target table. To train or test
+on different target distributions, generate a new table with the tool below and
+point `AFFINE_LOCK_VISIBLE_TARGET_TABLE_PATH` at it when building.
+
+## Committed Target Table
+
+The committed table stores sampled visible start/target pairs at depths `2`,
+`4`, `5`, `6`, and `8`, plus every known true depth-16 pair for this action
+set.
+
+| Depth | True visible pairs | Stored records |
+| ---: | ---: | ---: |
+| `2` | `2,216,496` | `65,536` |
+| `4` | `34,379,722` | `65,536` |
+| `5` | `115,388,932` | `65,536` |
+| `6` | `331,789,220` | `65,536` |
+| `8` | `1,125,374,770` | `65,536` |
+| `16` | `100,548` | `100,548` |
+
+The table format can store any depth sections, but this generator currently
+targets the fixed depth list `{2, 4, 5, 6, 8, 16}`. The runtime `seed` controls
+the episode sequence sampled from a loaded table. The generator's
+`--sample-seed` controls which sampled depth-2/4/5/6/8 records are written into
+a custom table. Depth 16 is stored in full for the committed 8-action set, so
+changing `--sample-seed` does not change the depth-16 records.
+
+## Regenerating the Target Table
+
+If the generated binary artifact is omitted from a checkout, regenerate the
+default table from the repo root:
+
+```bash
+gcc -std=c11 -O3 -DNDEBUG -fopenmp \
+  -I. -Iocean/affine_lock \
+  ocean/affine_lock/tools/generate_8action_visible_targets.c \
+  -lm -o /tmp/affine_lock_generate_visible_targets
+
+/tmp/affine_lock_generate_visible_targets
+```
+
+The no-argument generator run writes the default `.bin` and `.json` files under
+`ocean/affine_lock/generated/`. The default sample seed is `0`, which preserves
+the committed benchmark table. Changing the committed `.bin` changes the
+training data and can change full-run `perf`, so regenerate and benchmark before
+committing a replacement table.
+
+### Using a Custom 8-Action Table
+
+The same generator can create larger or seed-varied tables for the committed
+8-action environment without changing the runtime action set:
+
+```bash
+/tmp/affine_lock_generate_visible_targets \
+  --sample-seed 42 \
+  --sample-per-depth 131072 \
+  --store-all-depth 16 \
+  --output-bin /tmp/affine_lock_8action_visible_targets_seed42.bin \
+  --output-json /tmp/affine_lock_8action_visible_targets_seed42.json
+```
+
+Increasing `--sample-per-depth` raises the number of stored records for sampled
+depths. `--store-all-depth D` stores every exact pair for a supported target
+depth. For the committed 8-action set, depth 16 is stored in full by default.
+Using the same `--sample-seed` and options produces the same table; using a
+different seed produces a different sampled d2/d4/d5/d6/d8 table while leaving
+stored-all depths unchanged.
+
+To train against a custom 8-action table, either write it to the default path or
+build with an explicit table path:
+
+```bash
+EXTRA_CFLAGS='-DAFFINE_LOCK_VISIBLE_TARGET_TABLE_PATH="/tmp/affine_lock_8action_visible_targets_seed42.bin"' \
+  ./build.sh affine_lock
+```
+
+The loader checks that the table action-set hash matches the runtime action
+set. For seed-varied or larger 8-action tables, no runtime code changes are
+needed as long as the table contains the curriculum depths requested by the
+runtime.
+
+The generator currently uses one `--sample-per-depth` value for all sampled
+depths. If a future benchmark wants asymmetric budgets such as fewer d2/d4
+records and more d6/d8 records, update the generator sampling options and
+manifest/tests together, then regenerate and benchmark the replacement table.
+
+To generate train/test table variants, keep the same depth/count settings and
+change only `--sample-seed` and the output paths:
+
+```bash
+/tmp/affine_lock_generate_visible_targets \
+  --sample-seed 42 \
+  --sample-per-depth 65536 \
+  --store-all-depth 16 \
+  --output-bin /tmp/affine_lock_train_seed42.bin \
+  --output-json /tmp/affine_lock_train_seed42.json
+
+/tmp/affine_lock_generate_visible_targets \
+  --sample-seed 69 \
+  --sample-per-depth 65536 \
+  --store-all-depth 16 \
+  --output-bin /tmp/affine_lock_test_seed69.bin \
+  --output-json /tmp/affine_lock_test_seed69.json
+```
+
+### Dropping the Committed Binary
+
+The `.bin` is committed so the env works immediately and benchmark runs are
+byte-for-byte reproducible. If the binary is removed from a branch, users must
+run the no-argument generator before building/training:
+
+```bash
+/tmp/affine_lock_generate_visible_targets
+./build.sh affine_lock
+python -m pufferlib.pufferl train affine_lock
+```
+
+This recreates the default table at the path expected by the runtime. The
+matching `.json` manifest records the depth counts, checksum, action-set hash,
+and generator options.
+
+## Experimental 4-Action Generator Set
+
+The generator also includes an experimental `affine_lock_4action_v1` action set:
+
+```text
+shift_right
+mirror
+invert_right_7
+swap_adjacent_bits
+```
+
+This is generator-only. The committed runtime environment does not train on this
+action set. It is kept as a small, explicit alternate because a four-action
+policy can be easier to learn, and this graph has far more unique depth-16
+pairs than the committed 8-action table. To make it a runtime environment,
+update the env action table, `AFFINE_LOCK_NUM_ACTIONS`, the visible-table
+action-set hash/path, generated table artifact, and any policy/config
+expectations that assume eight actions.
+
+The current true visible-pair counts for this generator action set are:
+
+| Depth | True visible pairs |
+| ---: | ---: |
+| `2` | `772,080` |
+| `4` | `6,055,652` |
+| `5` | `16,234,512` |
+| `6` | `42,176,998` |
+| `8` | `234,409,780` |
+| `16` | `2,434,606` |
+
+Example generation command:
+
+```bash
+/tmp/affine_lock_generate_visible_targets \
+  --action-set affine_lock_4action_v1 \
+  --sample-per-depth 65536 \
+  --store-all-depth 16 \
+  --output-bin /tmp/affine_lock_4action_visible_targets.bin \
+  --output-json /tmp/affine_lock_4action_visible_targets.json
+```
+
+### Making 4-Action a Runtime Env
+
+The 4-action table is not plug-compatible with the committed 8-action runtime.
+To make a real 4-action runtime variant:
+
+1. Change `AFFINE_LOCK_NUM_ACTIONS` to `4`.
+2. Change the runtime action enum/table in `affine_lock.h` to match the
+   generator's `affine_lock_4action_v1` order.
+3. Point `AFFINE_LOCK_VISIBLE_TARGET_TABLE_PATH` at a 4-action table.
+4. Update the expected action-set hash in `affine_lock_visible_targets.h` to
+   the 4-action manifest's `action_set_hash`.
+5. Remove runtime helpers and render labels that only exist for the old
+   8-action table.
+6. Update policy/config/test assumptions that expect eight actions. In
+   particular, the old all-actions-have-one-step-inverses test is
+   8-action-specific because `shift_right` no longer has `shift_left` as an
+   action. Replace it with checks that match the new action cycles and refresh
+   the deterministic golden checksum.
+7. Rebuild, run `ocean/affine_lock/tests/run_all.sh`, and rerun a full
+   benchmark train.
+
+## Adding New Depths Later
+
+Adding another depth such as `7`, `10`, or `12` is intentionally not part of the
+committed runtime path, but the file format can represent it. A future change
+would need to:
+
+1. Add the depth to `TARGET_DEPTHS` in
+   `tools/generate_8action_visible_targets.c`.
+2. Regenerate the `.bin` and `.json`.
+3. Add the depth to `AFFINE_LOCK_CURRICULUM_DEPTHS` and update
+   `AFFINE_LOCK_CURRICULUM_DEPTH_COUNT`.
+4. Add matching `Log.depth_D_rate` and `Log.depth_D_solve_rate` fields plus
+   `my_log` exports if the depth should appear in training logs.
+5. Update config/docs/tests to expect the new depth and record count.
+6. Rerun the affine tests and a full training benchmark.
+
+The loader itself does not require a format change for additional depth
+sections. If a new table omits a runtime-requested curriculum depth, reset will
+abort because there is no valid record pool for that depth.
diff --git a/ocean/affine_lock/affine_lock.c b/ocean/affine_lock/affine_lock.c
new file mode 100644
index 0000000000..13b999db94
--- /dev/null
+++ b/ocean/affine_lock/affine_lock.c
@@ -0,0 +1,90 @@
+#include <stdio.h>
+#include <stdlib.h>
+#include <time.h>
+
+#include "affine_lock.h"
+
+static AffineLock* g_env = NULL;
+static AffineLockShared* g_shared = NULL;
+
+static void demo_cleanup(void) {
+    if (g_env != NULL) {
+        free(g_env->observations);
+        free(g_env->actions);
+        free(g_env->rewards);
+        free(g_env->terminals);
+        c_close(g_env);
+        g_env = NULL;
+    }
+    if (g_shared != NULL) {
+        affine_lock_free_shared(g_shared);
+        free(g_shared);
+        g_shared = NULL;
+    }
+}
+
+static int key_to_action(void) {
+    static const int keys[AFFINE_LOCK_NUM_ACTIONS] = {
+        KEY_ONE, KEY_TWO, KEY_THREE, KEY_FOUR,
+        KEY_FIVE, KEY_SIX, KEY_SEVEN, KEY_EIGHT,
+    };
+
+    for (int i = 0; i < AFFINE_LOCK_NUM_ACTIONS; i++) {
+        if (IsKeyPressed(keys[i])) {
+            return i;
+        }
+    }
+    return -1;
+}
+
+int main(void) {
+    g_shared = (AffineLockShared*)calloc(1, sizeof(AffineLockShared));
+    if (g_shared == NULL ||
+            affine_lock_init_shared(g_shared, 2, 16, 2) != 0) {
+        fprintf(stderr, "failed to initialize affine_lock demo\n");
+        demo_cleanup();
+        return 1;
+    }
+    if (affine_lock_prepare_visible_targets(g_shared) != 0) {
+        fprintf(stderr, "failed to configure affine_lock demo\n");
+        demo_cleanup();
+        return 1;
+    }
+
+    AffineLock env;
+    memset(&env, 0, sizeof(env));
+    g_env = &env;
+    atexit(demo_cleanup);
+
+    // Standalone demo buffers match the FloatTensor/float vecenv contract.
+    env.observations = (float*)calloc(AFFINE_LOCK_OBS_SIZE, sizeof(float));
+    env.actions = (float*)calloc(AFFINE_LOCK_NUM_ATNS, sizeof(float));
+    env.rewards = (float*)calloc(1, sizeof(float));
+    env.terminals = (float*)calloc(1, sizeof(float));
+    if (env.observations == NULL || env.actions == NULL ||
+            env.rewards == NULL || env.terminals == NULL) {
+        fprintf(stderr, "failed to allocate affine_lock demo buffers\n");
+        return 1;
+    }
+
+    affine_lock_init_env(&env, g_shared, (unsigned int)time(NULL));
+    c_reset(&env);
+    c_render(&env);
+
+    while (!WindowShouldClose()) {
+        if (IsWindowReady() && IsKeyPressed(KEY_R)) {
+            c_reset(&env);
+        }
+        int action = key_to_action();
+
+        if (action >= 0) {
+            env.actions[0] = (float)action;
+            c_step(&env);
+        }
+
+        c_render(&env);
+    }
+
+    demo_cleanup();
+    return 0;
+}
diff --git a/ocean/affine_lock/affine_lock.h b/ocean/affine_lock/affine_lock.h
new file mode 100644
index 0000000000..82be93890f
--- /dev/null
+++ b/ocean/affine_lock/affine_lock.h
@@ -0,0 +1,626 @@
+#pragma once
+
+#include <stdint.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <math.h>
+
+#ifndef AFFINE_LOCK_NO_RENDER
+#include "raylib.h"
+#endif
+
+#include "affine_lock_visible_targets.h"
+
+#define AFFINE_LOCK_BITS 16
+#define AFFINE_LOCK_TIMER_INDEX (2 * AFFINE_LOCK_BITS)
+#define AFFINE_LOCK_OBS_SIZE (AFFINE_LOCK_TIMER_INDEX + 1)
+// PufferLib uses one action slot for this single-discrete-action env.
+#define AFFINE_LOCK_NUM_ATNS 1
+#define AFFINE_LOCK_NUM_ACTIONS 8
+#define AFFINE_LOCK_MAX_SOLUTION_DEPTH 16
+#define AFFINE_LOCK_CURRICULUM_DEPTH_COUNT 6
+#define AFFINE_LOCK_STEP_REWARD (-0.01f)
+#ifndef AFFINE_LOCK_VISIBLE_TARGET_TABLE_PATH
+#define AFFINE_LOCK_VISIBLE_TARGET_TABLE_PATH \
+    "ocean/affine_lock/generated/affine_lock_8action_visible_targets.bin"
+#endif
+
+static const int AFFINE_LOCK_CURRICULUM_DEPTHS[
+    AFFINE_LOCK_CURRICULUM_DEPTH_COUNT] = {2, 4, 5, 6, 8, 16};
+
+typedef enum AffineLockAction {
+    AFFINE_LOCK_ACTION_SHIFT_LEFT = 0,
+    AFFINE_LOCK_ACTION_SHIFT_RIGHT = 1,
+    AFFINE_LOCK_ACTION_INVERT_RIGHT_7 = 2,
+    AFFINE_LOCK_ACTION_SWAP_ADJACENT_BITS = 3,
+    AFFINE_LOCK_ACTION_SWAP_ADJACENT_PAIRS = 4,
+    AFFINE_LOCK_ACTION_SWAP_NIBBLES_EACH_BYTE = 5,
+    AFFINE_LOCK_ACTION_REVERSE_EACH_NIBBLE = 6,
+    AFFINE_LOCK_ACTION_REVERSE_EACH_BYTE = 7,
+} AffineLockAction;
+
+typedef struct Log {
+    float perf;
+    float score;
+    float solve_rate;
+    float max_depth_solve;
+    float episode_return;
+    float episode_length;
+    float solve_steps;
+    float timeout_rate;
+    float invalid_rate;
+    float solve_efficiency;
+    float target_distance;
+    float solved_target_distance;
+    float depth_2_rate;
+    float depth_2_solve_rate;
+    float depth_4_rate;
+    float depth_4_solve_rate;
+    float depth_5_rate;
+    float depth_5_solve_rate;
+    float depth_6_rate;
+    float depth_6_solve_rate;
+    float depth_8_rate;
+    float depth_8_solve_rate;
+    float depth_16_rate;
+    float depth_16_solve_rate;
+    float n;
+} Log;
+
+typedef struct AffineLockShared {
+    int start_depth;
+    int max_depth;
+    int step_grace;
+    int num_states;
+    uint32_t mask;
+    uint32_t* next;
+    int visible_target_table_loaded;
+    AffineLockVisibleTargetTable visible_target_table;
+    float observation_bit_patterns[256][8];
+} AffineLockShared;
+
+typedef struct Client {
+    int screen_width;
+    int screen_height;
+} Client;
+
+typedef struct AffineLock {
+    Log log;
+    float* observations;
+    float* actions;
+    float* rewards;
+    float* terminals;
+    uint32_t state;
+    uint32_t target;
+    int step_count;
+    int max_steps;
+    int scramble_depth;
+    int curriculum_depth;
+    int solution_length;
+    int solution_actions[AFFINE_LOCK_MAX_SOLUTION_DEPTH];
+    int target_distance;
+    float episode_return;
+    unsigned int rng;
+    int num_agents;
+    AffineLockShared* shared;
+    Client* client;
+} AffineLock;
+
+static float affine_lock_solve_credit(const AffineLockShared* shared, int depth) {
+    return shared->max_depth > 0 ? (float)depth / (float)shared->max_depth : 0.0f;
+}
+
+static int affine_lock_log_depth(const AffineLock* env) {
+    return env->target_distance > 0 ? env->target_distance : env->scramble_depth;
+}
+
+static void affine_lock_init_observation_bit_patterns(AffineLockShared* shared) {
+    for (uint32_t value = 0; value < 256u; value++) {
+        for (int bit = 0; bit < 8; bit++) {
+            shared->observation_bit_patterns[value][bit] =
+                (value & (1u << bit)) ? 1.0f : -1.0f;
+        }
+    }
+}
+
+static uint32_t affine_lock_shift_left(uint32_t state) {
+    uint32_t first = state & 1u;
+    return (state >> 1) | (first << (AFFINE_LOCK_BITS - 1));
+}
+
+static uint32_t affine_lock_shift_right(uint32_t state) {
+    uint32_t last = (state >> (AFFINE_LOCK_BITS - 1)) & 1u;
+    return ((state << 1) & ((1u << AFFINE_LOCK_BITS) - 1u)) | last;
+}
+
+static uint32_t affine_lock_swap_adjacent_bits(uint32_t state) {
+    return ((state & 0x5555u) << 1) | ((state & 0xaaaau) >> 1);
+}
+
+static uint32_t affine_lock_swap_adjacent_pairs(uint32_t state) {
+    return ((state & 0x3333u) << 2) | ((state & 0xccccu) >> 2);
+}
+
+static uint32_t affine_lock_swap_nibbles_each_byte(uint32_t state) {
+    return ((state & 0x0f0fu) << 4) | ((state & 0xf0f0u) >> 4);
+}
+
+static uint32_t affine_lock_reverse_each_nibble(uint32_t state) {
+    return affine_lock_swap_adjacent_pairs(
+        affine_lock_swap_adjacent_bits(state));
+}
+
+static uint32_t affine_lock_reverse_each_byte(uint32_t state) {
+    return affine_lock_swap_nibbles_each_byte(
+        affine_lock_reverse_each_nibble(state));
+}
+
+static int affine_lock_init_shared(
+        AffineLockShared* shared,
+        int start_depth,
+        int max_depth,
+        int step_grace) {
+    memset(shared, 0, sizeof(*shared));
+
+    shared->start_depth = start_depth;
+    shared->max_depth = max_depth;
+    shared->step_grace = step_grace;
+    shared->num_states = 1 << AFFINE_LOCK_BITS;
+    shared->mask = (1u << AFFINE_LOCK_BITS) - 1u;
+    affine_lock_init_observation_bit_patterns(shared);
+
+    size_t transition_count =
+        (size_t)shared->num_states * AFFINE_LOCK_NUM_ACTIONS;
+    shared->next = (uint32_t*)calloc(transition_count, sizeof(uint32_t));
+    if (shared->next == NULL) {
+        fprintf(stderr, "affine_lock: failed to allocate action table\n");
+        return -1;
+    }
+
+    for (uint32_t state = 0; state < (uint32_t)shared->num_states; state++) {
+        for (int action = 0; action < AFFINE_LOCK_NUM_ACTIONS; action++) {
+            uint32_t next = state;
+            switch (action) {
+                case AFFINE_LOCK_ACTION_SHIFT_LEFT:
+                    next = affine_lock_shift_left(state);
+                    break;
+                case AFFINE_LOCK_ACTION_SHIFT_RIGHT:
+                    next = affine_lock_shift_right(state);
+                    break;
+                case AFFINE_LOCK_ACTION_INVERT_RIGHT_7:
+                    next = state ^ 0xfe00u;
+                    break;
+                case AFFINE_LOCK_ACTION_SWAP_ADJACENT_BITS:
+                    next = affine_lock_swap_adjacent_bits(state);
+                    break;
+                case AFFINE_LOCK_ACTION_SWAP_ADJACENT_PAIRS:
+                    next = affine_lock_swap_adjacent_pairs(state);
+                    break;
+                case AFFINE_LOCK_ACTION_SWAP_NIBBLES_EACH_BYTE:
+                    next = affine_lock_swap_nibbles_each_byte(state);
+                    break;
+                case AFFINE_LOCK_ACTION_REVERSE_EACH_NIBBLE:
+                    next = affine_lock_reverse_each_nibble(state);
+                    break;
+                case AFFINE_LOCK_ACTION_REVERSE_EACH_BYTE:
+                    next = affine_lock_reverse_each_byte(state);
+                    break;
+            }
+            shared->next[state * AFFINE_LOCK_NUM_ACTIONS + action] =
+                next & shared->mask;
+        }
+    }
+
+    return 0;
+}
+
+static int affine_lock_prepare_visible_targets(AffineLockShared* shared) {
+    if (shared->visible_target_table_loaded) {
+        return 0;
+    }
+
+    char error[256];
+    if (affine_lock_visible_targets_load(
+            AFFINE_LOCK_VISIBLE_TARGET_TABLE_PATH,
+            AFFINE_LOCK_VISIBLE_TARGET_8ACTION_V1_HASH,
+            &shared->visible_target_table,
+            error,
+            sizeof(error)) != 0) {
+        fprintf(stderr, "affine_lock: %s\n", error);
+        return -1;
+    }
+
+    shared->visible_target_table_loaded = 1;
+    return 0;
+}
+
+static void affine_lock_free_shared(AffineLockShared* shared) {
+    if (shared == NULL) {
+        return;
+    }
+    free(shared->next);
+    affine_lock_visible_targets_free(&shared->visible_target_table);
+    memset(shared, 0, sizeof(*shared));
+}
+
+static uint32_t affine_lock_apply_action(
+        const AffineLockShared* shared, uint32_t rel, int action) {
+    return shared->next[(rel & shared->mask) * AFFINE_LOCK_NUM_ACTIONS + action];
+}
+
+static uint32_t affine_lock_random_u32(AffineLock* env) {
+    env->rng = env->rng * 1664525u + 1013904223u;
+    return env->rng;
+}
+
+// Keep RNG fully local to each env so sweep runs differ only by hyperparams.
+// The mixer avoids weak low bits from the LCG when sampling bounded actions or
+// bit states. Do not replace this with global rand()/srand().
+static uint32_t affine_lock_random_mixed_u32(AffineLock* env) {
+    uint32_t x = affine_lock_random_u32(env);
+    x ^= x >> 16;
+    x *= 0x7feb352du;
+    x ^= x >> 15;
+    x *= 0x846ca68bu;
+    x ^= x >> 16;
+    return x;
+}
+
+static int affine_lock_random_bounded(AffineLock* env, int bound) {
+    uint32_t limit = UINT32_MAX - (UINT32_MAX % (uint32_t)bound);
+    uint32_t value = affine_lock_random_mixed_u32(env);
+    while (value >= limit) {
+        value = affine_lock_random_mixed_u32(env);
+    }
+    return (int)(value % (uint32_t)bound);
+}
+
+static int affine_lock_parse_action(float raw_action, int* action_out) {
+    if (!isfinite(raw_action) ||
+            raw_action < 0.0f ||
+            raw_action > (float)(AFFINE_LOCK_NUM_ACTIONS - 1)) {
+        return 0;
+    }
+
+    int action = (int)raw_action;
+    if ((float)action != raw_action) {
+        return 0;
+    }
+
+    *action_out = action;
+    return 1;
+}
+
+static void affine_lock_clear_generated_path(AffineLock* env) {
+    env->solution_length = 0;
+    for (int i = 0; i < AFFINE_LOCK_MAX_SOLUTION_DEPTH; i++) {
+        env->solution_actions[i] = -1;
+    }
+}
+
+static const AffineLockVisibleTargetDepth* affine_lock_visible_target_depth(
+        const AffineLockShared* shared,
+        int requested_depth) {
+    const AffineLockVisibleTargetTable* table = &shared->visible_target_table;
+    for (uint32_t i = 0; i < table->depth_count; i++) {
+        if (table->depths[i].depth == (uint32_t)requested_depth) {
+            return &table->depths[i];
+        }
+    }
+    return NULL;
+}
+
+static void affine_lock_store_visible_solution_path(
+        AffineLock* env,
+        const AffineLockVisibleTargetRecord* record) {
+    int length = (int)record->solution_length;
+    if (length <= 0 || length > AFFINE_LOCK_MAX_SOLUTION_DEPTH) {
+        fprintf(stderr, "affine_lock: invalid visible target solution length\n");
+        abort();
+    }
+
+    env->solution_length = length;
+    for (int i = 0; i < length; i++) {
+        int action = (int)((record->packed_actions >> (3u * i)) & 7ull);
+        if (action < 0 || action >= AFFINE_LOCK_NUM_ACTIONS) {
+            fprintf(stderr, "affine_lock: invalid visible target solution action\n");
+            abort();
+        }
+        env->solution_actions[i] = action;
+    }
+}
+
+static void affine_lock_generate_visible_target_table_target(AffineLock* env) {
+    AffineLockShared* shared = env->shared;
+    int requested_depth = env->scramble_depth;
+
+    if (affine_lock_prepare_visible_targets(shared) != 0) {
+        fprintf(stderr, "affine_lock: failed to load visible target table\n");
+        abort();
+    }
+
+    const AffineLockVisibleTargetDepth* depth =
+        affine_lock_visible_target_depth(shared, requested_depth);
+    if (depth == NULL || depth->stored_count == 0) {
+        fprintf(stderr,
+            "affine_lock: visible target table has no records for depth %d\n",
+            requested_depth);
+        abort();
+    }
+
+    int choice = affine_lock_random_bounded(env, (int)depth->stored_count);
+    uint32_t record_index = depth->first_record + (uint32_t)choice;
+    if (record_index >= shared->visible_target_table.record_count) {
+        fprintf(stderr, "affine_lock: invalid visible target record index\n");
+        abort();
+    }
+    const AffineLockVisibleTargetRecord* record =
+        &shared->visible_target_table.records[record_index];
+    if ((int)record->depth != requested_depth ||
+            record->solution_length != record->depth) {
+        fprintf(stderr, "affine_lock: invalid visible target record\n");
+        abort();
+    }
+
+    env->state = (uint32_t)record->start & shared->mask;
+    affine_lock_clear_generated_path(env);
+    env->target = record->target & shared->mask;
+    env->target_distance = (int)record->depth;
+    affine_lock_store_visible_solution_path(env, record);
+}
+
+static void affine_lock_reset_state(AffineLock* env) {
+    AffineLockShared* shared = env->shared;
+    env->scramble_depth = env->curriculum_depth;
+    env->max_steps = env->scramble_depth + shared->step_grace;
+    env->step_count = 0;
+    env->episode_return = 0.0f;
+    env->target_distance = -1;
+
+    affine_lock_generate_visible_target_table_target(env);
+    env->max_steps = env->target_distance + shared->step_grace;
+}
+
+static void affine_lock_init_env(
+        AffineLock* env, AffineLockShared* shared, unsigned int seed) {
+    env->shared = shared;
+    env->rng = seed;
+    env->num_agents = 1;
+    env->curriculum_depth = shared->start_depth;
+    env->scramble_depth = shared->start_depth;
+    env->target_distance = -1;
+    env->max_steps = shared->start_depth + shared->step_grace;
+    env->step_count = 0;
+    env->episode_return = 0.0f;
+}
+
+static void affine_lock_add_log(
+        AffineLock* env,
+        int solved,
+        int invalid) {
+    AffineLockShared* shared = env->shared;
+    int log_depth = affine_lock_log_depth(env);
+    int at_max_depth = log_depth == shared->max_depth;
+    float solve_credit = solved ?
+        affine_lock_solve_credit(shared, log_depth) : 0.0f;
+    env->log.perf += solve_credit;
+    env->log.score += solve_credit;
+    env->log.solve_rate += solved ? 1.0f : 0.0f;
+    env->log.max_depth_solve +=
+        (solved && at_max_depth) ? 1.0f : 0.0f;
+    env->log.episode_return += env->episode_return;
+    env->log.episode_length += (float)env->step_count;
+    env->log.solve_steps += solved ? (float)env->step_count : 0.0f;
+    env->log.timeout_rate += (!solved && !invalid) ? 1.0f : 0.0f;
+    env->log.invalid_rate += invalid ? 1.0f : 0.0f;
+    env->log.solve_efficiency += solved && log_depth > 0 ?
+        (float)env->step_count / (float)log_depth : 0.0f;
+    env->log.target_distance += (float)env->target_distance;
+    env->log.solved_target_distance +=
+        (solved && env->target_distance >= 0) ? (float)env->target_distance : 0.0f;
+    env->log.depth_2_rate += log_depth == 2 ? 1.0f : 0.0f;
+    env->log.depth_2_solve_rate +=
+        (solved && log_depth == 2) ? 1.0f : 0.0f;
+    env->log.depth_4_rate += log_depth == 4 ? 1.0f : 0.0f;
+    env->log.depth_4_solve_rate +=
+        (solved && log_depth == 4) ? 1.0f : 0.0f;
+    env->log.depth_5_rate += log_depth == 5 ? 1.0f : 0.0f;
+    env->log.depth_5_solve_rate +=
+        (solved && log_depth == 5) ? 1.0f : 0.0f;
+    env->log.depth_6_rate += log_depth == 6 ? 1.0f : 0.0f;
+    env->log.depth_6_solve_rate +=
+        (solved && log_depth == 6) ? 1.0f : 0.0f;
+    env->log.depth_8_rate += log_depth == 8 ? 1.0f : 0.0f;
+    env->log.depth_8_solve_rate +=
+        (solved && log_depth == 8) ? 1.0f : 0.0f;
+    env->log.depth_16_rate += log_depth == 16 ? 1.0f : 0.0f;
+    env->log.depth_16_solve_rate +=
+        (solved && log_depth == 16) ? 1.0f : 0.0f;
+    env->log.n += 1.0f;
+}
+
+static void affine_lock_compute_observations(AffineLock* env) {
+    float (*patterns)[8] = env->shared->observation_bit_patterns;
+    uint32_t state = env->state;
+    uint32_t target = env->target;
+    memcpy(&env->observations[0], patterns[state & 0xffu], 8 * sizeof(float));
+    memcpy(&env->observations[8], patterns[(state >> 8) & 0xffu], 8 * sizeof(float));
+    memcpy(&env->observations[16], patterns[target & 0xffu], 8 * sizeof(float));
+    memcpy(&env->observations[24], patterns[(target >> 8) & 0xffu], 8 * sizeof(float));
+    env->observations[AFFINE_LOCK_TIMER_INDEX] = env->max_steps > 0 ?
+        (float)env->step_count / (float)env->max_steps : 0.0f;
+}
+
+static void compute_observations(AffineLock* env) {
+    affine_lock_compute_observations(env);
+}
+
+static void c_reset(AffineLock* env) {
+    env->rewards[0] = 0.0f;
+    env->terminals[0] = 0.0f;
+    affine_lock_reset_state(env);
+    compute_observations(env);
+}
+
+static int affine_lock_next_curriculum_depth(
+        const AffineLockShared* shared,
+        int current_depth) {
+    for (int i = 0; i < AFFINE_LOCK_CURRICULUM_DEPTH_COUNT; i++) {
+        int depth = AFFINE_LOCK_CURRICULUM_DEPTHS[i];
+        if (depth > current_depth) {
+            return depth < shared->max_depth ? depth : shared->max_depth;
+        }
+    }
+    return shared->max_depth;
+}
+
+static void affine_lock_advance_curriculum(AffineLock* env, int solved) {
+    AffineLockShared* shared = env->shared;
+    if (!solved) {
+        env->curriculum_depth = shared->start_depth;
+        return;
+    }
+
+    env->curriculum_depth = affine_lock_next_curriculum_depth(
+        shared, env->scramble_depth);
+}
+
+static void affine_lock_finish_episode(
+        AffineLock* env,
+        int solved,
+        int invalid) {
+    affine_lock_add_log(env, solved, invalid);
+    affine_lock_advance_curriculum(env, solved);
+    affine_lock_reset_state(env);
+}
+
+static void c_step(AffineLock* env) {
+    AffineLockShared* shared = env->shared;
+    int action = -1;
+    int valid_action = affine_lock_parse_action(env->actions[0], &action);
+    float reward = AFFINE_LOCK_STEP_REWARD;
+    int terminal = 0;
+    int solved = 0;
+    int invalid = 0;
+
+    env->terminals[0] = 0.0f;
+    env->step_count += 1;
+
+    if (!valid_action) {
+        reward = -1.0f;
+        terminal = 1;
+        invalid = 1;
+    } else {
+        env->state = affine_lock_apply_action(shared, env->state, action);
+        if (env->state == env->target) {
+            reward = 1.0f;
+            terminal = 1;
+            solved = 1;
+        } else if (env->step_count >= env->max_steps) {
+            reward = -1.0f;
+            terminal = 1;
+        }
+    }
+    env->rewards[0] = reward;
+    env->episode_return += reward;
+
+    if (terminal) {
+        env->terminals[0] = 1.0f;
+        affine_lock_finish_episode(env, solved, invalid);
+    }
+
+    compute_observations(env);
+}
+
+static void c_close(AffineLock* env) {
+    if (env->client == NULL) {
+        return;
+    }
+#ifndef AFFINE_LOCK_NO_RENDER
+    if (IsWindowReady()) {
+        CloseWindow();
+    }
+#endif
+    free(env->client);
+    env->client = NULL;
+}
+
+#ifndef AFFINE_LOCK_NO_RENDER
+static Client* affine_lock_make_client(void) {
+    Client* client = (Client*)calloc(1, sizeof(Client));
+    client->screen_width = 780;
+    client->screen_height = 360;
+    InitWindow(client->screen_width, client->screen_height, "PufferLib AffineLock");
+    SetTargetFPS(30);
+    return client;
+}
+
+static Color affine_lock_bit_fill(int on) {
+    return on ? (Color){80, 210, 140, 255} : (Color){38, 48, 58, 255};
+}
+
+static void affine_lock_draw_bit_row(
+        AffineLock* env, const char* label, uint32_t value, int y) {
+    DrawText(label, 30, y + 9, 20, RAYWHITE);
+    for (int bit = 0; bit < AFFINE_LOCK_BITS; bit++) {
+        int x = 145 + bit * 34;
+        int on = (value >> bit) & 1u;
+        int mismatch = ((env->state ^ env->target) >> bit) & 1u;
+        Color fill = affine_lock_bit_fill(on);
+        Color border = mismatch ?
+            (Color){238, 88, 88, 255} : (Color){182, 196, 205, 255};
+        DrawRectangle(x, y, 24, 34, fill);
+        DrawRectangleLinesEx((Rectangle){(float)x, (float)y, 24.0f, 34.0f},
+            mismatch ? 3.0f : 1.0f, border);
+        DrawText(TextFormat("%d", bit), x + 5, y + 40, 10,
+            (Color){128, 140, 150, 255});
+    }
+}
+
+static void c_render(AffineLock* env) {
+    if (IsWindowReady() && (WindowShouldClose() || IsKeyPressed(KEY_ESCAPE))) {
+        c_close(env);
+        exit(0);
+    }
+
+    if (env->client == NULL) {
+        env->client = affine_lock_make_client();
+    }
+
+    uint32_t rel = (env->state ^ env->target) & env->shared->mask;
+    float display_reward = env->rewards[0];
+    int display_terminal = env->terminals[0] != 0.0f;
+    int display_solved = display_terminal && display_reward > 0.0f;
+    const char* status = "running";
+    Color status_color = (Color){190, 198, 206, 255};
+    if (display_terminal) {
+        status = display_solved ? "solved" : "failed";
+        status_color = display_solved ?
+            (Color){80, 210, 140, 255} : (Color){238, 88, 88, 255};
+    }
+
+    BeginDrawing();
+    ClearBackground((Color){12, 15, 18, 255});
+    DrawText("Affine Lock", 30, 24, 28, RAYWHITE);
+    DrawText(TextFormat("depth %d/%d  step %d/%d  last reward %.2f",
+        env->scramble_depth, env->shared->max_depth,
+        env->step_count, env->max_steps, display_reward),
+        30, 62, 20, (Color){180, 190, 200, 255});
+    DrawText(TextFormat("status %s  mismatches 0x%04x",
+        status, rel), 30, 90, 20, status_color);
+
+    affine_lock_draw_bit_row(env, "current", env->state, 138);
+    affine_lock_draw_bit_row(env, "target", env->target, 220);
+
+    DrawText("1 shiftL  2 shiftR  3 inv7  4 bit-swap  5 pair-swap",
+        30, 300, 16, (Color){160, 170, 178, 255});
+    DrawText("6 nib-swap  7 rev-nib  8 rev-byte  R reset",
+        30, 322, 16, (Color){160, 170, 178, 255});
+    EndDrawing();
+}
+#else
+static void c_render(AffineLock* env) {
+    (void)env;
+}
+#endif
diff --git a/ocean/affine_lock/affine_lock_visible_targets.h b/ocean/affine_lock/affine_lock_visible_targets.h
new file mode 100644
index 0000000000..6388f7e3bc
--- /dev/null
+++ b/ocean/affine_lock/affine_lock_visible_targets.h
@@ -0,0 +1,323 @@
+#pragma once
+
+#include <errno.h>
+#include <stdarg.h>
+#include <stdint.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+#define AFFINE_LOCK_VISIBLE_TARGET_FORMAT_VERSION 1u
+#define AFFINE_LOCK_VISIBLE_TARGET_RECORD_SIZE 16u
+#define AFFINE_LOCK_VISIBLE_TARGET_8ACTION_V1_HASH 0x6e11e18fdafc0baaull
+
+typedef struct AffineLockVisibleTargetDepth {
+    uint32_t depth;
+    uint32_t first_record;
+    uint32_t stored_count;
+    uint64_t exact_pair_count;
+} AffineLockVisibleTargetDepth;
+
+typedef struct AffineLockVisibleTargetRecord {
+    uint16_t start;
+    uint16_t target;
+    uint64_t packed_actions;
+    uint8_t solution_length;
+    uint8_t depth;
+} AffineLockVisibleTargetRecord;
+
+typedef struct AffineLockVisibleTargetTable {
+    uint32_t version;
+    uint32_t header_size;
+    uint32_t record_size;
+    uint32_t bits;
+    uint32_t num_actions;
+    uint32_t depth_count;
+    uint32_t record_count;
+    uint64_t checksum;
+    uint64_t action_set_hash;
+    AffineLockVisibleTargetDepth* depths;
+    AffineLockVisibleTargetRecord* records;
+} AffineLockVisibleTargetTable;
+
+static uint64_t affine_lock_visible_targets_mix_u64(
+        uint64_t hash,
+        uint64_t value) {
+    hash ^= value;
+    hash *= 1099511628211ull;
+    return hash;
+}
+
+static void affine_lock_visible_targets_set_error(
+        char* error,
+        size_t error_size,
+        const char* format,
+        ...) {
+    if (error == NULL || error_size == 0) {
+        return;
+    }
+    va_list args;
+    va_start(args, format);
+    vsnprintf(error, error_size, format, args);
+    va_end(args);
+}
+
+static int affine_lock_visible_targets_read_exact(
+        FILE* file,
+        void* out,
+        size_t size) {
+    return fread(out, 1, size, file) == size ? 0 : -1;
+}
+
+static int affine_lock_visible_targets_read_u16(
+        FILE* file,
+        uint16_t* out) {
+    unsigned char bytes[2];
+    if (affine_lock_visible_targets_read_exact(file, bytes, sizeof(bytes)) != 0) {
+        return -1;
+    }
+    *out = (uint16_t)bytes[0] | ((uint16_t)bytes[1] << 8);
+    return 0;
+}
+
+static int affine_lock_visible_targets_read_u32(
+        FILE* file,
+        uint32_t* out) {
+    unsigned char bytes[4];
+    if (affine_lock_visible_targets_read_exact(file, bytes, sizeof(bytes)) != 0) {
+        return -1;
+    }
+    *out = (uint32_t)bytes[0] |
+        ((uint32_t)bytes[1] << 8) |
+        ((uint32_t)bytes[2] << 16) |
+        ((uint32_t)bytes[3] << 24);
+    return 0;
+}
+
+static int affine_lock_visible_targets_read_u64(
+        FILE* file,
+        uint64_t* out) {
+    unsigned char bytes[8];
+    if (affine_lock_visible_targets_read_exact(file, bytes, sizeof(bytes)) != 0) {
+        return -1;
+    }
+    uint64_t value = 0;
+    for (int i = 0; i < 8; i++) {
+        value |= (uint64_t)bytes[i] << (8 * i);
+    }
+    *out = value;
+    return 0;
+}
+
+static void affine_lock_visible_targets_free(
+        AffineLockVisibleTargetTable* table) {
+    if (table == NULL) {
+        return;
+    }
+    free(table->depths);
+    free(table->records);
+    memset(table, 0, sizeof(*table));
+}
+
+static uint64_t affine_lock_visible_targets_checksum(
+        const AffineLockVisibleTargetTable* table) {
+    uint64_t hash = 1469598103934665603ull;
+    hash = affine_lock_visible_targets_mix_u64(hash, table->action_set_hash);
+    for (uint32_t depth_index = 0; depth_index < table->depth_count;
+            depth_index++) {
+        const AffineLockVisibleTargetDepth* depth = &table->depths[depth_index];
+        hash = affine_lock_visible_targets_mix_u64(hash, depth->depth);
+        hash = affine_lock_visible_targets_mix_u64(hash, depth->exact_pair_count);
+        hash = affine_lock_visible_targets_mix_u64(hash, depth->stored_count);
+        for (uint32_t i = 0; i < depth->stored_count; i++) {
+            uint32_t record_index = depth->first_record + i;
+            const AffineLockVisibleTargetRecord* record =
+                &table->records[record_index];
+            hash = affine_lock_visible_targets_mix_u64(hash, record->start);
+            hash = affine_lock_visible_targets_mix_u64(hash, record->target);
+            hash = affine_lock_visible_targets_mix_u64(
+                hash, record->packed_actions);
+            hash = affine_lock_visible_targets_mix_u64(
+                hash, record->solution_length);
+            hash = affine_lock_visible_targets_mix_u64(hash, record->depth);
+        }
+    }
+    return hash;
+}
+
+static int affine_lock_visible_targets_load(
+        const char* path,
+        uint64_t expected_action_set_hash,
+        AffineLockVisibleTargetTable* table,
+        char* error,
+        size_t error_size) {
+    static const unsigned char expected_magic[8] = {
+        'A', 'L', '7', 'T', 'G', 'T', '1', '\0'
+    };
+    memset(table, 0, sizeof(*table));
+
+    FILE* file = fopen(path, "rb");
+    if (file == NULL) {
+        affine_lock_visible_targets_set_error(
+            error, error_size, "failed to open %s: %s", path, strerror(errno));
+        return -1;
+    }
+
+    unsigned char magic[8];
+    if (affine_lock_visible_targets_read_exact(file, magic, sizeof(magic)) != 0 ||
+            affine_lock_visible_targets_read_u32(file, &table->version) != 0 ||
+            affine_lock_visible_targets_read_u32(file, &table->header_size) != 0 ||
+            affine_lock_visible_targets_read_u32(file, &table->record_size) != 0 ||
+            affine_lock_visible_targets_read_u32(file, &table->bits) != 0 ||
+            affine_lock_visible_targets_read_u32(file, &table->num_actions) != 0 ||
+            affine_lock_visible_targets_read_u32(file, &table->depth_count) != 0 ||
+            affine_lock_visible_targets_read_u32(file, &table->record_count) != 0 ||
+            affine_lock_visible_targets_read_u64(file, &table->checksum) != 0 ||
+            affine_lock_visible_targets_read_u64(file, &table->action_set_hash) != 0) {
+        affine_lock_visible_targets_set_error(
+            error, error_size, "truncated visible target header");
+        fclose(file);
+        return -1;
+    }
+
+    if (memcmp(magic, expected_magic, sizeof(magic)) != 0) {
+        affine_lock_visible_targets_set_error(
+            error, error_size, "invalid visible target magic");
+        fclose(file);
+        return -1;
+    }
+    if (table->version != AFFINE_LOCK_VISIBLE_TARGET_FORMAT_VERSION ||
+            table->record_size != AFFINE_LOCK_VISIBLE_TARGET_RECORD_SIZE ||
+            table->bits != 16 ||
+            table->num_actions == 0 ||
+            table->num_actions > 8 ||
+            table->depth_count == 0 ||
+            table->depth_count > 16) {
+        affine_lock_visible_targets_set_error(
+            error, error_size, "unsupported visible target table header");
+        fclose(file);
+        return -1;
+    }
+    uint32_t expected_header_size = 52u + table->depth_count * 24u;
+    if (table->header_size != expected_header_size) {
+        affine_lock_visible_targets_set_error(
+            error, error_size, "unexpected visible target header size");
+        fclose(file);
+        return -1;
+    }
+    if (expected_action_set_hash != 0 &&
+            table->action_set_hash != expected_action_set_hash) {
+        affine_lock_visible_targets_set_error(
+            error, error_size, "visible target action set hash mismatch");
+        fclose(file);
+        return -1;
+    }
+
+    table->depths = (AffineLockVisibleTargetDepth*)calloc(
+        table->depth_count, sizeof(AffineLockVisibleTargetDepth));
+    table->records = (AffineLockVisibleTargetRecord*)calloc(
+        table->record_count, sizeof(AffineLockVisibleTargetRecord));
+    if (table->depths == NULL || table->records == NULL) {
+        affine_lock_visible_targets_set_error(
+            error, error_size, "failed to allocate visible target table");
+        fclose(file);
+        affine_lock_visible_targets_free(table);
+        return -1;
+    }
+
+    uint64_t depth_record_total = 0;
+    for (uint32_t i = 0; i < table->depth_count; i++) {
+        AffineLockVisibleTargetDepth* depth = &table->depths[i];
+        uint32_t reserved = 0;
+        if (affine_lock_visible_targets_read_u32(file, &depth->depth) != 0 ||
+                affine_lock_visible_targets_read_u32(
+                    file, &depth->first_record) != 0 ||
+                affine_lock_visible_targets_read_u32(
+                    file, &depth->stored_count) != 0 ||
+                affine_lock_visible_targets_read_u32(file, &reserved) != 0 ||
+                affine_lock_visible_targets_read_u64(
+                    file, &depth->exact_pair_count) != 0) {
+            affine_lock_visible_targets_set_error(
+                error, error_size, "truncated visible target depth table");
+            fclose(file);
+            affine_lock_visible_targets_free(table);
+            return -1;
+        }
+        if (reserved != 0 ||
+                depth->first_record > table->record_count ||
+                depth->stored_count > table->record_count ||
+                depth->first_record + depth->stored_count >
+                    table->record_count) {
+            affine_lock_visible_targets_set_error(
+                error, error_size, "invalid visible target depth table");
+            fclose(file);
+            affine_lock_visible_targets_free(table);
+            return -1;
+        }
+        depth_record_total += depth->stored_count;
+    }
+    if (depth_record_total != table->record_count) {
+        affine_lock_visible_targets_set_error(
+            error, error_size, "visible target depth counts do not sum");
+        fclose(file);
+        affine_lock_visible_targets_free(table);
+        return -1;
+    }
+
+    for (uint32_t i = 0; i < table->record_count; i++) {
+        AffineLockVisibleTargetRecord* record = &table->records[i];
+        uint16_t reserved = 0;
+        if (affine_lock_visible_targets_read_u16(file, &record->start) != 0 ||
+                affine_lock_visible_targets_read_u16(file, &record->target) != 0 ||
+                affine_lock_visible_targets_read_u64(
+                    file, &record->packed_actions) != 0) {
+            affine_lock_visible_targets_set_error(
+                error, error_size, "truncated visible target record");
+            fclose(file);
+            affine_lock_visible_targets_free(table);
+            return -1;
+        }
+        int solution_length = fgetc(file);
+        int depth = fgetc(file);
+        if (solution_length == EOF || depth == EOF ||
+                affine_lock_visible_targets_read_u16(file, &reserved) != 0) {
+            affine_lock_visible_targets_set_error(
+                error, error_size, "truncated visible target record");
+            fclose(file);
+            affine_lock_visible_targets_free(table);
+            return -1;
+        }
+        record->solution_length = (uint8_t)solution_length;
+        record->depth = (uint8_t)depth;
+        if (reserved != 0 || record->solution_length != record->depth) {
+            affine_lock_visible_targets_set_error(
+                error, error_size, "invalid visible target record");
+            fclose(file);
+            affine_lock_visible_targets_free(table);
+            return -1;
+        }
+    }
+
+    int extra = fgetc(file);
+    if (extra != EOF) {
+        affine_lock_visible_targets_set_error(
+            error, error_size, "visible target file has trailing bytes");
+        fclose(file);
+        affine_lock_visible_targets_free(table);
+        return -1;
+    }
+    fclose(file);
+
+    uint64_t computed_checksum =
+        affine_lock_visible_targets_checksum(table);
+    if (computed_checksum != table->checksum) {
+        affine_lock_visible_targets_set_error(
+            error, error_size,
+            "visible target checksum mismatch: got 0x%016llx expected 0x%016llx",
+            (unsigned long long)computed_checksum,
+            (unsigned long long)table->checksum);
+        affine_lock_visible_targets_free(table);
+        return -1;
+    }
+    return 0;
+}
diff --git a/ocean/affine_lock/binding.c b/ocean/affine_lock/binding.c
new file mode 100644
index 0000000000..e51be3d83e
--- /dev/null
+++ b/ocean/affine_lock/binding.c
@@ -0,0 +1,143 @@
+#include "affine_lock.h"
+
+#define OBS_SIZE AFFINE_LOCK_OBS_SIZE
+#define NUM_ATNS AFFINE_LOCK_NUM_ATNS
+#define ACT_SIZES {AFFINE_LOCK_NUM_ACTIONS}
+#define OBS_TENSOR_T FloatTensor
+
+#define MY_VEC_INIT
+#define MY_VEC_CLOSE
+#define Env AffineLock
+#include "vecenv.h"
+
+static uint32_t affine_lock_mix_seed(uint32_t value) {
+    value ^= value >> 16;
+    value *= 0x7feb352du;
+    value ^= value >> 15;
+    value *= 0x846ca68bu;
+    value ^= value >> 16;
+    return value;
+}
+
+static unsigned int affine_lock_env_seed(int base_seed, int env_id) {
+    uint32_t value = 0x811c9dc5u;
+    value = (value ^ (uint32_t)base_seed) * 0x01000193u;
+    value = (value ^ (uint32_t)env_id) * 0x01000193u;
+    return affine_lock_mix_seed(value);
+}
+
+Env* my_vec_init(int* num_envs_out, int* buffer_env_starts, int* buffer_env_counts,
+                 Dict* vec_kwargs, Dict* env_kwargs) {
+    int total_agents = (int)dict_get(vec_kwargs, "total_agents")->value;
+    int num_buffers = (int)dict_get(vec_kwargs, "num_buffers")->value;
+    int agents_per_buffer = total_agents / num_buffers;
+    int base_seed = (int)dict_get(env_kwargs, "seed")->value;
+
+    int start_depth = (int)dict_get(env_kwargs, "start_depth")->value;
+    int max_depth = (int)dict_get(env_kwargs, "max_depth")->value;
+    int step_grace = (int)dict_get(env_kwargs, "step_grace")->value;
+
+    AffineLockShared* shared =
+        (AffineLockShared*)calloc(1, sizeof(AffineLockShared));
+    if (shared == NULL || affine_lock_init_shared(
+            shared, start_depth, max_depth, step_grace) != 0) {
+        fprintf(stderr, "affine_lock: failed to initialize shared state\n");
+        free(shared);
+        abort();
+    }
+    if (affine_lock_prepare_visible_targets(shared) != 0) {
+        affine_lock_free_shared(shared);
+        free(shared);
+        abort();
+    }
+
+    Env* envs = (Env*)calloc((size_t)total_agents, sizeof(Env));
+    if (envs == NULL) {
+        fprintf(stderr, "affine_lock: failed to allocate envs\n");
+        affine_lock_free_shared(shared);
+        free(shared);
+        abort();
+    }
+
+    int buf = 0;
+    int buf_agents = 0;
+    buffer_env_starts[0] = 0;
+    buffer_env_counts[0] = 0;
+
+    for (int i = 0; i < total_agents; i++) {
+        Env* env = &envs[i];
+        affine_lock_init_env(env, shared, affine_lock_env_seed(base_seed, i));
+
+        buf_agents += env->num_agents;
+        buffer_env_counts[buf]++;
+        if (buf_agents >= agents_per_buffer && buf < num_buffers - 1) {
+            buf++;
+            buffer_env_starts[buf] = i + 1;
+            buffer_env_counts[buf] = 0;
+            buf_agents = 0;
+        }
+    }
+
+    *num_envs_out = total_agents;
+    return envs;
+}
+
+void my_vec_close(Env* envs) {
+    if (envs == NULL || envs[0].shared == NULL) {
+        return;
+    }
+    AffineLockShared* shared = envs[0].shared;
+    affine_lock_free_shared(shared);
+    free(shared);
+}
+
+void my_init(Env* env, Dict* kwargs) {
+    (void)env;
+    (void)kwargs;
+}
+
+static float conditional_rate(float numerator, float denominator) {
+    return denominator > 0.0f ? numerator / denominator : 0.0f;
+}
+
+void my_log(Log* log, Dict* out) {
+    float conditional_solve_steps =
+        log->solve_rate > 0.0f ? log->solve_steps / log->solve_rate : 0.0f;
+    float conditional_solve_efficiency =
+        log->solve_rate > 0.0f ?
+            log->solve_efficiency / log->solve_rate : 0.0f;
+    float solved_min_win_moves =
+        log->solve_rate > 0.0f ?
+            log->solved_target_distance / log->solve_rate : 0.0f;
+    float depth_2_solve_rate =
+        conditional_rate(log->depth_2_solve_rate, log->depth_2_rate);
+    float depth_4_solve_rate =
+        conditional_rate(log->depth_4_solve_rate, log->depth_4_rate);
+    float depth_5_solve_rate =
+        conditional_rate(log->depth_5_solve_rate, log->depth_5_rate);
+    float depth_6_solve_rate =
+        conditional_rate(log->depth_6_solve_rate, log->depth_6_rate);
+    float depth_8_solve_rate =
+        conditional_rate(log->depth_8_solve_rate, log->depth_8_rate);
+    float depth_16_solve_rate =
+        conditional_rate(log->depth_16_solve_rate, log->depth_16_rate);
+
+    dict_set(out, "perf", log->perf);
+    dict_set(out, "score", log->score);
+    dict_set(out, "solve_rate", log->solve_rate);
+    dict_set(out, "max_depth_solve", log->max_depth_solve);
+    dict_set(out, "episode_return", log->episode_return);
+    dict_set(out, "episode_length", log->episode_length);
+    dict_set(out, "timeout_rate", log->timeout_rate);
+    dict_set(out, "invalid_rate", log->invalid_rate);
+    dict_set(out, "min_win_moves", log->target_distance);
+    dict_set(out, "solved_min_win_moves", solved_min_win_moves);
+    dict_set(out, "conditional_solve_steps", conditional_solve_steps);
+    dict_set(out, "conditional_solve_efficiency", conditional_solve_efficiency);
+    dict_set(out, "depth_2_solve_rate", depth_2_solve_rate);
+    dict_set(out, "depth_4_solve_rate", depth_4_solve_rate);
+    dict_set(out, "depth_5_solve_rate", depth_5_solve_rate);
+    dict_set(out, "depth_6_solve_rate", depth_6_solve_rate);
+    dict_set(out, "depth_8_solve_rate", depth_8_solve_rate);
+    dict_set(out, "depth_16_solve_rate", depth_16_solve_rate);
+}
diff --git a/ocean/affine_lock/generated/affine_lock_8action_visible_targets.bin b/ocean/affine_lock/generated/affine_lock_8action_visible_targets.bin
new file mode 100644
index 0000000000..092eb20932
Binary files /dev/null and b/ocean/affine_lock/generated/affine_lock_8action_visible_targets.bin differ
diff --git a/ocean/affine_lock/generated/affine_lock_8action_visible_targets.json b/ocean/affine_lock/generated/affine_lock_8action_visible_targets.json
new file mode 100644
index 0000000000..1cd2840cbf
--- /dev/null
+++ b/ocean/affine_lock/generated/affine_lock_8action_visible_targets.json
@@ -0,0 +1,60 @@
+{
+  "action_id_to_name": [
+    "shift_left",
+    "shift_right",
+    "invert_right_7",
+    "swap_adjacent_bits",
+    "swap_adjacent_pairs",
+    "swap_nibbles_each_byte",
+    "reverse_each_nibble",
+    "reverse_each_byte"
+  ],
+  "action_set": "affine_lock_8action_v1",
+  "action_set_hash": "0x6e11e18fdafc0baa",
+  "binary_path": "ocean/affine_lock/generated/affine_lock_8action_visible_targets.bin",
+  "bits": 16,
+  "checksum": "0xce70379011f6386d",
+  "depth_records": [
+    {"depth": 2, "exact_pair_count": 2216496, "first_record": 0, "stored_count": 65536},
+    {"depth": 4, "exact_pair_count": 34379722, "first_record": 65536, "stored_count": 65536},
+    {"depth": 5, "exact_pair_count": 115388932, "first_record": 131072, "stored_count": 65536},
+    {"depth": 6, "exact_pair_count": 331789220, "first_record": 196608, "stored_count": 65536},
+    {"depth": 8, "exact_pair_count": 1125374770, "first_record": 262144, "stored_count": 65536},
+    {"depth": 16, "exact_pair_count": 100548, "first_record": 327680, "stored_count": 100548}
+  ],
+  "depths": [2, 4, 5, 6, 8, 16],
+  "disconnected_starts": 0,
+  "format": "affine_lock_visible_targets_bin",
+  "header_size": 196,
+  "max_distance": 20,
+  "num_actions": 8,
+  "record_count": 428228,
+  "record_size": 16,
+  "sample_per_depth": 65536,
+  "sample_seed": 0,
+  "stored_all_depths": [16],
+  "version": 1,
+  "visible_distance_histogram": {
+    "0": 65536,
+    "1": 517796,
+    "2": 2216496,
+    "3": 9482736,
+    "4": 34379722,
+    "5": 115388932,
+    "6": 331789220,
+    "7": 736430258,
+    "8": 1125374770,
+    "9": 1053662288,
+    "10": 582197010,
+    "11": 216704714,
+    "12": 65093582,
+    "13": 17020854,
+    "14": 3830040,
+    "15": 701474,
+    "16": 100548,
+    "17": 10352,
+    "18": 924,
+    "19": 40,
+    "20": 4
+  }
+}
diff --git a/ocean/affine_lock/tests/run_all.sh b/ocean/affine_lock/tests/run_all.sh
new file mode 100755
index 0000000000..814afa4a4e
--- /dev/null
+++ b/ocean/affine_lock/tests/run_all.sh
@@ -0,0 +1,35 @@
+#!/usr/bin/env bash
+set -euo pipefail
+
+ROOT="$(cd "$(dirname "$0")/../../.." && pwd)"
+OUT="${TMPDIR:-/tmp}/affine_lock_tests"
+LOG_OUT="${TMPDIR:-/tmp}/affine_lock_log_export_tests"
+C99_OUT="${TMPDIR:-/tmp}/affine_lock_c99_compile"
+CC_BIN="${CC:-clang}"
+
+python3 "$ROOT/ocean/affine_lock/tests/test_metadata_smoke.py"
+bash "$ROOT/ocean/affine_lock/tests/test_8action_visible_targets_smoke.sh"
+
+"$CC_BIN" \
+  -std=c99 -pedantic -Wall -Wextra -Werror -Wno-unused-function \
+  -O0 -I"$ROOT" -I"$ROOT/src" -I"$ROOT/ocean/affine_lock" -I"$ROOT/vendor" \
+  "$ROOT/ocean/affine_lock/tests/test_affine_lock.c" \
+  -lm -o "$C99_OUT"
+
+"$CC_BIN" \
+  -std=c11 -Wall -Wextra -Werror -Wno-unused-function \
+  -O0 -g -fsanitize=address,undefined \
+  -I"$ROOT" -I"$ROOT/src" -I"$ROOT/ocean/affine_lock" -I"$ROOT/vendor" \
+  "$ROOT/ocean/affine_lock/tests/test_affine_lock.c" \
+  -lm -o "$OUT"
+
+"$CC_BIN" \
+  -std=c11 -Wall -Wextra -Werror -Wno-unused-function -Wno-unused-parameter \
+  -D_POSIX_C_SOURCE=200809L \
+  -O0 -g -ffunction-sections -fdata-sections -fsanitize=address,undefined \
+  -I"$ROOT" -I"$ROOT/src" -I"$ROOT/ocean/affine_lock" -I"$ROOT/vendor" \
+  "$ROOT/ocean/affine_lock/tests/test_affine_lock_log_export.c" \
+  -Wl,--gc-sections -lm -o "$LOG_OUT"
+
+ASAN_OPTIONS="${ASAN_OPTIONS:-detect_leaks=0}" "$OUT"
+ASAN_OPTIONS="${ASAN_OPTIONS:-detect_leaks=0}" "$LOG_OUT"
diff --git a/ocean/affine_lock/tests/test_8action_visible_targets_smoke.sh b/ocean/affine_lock/tests/test_8action_visible_targets_smoke.sh
new file mode 100644
index 0000000000..eb5119a257
--- /dev/null
+++ b/ocean/affine_lock/tests/test_8action_visible_targets_smoke.sh
@@ -0,0 +1,243 @@
+#!/usr/bin/env bash
+set -euo pipefail
+
+ROOT="$(cd "$(dirname "$0")/../../.." && pwd)"
+SRC="$ROOT/ocean/affine_lock/tools/generate_8action_visible_targets.c"
+OUT="${TMPDIR:-/tmp}/affine_lock_generate_8action_visible_targets"
+LOADER_SRC="$ROOT/ocean/affine_lock/tests/test_visible_targets_loader.c"
+LOADER_OUT="${TMPDIR:-/tmp}/affine_lock_test_visible_targets_loader"
+BIN_OUT="${TMPDIR:-/tmp}/affine_lock_8action_visible_targets.bin"
+JSON_OUT="${TMPDIR:-/tmp}/affine_lock_8action_visible_targets.json"
+SEED_42_A_BIN_OUT="${TMPDIR:-/tmp}/affine_lock_8action_visible_targets_seed42_a.bin"
+SEED_42_A_JSON_OUT="${TMPDIR:-/tmp}/affine_lock_8action_visible_targets_seed42_a.json"
+SEED_42_B_BIN_OUT="${TMPDIR:-/tmp}/affine_lock_8action_visible_targets_seed42_b.bin"
+SEED_42_B_JSON_OUT="${TMPDIR:-/tmp}/affine_lock_8action_visible_targets_seed42_b.json"
+SEED_69_BIN_OUT="${TMPDIR:-/tmp}/affine_lock_8action_visible_targets_seed69.bin"
+SEED_69_JSON_OUT="${TMPDIR:-/tmp}/affine_lock_8action_visible_targets_seed69.json"
+FOUR_BIN_OUT="${TMPDIR:-/tmp}/affine_lock_4action_visible_targets.bin"
+FOUR_JSON_OUT="${TMPDIR:-/tmp}/affine_lock_4action_visible_targets.json"
+CC_BIN="${CC:-gcc}"
+
+if [ ! -f "$SRC" ]; then
+    echo "missing 8-action visible target generator: $SRC" >&2
+    exit 1
+fi
+if [ ! -f "$LOADER_SRC" ]; then
+    echo "missing visible target loader test: $LOADER_SRC" >&2
+    exit 1
+fi
+
+"$CC_BIN" \
+    -std=c11 -O3 -DNDEBUG -fopenmp \
+    -Wall -Wextra -Werror \
+    -I"$ROOT" -I"$ROOT/ocean/affine_lock" \
+    "$SRC" -lm -o "$OUT"
+
+"$CC_BIN" \
+    -std=c11 -O0 -g \
+    -Wall -Wextra -Werror \
+    -I"$ROOT" -I"$ROOT/ocean/affine_lock" \
+    "$LOADER_SRC" -lm -o "$LOADER_OUT"
+
+"$OUT" \
+    --sample-per-depth 128 \
+    --output-bin "$BIN_OUT" \
+    --output-json "$JSON_OUT"
+
+"$LOADER_OUT" "$BIN_OUT" 101188 128 100548
+
+"$OUT" \
+    --sample-per-depth 8 \
+    --sample-seed 42 \
+    --output-bin "$SEED_42_A_BIN_OUT" \
+    --output-json "$SEED_42_A_JSON_OUT"
+
+"$OUT" \
+    --sample-per-depth 8 \
+    --sample-seed 42 \
+    --output-bin "$SEED_42_B_BIN_OUT" \
+    --output-json "$SEED_42_B_JSON_OUT"
+
+"$OUT" \
+    --sample-per-depth 8 \
+    --sample-seed 69 \
+    --output-bin "$SEED_69_BIN_OUT" \
+    --output-json "$SEED_69_JSON_OUT"
+
+cmp "$SEED_42_A_BIN_OUT" "$SEED_42_B_BIN_OUT"
+if cmp -s "$SEED_42_A_BIN_OUT" "$SEED_69_BIN_OUT"; then
+    echo "different sample seeds unexpectedly produced identical tables" >&2
+    exit 1
+fi
+
+"$OUT" \
+    --action-set affine_lock_4action_v1 \
+    --sample-per-depth 16 \
+    --output-bin "$FOUR_BIN_OUT" \
+    --output-json "$FOUR_JSON_OUT"
+
+python3 - \
+    "$BIN_OUT" "$JSON_OUT" \
+    "$SEED_42_A_BIN_OUT" "$SEED_42_A_JSON_OUT" \
+    "$SEED_69_BIN_OUT" "$SEED_69_JSON_OUT" \
+    "$FOUR_BIN_OUT" "$FOUR_JSON_OUT" <<'PY'
+import json
+import struct
+import sys
+from pathlib import Path
+
+bin_path = Path(sys.argv[1])
+json_path = Path(sys.argv[2])
+seed_42_bin_path = Path(sys.argv[3])
+seed_42_json_path = Path(sys.argv[4])
+seed_69_bin_path = Path(sys.argv[5])
+seed_69_json_path = Path(sys.argv[6])
+four_bin_path = Path(sys.argv[7])
+four_json_path = Path(sys.argv[8])
+manifest = json.loads(json_path.read_text())
+
+assert manifest["action_set"] == "affine_lock_8action_v1"
+assert manifest["action_id_to_name"] == [
+    "shift_left",
+    "shift_right",
+    "invert_right_7",
+    "swap_adjacent_bits",
+    "swap_adjacent_pairs",
+    "swap_nibbles_each_byte",
+    "reverse_each_nibble",
+    "reverse_each_byte",
+]
+assert manifest["bits"] == 16
+assert manifest["num_actions"] == 8
+assert manifest["depths"] == [2, 4, 5, 6, 8, 16]
+assert manifest["sample_per_depth"] == 128
+assert manifest["sample_seed"] == 0
+assert manifest["stored_all_depths"] == [16]
+assert manifest["max_distance"] == 20
+assert manifest["disconnected_starts"] == 0
+assert manifest["visible_distance_histogram"]["16"] == 100548
+assert manifest["visible_distance_histogram"]["20"] == 4
+
+depth_records = manifest["depth_records"]
+assert [record["depth"] for record in depth_records] == [2, 4, 5, 6, 8, 16]
+for record in depth_records[:5]:
+    assert record["stored_count"] == 128
+    assert record["exact_pair_count"] >= record["stored_count"]
+assert depth_records[5]["stored_count"] == 100548
+assert depth_records[5]["exact_pair_count"] == 100548
+
+data = bin_path.read_bytes()
+fixed_header = struct.Struct("<8sIIIIIIIQQ")
+(
+    magic,
+    version,
+    header_size,
+    record_size,
+    bits,
+    num_actions,
+    depth_count,
+    record_count,
+    checksum,
+    action_set_hash,
+) = fixed_header.unpack_from(data, 0)
+
+assert magic == b"AL7TGT1\0"
+assert version == 1
+assert header_size == manifest["header_size"]
+assert record_size == manifest["record_size"] == 16
+assert bits == 16
+assert num_actions == 8
+assert depth_count == 6
+assert record_count == sum(record["stored_count"] for record in depth_records)
+assert checksum == int(manifest["checksum"], 16)
+assert action_set_hash == int(manifest["action_set_hash"], 16)
+assert len(data) == header_size + record_count * record_size
+
+depth_struct = struct.Struct("<IIIIQ")
+offset = fixed_header.size
+for expected in depth_records:
+    depth, first_record, stored_count, reserved, exact_pair_count = (
+        depth_struct.unpack_from(data, offset)
+    )
+    offset += depth_struct.size
+    assert reserved == 0
+    assert depth == expected["depth"]
+    assert first_record == expected["first_record"]
+    assert stored_count == expected["stored_count"]
+    assert exact_pair_count == expected["exact_pair_count"]
+
+record_struct = struct.Struct("<HHQBBH")
+records_start = header_size
+first_start, first_target, first_packed, first_length, first_depth, reserved = (
+    record_struct.unpack_from(data, records_start)
+)
+assert reserved == 0
+assert first_start <= 0xffff
+assert first_target <= 0xffff
+assert first_length == first_depth
+assert first_depth in {2, 4, 5, 6, 8, 16}
+assert first_packed >= 0
+
+seed_42_manifest = json.loads(seed_42_json_path.read_text())
+seed_69_manifest = json.loads(seed_69_json_path.read_text())
+assert seed_42_manifest["sample_seed"] == 42
+assert seed_69_manifest["sample_seed"] == 69
+assert seed_42_manifest["sample_per_depth"] == 8
+assert seed_69_manifest["sample_per_depth"] == 8
+assert seed_42_manifest["depth_records"] == seed_69_manifest["depth_records"]
+
+seed_42_data = seed_42_bin_path.read_bytes()
+seed_69_data = seed_69_bin_path.read_bytes()
+
+def record_span(table_manifest, depth):
+    record = next(
+        record for record in table_manifest["depth_records"]
+        if record["depth"] == depth
+    )
+    start = (
+        table_manifest["header_size"] +
+        record["first_record"] * table_manifest["record_size"]
+    )
+    end = start + record["stored_count"] * table_manifest["record_size"]
+    return start, end
+
+sampled_depths_changed = False
+for depth in (2, 4, 5, 6, 8):
+    start, end = record_span(seed_42_manifest, depth)
+    if seed_42_data[start:end] != seed_69_data[start:end]:
+        sampled_depths_changed = True
+assert sampled_depths_changed
+
+start, end = record_span(seed_42_manifest, 16)
+assert seed_42_data[start:end] == seed_69_data[start:end]
+
+four_manifest = json.loads(four_json_path.read_text())
+assert four_manifest["action_set"] == "affine_lock_4action_v1"
+assert four_manifest["action_id_to_name"] == [
+    "shift_right",
+    "mirror",
+    "invert_right_7",
+    "swap_adjacent_bits",
+]
+assert four_manifest["bits"] == 16
+assert four_manifest["num_actions"] == 4
+assert four_manifest["depths"] == [2, 4, 5, 6, 8, 16]
+assert four_manifest["sample_per_depth"] == 16
+assert four_manifest["sample_seed"] == 0
+assert four_manifest["stored_all_depths"] == []
+assert four_manifest["max_distance"] == 19
+assert four_manifest["disconnected_starts"] == 0
+assert four_manifest["visible_distance_histogram"]["16"] == 2434606
+assert [record["stored_count"] for record in four_manifest["depth_records"]] == [
+    16,
+    16,
+    16,
+    16,
+    16,
+    16,
+]
+assert four_bin_path.stat().st_size == (
+    four_manifest["header_size"] +
+    four_manifest["record_count"] * four_manifest["record_size"]
+)
+PY
diff --git a/ocean/affine_lock/tests/test_affine_lock.c b/ocean/affine_lock/tests/test_affine_lock.c
new file mode 100644
index 0000000000..ce57906efa
--- /dev/null
+++ b/ocean/affine_lock/tests/test_affine_lock.c
@@ -0,0 +1,1262 @@
+#include <math.h>
+#include <stdint.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+#define AFFINE_LOCK_NO_RENDER
+#include "../affine_lock.h"
+
+#define EXPECT_TRUE(cond) do { \
+    if (!(cond)) { \
+        fprintf(stderr, "%s:%d: expected true: %s\n", __FILE__, __LINE__, #cond); \
+        exit(1); \
+    } \
+} while (0)
+
+#define EXPECT_EQ_INT(actual, expected) do { \
+    int _a = (int)(actual); \
+    int _e = (int)(expected); \
+    if (_a != _e) { \
+        fprintf(stderr, "%s:%d: expected %s == %d, got %d\n", \
+            __FILE__, __LINE__, #actual, _e, _a); \
+        exit(1); \
+    } \
+} while (0)
+
+#define EXPECT_EQ_U32(actual, expected) do { \
+    uint32_t _a = (uint32_t)(actual); \
+    uint32_t _e = (uint32_t)(expected); \
+    if (_a != _e) { \
+        fprintf(stderr, "%s:%d: expected %s == 0x%x, got 0x%x\n", \
+            __FILE__, __LINE__, #actual, _e, _a); \
+        exit(1); \
+    } \
+} while (0)
+
+#define EXPECT_EQ_U64(actual, expected) do { \
+    uint64_t _a = (uint64_t)(actual); \
+    uint64_t _e = (uint64_t)(expected); \
+    if (_a != _e) { \
+        fprintf(stderr, "%s:%d: expected %s == 0x%llx, got 0x%llx\n", \
+            __FILE__, __LINE__, #actual, \
+            (unsigned long long)_e, (unsigned long long)_a); \
+        exit(1); \
+    } \
+} while (0)
+
+#define EXPECT_NE_U32(actual, expected) do { \
+    uint32_t _a = (uint32_t)(actual); \
+    uint32_t _e = (uint32_t)(expected); \
+    if (_a == _e) { \
+        fprintf(stderr, "%s:%d: expected %s != 0x%x\n", \
+            __FILE__, __LINE__, #actual, _e); \
+        exit(1); \
+    } \
+} while (0)
+
+#define EXPECT_NEAR(actual, expected, eps) do { \
+    float _a = (float)(actual); \
+    float _e = (float)(expected); \
+    if (fabsf(_a - _e) > (eps)) { \
+        fprintf(stderr, "%s:%d: expected %s ~= %.6f, got %.6f\n", \
+            __FILE__, __LINE__, #actual, _e, _a); \
+        exit(1); \
+    } \
+} while (0)
+
+static AffineLockShared make_shared(
+        int start_depth, int max_depth,
+        int step_grace) {
+    AffineLockShared shared;
+    memset(&shared, 0, sizeof(shared));
+    int rc = affine_lock_init_shared(
+        &shared, start_depth, max_depth, step_grace);
+    EXPECT_EQ_INT(rc, 0);
+    EXPECT_EQ_INT(affine_lock_prepare_visible_targets(&shared), 0);
+    return shared;
+}
+
+static void make_env(
+        AffineLock* env,
+        AffineLockShared* shared,
+        unsigned int seed,
+        float observations[AFFINE_LOCK_OBS_SIZE],
+        float actions[AFFINE_LOCK_NUM_ATNS],
+        float rewards[1],
+        float terminals[1]) {
+    memset(env, 0, sizeof(*env));
+    memset(observations, 0, AFFINE_LOCK_OBS_SIZE * sizeof(float));
+    actions[0] = 0.0f;
+    rewards[0] = 0.0f;
+    terminals[0] = 0.0f;
+    affine_lock_init_env(env, shared, seed);
+    env->observations = observations;
+    env->actions = actions;
+    env->rewards = rewards;
+    env->terminals = terminals;
+}
+
+static uint32_t bits_from_text(const char* bits) {
+    EXPECT_EQ_INT(strlen(bits), AFFINE_LOCK_BITS);
+    uint32_t value = 0u;
+    for (int i = 0; i < AFFINE_LOCK_BITS; i++) {
+        EXPECT_TRUE(bits[i] == '0' || bits[i] == '1');
+        if (bits[i] == '1') {
+            value |= 1u << i;
+        }
+    }
+    return value;
+}
+
+static uint32_t test_apply_action(uint32_t state, int action) {
+    state &= 0xffffu;
+    switch (action) {
+        case 0: {
+            uint32_t first = state & 1u;
+            return ((state >> 1) | (first << 15)) & 0xffffu;
+        }
+        case 1: {
+            uint32_t last = (state >> 15) & 1u;
+            return ((state << 1) & 0xffffu) | last;
+        }
+        case 2:
+            return state ^ 0xfe00u;
+        case 3:
+            return ((state & 0x5555u) << 1) | ((state & 0xaaaau) >> 1);
+        case 4:
+            return ((state & 0x3333u) << 2) | ((state & 0xccccu) >> 2);
+        case 5:
+            return ((state & 0x0f0fu) << 4) | ((state & 0xf0f0u) >> 4);
+        case 6:
+            return test_apply_action(test_apply_action(state, 3), 4);
+        case 7:
+            return test_apply_action(test_apply_action(state, 6), 5);
+        default:
+            return state;
+    }
+}
+
+typedef struct TestBfsStats {
+    int reachable_count;
+    int distance_histogram[128];
+    int farthest_distance;
+    int shortest_distance;
+} TestBfsStats;
+
+static void compute_test_bfs_stats(
+        const AffineLockShared* shared,
+        uint32_t start,
+        uint32_t target,
+        TestBfsStats* stats) {
+    memset(stats, 0, sizeof(*stats));
+    stats->shortest_distance = -1;
+
+    int* distances = (int*)malloc((size_t)shared->num_states * sizeof(int));
+    uint32_t* queue =
+        (uint32_t*)malloc((size_t)shared->num_states * sizeof(uint32_t));
+    EXPECT_TRUE(distances != NULL);
+    EXPECT_TRUE(queue != NULL);
+
+    for (int i = 0; i < shared->num_states; i++) {
+        distances[i] = -1;
+    }
+
+    int head = 0;
+    int tail = 0;
+    start &= shared->mask;
+    target &= shared->mask;
+    distances[start] = 0;
+    queue[tail++] = start;
+
+    while (head < tail) {
+        uint32_t state = queue[head++];
+        int distance = distances[state];
+        stats->reachable_count += 1;
+        if (distance >= 0 && distance < (int)(sizeof(stats->distance_histogram) /
+                sizeof(stats->distance_histogram[0]))) {
+            stats->distance_histogram[distance] += 1;
+        }
+        if (distance > stats->farthest_distance) {
+            stats->farthest_distance = distance;
+        }
+
+        for (int action = 0; action < AFFINE_LOCK_NUM_ACTIONS; action++) {
+            uint32_t next = test_apply_action(state, action) & shared->mask;
+            if (distances[next] >= 0) {
+                continue;
+            }
+            distances[next] = distance + 1;
+            queue[tail++] = next;
+        }
+    }
+
+    stats->shortest_distance = distances[target];
+    free(distances);
+    free(queue);
+}
+
+static float expected_solve_credit(const AffineLockShared* shared, int depth);
+
+static void test_log_solve_credit_uses_known_target_distance(void) {
+    AffineLockShared shared = make_shared(2, 16, 0);
+    AffineLock env;
+    memset(&env, 0, sizeof(env));
+    env.shared = &shared;
+    env.scramble_depth = 16;
+    env.target_distance = 8;
+    env.step_count = 8;
+
+    affine_lock_add_log(&env, 1, 0);
+
+    EXPECT_NEAR(env.log.perf, expected_solve_credit(&shared, 8), 0.0f);
+    EXPECT_NEAR(env.log.score, expected_solve_credit(&shared, 8), 0.0f);
+    EXPECT_NEAR(env.log.max_depth_solve, 0.0f, 0.0f);
+    EXPECT_NEAR(env.log.solve_efficiency, 1.0f, 0.0f);
+    EXPECT_NEAR(env.log.target_distance, 8.0f, 0.0f);
+    EXPECT_NEAR(env.log.solved_target_distance, 8.0f, 0.0f);
+    EXPECT_NEAR(env.log.depth_5_rate, 0.0f, 0.0f);
+    EXPECT_NEAR(env.log.depth_5_solve_rate, 0.0f, 0.0f);
+    EXPECT_NEAR(env.log.depth_6_rate, 0.0f, 0.0f);
+    EXPECT_NEAR(env.log.depth_6_solve_rate, 0.0f, 0.0f);
+    EXPECT_NEAR(env.log.depth_8_rate, 1.0f, 0.0f);
+    EXPECT_NEAR(env.log.depth_8_solve_rate, 1.0f, 0.0f);
+    EXPECT_NEAR(env.log.depth_16_rate, 0.0f, 0.0f);
+    EXPECT_NEAR(env.log.depth_16_solve_rate, 0.0f, 0.0f);
+
+    affine_lock_free_shared(&shared);
+}
+
+static void expect_observation_matches(const AffineLock* env) {
+    for (int bit = 0; bit < AFFINE_LOCK_BITS; bit++) {
+        uint32_t bit_mask = 1u << bit;
+        float expected_current = (env->state & bit_mask) ? 1.0f : -1.0f;
+        float expected_target = (env->target & bit_mask) ? 1.0f : -1.0f;
+        EXPECT_NEAR(env->observations[bit], expected_current, 0.0f);
+        EXPECT_NEAR(env->observations[AFFINE_LOCK_BITS + bit], expected_target, 0.0f);
+    }
+
+    for (int i = 0; i < AFFINE_LOCK_TIMER_INDEX; i++) {
+        EXPECT_TRUE(env->observations[i] == -1.0f || env->observations[i] == 1.0f);
+    }
+
+    float expected_timer = env->max_steps > 0 ?
+        (float)env->step_count / (float)env->max_steps : 0.0f;
+    EXPECT_TRUE(env->observations[AFFINE_LOCK_TIMER_INDEX] >= 0.0f);
+    EXPECT_TRUE(env->observations[AFFINE_LOCK_TIMER_INDEX] <= 1.0f);
+    EXPECT_NEAR(env->observations[AFFINE_LOCK_TIMER_INDEX], expected_timer, 0.000001f);
+}
+
+static int find_non_solving_action(AffineLock* env) {
+    for (int action = 0; action < AFFINE_LOCK_NUM_ACTIONS; action++) {
+        uint32_t next = affine_lock_apply_action(env->shared, env->state, action);
+        if (next != env->target) {
+            return action;
+        }
+    }
+    return -1;
+}
+
+static float expected_solve_credit(const AffineLockShared* shared, int depth) {
+    return (float)depth / (float)shared->max_depth;
+}
+
+static uint64_t mix_u64(uint64_t hash, uint64_t value) {
+    hash ^= value;
+    hash *= 1099511628211ull;
+    return hash;
+}
+
+static uint64_t mix_float(uint64_t hash, float value) {
+    uint32_t bits;
+    memcpy(&bits, &value, sizeof(bits));
+    return mix_u64(hash, bits);
+}
+
+static uint64_t log_snapshot_checksum(uint64_t hash, const Log* log) {
+    hash = mix_float(hash, log->perf);
+    hash = mix_float(hash, log->score);
+    hash = mix_float(hash, log->solve_rate);
+    hash = mix_float(hash, log->max_depth_solve);
+    hash = mix_float(hash, log->episode_return);
+    hash = mix_float(hash, log->episode_length);
+    hash = mix_float(hash, log->solve_steps);
+    hash = mix_float(hash, log->timeout_rate);
+    hash = mix_float(hash, log->invalid_rate);
+    hash = mix_float(hash, log->solve_efficiency);
+    hash = mix_float(hash, log->target_distance);
+    hash = mix_float(hash, log->solved_target_distance);
+    hash = mix_float(hash, log->depth_2_rate);
+    hash = mix_float(hash, log->depth_2_solve_rate);
+    hash = mix_float(hash, log->depth_4_rate);
+    hash = mix_float(hash, log->depth_4_solve_rate);
+    hash = mix_float(hash, log->depth_5_rate);
+    hash = mix_float(hash, log->depth_5_solve_rate);
+    hash = mix_float(hash, log->depth_6_rate);
+    hash = mix_float(hash, log->depth_6_solve_rate);
+    hash = mix_float(hash, log->depth_8_rate);
+    hash = mix_float(hash, log->depth_8_solve_rate);
+    hash = mix_float(hash, log->depth_16_rate);
+    hash = mix_float(hash, log->depth_16_solve_rate);
+    hash = mix_float(hash, log->n);
+    return hash;
+}
+
+static uint64_t reset_snapshot_checksum(const AffineLock* env) {
+    uint64_t hash = 1469598103934665603ull;
+    hash = mix_u64(hash, env->state);
+    hash = mix_u64(hash, env->target);
+    hash = mix_u64(hash, (uint64_t)env->step_count);
+    hash = mix_u64(hash, (uint64_t)env->max_steps);
+    hash = mix_u64(hash, (uint64_t)env->scramble_depth);
+    hash = mix_u64(hash, (uint64_t)env->curriculum_depth);
+    hash = mix_u64(hash, (uint64_t)env->solution_length);
+    hash = mix_u64(hash, (uint64_t)(env->target_distance + 1));
+    hash = mix_float(hash, env->rewards[0]);
+    hash = mix_float(hash, env->terminals[0]);
+    for (int i = 0; i < AFFINE_LOCK_OBS_SIZE; i++) {
+        hash = mix_float(hash, env->observations[i]);
+    }
+    hash = log_snapshot_checksum(hash, &env->log);
+    for (int i = 0; i < AFFINE_LOCK_MAX_SOLUTION_DEPTH; i++) {
+        hash = mix_u64(hash, (uint64_t)(env->solution_actions[i] + 1));
+    }
+    return hash;
+}
+
+static void expect_env_snapshots_equal(
+        const AffineLock* a,
+        const AffineLock* b,
+        const float obs_a[AFFINE_LOCK_OBS_SIZE],
+        const float obs_b[AFFINE_LOCK_OBS_SIZE]) {
+    EXPECT_EQ_U64(reset_snapshot_checksum(a), reset_snapshot_checksum(b));
+    EXPECT_TRUE(memcmp(obs_a, obs_b, AFFINE_LOCK_OBS_SIZE * sizeof(float)) == 0);
+    EXPECT_EQ_U32(a->state, b->state);
+    EXPECT_EQ_U32(a->target, b->target);
+    EXPECT_EQ_INT(a->scramble_depth, b->scramble_depth);
+    EXPECT_EQ_INT(a->max_steps, b->max_steps);
+    EXPECT_EQ_INT(a->solution_length, b->solution_length);
+    EXPECT_TRUE(memcmp(a->solution_actions, b->solution_actions,
+        sizeof(a->solution_actions)) == 0);
+}
+
+static void expect_solution_reaches_target(
+        const AffineLockShared* shared,
+        const AffineLock* env) {
+    TestBfsStats stats;
+    compute_test_bfs_stats(shared, env->state, env->target, &stats);
+    EXPECT_EQ_INT(env->target_distance, stats.shortest_distance);
+    EXPECT_EQ_INT(env->solution_length, stats.shortest_distance);
+    EXPECT_EQ_INT(env->max_steps, stats.shortest_distance + shared->step_grace);
+    EXPECT_TRUE(stats.reachable_count > 0);
+    EXPECT_TRUE(stats.farthest_distance >= stats.shortest_distance);
+    EXPECT_TRUE(stats.distance_histogram[env->target_distance] > 0);
+
+    uint32_t simulated = env->state;
+    for (int i = 0; i < env->solution_length; i++) {
+        int action = env->solution_actions[i];
+        EXPECT_TRUE(action >= 0 && action < AFFINE_LOCK_NUM_ACTIONS);
+        simulated = test_apply_action(simulated, action) & shared->mask;
+    }
+    EXPECT_EQ_U32(simulated, env->target);
+}
+
+static void solve_with_stored_solution(AffineLock* env) {
+    int length = env->solution_length;
+    for (int step = 0; step < length; step++) {
+        env->actions[0] = (float)env->solution_actions[step];
+        c_step(env);
+        if (env->terminals[0] != 0.0f) {
+            return;
+        }
+    }
+}
+
+static void expect_depth_log_delta(
+        const Log* before,
+        const Log* after,
+        int depth,
+        int solved) {
+    EXPECT_NEAR(after->depth_2_rate,
+        before->depth_2_rate + (depth == 2 ? 1.0f : 0.0f), 0.0f);
+    EXPECT_NEAR(after->depth_2_solve_rate,
+        before->depth_2_solve_rate + (solved && depth == 2 ? 1.0f : 0.0f), 0.0f);
+    EXPECT_NEAR(after->depth_4_rate,
+        before->depth_4_rate + (depth == 4 ? 1.0f : 0.0f), 0.0f);
+    EXPECT_NEAR(after->depth_4_solve_rate,
+        before->depth_4_solve_rate + (solved && depth == 4 ? 1.0f : 0.0f), 0.0f);
+    EXPECT_NEAR(after->depth_5_rate,
+        before->depth_5_rate + (depth == 5 ? 1.0f : 0.0f), 0.0f);
+    EXPECT_NEAR(after->depth_5_solve_rate,
+        before->depth_5_solve_rate + (solved && depth == 5 ? 1.0f : 0.0f), 0.0f);
+    EXPECT_NEAR(after->depth_6_rate,
+        before->depth_6_rate + (depth == 6 ? 1.0f : 0.0f), 0.0f);
+    EXPECT_NEAR(after->depth_6_solve_rate,
+        before->depth_6_solve_rate + (solved && depth == 6 ? 1.0f : 0.0f), 0.0f);
+    EXPECT_NEAR(after->depth_8_rate,
+        before->depth_8_rate + (depth == 8 ? 1.0f : 0.0f), 0.0f);
+    EXPECT_NEAR(after->depth_8_solve_rate,
+        before->depth_8_solve_rate + (solved && depth == 8 ? 1.0f : 0.0f), 0.0f);
+    EXPECT_NEAR(after->depth_16_rate,
+        before->depth_16_rate + (depth == 16 ? 1.0f : 0.0f), 0.0f);
+    EXPECT_NEAR(after->depth_16_solve_rate,
+        before->depth_16_solve_rate + (solved && depth == 16 ? 1.0f : 0.0f), 0.0f);
+}
+
+static void expect_oracle_episode_win(AffineLock* env, int depth) {
+    AffineLockShared* shared = env->shared;
+    EXPECT_EQ_INT(env->scramble_depth, depth);
+    EXPECT_TRUE(env->solution_length > 0);
+    expect_solution_reaches_target(shared, env);
+
+    Log before = env->log;
+    int target_distance = env->target_distance;
+    int solution_length = env->solution_length;
+    EXPECT_TRUE(solution_length > 0);
+    EXPECT_EQ_INT(env->max_steps, target_distance + shared->step_grace);
+
+    for (int step = 0; step < solution_length; step++) {
+        env->actions[0] = (float)env->solution_actions[step];
+        c_step(env);
+        if (step + 1 < solution_length) {
+            EXPECT_NEAR(env->rewards[0], AFFINE_LOCK_STEP_REWARD, 0.0f);
+            EXPECT_NEAR(env->terminals[0], 0.0f, 0.0f);
+            EXPECT_EQ_INT(env->step_count, step + 1);
+            expect_observation_matches(env);
+        }
+    }
+
+    EXPECT_NEAR(env->rewards[0], 1.0f, 0.0f);
+    EXPECT_NEAR(env->terminals[0], 1.0f, 0.0f);
+    EXPECT_EQ_INT(env->step_count, 0);
+    EXPECT_NEAR(env->log.n, before.n + 1.0f, 0.0f);
+    EXPECT_NEAR(env->log.perf,
+        before.perf + expected_solve_credit(shared, depth), 0.0f);
+    EXPECT_NEAR(env->log.score,
+        before.score + expected_solve_credit(shared, depth), 0.0f);
+    EXPECT_NEAR(env->log.solve_rate, before.solve_rate + 1.0f, 0.0f);
+    EXPECT_NEAR(env->log.timeout_rate, before.timeout_rate, 0.0f);
+    EXPECT_NEAR(env->log.invalid_rate, before.invalid_rate, 0.0f);
+    EXPECT_NEAR(env->log.episode_length,
+        before.episode_length + (float)solution_length, 0.0f);
+    EXPECT_NEAR(env->log.solve_steps,
+        before.solve_steps + (float)solution_length, 0.0f);
+    EXPECT_NEAR(env->log.target_distance,
+        before.target_distance + (float)target_distance, 0.0f);
+    EXPECT_NEAR(env->log.solved_target_distance,
+        before.solved_target_distance + (float)target_distance, 0.0f);
+    expect_depth_log_delta(&before, &env->log, depth, 1);
+
+    int next_depth = affine_lock_next_curriculum_depth(shared, depth);
+    EXPECT_EQ_INT(env->scramble_depth, next_depth);
+    expect_observation_matches(env);
+}
+
+static void expect_non_solving_episode_timeout(AffineLock* env, int depth) {
+    AffineLockShared* shared = env->shared;
+    EXPECT_EQ_INT(env->scramble_depth, depth);
+    EXPECT_TRUE(env->solution_length > 0);
+    expect_solution_reaches_target(shared, env);
+
+    Log before = env->log;
+    int target_distance = env->target_distance;
+    int max_steps = env->max_steps;
+    EXPECT_TRUE(max_steps > 0);
+
+    for (int step = 0; step < max_steps; step++) {
+        int action = find_non_solving_action(env);
+        EXPECT_TRUE(action >= 0);
+        env->actions[0] = (float)action;
+        c_step(env);
+        if (step + 1 < max_steps) {
+            EXPECT_NEAR(env->rewards[0], AFFINE_LOCK_STEP_REWARD, 0.0f);
+            EXPECT_NEAR(env->terminals[0], 0.0f, 0.0f);
+            EXPECT_EQ_INT(env->step_count, step + 1);
+            expect_observation_matches(env);
+        }
+    }
+
+    EXPECT_NEAR(env->rewards[0], -1.0f, 0.0f);
+    EXPECT_NEAR(env->terminals[0], 1.0f, 0.0f);
+    EXPECT_EQ_INT(env->step_count, 0);
+    EXPECT_NEAR(env->log.n, before.n + 1.0f, 0.0f);
+    EXPECT_NEAR(env->log.perf, before.perf, 0.0f);
+    EXPECT_NEAR(env->log.score, before.score, 0.0f);
+    EXPECT_NEAR(env->log.solve_rate, before.solve_rate, 0.0f);
+    EXPECT_NEAR(env->log.timeout_rate, before.timeout_rate + 1.0f, 0.0f);
+    EXPECT_NEAR(env->log.invalid_rate, before.invalid_rate, 0.0f);
+    EXPECT_NEAR(env->log.episode_length,
+        before.episode_length + (float)max_steps, 0.0f);
+    EXPECT_NEAR(env->log.solve_steps, before.solve_steps, 0.0f);
+    EXPECT_NEAR(env->log.target_distance,
+        before.target_distance + (float)target_distance, 0.0f);
+    EXPECT_NEAR(env->log.solved_target_distance,
+        before.solved_target_distance, 0.0f);
+    expect_depth_log_delta(&before, &env->log, depth, 0);
+    EXPECT_EQ_INT(env->scramble_depth, shared->start_depth);
+    expect_observation_matches(env);
+}
+
+static size_t read_text_file(const char* path, char* buffer, size_t capacity) {
+    FILE* file = fopen(path, "r");
+    EXPECT_TRUE(file != NULL);
+    size_t nread = fread(buffer, 1, capacity - 1, file);
+    buffer[nread] = '\0';
+    fclose(file);
+    return nread;
+}
+
+static void test_metadata_contract(void) {
+    EXPECT_EQ_INT(AFFINE_LOCK_BITS, 16);
+    EXPECT_EQ_INT(AFFINE_LOCK_TIMER_INDEX, 32);
+    EXPECT_EQ_INT(AFFINE_LOCK_OBS_SIZE, 33);
+    EXPECT_EQ_INT(AFFINE_LOCK_NUM_ATNS, 1);
+    EXPECT_EQ_INT(AFFINE_LOCK_NUM_ACTIONS, 8);
+}
+
+static void test_config_and_binding_metadata_contract(void) {
+    char config[16384];
+    read_text_file("config/affine_lock.ini", config, sizeof(config));
+    EXPECT_TRUE(strstr(config, "[base]") != NULL);
+    EXPECT_TRUE(strstr(config, "env_name = affine_lock") != NULL);
+    EXPECT_TRUE(strstr(config, "[env]") != NULL);
+    EXPECT_TRUE(strstr(config, "start_depth = 2") != NULL);
+    EXPECT_TRUE(strstr(config, "max_depth = 16") != NULL);
+    EXPECT_TRUE(strstr(config, "[sweep]") != NULL);
+    EXPECT_TRUE(strstr(config, "metric = perf") != NULL);
+    EXPECT_TRUE(strstr(config, "goal = maximize") != NULL);
+    EXPECT_TRUE(strstr(config, "min = 100_000_000") != NULL);
+    EXPECT_TRUE(strstr(config, "max = 200_000_000") != NULL);
+    EXPECT_TRUE(strstr(config, "[sweep.policy.num_layers]") != NULL);
+    EXPECT_TRUE(strstr(config, "max = 131072") != NULL);
+    EXPECT_TRUE(strstr(config, "max = 4.0") != NULL);
+
+    char binding[8192];
+    read_text_file("ocean/affine_lock/binding.c", binding, sizeof(binding));
+    EXPECT_TRUE(strstr(binding, "#define OBS_SIZE AFFINE_LOCK_OBS_SIZE") != NULL);
+    EXPECT_TRUE(strstr(binding, "#define ACT_SIZES {AFFINE_LOCK_NUM_ACTIONS}") != NULL);
+    EXPECT_TRUE(strstr(binding, "#define OBS_TENSOR_T FloatTensor") != NULL);
+}
+
+static void test_global_action_examples(void) {
+    AffineLockShared shared = make_shared(2, 16, 0);
+    uint32_t start = bits_from_text("0011011000010111");
+
+    const char* expected[AFFINE_LOCK_NUM_ACTIONS] = {
+        "0110110000101110",
+        "1001101100001011",
+        "0011011001101000",
+        "0011100100101011",
+        "1100100101001101",
+        "0110001101110001",
+        "1100011010001110",
+        "0110110011101000",
+    };
+
+    for (int action = 0; action < AFFINE_LOCK_NUM_ACTIONS; action++) {
+        uint32_t next = affine_lock_apply_action(&shared, start, action);
+        EXPECT_EQ_U32(next, bits_from_text(expected[action]));
+    }
+
+    affine_lock_free_shared(&shared);
+}
+
+static void test_actions_round_trip_for_all_states(void) {
+    AffineLockShared shared = make_shared(2, 16, 0);
+    const int inverse_actions[AFFINE_LOCK_NUM_ACTIONS] = {
+        AFFINE_LOCK_ACTION_SHIFT_RIGHT,
+        AFFINE_LOCK_ACTION_SHIFT_LEFT,
+        AFFINE_LOCK_ACTION_INVERT_RIGHT_7,
+        AFFINE_LOCK_ACTION_SWAP_ADJACENT_BITS,
+        AFFINE_LOCK_ACTION_SWAP_ADJACENT_PAIRS,
+        AFFINE_LOCK_ACTION_SWAP_NIBBLES_EACH_BYTE,
+        AFFINE_LOCK_ACTION_REVERSE_EACH_NIBBLE,
+        AFFINE_LOCK_ACTION_REVERSE_EACH_BYTE,
+    };
+    EXPECT_EQ_INT(shared.num_states, 1 << 16);
+    EXPECT_EQ_U32(shared.mask, 0xffffu);
+
+    for (int action = 0; action < AFFINE_LOCK_NUM_ACTIONS; action++) {
+        int inverse = inverse_actions[action];
+        EXPECT_TRUE(inverse >= 0 && inverse < AFFINE_LOCK_NUM_ACTIONS);
+        EXPECT_EQ_INT(inverse_actions[inverse], action);
+
+        for (uint32_t state = 0; state < (uint32_t)shared.num_states; state++) {
+            uint32_t next = affine_lock_apply_action(&shared, state, action);
+            EXPECT_EQ_U32(next & ~shared.mask, 0u);
+            uint32_t round_trip = affine_lock_apply_action(&shared, next, inverse);
+            EXPECT_EQ_U32(round_trip, state);
+        }
+    }
+
+    affine_lock_free_shared(&shared);
+}
+
+static void test_reset_randomizes_target_and_current(void) {
+    AffineLockShared shared = make_shared(2, 16, 0);
+    AffineLock env;
+    float observations[AFFINE_LOCK_OBS_SIZE];
+    float actions[AFFINE_LOCK_NUM_ATNS];
+    float rewards[1];
+    float terminals[1];
+    make_env(&env, &shared, 123, observations, actions, rewards, terminals);
+
+    uint32_t first_target = 0;
+    uint32_t first_state = 0;
+    int target_changed = 0;
+    int state_changed = 0;
+
+    for (int i = 0; i < 16; i++) {
+        c_reset(&env);
+        EXPECT_EQ_INT(env.scramble_depth, shared.start_depth);
+        EXPECT_EQ_INT(env.max_steps, shared.start_depth);
+        EXPECT_EQ_U32(env.target & ~shared.mask, 0u);
+        EXPECT_EQ_U32(env.state & ~shared.mask, 0u);
+        EXPECT_NE_U32(env.state, env.target);
+        expect_observation_matches(&env);
+
+        if (i == 0) {
+            first_target = env.target;
+            first_state = env.state;
+        } else {
+            if (env.target != first_target) {
+                target_changed = 1;
+            }
+            if (env.state != first_state) {
+                state_changed = 1;
+            }
+        }
+    }
+
+    EXPECT_TRUE(target_changed);
+    EXPECT_TRUE(state_changed);
+    affine_lock_free_shared(&shared);
+}
+
+static void test_visible_target_table_initialization_samples_reachable_target(void) {
+    AffineLockShared shared = make_shared(8, 16, 0);
+
+    AffineLock env;
+    float observations[AFFINE_LOCK_OBS_SIZE];
+    float actions[AFFINE_LOCK_NUM_ATNS];
+    float rewards[1];
+    float terminals[1];
+    make_env(&env, &shared, 777, observations, actions, rewards, terminals);
+    c_reset(&env);
+
+    EXPECT_EQ_INT(env.scramble_depth, shared.start_depth);
+    EXPECT_EQ_INT(env.target_distance, shared.start_depth);
+    EXPECT_EQ_INT(env.max_steps, env.target_distance);
+    EXPECT_EQ_INT(env.solution_length, env.target_distance);
+    EXPECT_NE_U32(env.state, env.target);
+    expect_solution_reaches_target(&shared, &env);
+    expect_observation_matches(&env);
+
+    affine_lock_free_shared(&shared);
+}
+
+static void test_visible_target_table_depths_have_expected_distances(void) {
+    const int depths[] = {2, 4, 5, 6, 8, 16};
+    for (int i = 0; i < 6; i++) {
+        int depth = depths[i];
+        AffineLockShared shared = make_shared(depth, 16, 0);
+
+        AffineLock env;
+        float observations[AFFINE_LOCK_OBS_SIZE];
+        float actions[AFFINE_LOCK_NUM_ATNS];
+        float rewards[1];
+        float terminals[1];
+        make_env(&env, &shared, (unsigned int)(1900 + depth), observations,
+            actions, rewards, terminals);
+        c_reset(&env);
+
+        TestBfsStats stats;
+        compute_test_bfs_stats(&shared, env.state, env.target, &stats);
+        int expected_distance = depth <= stats.farthest_distance ?
+            depth : stats.farthest_distance;
+        EXPECT_EQ_INT(env.target_distance, expected_distance);
+        EXPECT_EQ_INT(env.solution_length, expected_distance);
+        EXPECT_EQ_INT(env.max_steps, expected_distance);
+        expect_solution_reaches_target(&shared, &env);
+
+        solve_with_stored_solution(&env);
+        EXPECT_NEAR(rewards[0], 1.0f, 0.0f);
+        EXPECT_NEAR(terminals[0], 1.0f, 0.0f);
+
+        affine_lock_free_shared(&shared);
+    }
+}
+
+static void test_visible_target_table_reset_uses_stored_records(void) {
+    const int requested_depths[] = {2, 4, 5, 6, 8, 16};
+    const int expected_pool_sizes[] = {65536, 65536, 65536, 65536, 65536, 100548};
+
+    for (int depth_index = 0; depth_index < 6; depth_index++) {
+        int requested_depth = requested_depths[depth_index];
+        AffineLockShared shared = make_shared(requested_depth, 16, 0);
+        const AffineLockVisibleTargetDepth* table_depth =
+            affine_lock_visible_target_depth(&shared, requested_depth);
+        EXPECT_TRUE(table_depth != NULL);
+        EXPECT_EQ_INT((int)table_depth->stored_count,
+            expected_pool_sizes[depth_index]);
+
+        AffineLock env;
+        float observations[AFFINE_LOCK_OBS_SIZE];
+        float actions[AFFINE_LOCK_NUM_ATNS];
+        float rewards[1];
+        float terminals[1];
+        make_env(&env, &shared, (unsigned int)(2500 + requested_depth),
+            observations, actions, rewards, terminals);
+
+        for (int reset = 0; reset < 8; reset++) {
+            c_reset(&env);
+
+            EXPECT_EQ_INT(env.target_distance, requested_depth);
+            EXPECT_EQ_INT(env.solution_length, requested_depth);
+            EXPECT_EQ_INT(env.max_steps, requested_depth);
+            TestBfsStats stats;
+            compute_test_bfs_stats(&shared, env.state, env.target, &stats);
+            EXPECT_EQ_INT(stats.shortest_distance, requested_depth);
+            expect_solution_reaches_target(&shared, &env);
+        }
+
+        affine_lock_free_shared(&shared);
+    }
+}
+
+static void test_visible_target_table_matches_independent_bfs_over_repeated_resets(void) {
+    const int depths[] = {2, 4, 5, 6, 8, 16};
+
+    for (int depth_index = 0; depth_index < 6; depth_index++) {
+        int depth = depths[depth_index];
+        AffineLockShared shared = make_shared(depth, 16, 0);
+
+        AffineLock env;
+        float observations[AFFINE_LOCK_OBS_SIZE];
+        float actions[AFFINE_LOCK_NUM_ATNS];
+        float rewards[1];
+        float terminals[1];
+        make_env(&env, &shared, (unsigned int)(1000 + depth),
+            observations, actions, rewards, terminals);
+
+        for (int reset = 0; reset < 12; reset++) {
+            c_reset(&env);
+            EXPECT_TRUE(env.target_distance > 0);
+            EXPECT_TRUE(env.solution_length > 0);
+            expect_solution_reaches_target(&shared, &env);
+            expect_observation_matches(&env);
+        }
+
+        affine_lock_free_shared(&shared);
+    }
+}
+
+static void test_observation_encoding_is_32_signed_bit_floats_plus_timer(void) {
+    AffineLockShared shared = make_shared(2, 16, 0);
+    AffineLock env;
+    float observations[AFFINE_LOCK_OBS_SIZE];
+    float actions[AFFINE_LOCK_NUM_ATNS];
+    float rewards[1];
+    float terminals[1];
+    make_env(&env, &shared, 7, observations, actions, rewards, terminals);
+
+    env.state = 0xa55au;
+    env.target = 0x0f0fu;
+    env.step_count = 3;
+    env.max_steps = 12;
+    affine_lock_compute_observations(&env);
+
+    expect_observation_matches(&env);
+    affine_lock_free_shared(&shared);
+}
+
+static void test_timer_observation_progresses_and_resets_after_timeout(void) {
+    AffineLockShared shared = make_shared(2, 16, 0);
+    AffineLock env;
+    float observations[AFFINE_LOCK_OBS_SIZE];
+    float actions[AFFINE_LOCK_NUM_ATNS];
+    float rewards[1];
+    float terminals[1];
+    make_env(&env, &shared, 19, observations, actions, rewards, terminals);
+    c_reset(&env);
+    EXPECT_NEAR(observations[AFFINE_LOCK_TIMER_INDEX], 0.0f, 0.0f);
+
+    env.target = 0u;
+    env.state = shared.mask;
+    env.step_count = 0;
+    env.max_steps = 4;
+    affine_lock_compute_observations(&env);
+    EXPECT_NEAR(observations[AFFINE_LOCK_TIMER_INDEX], 0.0f, 0.0f);
+
+    actions[0] = 1.0f;
+    c_step(&env);
+    EXPECT_NEAR(terminals[0], 0.0f, 0.0f);
+    EXPECT_NEAR(observations[AFFINE_LOCK_TIMER_INDEX], 0.25f, 0.000001f);
+
+    c_step(&env);
+    EXPECT_NEAR(terminals[0], 0.0f, 0.0f);
+    EXPECT_NEAR(observations[AFFINE_LOCK_TIMER_INDEX], 0.5f, 0.000001f);
+
+    c_step(&env);
+    EXPECT_NEAR(terminals[0], 0.0f, 0.0f);
+    EXPECT_NEAR(observations[AFFINE_LOCK_TIMER_INDEX], 0.75f, 0.000001f);
+
+    c_step(&env);
+    EXPECT_NEAR(rewards[0], -1.0f, 0.0f);
+    EXPECT_NEAR(terminals[0], 1.0f, 0.0f);
+    EXPECT_EQ_INT(env.step_count, 0);
+    EXPECT_NEAR(observations[AFFINE_LOCK_TIMER_INDEX], 0.0f, 0.0f);
+
+    affine_lock_free_shared(&shared);
+}
+
+static void test_actions_apply_to_current_state_directly(void) {
+    AffineLockShared shared = make_shared(2, 16, 0);
+    AffineLock env;
+    float observations[AFFINE_LOCK_OBS_SIZE];
+    float actions[AFFINE_LOCK_NUM_ATNS];
+    float rewards[1];
+    float terminals[1];
+    make_env(&env, &shared, 55, observations, actions, rewards, terminals);
+    c_reset(&env);
+
+    uint32_t target = bits_from_text("1111000011110000");
+    uint32_t state = bits_from_text("0011011000010111");
+    int action = 1;
+    uint32_t expected_state = affine_lock_apply_action(&shared, state, action);
+    EXPECT_NE_U32(expected_state, target);
+
+    env.target = target;
+    env.state = state;
+    env.step_count = 0;
+    env.max_steps = 16;
+    env.actions[0] = (float)action;
+    c_step(&env);
+
+    EXPECT_NEAR(rewards[0], -0.01f, 0.0f);
+    EXPECT_NEAR(terminals[0], 0.0f, 0.0f);
+    EXPECT_EQ_U32(env.target, target);
+    EXPECT_EQ_U32(env.state, expected_state);
+
+    affine_lock_free_shared(&shared);
+}
+
+static void test_action_float_validation_rejects_non_discrete_values(void) {
+    AffineLockShared shared = make_shared(2, 16, 0);
+    AffineLock env;
+    float observations[AFFINE_LOCK_OBS_SIZE];
+    float actions[AFFINE_LOCK_NUM_ATNS];
+    float rewards[1];
+    float terminals[1];
+    make_env(&env, &shared, 57, observations, actions, rewards, terminals);
+
+    const float invalid_actions[] = {
+        -1.0f,
+        8.0f,
+        1.5f,
+        NAN,
+        INFINITY,
+        -INFINITY,
+    };
+    int count = (int)(sizeof(invalid_actions) / sizeof(invalid_actions[0]));
+    for (int i = 0; i < count; i++) {
+        c_reset(&env);
+        float prev_invalid = env.log.invalid_rate;
+        float prev_n = env.log.n;
+
+        actions[0] = invalid_actions[i];
+        c_step(&env);
+
+        EXPECT_NEAR(rewards[0], -1.0f, 0.0f);
+        EXPECT_NEAR(terminals[0], 1.0f, 0.0f);
+        EXPECT_NEAR(env.log.invalid_rate, prev_invalid + 1.0f, 0.0f);
+        EXPECT_NEAR(env.log.n, prev_n + 1.0f, 0.0f);
+        EXPECT_EQ_INT(env.step_count, 0);
+    }
+
+    affine_lock_free_shared(&shared);
+}
+
+static void test_visible_target_table_curriculum_and_logging(void) {
+    AffineLockShared shared = make_shared(2, 16, 0);
+
+    AffineLock env;
+    float observations[AFFINE_LOCK_OBS_SIZE];
+    float actions[AFFINE_LOCK_NUM_ATNS];
+    float rewards[1];
+    float terminals[1];
+    make_env(&env, &shared, 42, observations, actions, rewards, terminals);
+    c_reset(&env);
+
+    const int expected_depths[] = {2, 4, 5, 6, 8, 16};
+    for (int episode = 0; episode < 6; episode++) {
+        int depth = expected_depths[episode];
+        EXPECT_EQ_INT(env.scramble_depth, depth);
+        expect_solution_reaches_target(&shared, &env);
+
+        float prev_n = env.log.n;
+        float prev_perf = env.log.perf;
+        float prev_max_depth_solve = env.log.max_depth_solve;
+        float prev_target_distance = env.log.target_distance;
+        float prev_solved_target_distance = env.log.solved_target_distance;
+        float prev_depth_2 = env.log.depth_2_rate;
+        float prev_depth_2_solve = env.log.depth_2_solve_rate;
+        float prev_depth_4 = env.log.depth_4_rate;
+        float prev_depth_4_solve = env.log.depth_4_solve_rate;
+        float prev_depth_5 = env.log.depth_5_rate;
+        float prev_depth_5_solve = env.log.depth_5_solve_rate;
+        float prev_depth_6 = env.log.depth_6_rate;
+        float prev_depth_6_solve = env.log.depth_6_solve_rate;
+        float prev_depth_8 = env.log.depth_8_rate;
+        float prev_depth_8_solve = env.log.depth_8_solve_rate;
+        float prev_depth_16 = env.log.depth_16_rate;
+        float prev_depth_16_solve = env.log.depth_16_solve_rate;
+        int target_distance = env.target_distance;
+        int metric_depth = target_distance > 0 ? target_distance : depth;
+
+        solve_with_stored_solution(&env);
+        EXPECT_NEAR(rewards[0], 1.0f, 0.0f);
+        EXPECT_NEAR(terminals[0], 1.0f, 0.0f);
+        EXPECT_NEAR(env.log.n, prev_n + 1.0f, 0.0f);
+        EXPECT_NEAR(env.log.perf,
+            prev_perf + expected_solve_credit(&shared, metric_depth), 0.0f);
+        EXPECT_NEAR(env.log.max_depth_solve,
+            prev_max_depth_solve + (metric_depth == shared.max_depth ? 1.0f : 0.0f),
+            0.0f);
+        EXPECT_NEAR(env.log.target_distance,
+            prev_target_distance + (float)target_distance, 0.0f);
+        EXPECT_NEAR(env.log.solved_target_distance,
+            prev_solved_target_distance + (float)target_distance, 0.0f);
+        EXPECT_NEAR(env.log.depth_2_rate,
+            prev_depth_2 + (metric_depth == 2 ? 1.0f : 0.0f), 0.0f);
+        EXPECT_NEAR(env.log.depth_2_solve_rate,
+            prev_depth_2_solve + (metric_depth == 2 ? 1.0f : 0.0f), 0.0f);
+        EXPECT_NEAR(env.log.depth_4_rate,
+            prev_depth_4 + (metric_depth == 4 ? 1.0f : 0.0f), 0.0f);
+        EXPECT_NEAR(env.log.depth_4_solve_rate,
+            prev_depth_4_solve + (metric_depth == 4 ? 1.0f : 0.0f), 0.0f);
+        EXPECT_NEAR(env.log.depth_5_rate,
+            prev_depth_5 + (metric_depth == 5 ? 1.0f : 0.0f), 0.0f);
+        EXPECT_NEAR(env.log.depth_5_solve_rate,
+            prev_depth_5_solve + (metric_depth == 5 ? 1.0f : 0.0f), 0.0f);
+        EXPECT_NEAR(env.log.depth_6_rate,
+            prev_depth_6 + (metric_depth == 6 ? 1.0f : 0.0f), 0.0f);
+        EXPECT_NEAR(env.log.depth_6_solve_rate,
+            prev_depth_6_solve + (metric_depth == 6 ? 1.0f : 0.0f), 0.0f);
+        EXPECT_NEAR(env.log.depth_8_rate,
+            prev_depth_8 + (metric_depth == 8 ? 1.0f : 0.0f), 0.0f);
+        EXPECT_NEAR(env.log.depth_8_solve_rate,
+            prev_depth_8_solve + (metric_depth == 8 ? 1.0f : 0.0f), 0.0f);
+        EXPECT_NEAR(env.log.depth_16_rate,
+            prev_depth_16 + (metric_depth == 16 ? 1.0f : 0.0f), 0.0f);
+        EXPECT_NEAR(env.log.depth_16_solve_rate,
+            prev_depth_16_solve + (metric_depth == 16 ? 1.0f : 0.0f), 0.0f);
+
+        int next_depth = episode < 5 ? expected_depths[episode + 1] : 16;
+        EXPECT_EQ_INT(env.scramble_depth, next_depth);
+    }
+
+    float prev_n = env.log.n;
+    float prev_perf = env.log.perf;
+    float prev_max_depth_solve = env.log.max_depth_solve;
+    float prev_invalid = env.log.invalid_rate;
+    EXPECT_EQ_INT(env.scramble_depth, shared.max_depth);
+    actions[0] = 999.0f;
+    c_step(&env);
+    EXPECT_NEAR(rewards[0], -1.0f, 0.0f);
+    EXPECT_NEAR(terminals[0], 1.0f, 0.0f);
+    EXPECT_NEAR(env.log.n, prev_n + 1.0f, 0.0f);
+    EXPECT_NEAR(env.log.perf, prev_perf, 0.0f);
+    EXPECT_NEAR(env.log.max_depth_solve, prev_max_depth_solve, 0.0f);
+    EXPECT_NEAR(env.log.invalid_rate, prev_invalid + 1.0f, 0.0f);
+    EXPECT_EQ_INT(env.scramble_depth, shared.start_depth);
+
+    affine_lock_free_shared(&shared);
+}
+
+static void test_visible_target_table_oracle_wins_all_curriculum_depths_end_to_end(void) {
+    AffineLockShared shared = make_shared(2, 16, 0);
+
+    AffineLock env;
+    float observations[AFFINE_LOCK_OBS_SIZE];
+    float actions[AFFINE_LOCK_NUM_ATNS];
+    float rewards[1];
+    float terminals[1];
+    make_env(&env, &shared, 4242, observations, actions, rewards, terminals);
+    c_reset(&env);
+
+    const int depths[] = {2, 4, 5, 6, 8, 16};
+    for (int i = 0; i < 6; i++) {
+        expect_oracle_episode_win(&env, depths[i]);
+    }
+
+    EXPECT_EQ_INT(env.scramble_depth, shared.max_depth);
+    EXPECT_NEAR(env.log.n, 6.0f, 0.0f);
+    EXPECT_NEAR(env.log.depth_2_rate, 1.0f, 0.0f);
+    EXPECT_NEAR(env.log.depth_2_solve_rate, 1.0f, 0.0f);
+    EXPECT_NEAR(env.log.depth_4_rate, 1.0f, 0.0f);
+    EXPECT_NEAR(env.log.depth_4_solve_rate, 1.0f, 0.0f);
+    EXPECT_NEAR(env.log.depth_5_rate, 1.0f, 0.0f);
+    EXPECT_NEAR(env.log.depth_5_solve_rate, 1.0f, 0.0f);
+    EXPECT_NEAR(env.log.depth_6_rate, 1.0f, 0.0f);
+    EXPECT_NEAR(env.log.depth_6_solve_rate, 1.0f, 0.0f);
+    EXPECT_NEAR(env.log.depth_8_rate, 1.0f, 0.0f);
+    EXPECT_NEAR(env.log.depth_8_solve_rate, 1.0f, 0.0f);
+    EXPECT_NEAR(env.log.depth_16_rate, 1.0f, 0.0f);
+    EXPECT_NEAR(env.log.depth_16_solve_rate, 1.0f, 0.0f);
+    EXPECT_NEAR(env.log.timeout_rate, 0.0f, 0.0f);
+    EXPECT_NEAR(env.log.invalid_rate, 0.0f, 0.0f);
+
+    affine_lock_free_shared(&shared);
+}
+
+static void test_visible_target_table_timeouts_at_all_curriculum_depths_end_to_end(void) {
+    const int loss_depths[] = {2, 4, 5, 6, 8, 16};
+
+    for (int i = 0; i < 6; i++) {
+        int loss_depth = loss_depths[i];
+        AffineLockShared shared = make_shared(2, 16, 0);
+
+        AffineLock env;
+        float observations[AFFINE_LOCK_OBS_SIZE];
+        float actions[AFFINE_LOCK_NUM_ATNS];
+        float rewards[1];
+        float terminals[1];
+        make_env(&env, &shared, (unsigned int)(5200 + loss_depth),
+            observations, actions, rewards, terminals);
+        c_reset(&env);
+
+        while (env.scramble_depth < loss_depth) {
+            expect_oracle_episode_win(&env, env.scramble_depth);
+        }
+        expect_non_solving_episode_timeout(&env, loss_depth);
+
+        EXPECT_EQ_INT(env.scramble_depth, shared.start_depth);
+        EXPECT_TRUE(env.log.timeout_rate >= 1.0f);
+        EXPECT_TRUE(env.log.solve_rate >= 0.0f);
+        EXPECT_NEAR(env.log.invalid_rate, 0.0f, 0.0f);
+
+        affine_lock_free_shared(&shared);
+    }
+}
+
+static int deterministic_stream_action(int episode, int step) {
+    return (episode * 3 + step * 7) % AFFINE_LOCK_NUM_ACTIONS;
+}
+
+static uint64_t run_seed_sequence_checksum(unsigned int seed) {
+    AffineLockShared shared = make_shared(2, 16, 0);
+
+    AffineLock env;
+    float observations[AFFINE_LOCK_OBS_SIZE];
+    float actions[AFFINE_LOCK_NUM_ATNS];
+    float rewards[1];
+    float terminals[1];
+    make_env(&env, &shared, seed, observations, actions, rewards, terminals);
+
+    uint64_t checksum = 1469598103934665603ull;
+    for (int episode = 0; episode < 16; episode++) {
+        c_reset(&env);
+        checksum = mix_u64(checksum, reset_snapshot_checksum(&env));
+        int max_steps = env.max_steps;
+        for (int step = 0; step < max_steps + 1; step++) {
+            int action = deterministic_stream_action(episode, step);
+            if (step < env.solution_length) {
+                action = env.solution_actions[step];
+            }
+            actions[0] = (float)action;
+            c_step(&env);
+            checksum = mix_u64(checksum, reset_snapshot_checksum(&env));
+            if (terminals[0] != 0.0f) {
+                break;
+            }
+        }
+    }
+
+    affine_lock_free_shared(&shared);
+    return checksum;
+}
+
+static void test_deterministic_seed_sequences(void) {
+    AffineLockShared shared = make_shared(2, 16, 0);
+
+    AffineLock env_a;
+    AffineLock env_b;
+    float obs_a[AFFINE_LOCK_OBS_SIZE], obs_b[AFFINE_LOCK_OBS_SIZE];
+    float atn_a[AFFINE_LOCK_NUM_ATNS], atn_b[AFFINE_LOCK_NUM_ATNS];
+    float rew_a[1], rew_b[1];
+    float term_a[1], term_b[1];
+    make_env(&env_a, &shared, 12345, obs_a, atn_a, rew_a, term_a);
+    make_env(&env_b, &shared, 12345, obs_b, atn_b, rew_b, term_b);
+
+    for (int episode = 0; episode < 16; episode++) {
+        c_reset(&env_a);
+        c_reset(&env_b);
+        expect_env_snapshots_equal(&env_a, &env_b, obs_a, obs_b);
+        int max_steps = env_a.max_steps;
+        for (int step = 0; step < max_steps + 1; step++) {
+            int action = deterministic_stream_action(episode, step);
+            if (step < env_a.solution_length) {
+                action = env_a.solution_actions[step];
+            }
+            atn_a[0] = (float)action;
+            atn_b[0] = (float)action;
+            c_step(&env_a);
+            c_step(&env_b);
+            EXPECT_NEAR(rew_a[0], rew_b[0], 0.0f);
+            EXPECT_NEAR(term_a[0], term_b[0], 0.0f);
+            expect_env_snapshots_equal(&env_a, &env_b, obs_a, obs_b);
+            if (term_a[0] != 0.0f) {
+                break;
+            }
+        }
+    }
+
+    affine_lock_free_shared(&shared);
+
+    uint64_t seed_1 = run_seed_sequence_checksum(1);
+    uint64_t seed_1_repeat = run_seed_sequence_checksum(1);
+    uint64_t seed_2 = run_seed_sequence_checksum(2);
+    uint64_t seed_2_repeat = run_seed_sequence_checksum(2);
+    EXPECT_EQ_U64(seed_1, seed_1_repeat);
+    EXPECT_EQ_U64(seed_2, seed_2_repeat);
+    EXPECT_TRUE(seed_1 != seed_2);
+}
+
+static uint64_t run_visible_table_seed_42_golden_sequence(void) {
+    AffineLockShared shared = make_shared(2, 16, 0);
+
+    AffineLock env;
+    float observations[AFFINE_LOCK_OBS_SIZE];
+    float actions[AFFINE_LOCK_NUM_ATNS];
+    float rewards[1];
+    float terminals[1];
+    make_env(&env, &shared, 42, observations, actions, rewards, terminals);
+
+    uint64_t checksum = 1469598103934665603ull;
+    c_reset(&env);
+    checksum = mix_u64(checksum, reset_snapshot_checksum(&env));
+    for (int episode = 0; episode < 5; episode++) {
+        int length = env.solution_length;
+        for (int step = 0; step < length; step++) {
+            actions[0] = (float)env.solution_actions[step];
+            c_step(&env);
+            checksum = mix_u64(checksum, reset_snapshot_checksum(&env));
+            if (terminals[0] != 0.0f) {
+                break;
+            }
+        }
+    }
+    EXPECT_EQ_INT(env.scramble_depth, 16);
+    actions[0] = 999.0f;
+    c_step(&env);
+    checksum = mix_u64(checksum, reset_snapshot_checksum(&env));
+    EXPECT_EQ_INT(env.scramble_depth, 2);
+
+    affine_lock_free_shared(&shared);
+    return checksum;
+}
+
+static void test_visible_table_seed_42_golden_checksum(void) {
+    uint64_t checksum = run_visible_table_seed_42_golden_sequence();
+    EXPECT_EQ_U64(checksum, 0x1b6d67bf767fd010ull);
+}
+
+static void test_deterministic_seed_sequences_and_distinct_env_ids(void) {
+    AffineLockShared shared = make_shared(2, 16, 0);
+
+    AffineLock env_a;
+    AffineLock env_b;
+    float obs_a[AFFINE_LOCK_OBS_SIZE], obs_b[AFFINE_LOCK_OBS_SIZE];
+    float atn_a[AFFINE_LOCK_NUM_ATNS], atn_b[AFFINE_LOCK_NUM_ATNS];
+    float rew_a[1], rew_b[1];
+    float term_a[1], term_b[1];
+    make_env(&env_a, &shared, 12345, obs_a, atn_a, rew_a, term_a);
+    make_env(&env_b, &shared, 12345, obs_b, atn_b, rew_b, term_b);
+
+    for (int episode = 0; episode < 8; episode++) {
+        c_reset(&env_a);
+        c_reset(&env_b);
+        EXPECT_EQ_U32(env_a.target, env_b.target);
+        EXPECT_EQ_U32(env_a.state, env_b.state);
+        EXPECT_EQ_INT(env_a.scramble_depth, env_b.scramble_depth);
+        EXPECT_EQ_INT(env_a.solution_length, env_b.solution_length);
+        EXPECT_TRUE(memcmp(env_a.solution_actions, env_b.solution_actions,
+            sizeof(env_a.solution_actions)) == 0);
+        EXPECT_TRUE(memcmp(obs_a, obs_b, sizeof(obs_a)) == 0);
+
+        solve_with_stored_solution(&env_a);
+        solve_with_stored_solution(&env_b);
+        EXPECT_EQ_U32(env_a.target, env_b.target);
+        EXPECT_EQ_U32(env_a.state, env_b.state);
+        EXPECT_NEAR(rew_a[0], rew_b[0], 0.0f);
+        EXPECT_NEAR(term_a[0], term_b[0], 0.0f);
+        EXPECT_TRUE(memcmp(obs_a, obs_b, sizeof(obs_a)) == 0);
+    }
+
+    AffineLock env_1;
+    AffineLock env_2;
+    AffineLock env_1_repeat;
+    AffineLock env_2_repeat;
+    float obs_1[AFFINE_LOCK_OBS_SIZE], obs_2[AFFINE_LOCK_OBS_SIZE];
+    float obs_1r[AFFINE_LOCK_OBS_SIZE], obs_2r[AFFINE_LOCK_OBS_SIZE];
+    float atn_1[AFFINE_LOCK_NUM_ATNS], atn_2[AFFINE_LOCK_NUM_ATNS];
+    float atn_1r[AFFINE_LOCK_NUM_ATNS], atn_2r[AFFINE_LOCK_NUM_ATNS];
+    float rew_1[1], rew_2[1], rew_1r[1], rew_2r[1];
+    float term_1[1], term_2[1], term_1r[1], term_2r[1];
+    make_env(&env_1, &shared, 1, obs_1, atn_1, rew_1, term_1);
+    make_env(&env_2, &shared, 2, obs_2, atn_2, rew_2, term_2);
+    make_env(&env_1_repeat, &shared, 1, obs_1r, atn_1r, rew_1r, term_1r);
+    make_env(&env_2_repeat, &shared, 2, obs_2r, atn_2r, rew_2r, term_2r);
+
+    int differs = 0;
+    for (int i = 0; i < 8; i++) {
+        c_reset(&env_1);
+        c_reset(&env_2);
+        c_reset(&env_1_repeat);
+        c_reset(&env_2_repeat);
+
+        EXPECT_EQ_U32(env_1.target, env_1_repeat.target);
+        EXPECT_EQ_U32(env_1.state, env_1_repeat.state);
+        EXPECT_EQ_U32(env_2.target, env_2_repeat.target);
+        EXPECT_EQ_U32(env_2.state, env_2_repeat.state);
+        EXPECT_TRUE(memcmp(env_1.solution_actions, env_1_repeat.solution_actions,
+            sizeof(env_1.solution_actions)) == 0);
+        EXPECT_TRUE(memcmp(env_2.solution_actions, env_2_repeat.solution_actions,
+            sizeof(env_2.solution_actions)) == 0);
+
+        if (env_1.target != env_2.target || env_1.state != env_2.state ||
+                memcmp(env_1.solution_actions, env_2.solution_actions,
+                    sizeof(env_1.solution_actions)) != 0) {
+            differs = 1;
+        }
+    }
+    EXPECT_TRUE(differs);
+
+    affine_lock_free_shared(&shared);
+}
+
+int main(void) {
+    test_metadata_contract();
+    test_config_and_binding_metadata_contract();
+    test_global_action_examples();
+    test_actions_round_trip_for_all_states();
+    test_reset_randomizes_target_and_current();
+    test_visible_target_table_initialization_samples_reachable_target();
+    test_visible_target_table_depths_have_expected_distances();
+    test_visible_target_table_reset_uses_stored_records();
+    test_visible_target_table_matches_independent_bfs_over_repeated_resets();
+    test_log_solve_credit_uses_known_target_distance();
+    test_observation_encoding_is_32_signed_bit_floats_plus_timer();
+    test_timer_observation_progresses_and_resets_after_timeout();
+    test_actions_apply_to_current_state_directly();
+    test_action_float_validation_rejects_non_discrete_values();
+    test_visible_target_table_curriculum_and_logging();
+    test_visible_target_table_oracle_wins_all_curriculum_depths_end_to_end();
+    test_visible_target_table_timeouts_at_all_curriculum_depths_end_to_end();
+    test_deterministic_seed_sequences();
+    test_visible_table_seed_42_golden_checksum();
+    test_deterministic_seed_sequences_and_distinct_env_ids();
+    printf("affine_lock tests passed\n");
+    return 0;
+}
diff --git a/ocean/affine_lock/tests/test_affine_lock_log_export.c b/ocean/affine_lock/tests/test_affine_lock_log_export.c
new file mode 100644
index 0000000000..773d0279f4
--- /dev/null
+++ b/ocean/affine_lock/tests/test_affine_lock_log_export.c
@@ -0,0 +1,276 @@
+#include <math.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+#define AFFINE_LOCK_NO_RENDER
+#include "../binding.c"
+
+#define EXPECT_NEAR(actual, expected, tolerance) do { \
+    double _actual = (double)(actual); \
+    double _expected = (double)(expected); \
+    double _tolerance = (double)(tolerance); \
+    if (fabs(_actual - _expected) > _tolerance) { \
+        fprintf(stderr, "%s:%d: expected %.9f ~= %.9f\n", \
+            __FILE__, __LINE__, _actual, _expected); \
+        exit(1); \
+    } \
+} while (0)
+
+#define EXPECT_EQ_INT(actual, expected) do { \
+    int _actual = (int)(actual); \
+    int _expected = (int)(expected); \
+    if (_actual != _expected) { \
+        fprintf(stderr, "%s:%d: expected %d == %d\n", \
+            __FILE__, __LINE__, _actual, _expected); \
+        exit(1); \
+    } \
+} while (0)
+
+#define EXPECT_EQ_U32(actual, expected) do { \
+    uint32_t _actual = (uint32_t)(actual); \
+    uint32_t _expected = (uint32_t)(expected); \
+    if (_actual != _expected) { \
+        fprintf(stderr, "%s:%d: expected 0x%x == 0x%x\n", \
+            __FILE__, __LINE__, _actual, _expected); \
+        exit(1); \
+    } \
+} while (0)
+
+#define EXPECT_NE_U32(actual, expected) do { \
+    uint32_t _actual = (uint32_t)(actual); \
+    uint32_t _expected = (uint32_t)(expected); \
+    if (_actual == _expected) { \
+        fprintf(stderr, "%s:%d: expected 0x%x != 0x%x\n", \
+            __FILE__, __LINE__, _actual, _expected); \
+        exit(1); \
+    } \
+} while (0)
+
+#define EXPECT_TRUE(cond) do { \
+    if (!(cond)) { \
+        fprintf(stderr, "%s:%d: expected true: %s\n", \
+            __FILE__, __LINE__, #cond); \
+        exit(1); \
+    } \
+} while (0)
+
+static double dict_value(Dict* dict, const char* key) {
+    return dict_get(dict, key)->value;
+}
+
+static int dict_has_key(Dict* dict, const char* key) {
+    return dict_get_unsafe(dict, key) != NULL;
+}
+
+static Dict* make_vec_kwargs_for_agents(int total_agents) {
+    Dict* vec_kwargs = create_dict(2);
+    dict_set(vec_kwargs, "total_agents", total_agents);
+    dict_set(vec_kwargs, "num_buffers", 1);
+    return vec_kwargs;
+}
+
+static Dict* make_vec_kwargs(void) {
+    return make_vec_kwargs_for_agents(2);
+}
+
+static Dict* make_env_kwargs(int seed) {
+    Dict* env_kwargs = create_dict(4);
+    dict_set(env_kwargs, "start_depth", 2);
+    dict_set(env_kwargs, "max_depth", 16);
+    dict_set(env_kwargs, "step_grace", 0);
+    dict_set(env_kwargs, "seed", seed);
+    return env_kwargs;
+}
+
+static void free_dict(Dict* dict) {
+    free(dict->items);
+    free(dict);
+}
+
+static Env* make_binding_envs(int seed) {
+    Dict* vec_kwargs = make_vec_kwargs();
+    Dict* env_kwargs = make_env_kwargs(seed);
+    int starts[1] = {0};
+    int counts[1] = {0};
+    int num_envs = 0;
+    Env* envs = my_vec_init(&num_envs, starts, counts, vec_kwargs, env_kwargs);
+    EXPECT_EQ_INT(num_envs, 2);
+    EXPECT_EQ_INT(starts[0], 0);
+    EXPECT_EQ_INT(counts[0], 2);
+    free_dict(vec_kwargs);
+    free_dict(env_kwargs);
+    return envs;
+}
+
+static Env* make_binding_env_batch(int seed, int total_agents) {
+    Dict* vec_kwargs = make_vec_kwargs_for_agents(total_agents);
+    Dict* env_kwargs = make_env_kwargs(seed);
+    int starts[1] = {0};
+    int counts[1] = {0};
+    int num_envs = 0;
+    Env* envs = my_vec_init(&num_envs, starts, counts, vec_kwargs, env_kwargs);
+    EXPECT_EQ_INT(num_envs, total_agents);
+    EXPECT_EQ_INT(starts[0], 0);
+    EXPECT_EQ_INT(counts[0], total_agents);
+    free_dict(vec_kwargs);
+    free_dict(env_kwargs);
+    return envs;
+}
+
+static void free_binding_envs(Env* envs) {
+    my_vec_close(envs);
+    free(envs);
+}
+
+static void test_vec_init_mixes_base_seed_and_env_id(void) {
+    Env* base = make_binding_envs(123);
+    Env* repeat = make_binding_envs(123);
+    Env* different_seed = make_binding_envs(124);
+
+    EXPECT_EQ_U32(base[0].rng, repeat[0].rng);
+    EXPECT_EQ_U32(base[1].rng, repeat[1].rng);
+    EXPECT_NE_U32(base[0].rng, base[1].rng);
+    EXPECT_NE_U32(base[0].rng, different_seed[0].rng);
+
+    free_binding_envs(base);
+    free_binding_envs(repeat);
+    free_binding_envs(different_seed);
+}
+
+static uint64_t mix_u64_for_binding_test(uint64_t hash, uint64_t value) {
+    hash ^= value;
+    hash *= 1099511628211ull;
+    return hash;
+}
+
+static uint64_t binding_reset_checksum(const Env* env) {
+    uint64_t hash = 1469598103934665603ull;
+    hash = mix_u64_for_binding_test(hash, env->state);
+    hash = mix_u64_for_binding_test(hash, env->target);
+    hash = mix_u64_for_binding_test(hash, (uint64_t)(env->target_distance + 1));
+    hash = mix_u64_for_binding_test(hash, (uint64_t)env->solution_length);
+    for (int i = 0; i < AFFINE_LOCK_MAX_SOLUTION_DEPTH; i++) {
+        hash = mix_u64_for_binding_test(
+            hash, (uint64_t)(env->solution_actions[i] + 1));
+    }
+    return hash;
+}
+
+static void assign_binding_env_buffers(
+        Env* envs,
+        int total_agents,
+        float observations[][AFFINE_LOCK_OBS_SIZE],
+        float actions[],
+        float rewards[],
+        float terminals[]) {
+    memset(observations, 0,
+        (size_t)total_agents * AFFINE_LOCK_OBS_SIZE * sizeof(float));
+    memset(actions, 0, (size_t)total_agents * sizeof(float));
+    memset(rewards, 0, (size_t)total_agents * sizeof(float));
+    memset(terminals, 0, (size_t)total_agents * sizeof(float));
+    for (int i = 0; i < total_agents; i++) {
+        envs[i].observations = observations[i];
+        envs[i].actions = &actions[i];
+        envs[i].rewards = &rewards[i];
+        envs[i].terminals = &terminals[i];
+    }
+}
+
+static void test_vec_init_visible_targets_repeat_across_runs_and_vary_by_env_id(void) {
+    const int total_agents = 64;
+    Env* run_a = make_binding_env_batch(42, total_agents);
+    Env* run_b = make_binding_env_batch(42, total_agents);
+
+    float obs_a[64][AFFINE_LOCK_OBS_SIZE];
+    float obs_b[64][AFFINE_LOCK_OBS_SIZE];
+    float actions_a[64], actions_b[64];
+    float rewards_a[64], rewards_b[64];
+    float terminals_a[64], terminals_b[64];
+    assign_binding_env_buffers(
+        run_a, total_agents, obs_a, actions_a, rewards_a, terminals_a);
+    assign_binding_env_buffers(
+        run_b, total_agents, obs_b, actions_b, rewards_b, terminals_b);
+
+    int saw_different_puzzle = 0;
+    uint64_t first_checksum = 0;
+    for (int i = 0; i < total_agents; i++) {
+        c_reset(&run_a[i]);
+        c_reset(&run_b[i]);
+
+        uint64_t checksum_a = binding_reset_checksum(&run_a[i]);
+        uint64_t checksum_b = binding_reset_checksum(&run_b[i]);
+        EXPECT_EQ_U32(run_a[i].rng, run_b[i].rng);
+        EXPECT_EQ_U32(run_a[i].state, run_b[i].state);
+        EXPECT_EQ_U32(run_a[i].target, run_b[i].target);
+        EXPECT_EQ_INT(run_a[i].target_distance, run_b[i].target_distance);
+        EXPECT_EQ_INT(run_a[i].solution_length, run_b[i].solution_length);
+        EXPECT_EQ_INT(run_a[i].target_distance, 2);
+        EXPECT_EQ_INT(run_a[i].solution_length, 2);
+        EXPECT_TRUE(checksum_a == checksum_b);
+        EXPECT_TRUE(memcmp(obs_a[i], obs_b[i], sizeof(obs_a[i])) == 0);
+
+        if (i == 0) {
+            first_checksum = checksum_a;
+        } else if (checksum_a != first_checksum) {
+            saw_different_puzzle = 1;
+        }
+    }
+    EXPECT_TRUE(saw_different_puzzle);
+
+    free_binding_envs(run_a);
+    free_binding_envs(run_b);
+}
+
+static void test_depth_solve_rates_are_conditional_on_depth_attempts(void) {
+    Log log = {0};
+    log.depth_2_rate = 0.25f;
+    log.depth_2_solve_rate = 0.125f;
+    log.depth_4_rate = 0.5f;
+    log.depth_4_solve_rate = 0.375f;
+    log.depth_5_rate = 0.25f;
+    log.depth_5_solve_rate = 0.125f;
+    log.depth_6_rate = 0.25f;
+    log.depth_6_solve_rate = 0.125f;
+    log.depth_8_rate = 0.0f;
+    log.depth_8_solve_rate = 0.0f;
+    log.depth_16_rate = 0.125f;
+    log.depth_16_solve_rate = 0.0f;
+    log.score = 0.75f;
+    log.target_distance = 4.0f;
+    log.solved_target_distance = 2.0f;
+    log.solve_rate = 0.5f;
+
+    Dict* out = create_dict(32);
+    my_log(&log, out);
+
+    EXPECT_EQ_INT(out->size, 18);
+    EXPECT_NEAR(dict_value(out, "score"), 0.75, 0.0);
+    EXPECT_TRUE(!dict_has_key(out, "solve_steps"));
+    EXPECT_TRUE(!dict_has_key(out, "solve_efficiency"));
+    EXPECT_TRUE(!dict_has_key(out, "scramble_unique_states"));
+    EXPECT_NEAR(dict_value(out, "min_win_moves"), 4.0, 0.0);
+    EXPECT_NEAR(dict_value(out, "solved_min_win_moves"), 4.0, 0.0);
+    EXPECT_TRUE(!dict_has_key(out, "depth_2_rate"));
+    EXPECT_NEAR(dict_value(out, "depth_2_solve_rate"), 0.5, 0.0);
+    EXPECT_TRUE(!dict_has_key(out, "depth_4_rate"));
+    EXPECT_NEAR(dict_value(out, "depth_4_solve_rate"), 0.75, 0.0);
+    EXPECT_TRUE(!dict_has_key(out, "depth_5_rate"));
+    EXPECT_NEAR(dict_value(out, "depth_5_solve_rate"), 0.5, 0.0);
+    EXPECT_TRUE(!dict_has_key(out, "depth_6_rate"));
+    EXPECT_NEAR(dict_value(out, "depth_6_solve_rate"), 0.5, 0.0);
+    EXPECT_TRUE(!dict_has_key(out, "depth_8_rate"));
+    EXPECT_NEAR(dict_value(out, "depth_8_solve_rate"), 0.0, 0.0);
+    EXPECT_TRUE(!dict_has_key(out, "depth_16_rate"));
+    EXPECT_NEAR(dict_value(out, "depth_16_solve_rate"), 0.0, 0.0);
+
+    free(out->items);
+    free(out);
+}
+
+int main(void) {
+    test_vec_init_mixes_base_seed_and_env_id();
+    test_vec_init_visible_targets_repeat_across_runs_and_vary_by_env_id();
+    test_depth_solve_rates_are_conditional_on_depth_attempts();
+    return 0;
+}
diff --git a/ocean/affine_lock/tests/test_metadata_smoke.py b/ocean/affine_lock/tests/test_metadata_smoke.py
new file mode 100644
index 0000000000..df3c6513de
--- /dev/null
+++ b/ocean/affine_lock/tests/test_metadata_smoke.py
@@ -0,0 +1,249 @@
+#!/usr/bin/env python3
+import argparse
+import configparser
+import ctypes
+import re
+import sys
+from pathlib import Path
+
+
+ROOT = Path(__file__).resolve().parents[3]
+
+EXPECTED_MY_LOG_KEYS = [
+    "perf",
+    "score",
+    "solve_rate",
+    "max_depth_solve",
+    "episode_return",
+    "episode_length",
+    "timeout_rate",
+    "invalid_rate",
+    "min_win_moves",
+    "solved_min_win_moves",
+    "conditional_solve_steps",
+    "conditional_solve_efficiency",
+    "depth_2_solve_rate",
+    "depth_4_solve_rate",
+    "depth_5_solve_rate",
+    "depth_6_solve_rate",
+    "depth_8_solve_rate",
+    "depth_16_solve_rate",
+]
+
+
+def parse_int(value):
+    return int(value.replace("_", ""))
+
+
+def parse_float(value):
+    return float(value.replace("_", ""))
+
+
+def assert_sweep_mean(config, section, expected):
+    assert parse_float(config[section]["mean"]) == expected
+
+
+def check_config():
+    config = configparser.ConfigParser()
+    config.read(ROOT / "config" / "default.ini")
+    config.read(ROOT / "config" / "affine_lock.ini")
+
+    assert config["base"]["env_name"] == "affine_lock"
+    assert parse_int(config["vec"]["total_agents"]) == 4096
+    assert parse_int(config["vec"]["num_buffers"]) == 2
+    assert parse_int(config["vec"]["num_threads"]) == 16
+    assert parse_int(config["policy"]["hidden_size"]) == 256
+    assert parse_int(config["policy"]["num_layers"]) == 3
+    assert parse_int(config["env"]["seed"]) == 42
+    assert parse_int(config["env"]["start_depth"]) == 2
+    assert parse_int(config["env"]["max_depth"]) == 16
+    assert parse_int(config["train"]["total_timesteps"]) == 200_000_000
+    assert parse_int(config["train"]["horizon"]) == 64
+    assert parse_int(config["train"]["minibatch_size"]) == 8192
+    assert parse_float(config["train"]["learning_rate"]) == 0.012
+    assert parse_float(config["train"]["ent_coef"]) == 0.2
+    assert parse_float(config["train"]["gamma"]) == 0.8
+    assert parse_float(config["train"]["gae_lambda"]) == 0.995
+    assert parse_float(config["train"]["replay_ratio"]) == 3.0
+    assert parse_float(config["train"]["clip_coef"]) == 0.83
+    assert parse_float(config["train"]["vf_coef"]) == 4.75
+    assert parse_float(config["train"]["vf_clip_coef"]) == 0.8
+    assert parse_float(config["train"]["max_grad_norm"]) == 3.0
+    assert parse_float(config["train"]["beta1"]) == 0.5
+    assert parse_float(config["train"]["beta2"]) == 0.9915
+    assert parse_float(config["train"]["eps"]) == 0.0001
+    assert parse_float(config["train"]["vtrace_rho_clip"]) == 1.4
+    assert parse_float(config["train"]["vtrace_c_clip"]) == 3.75
+    assert parse_float(config["train"]["prio_alpha"]) == 0.055
+    assert parse_float(config["train"]["prio_beta0"]) == 0.161
+    assert_sweep_mean(config, "sweep.train.total_timesteps", 200_000_000.0)
+    assert_sweep_mean(config, "sweep.vec.total_agents", 4096.0)
+    assert_sweep_mean(config, "sweep.vec.num_buffers", 2.0)
+    assert_sweep_mean(config, "sweep.policy.hidden_size", 256.0)
+    assert_sweep_mean(config, "sweep.policy.num_layers", 3.0)
+    assert_sweep_mean(config, "sweep.train.horizon", 64.0)
+    assert_sweep_mean(config, "sweep.train.minibatch_size", 8192.0)
+    assert_sweep_mean(config, "sweep.train.learning_rate", 0.012)
+    assert_sweep_mean(config, "sweep.train.ent_coef", 0.2)
+    assert_sweep_mean(config, "sweep.train.gamma", 0.8)
+    assert_sweep_mean(config, "sweep.train.gae_lambda", 0.995)
+    assert_sweep_mean(config, "sweep.train.replay_ratio", 3.0)
+    assert_sweep_mean(config, "sweep.train.clip_coef", 0.83)
+    assert_sweep_mean(config, "sweep.train.vf_coef", 4.75)
+    assert_sweep_mean(config, "sweep.train.vf_clip_coef", 0.8)
+    assert_sweep_mean(config, "sweep.train.max_grad_norm", 3.0)
+    assert_sweep_mean(config, "sweep.train.beta1", 0.5)
+    assert_sweep_mean(config, "sweep.train.beta2", 0.9915)
+    assert_sweep_mean(config, "sweep.train.eps", 0.0001)
+    assert_sweep_mean(config, "sweep.train.vtrace_rho_clip", 1.4)
+    assert_sweep_mean(config, "sweep.train.vtrace_c_clip", 3.75)
+    assert_sweep_mean(config, "sweep.train.prio_alpha", 0.055)
+    assert_sweep_mean(config, "sweep.train.prio_beta0", 0.161)
+    assert config["sweep"]["metric"] == "perf"
+    assert config["sweep"]["goal"] == "maximize"
+
+    sweep_ts = config["sweep.train.total_timesteps"]
+    min_steps = parse_int(sweep_ts["min"])
+    max_steps = parse_int(sweep_ts["max"])
+    assert min_steps == 100_000_000
+    assert max_steps == 200_000_000
+
+    assert parse_int(config["sweep.train.horizon"]["min"]) == 32
+    assert parse_int(config["sweep.train.horizon"]["max"]) == 128
+    assert parse_int(config["sweep.policy.hidden_size"]["min"]) == 64
+    assert parse_int(config["sweep.policy.hidden_size"]["max"]) == 512
+    assert float(config["sweep.policy.num_layers"]["min"]) == 1.0
+    assert float(config["sweep.policy.num_layers"]["max"]) == 4.0
+    assert parse_int(config["sweep.vec.total_agents"]["min"]) == 4096
+    assert parse_int(config["sweep.vec.total_agents"]["max"]) == 16_384
+    assert float(config["sweep.vec.num_buffers"]["min"]) == 1.0
+    assert float(config["sweep.vec.num_buffers"]["max"]) == 4.0
+    assert parse_int(config["sweep.train.minibatch_size"]["min"]) == 8192
+    assert parse_int(config["sweep.train.minibatch_size"]["max"]) == 131_072
+    assert float(config["sweep.train.replay_ratio"]["min"]) == 1.0
+    assert float(config["sweep.train.replay_ratio"]["max"]) == 4.0
+    assert float(config["sweep.train.vf_clip_coef"]["min"]) == 0.001
+    assert float(config["sweep.train.vf_clip_coef"]["max"]) == 5.0
+    assert float(config["sweep.train.vf_coef"]["min"]) == 0.1
+    assert float(config["sweep.train.vf_coef"]["max"]) == 8.0
+
+    min_batch_size = (
+        parse_int(config["sweep.vec.total_agents"]["min"])
+        * parse_int(config["sweep.train.horizon"]["min"])
+    )
+    max_minibatch_size = parse_int(config["sweep.train.minibatch_size"]["max"])
+    min_replay_ratio = float(config["sweep.train.replay_ratio"]["min"])
+    assert min_replay_ratio * min_batch_size >= max_minibatch_size
+
+
+def check_binding_text():
+    binding = (ROOT / "ocean" / "affine_lock" / "binding.c").read_text()
+    assert "#define OBS_SIZE AFFINE_LOCK_OBS_SIZE" in binding
+    assert "#define ACT_SIZES {AFFINE_LOCK_NUM_ACTIONS}" in binding
+    assert "#define OBS_TENSOR_T FloatTensor" in binding
+
+    log_keys = re.findall(r'dict_set\(out,\s*"([^"]+)"', binding)
+    assert log_keys == EXPECTED_MY_LOG_KEYS
+    assert len(log_keys) + 1 <= 32  # static_vec_log appends "n".
+
+
+def float_buffer(ptr, count):
+    return (ctypes.c_float * count).from_address(ptr)
+
+
+def check_backend_metadata():
+    from pufferlib import _C
+    from pufferlib.pufferl import load_config
+
+    assert _C.env_name == "affine_lock"
+    assert _C.gpu == 1
+
+    def load_affine_args(extra_argv):
+        old_argv = sys.argv
+        try:
+            sys.argv = [old_argv[0], *extra_argv]
+            return load_config("affine_lock")
+        finally:
+            sys.argv = old_argv
+
+    base_args = load_affine_args([])
+    assert base_args["env_name"] == "affine_lock"
+    assert base_args["vec"]["total_agents"] == 4096
+    assert base_args["vec"]["num_buffers"] == 2
+    assert base_args["policy"]["hidden_size"] == 256
+    assert base_args["policy"]["num_layers"] == 3
+    assert base_args["train"]["horizon"] == 64
+    assert base_args["train"]["minibatch_size"] == 8192
+    assert base_args["train"]["learning_rate"] == 0.012
+    assert base_args["train"]["ent_coef"] == 0.2
+    assert base_args["train"]["gamma"] == 0.8
+    assert base_args["train"]["gae_lambda"] == 0.995
+    assert base_args["train"]["replay_ratio"] == 3.0
+    assert base_args["train"]["clip_coef"] == 0.83
+    assert base_args["train"]["vf_coef"] == 4.75
+    assert base_args["train"]["vf_clip_coef"] == 0.8
+    assert base_args["train"]["max_grad_norm"] == 3.0
+    assert base_args["train"]["beta1"] == 0.5
+    assert base_args["train"]["beta2"] == 0.9915
+    assert base_args["train"]["eps"] == 0.0001
+    assert base_args["train"]["vtrace_rho_clip"] == 1.4
+    assert base_args["train"]["vtrace_c_clip"] == 3.75
+    assert base_args["train"]["prio_alpha"] == 0.055
+    assert base_args["train"]["prio_beta0"] == 0.161
+
+    old_argv = sys.argv
+    try:
+        sys.argv = [old_argv[0]]
+        args = load_config("affine_lock")
+    finally:
+        sys.argv = old_argv
+    args["vec"]["total_agents"] = 2
+    args["vec"]["num_buffers"] = 1
+    vec = _C.create_vec(args, 0)
+    try:
+        assert vec.obs_size == 33
+        assert vec.obs_dtype == "FloatTensor"
+        assert list(vec.act_sizes) == [8]
+
+        obs = float_buffer(vec.obs_ptr, vec.total_agents * vec.obs_size)
+        rewards = float_buffer(vec.rewards_ptr, vec.total_agents)
+        terminals = float_buffer(vec.terminals_ptr, vec.total_agents)
+
+        vec.reset()
+        assert list(rewards) == [0.0, 0.0]
+        assert list(terminals) == [0.0, 0.0]
+        for env_id in range(vec.total_agents):
+            timer = obs[env_id * vec.obs_size + 32]
+            assert timer == 0.0
+
+        actions = (ctypes.c_float * vec.total_agents)(8.0, 8.0)
+        vec.cpu_step(ctypes.addressof(actions))
+        assert list(rewards) == [-1.0, -1.0]
+        assert list(terminals) == [1.0, 1.0]
+
+        logs = vec.log()
+        assert logs["n"] == 2.0
+        assert logs["invalid_rate"] == 1.0
+        assert logs["timeout_rate"] == 0.0
+        assert logs["solve_rate"] == 0.0
+        assert logs["episode_length"] == 1.0
+        assert logs["episode_return"] == -1.0
+        assert "perf" in logs
+        assert "min_win_moves" in logs
+    finally:
+        vec.close()
+
+
+def main():
+    parser = argparse.ArgumentParser()
+    parser.add_argument("--require-backend", action="store_true")
+    args = parser.parse_args()
+
+    check_config()
+    check_binding_text()
+    if args.require_backend:
+        check_backend_metadata()
+
+
+if __name__ == "__main__":
+    main()
diff --git a/ocean/affine_lock/tests/test_visible_targets_loader.c b/ocean/affine_lock/tests/test_visible_targets_loader.c
new file mode 100644
index 0000000000..9fed276f83
--- /dev/null
+++ b/ocean/affine_lock/tests/test_visible_targets_loader.c
@@ -0,0 +1,116 @@
+#include <stdint.h>
+#include <stdio.h>
+#include <stdlib.h>
+
+#include "affine_lock_visible_targets.h"
+
+#define EXPECT_TRUE(expr) do { \
+    if (!(expr)) { \
+        fprintf(stderr, "EXPECT_TRUE failed at %s:%d: %s\n", \
+            __FILE__, __LINE__, #expr); \
+        exit(1); \
+    } \
+} while (0)
+
+#define EXPECT_EQ_U32(actual, expected) do { \
+    uint32_t actual_value = (uint32_t)(actual); \
+    uint32_t expected_value = (uint32_t)(expected); \
+    if (actual_value != expected_value) { \
+        fprintf(stderr, \
+            "EXPECT_EQ_U32 failed at %s:%d: %s=%u expected %u\n", \
+            __FILE__, __LINE__, #actual, actual_value, expected_value); \
+        exit(1); \
+    } \
+} while (0)
+
+#define EXPECT_EQ_U64(actual, expected) do { \
+    uint64_t actual_value = (uint64_t)(actual); \
+    uint64_t expected_value = (uint64_t)(expected); \
+    if (actual_value != expected_value) { \
+        fprintf(stderr, \
+            "EXPECT_EQ_U64 failed at %s:%d: %s=%llu expected %llu\n", \
+            __FILE__, __LINE__, #actual, \
+            (unsigned long long)actual_value, \
+            (unsigned long long)expected_value); \
+        exit(1); \
+    } \
+} while (0)
+
+int main(int argc, char** argv) {
+    if (argc != 5) {
+        fprintf(stderr,
+            "usage: %s TARGET_BIN EXPECTED_RECORD_COUNT "
+            "EXPECTED_SAMPLE_COUNT EXPECTED_D16_COUNT\n",
+            argv[0]);
+        return 1;
+    }
+
+    char* end = NULL;
+    unsigned long expected_record_count = strtoul(argv[2], &end, 10);
+    EXPECT_TRUE(end != argv[2] && *end == '\0');
+    unsigned long expected_sample_count = strtoul(argv[3], &end, 10);
+    EXPECT_TRUE(end != argv[3] && *end == '\0');
+    unsigned long expected_d16_count = strtoul(argv[4], &end, 10);
+    EXPECT_TRUE(end != argv[4] && *end == '\0');
+
+    AffineLockVisibleTargetTable table;
+    char error[256];
+    int rc = affine_lock_visible_targets_load(
+        argv[1],
+        AFFINE_LOCK_VISIBLE_TARGET_8ACTION_V1_HASH,
+        &table,
+        error,
+        sizeof(error));
+    if (rc != 0) {
+        fprintf(stderr, "failed to load visible target table: %s\n", error);
+        return 1;
+    }
+
+    EXPECT_EQ_U32(table.bits, 16);
+    EXPECT_EQ_U32(table.num_actions, 8);
+    EXPECT_EQ_U32(table.depth_count, 6);
+    EXPECT_EQ_U32(table.record_size, 16);
+    EXPECT_EQ_U32(table.record_count, expected_record_count);
+    EXPECT_EQ_U64(
+        table.action_set_hash,
+        AFFINE_LOCK_VISIBLE_TARGET_8ACTION_V1_HASH);
+
+    const uint32_t expected_depths[6] = {2, 4, 5, 6, 8, 16};
+    const uint64_t expected_exact_counts[6] = {
+        2216496ull,
+        34379722ull,
+        115388932ull,
+        331789220ull,
+        1125374770ull,
+        100548ull,
+    };
+    uint32_t first_record = 0;
+    for (uint32_t i = 0; i < table.depth_count; i++) {
+        EXPECT_EQ_U32(table.depths[i].depth, expected_depths[i]);
+        EXPECT_EQ_U32(table.depths[i].first_record, first_record);
+        uint32_t expected_stored_count = i == 5 ?
+            (uint32_t)expected_d16_count : (uint32_t)expected_sample_count;
+        EXPECT_EQ_U32(table.depths[i].stored_count, expected_stored_count);
+        EXPECT_EQ_U64(table.depths[i].exact_pair_count, expected_exact_counts[i]);
+        first_record += table.depths[i].stored_count;
+    }
+
+    for (uint32_t i = 0; i < table.record_count; i++) {
+        const AffineLockVisibleTargetRecord* record = &table.records[i];
+        EXPECT_TRUE(record->solution_length == record->depth);
+        EXPECT_TRUE(
+            record->depth == 2 ||
+            record->depth == 4 ||
+            record->depth == 5 ||
+            record->depth == 6 ||
+            record->depth == 8 ||
+            record->depth == 16);
+        for (uint8_t step = 0; step < record->solution_length; step++) {
+            uint8_t action = (record->packed_actions >> (3u * step)) & 7u;
+            EXPECT_TRUE(action < table.num_actions);
+        }
+    }
+
+    affine_lock_visible_targets_free(&table);
+    return 0;
+}
diff --git a/ocean/affine_lock/tools/generate_8action_visible_targets.c b/ocean/affine_lock/tools/generate_8action_visible_targets.c
new file mode 100644
index 0000000000..74ac1d17b8
--- /dev/null
+++ b/ocean/affine_lock/tools/generate_8action_visible_targets.c
@@ -0,0 +1,954 @@
+#define _POSIX_C_SOURCE 200809L
+
+#include <errno.h>
+#include <stdint.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+#ifdef _OPENMP
+#include <omp.h>
+#endif
+
+#define BITS 16
+#define STATE_COUNT (1u << BITS)
+#define MAX_ACTIONS 8
+#define TARGET_DEPTH_COUNT 6
+#define MAX_DISTANCE 64
+#define RECORD_SIZE 16
+#define FORMAT_VERSION 1
+
+static const int TARGET_DEPTHS[TARGET_DEPTH_COUNT] = {2, 4, 5, 6, 8, 16};
+typedef enum ActionOp {
+    ACTION_OP_SHIFT_LEFT = 0,
+    ACTION_OP_SHIFT_RIGHT = 1,
+    ACTION_OP_MIRROR = 2,
+    ACTION_OP_INVERT_RIGHT_7 = 3,
+    ACTION_OP_SWAP_ADJACENT_BITS = 4,
+    ACTION_OP_SWAP_ADJACENT_PAIRS = 5,
+    ACTION_OP_SWAP_NIBBLES_EACH_BYTE = 6,
+    ACTION_OP_REVERSE_EACH_NIBBLE = 7,
+    ACTION_OP_REVERSE_EACH_BYTE = 8,
+} ActionOp;
+
+typedef struct ActionSet {
+    const char* name;
+    int num_actions;
+    int store_all_d16_by_default;
+    // Stable salt for deterministic sampled-record selection.
+    uint64_t candidate_score_seed;
+    const char* default_bin;
+    const char* default_json;
+    ActionOp ops[MAX_ACTIONS];
+    const char* names[MAX_ACTIONS];
+} ActionSet;
+
+static const ActionSet ACTION_SETS[] = {
+    {
+        "affine_lock_8action_v1",
+        8,
+        1,
+        0x7b7ba09982ec5a9dull,
+        "ocean/affine_lock/generated/affine_lock_8action_visible_targets.bin",
+        "ocean/affine_lock/generated/affine_lock_8action_visible_targets.json",
+        {
+            ACTION_OP_SHIFT_LEFT,
+            ACTION_OP_SHIFT_RIGHT,
+            ACTION_OP_INVERT_RIGHT_7,
+            ACTION_OP_SWAP_ADJACENT_BITS,
+            ACTION_OP_SWAP_ADJACENT_PAIRS,
+            ACTION_OP_SWAP_NIBBLES_EACH_BYTE,
+            ACTION_OP_REVERSE_EACH_NIBBLE,
+            ACTION_OP_REVERSE_EACH_BYTE,
+        },
+        {
+            "shift_left",
+            "shift_right",
+            "invert_right_7",
+            "swap_adjacent_bits",
+            "swap_adjacent_pairs",
+            "swap_nibbles_each_byte",
+            "reverse_each_nibble",
+            "reverse_each_byte",
+        },
+    },
+    {
+        // Generator-only alternate for future runtime experiments. Fewer
+        // actions can make policy search easier while producing many more
+        // exact depth-16 pairs than the committed 8-action training set.
+        "affine_lock_4action_v1",
+        4,
+        0,
+        0x8c4d9362024c02b8ull,
+        "ocean/affine_lock/generated/affine_lock_4action_visible_targets.bin",
+        "ocean/affine_lock/generated/affine_lock_4action_visible_targets.json",
+        {
+            ACTION_OP_SHIFT_RIGHT,
+            ACTION_OP_MIRROR,
+            ACTION_OP_INVERT_RIGHT_7,
+            ACTION_OP_SWAP_ADJACENT_BITS,
+        },
+        {
+            "shift_right",
+            "mirror",
+            "invert_right_7",
+            "swap_adjacent_bits",
+        },
+    },
+};
+
+static const int ACTION_SET_COUNT =
+    (int)(sizeof(ACTION_SETS) / sizeof(ACTION_SETS[0]));
+static const ActionSet* ACTIVE_ACTION_SET = &ACTION_SETS[0];
+
+typedef struct TargetRecord {
+    uint16_t start;
+    uint16_t target;
+    uint64_t packed_actions;
+    uint8_t solution_length;
+    uint8_t depth;
+    uint64_t score;
+} TargetRecord;
+
+typedef struct DepthSample {
+    int depth;
+    int store_all;
+    uint64_t exact_count;
+    uint32_t capacity;
+    uint32_t count;
+    TargetRecord* records;
+} DepthSample;
+
+typedef struct WorkerResult {
+    DepthSample depths[TARGET_DEPTH_COUNT];
+    uint64_t histogram[MAX_DISTANCE + 1];
+    uint64_t disconnected_starts;
+    int max_distance;
+} WorkerResult;
+
+typedef struct Options {
+    const char* output_bin;
+    const char* output_json;
+    const ActionSet* action_set;
+    uint32_t sample_per_depth;
+    uint64_t sample_seed;
+    int store_all_depths[TARGET_DEPTH_COUNT];
+    int output_bin_explicit;
+    int output_json_explicit;
+} Options;
+
+static uint16_t NEXT_STATE[STATE_COUNT][MAX_ACTIONS];
+static uint64_t ACTIVE_SAMPLE_SEED = 0u;
+
+static uint64_t mix_u64(uint64_t hash, uint64_t value) {
+    hash ^= value;
+    hash *= 1099511628211ull;
+    return hash;
+}
+
+static uint64_t mix_bytes(uint64_t hash, const char* text) {
+    const unsigned char* ptr = (const unsigned char*)text;
+    while (*ptr != '\0') {
+        hash = mix_u64(hash, (uint64_t)*ptr);
+        ptr++;
+    }
+    return hash;
+}
+
+static uint16_t shift_left(uint16_t state) {
+    uint16_t first = state & 1u;
+    return (uint16_t)((state >> 1) | (first << (BITS - 1)));
+}
+
+static uint16_t shift_right(uint16_t state) {
+    uint16_t last = (uint16_t)((state >> (BITS - 1)) & 1u);
+    return (uint16_t)(((state << 1) & 0xffffu) | last);
+}
+
+static uint16_t mirror_bits(uint16_t state) {
+    uint16_t out = 0u;
+    for (int bit = 0; bit < BITS; bit++) {
+        if ((state & (1u << bit)) != 0u) {
+            out |= (uint16_t)(1u << (BITS - 1 - bit));
+        }
+    }
+    return out;
+}
+
+static uint16_t swap_adjacent_bits(uint16_t state) {
+    return (uint16_t)(((state & 0x5555u) << 1) |
+        ((state & 0xaaaau) >> 1));
+}
+
+static uint16_t swap_adjacent_pairs(uint16_t state) {
+    return (uint16_t)(((state & 0x3333u) << 2) |
+        ((state & 0xccccu) >> 2));
+}
+
+static uint16_t swap_nibbles_each_byte(uint16_t state) {
+    return (uint16_t)(((state & 0x0f0fu) << 4) |
+        ((state & 0xf0f0u) >> 4));
+}
+
+static uint16_t reverse_each_nibble(uint16_t state) {
+    return swap_adjacent_pairs(swap_adjacent_bits(state));
+}
+
+static uint16_t reverse_each_byte(uint16_t state) {
+    return swap_nibbles_each_byte(reverse_each_nibble(state));
+}
+
+static uint16_t apply_action_op(uint16_t state, ActionOp op) {
+    switch (op) {
+        case ACTION_OP_SHIFT_LEFT:
+            return shift_left(state);
+        case ACTION_OP_SHIFT_RIGHT:
+            return shift_right(state);
+        case ACTION_OP_MIRROR:
+            return mirror_bits(state);
+        case ACTION_OP_INVERT_RIGHT_7:
+            return (uint16_t)(state ^ 0xfe00u);
+        case ACTION_OP_SWAP_ADJACENT_BITS:
+            return swap_adjacent_bits(state);
+        case ACTION_OP_SWAP_ADJACENT_PAIRS:
+            return swap_adjacent_pairs(state);
+        case ACTION_OP_SWAP_NIBBLES_EACH_BYTE:
+            return swap_nibbles_each_byte(state);
+        case ACTION_OP_REVERSE_EACH_NIBBLE:
+            return reverse_each_nibble(state);
+        case ACTION_OP_REVERSE_EACH_BYTE:
+            return reverse_each_byte(state);
+        default:
+            return state;
+    }
+}
+
+static void build_next_state(void) {
+    for (uint32_t state = 0; state < STATE_COUNT; state++) {
+        for (int action = 0; action < ACTIVE_ACTION_SET->num_actions; action++) {
+            NEXT_STATE[state][action] = apply_action_op(
+                (uint16_t)state, ACTIVE_ACTION_SET->ops[action]);
+        }
+    }
+}
+
+static const ActionSet* action_set_by_name(const char* name) {
+    for (int i = 0; i < ACTION_SET_COUNT; i++) {
+        if (strcmp(ACTION_SETS[i].name, name) == 0) {
+            return &ACTION_SETS[i];
+        }
+    }
+    return NULL;
+}
+
+static int target_depth_index(int depth) {
+    for (int i = 0; i < TARGET_DEPTH_COUNT; i++) {
+        if (TARGET_DEPTHS[i] == depth) {
+            return i;
+        }
+    }
+    return -1;
+}
+
+static int record_worse(const TargetRecord* a, const TargetRecord* b) {
+    if (a->score != b->score) {
+        return a->score > b->score;
+    }
+    if (a->start != b->start) {
+        return a->start > b->start;
+    }
+    if (a->target != b->target) {
+        return a->target > b->target;
+    }
+    if (a->packed_actions != b->packed_actions) {
+        return a->packed_actions > b->packed_actions;
+    }
+    return a->depth > b->depth;
+}
+
+static int record_better(const TargetRecord* a, const TargetRecord* b) {
+    return record_worse(b, a);
+}
+
+static void heap_swap(TargetRecord* a, TargetRecord* b) {
+    TargetRecord tmp = *a;
+    *a = *b;
+    *b = tmp;
+}
+
+static void heap_sift_up(TargetRecord* records, uint32_t index) {
+    while (index > 0) {
+        uint32_t parent = (index - 1u) / 2u;
+        if (!record_worse(&records[index], &records[parent])) {
+            break;
+        }
+        heap_swap(&records[index], &records[parent]);
+        index = parent;
+    }
+}
+
+static void heap_sift_down(TargetRecord* records, uint32_t count, uint32_t index) {
+    while (1) {
+        uint32_t left = 2u * index + 1u;
+        uint32_t right = left + 1u;
+        uint32_t worst = index;
+        if (left < count && record_worse(&records[left], &records[worst])) {
+            worst = left;
+        }
+        if (right < count && record_worse(&records[right], &records[worst])) {
+            worst = right;
+        }
+        if (worst == index) {
+            break;
+        }
+        heap_swap(&records[index], &records[worst]);
+        index = worst;
+    }
+}
+
+static int ensure_capacity(DepthSample* sample, uint32_t required) {
+    if (required <= sample->capacity) {
+        return 0;
+    }
+    uint32_t next_capacity = sample->capacity == 0 ? 1024u : sample->capacity;
+    while (next_capacity < required) {
+        if (next_capacity > UINT32_MAX / 2u) {
+            return -1;
+        }
+        next_capacity *= 2u;
+    }
+    TargetRecord* next = (TargetRecord*)realloc(
+        sample->records, (size_t)next_capacity * sizeof(TargetRecord));
+    if (next == NULL) {
+        return -1;
+    }
+    sample->records = next;
+    sample->capacity = next_capacity;
+    return 0;
+}
+
+static int add_record(DepthSample* sample, const TargetRecord* record) {
+    if (sample->store_all) {
+        if (ensure_capacity(sample, sample->count + 1u) != 0) {
+            return -1;
+        }
+        sample->records[sample->count++] = *record;
+        return 0;
+    }
+
+    if (sample->capacity == 0) {
+        return 0;
+    }
+    if (sample->count < sample->capacity) {
+        sample->records[sample->count] = *record;
+        heap_sift_up(sample->records, sample->count);
+        sample->count += 1u;
+        return 0;
+    }
+    if (record_better(record, &sample->records[0])) {
+        sample->records[0] = *record;
+        heap_sift_down(sample->records, sample->count, 0);
+    }
+    return 0;
+}
+
+static uint64_t candidate_score(
+        uint16_t start,
+        uint16_t target,
+        int depth,
+        uint64_t packed_actions,
+        int store_all) {
+    uint64_t hash = ACTIVE_ACTION_SET->candidate_score_seed;
+    // Store-all depths are complete sets, so keep their ordering stable across
+    // sample seeds and only reseed the sampled pools.
+    if (!store_all && ACTIVE_SAMPLE_SEED != 0u) {
+        hash = mix_u64(hash, ACTIVE_SAMPLE_SEED);
+    }
+    hash = mix_u64(hash, start);
+    hash = mix_u64(hash, target);
+    hash = mix_u64(hash, (uint64_t)depth);
+    hash = mix_u64(hash, packed_actions);
+    return hash;
+}
+
+static uint64_t pack_solution(
+        uint16_t start,
+        uint16_t target,
+        uint8_t solution_length,
+        const uint16_t* parent,
+        const uint8_t* parent_action) {
+    uint8_t actions[MAX_DISTANCE];
+    uint16_t state = target;
+    for (int i = (int)solution_length - 1; i >= 0; i--) {
+        actions[i] = parent_action[state];
+        state = parent[state];
+    }
+    if (state != start) {
+        fprintf(stderr, "failed to reconstruct path from %u to %u\n",
+            (unsigned int)start, (unsigned int)target);
+        exit(2);
+    }
+
+    uint64_t packed = 0u;
+    for (uint8_t i = 0; i < solution_length; i++) {
+        packed |= (uint64_t)(actions[i] & 7u) << (3u * i);
+    }
+    return packed;
+}
+
+static void init_worker_result(
+        WorkerResult* result,
+        const Options* options) {
+    memset(result, 0, sizeof(*result));
+    for (int i = 0; i < TARGET_DEPTH_COUNT; i++) {
+        result->depths[i].depth = TARGET_DEPTHS[i];
+        result->depths[i].store_all = options->store_all_depths[i];
+        if (!result->depths[i].store_all && options->sample_per_depth > 0) {
+            result->depths[i].capacity = options->sample_per_depth;
+            result->depths[i].records = (TargetRecord*)calloc(
+                options->sample_per_depth, sizeof(TargetRecord));
+            if (result->depths[i].records == NULL) {
+                fprintf(stderr, "failed to allocate target sampler\n");
+                exit(2);
+            }
+        }
+    }
+}
+
+static void free_worker_result(WorkerResult* result) {
+    for (int i = 0; i < TARGET_DEPTH_COUNT; i++) {
+        free(result->depths[i].records);
+        result->depths[i].records = NULL;
+        result->depths[i].capacity = 0;
+        result->depths[i].count = 0;
+    }
+}
+
+static void compute_worker_records(WorkerResult* result) {
+    uint32_t* seen = (uint32_t*)calloc(STATE_COUNT, sizeof(uint32_t));
+    uint16_t* queue = (uint16_t*)malloc(STATE_COUNT * sizeof(uint16_t));
+    uint16_t* parent = (uint16_t*)malloc(STATE_COUNT * sizeof(uint16_t));
+    uint8_t* parent_action = (uint8_t*)malloc(STATE_COUNT * sizeof(uint8_t));
+    uint8_t* depth = (uint8_t*)malloc(STATE_COUNT * sizeof(uint8_t));
+    if (seen == NULL || queue == NULL || parent == NULL ||
+            parent_action == NULL || depth == NULL) {
+        fprintf(stderr, "failed to allocate BFS buffers\n");
+        exit(2);
+    }
+
+#ifdef _OPENMP
+    #pragma omp for schedule(dynamic, 64)
+#endif
+    for (uint32_t start = 0; start < STATE_COUNT; start++) {
+        uint32_t stamp = start + 1u;
+        uint32_t head = 0;
+        uint32_t tail = 0;
+        seen[start] = stamp;
+        parent[start] = (uint16_t)start;
+        parent_action[start] = 0;
+        depth[start] = 0;
+        queue[tail++] = (uint16_t)start;
+        result->histogram[0] += 1u;
+
+        while (head < tail) {
+            uint16_t state = queue[head++];
+            uint8_t state_depth = depth[state];
+            const uint16_t* row = NEXT_STATE[state];
+            for (int action = 0; action < ACTIVE_ACTION_SET->num_actions; action++) {
+                uint16_t next = row[action];
+                if (seen[next] == stamp) {
+                    continue;
+                }
+                uint8_t next_depth = (uint8_t)(state_depth + 1u);
+                seen[next] = stamp;
+                parent[next] = state;
+                parent_action[next] = (uint8_t)action;
+                depth[next] = next_depth;
+                queue[tail++] = next;
+                if (next_depth > MAX_DISTANCE) {
+                    fprintf(stderr, "distance exceeded internal limit\n");
+                    exit(2);
+                }
+                result->histogram[next_depth] += 1u;
+                if ((int)next_depth > result->max_distance) {
+                    result->max_distance = (int)next_depth;
+                }
+
+                int depth_index = target_depth_index((int)next_depth);
+                if (depth_index < 0) {
+                    continue;
+                }
+                DepthSample* sample = &result->depths[depth_index];
+                sample->exact_count += 1u;
+                uint64_t packed_actions = pack_solution(
+                    (uint16_t)start, next, next_depth, parent, parent_action);
+                TargetRecord record;
+                memset(&record, 0, sizeof(record));
+                record.start = (uint16_t)start;
+                record.target = next;
+                record.packed_actions = packed_actions;
+                record.solution_length = next_depth;
+                record.depth = next_depth;
+                record.score = candidate_score(
+                    (uint16_t)start, next, (int)next_depth, packed_actions,
+                    sample->store_all);
+                if (add_record(sample, &record) != 0) {
+                    fprintf(stderr, "failed to store sampled target record\n");
+                    exit(2);
+                }
+            }
+        }
+
+        if (tail != STATE_COUNT) {
+            result->disconnected_starts += 1u;
+        }
+    }
+
+    free(seen);
+    free(queue);
+    free(parent);
+    free(parent_action);
+    free(depth);
+}
+
+static int compare_records(const void* lhs, const void* rhs) {
+    const TargetRecord* a = (const TargetRecord*)lhs;
+    const TargetRecord* b = (const TargetRecord*)rhs;
+    if (a->depth != b->depth) {
+        return (int)a->depth - (int)b->depth;
+    }
+    if (a->score < b->score) {
+        return -1;
+    }
+    if (a->score > b->score) {
+        return 1;
+    }
+    if (a->start != b->start) {
+        return (int)a->start - (int)b->start;
+    }
+    if (a->target != b->target) {
+        return (int)a->target - (int)b->target;
+    }
+    if (a->packed_actions < b->packed_actions) {
+        return -1;
+    }
+    if (a->packed_actions > b->packed_actions) {
+        return 1;
+    }
+    return 0;
+}
+
+static void merge_results(
+        WorkerResult* merged,
+        WorkerResult* workers,
+        int worker_count,
+        const Options* options) {
+    init_worker_result(merged, options);
+    for (int worker_index = 0; worker_index < worker_count; worker_index++) {
+        WorkerResult* worker = &workers[worker_index];
+        merged->disconnected_starts += worker->disconnected_starts;
+        if (worker->max_distance > merged->max_distance) {
+            merged->max_distance = worker->max_distance;
+        }
+        for (int distance = 0; distance <= MAX_DISTANCE; distance++) {
+            merged->histogram[distance] += worker->histogram[distance];
+        }
+        for (int depth_index = 0; depth_index < TARGET_DEPTH_COUNT; depth_index++) {
+            DepthSample* dst = &merged->depths[depth_index];
+            DepthSample* src = &worker->depths[depth_index];
+            dst->exact_count += src->exact_count;
+            for (uint32_t i = 0; i < src->count; i++) {
+                if (add_record(dst, &src->records[i]) != 0) {
+                    fprintf(stderr, "failed to merge sampled target records\n");
+                    exit(2);
+                }
+            }
+        }
+    }
+
+    for (int depth_index = 0; depth_index < TARGET_DEPTH_COUNT; depth_index++) {
+        DepthSample* sample = &merged->depths[depth_index];
+        qsort(sample->records, sample->count, sizeof(TargetRecord),
+            compare_records);
+    }
+}
+
+static uint64_t action_set_hash(void) {
+    uint64_t hash = 1469598103934665603ull;
+    hash = mix_bytes(hash, ACTIVE_ACTION_SET->name);
+    hash = mix_u64(hash, BITS);
+    hash = mix_u64(hash, ACTIVE_ACTION_SET->num_actions);
+    hash = mix_u64(hash, 0xfe00u);
+    for (int i = 0; i < ACTIVE_ACTION_SET->num_actions; i++) {
+        hash = mix_u64(hash, (uint64_t)i);
+        hash = mix_bytes(hash, ACTIVE_ACTION_SET->names[i]);
+    }
+    return hash;
+}
+
+static uint64_t checksum_records(const WorkerResult* result) {
+    uint64_t hash = 1469598103934665603ull;
+    hash = mix_u64(hash, action_set_hash());
+    for (int depth_index = 0; depth_index < TARGET_DEPTH_COUNT; depth_index++) {
+        const DepthSample* sample = &result->depths[depth_index];
+        hash = mix_u64(hash, (uint64_t)sample->depth);
+        hash = mix_u64(hash, sample->exact_count);
+        hash = mix_u64(hash, sample->count);
+        for (uint32_t i = 0; i < sample->count; i++) {
+            const TargetRecord* record = &sample->records[i];
+            hash = mix_u64(hash, record->start);
+            hash = mix_u64(hash, record->target);
+            hash = mix_u64(hash, record->packed_actions);
+            hash = mix_u64(hash, record->solution_length);
+            hash = mix_u64(hash, record->depth);
+        }
+    }
+    return hash;
+}
+
+static int write_bytes(FILE* file, const void* data, size_t size) {
+    return fwrite(data, 1, size, file) == size ? 0 : -1;
+}
+
+static int write_u16(FILE* file, uint16_t value) {
+    unsigned char bytes[2] = {
+        (unsigned char)(value & 0xffu),
+        (unsigned char)((value >> 8) & 0xffu),
+    };
+    return write_bytes(file, bytes, sizeof(bytes));
+}
+
+static int write_u32(FILE* file, uint32_t value) {
+    unsigned char bytes[4] = {
+        (unsigned char)(value & 0xffu),
+        (unsigned char)((value >> 8) & 0xffu),
+        (unsigned char)((value >> 16) & 0xffu),
+        (unsigned char)((value >> 24) & 0xffu),
+    };
+    return write_bytes(file, bytes, sizeof(bytes));
+}
+
+static int write_u64(FILE* file, uint64_t value) {
+    unsigned char bytes[8];
+    for (int i = 0; i < 8; i++) {
+        bytes[i] = (unsigned char)((value >> (8 * i)) & 0xffu);
+    }
+    return write_bytes(file, bytes, sizeof(bytes));
+}
+
+static uint32_t total_record_count(const WorkerResult* result) {
+    uint64_t count = 0;
+    for (int depth_index = 0; depth_index < TARGET_DEPTH_COUNT; depth_index++) {
+        count += result->depths[depth_index].count;
+    }
+    if (count > UINT32_MAX) {
+        fprintf(stderr, "too many target records for binary format\n");
+        exit(2);
+    }
+    return (uint32_t)count;
+}
+
+static uint32_t header_size(void) {
+    return 52u + (uint32_t)TARGET_DEPTH_COUNT * 24u;
+}
+
+static int write_binary(const char* path, const WorkerResult* result) {
+    FILE* file = fopen(path, "wb");
+    if (file == NULL) {
+        fprintf(stderr, "failed to open %s: %s\n", path, strerror(errno));
+        return -1;
+    }
+
+    const unsigned char magic[8] = {'A', 'L', '7', 'T', 'G', 'T', '1', '\0'};
+    uint32_t record_count = total_record_count(result);
+    uint64_t checksum = checksum_records(result);
+    uint64_t set_hash = action_set_hash();
+    int rc = 0;
+    rc |= write_bytes(file, magic, sizeof(magic));
+    rc |= write_u32(file, FORMAT_VERSION);
+    rc |= write_u32(file, header_size());
+    rc |= write_u32(file, RECORD_SIZE);
+    rc |= write_u32(file, BITS);
+    rc |= write_u32(file, (uint32_t)ACTIVE_ACTION_SET->num_actions);
+    rc |= write_u32(file, TARGET_DEPTH_COUNT);
+    rc |= write_u32(file, record_count);
+    rc |= write_u64(file, checksum);
+    rc |= write_u64(file, set_hash);
+
+    uint32_t first_record = 0;
+    for (int depth_index = 0; depth_index < TARGET_DEPTH_COUNT; depth_index++) {
+        const DepthSample* sample = &result->depths[depth_index];
+        rc |= write_u32(file, (uint32_t)sample->depth);
+        rc |= write_u32(file, first_record);
+        rc |= write_u32(file, sample->count);
+        rc |= write_u32(file, 0u);
+        rc |= write_u64(file, sample->exact_count);
+        first_record += sample->count;
+    }
+
+    for (int depth_index = 0; depth_index < TARGET_DEPTH_COUNT; depth_index++) {
+        const DepthSample* sample = &result->depths[depth_index];
+        for (uint32_t i = 0; i < sample->count; i++) {
+            const TargetRecord* record = &sample->records[i];
+            rc |= write_u16(file, record->start);
+            rc |= write_u16(file, record->target);
+            rc |= write_u64(file, record->packed_actions);
+            rc |= fputc(record->solution_length, file) == EOF ? -1 : 0;
+            rc |= fputc(record->depth, file) == EOF ? -1 : 0;
+            rc |= write_u16(file, 0u);
+        }
+    }
+
+    if (fclose(file) != 0) {
+        fprintf(stderr, "failed to close %s: %s\n", path, strerror(errno));
+        return -1;
+    }
+    if (rc != 0) {
+        fprintf(stderr, "failed to write %s\n", path);
+        return -1;
+    }
+    return 0;
+}
+
+static int write_json(const char* path, const WorkerResult* result,
+        const Options* options) {
+    FILE* file = fopen(path, "w");
+    if (file == NULL) {
+        fprintf(stderr, "failed to open %s: %s\n", path, strerror(errno));
+        return -1;
+    }
+
+    uint32_t record_count = total_record_count(result);
+    uint64_t checksum = checksum_records(result);
+    uint64_t set_hash = action_set_hash();
+
+    fprintf(file, "{\n");
+    fprintf(file, "  \"action_id_to_name\": [\n");
+    for (int i = 0; i < ACTIVE_ACTION_SET->num_actions; i++) {
+        fprintf(file, "    \"%s\"%s\n", ACTIVE_ACTION_SET->names[i],
+            i == ACTIVE_ACTION_SET->num_actions - 1 ? "" : ",");
+    }
+    fprintf(file, "  ],\n");
+    fprintf(file, "  \"action_set\": \"%s\",\n", ACTIVE_ACTION_SET->name);
+    fprintf(file, "  \"action_set_hash\": \"0x%016llx\",\n",
+        (unsigned long long)set_hash);
+    fprintf(file, "  \"binary_path\": \"%s\",\n", options->output_bin);
+    fprintf(file, "  \"bits\": %d,\n", BITS);
+    fprintf(file, "  \"checksum\": \"0x%016llx\",\n",
+        (unsigned long long)checksum);
+    fprintf(file, "  \"depth_records\": [\n");
+    uint32_t first_record = 0;
+    for (int depth_index = 0; depth_index < TARGET_DEPTH_COUNT; depth_index++) {
+        const DepthSample* sample = &result->depths[depth_index];
+        fprintf(file,
+            "    {\"depth\": %d, \"exact_pair_count\": %llu, "
+            "\"first_record\": %u, \"stored_count\": %u}%s\n",
+            sample->depth,
+            (unsigned long long)sample->exact_count,
+            first_record,
+            sample->count,
+            depth_index == TARGET_DEPTH_COUNT - 1 ? "" : ",");
+        first_record += sample->count;
+    }
+    fprintf(file, "  ],\n");
+    fprintf(file, "  \"depths\": [");
+    for (int i = 0; i < TARGET_DEPTH_COUNT; i++) {
+        fprintf(file, "%s%d", i == 0 ? "" : ", ", TARGET_DEPTHS[i]);
+    }
+    fprintf(file, "],\n");
+    fprintf(file, "  \"disconnected_starts\": %llu,\n",
+        (unsigned long long)result->disconnected_starts);
+    fprintf(file, "  \"format\": \"affine_lock_visible_targets_bin\",\n");
+    fprintf(file, "  \"header_size\": %u,\n", header_size());
+    fprintf(file, "  \"max_distance\": %d,\n", result->max_distance);
+    fprintf(file, "  \"num_actions\": %d,\n", ACTIVE_ACTION_SET->num_actions);
+    fprintf(file, "  \"record_count\": %u,\n", record_count);
+    fprintf(file, "  \"record_size\": %d,\n", RECORD_SIZE);
+    fprintf(file, "  \"sample_per_depth\": %u,\n",
+        options->sample_per_depth);
+    fprintf(file, "  \"sample_seed\": %llu,\n",
+        (unsigned long long)options->sample_seed);
+    fprintf(file, "  \"stored_all_depths\": [");
+    int wrote_depth = 0;
+    for (int i = 0; i < TARGET_DEPTH_COUNT; i++) {
+        if (!options->store_all_depths[i]) {
+            continue;
+        }
+        fprintf(file, "%s%d", wrote_depth ? ", " : "", TARGET_DEPTHS[i]);
+        wrote_depth = 1;
+    }
+    fprintf(file, "],\n");
+    fprintf(file, "  \"version\": %d,\n", FORMAT_VERSION);
+    fprintf(file, "  \"visible_distance_histogram\": {\n");
+    int first = 1;
+    for (int distance = 0; distance <= result->max_distance; distance++) {
+        if (!first) {
+            fprintf(file, ",\n");
+        }
+        fprintf(file, "    \"%d\": %llu", distance,
+            (unsigned long long)result->histogram[distance]);
+        first = 0;
+    }
+    fprintf(file, "\n  }\n");
+    fprintf(file, "}\n");
+
+    if (fclose(file) != 0) {
+        fprintf(stderr, "failed to close %s: %s\n", path, strerror(errno));
+        return -1;
+    }
+    return 0;
+}
+
+static int parse_uint32(const char* text, uint32_t* out) {
+    char* end = NULL;
+    errno = 0;
+    unsigned long value = strtoul(text, &end, 10);
+    if (errno != 0 || end == text || *end != '\0' || value > UINT32_MAX) {
+        return -1;
+    }
+    *out = (uint32_t)value;
+    return 0;
+}
+
+static int parse_uint64(const char* text, uint64_t* out) {
+    char* end = NULL;
+    errno = 0;
+    if (text[0] == '-') {
+        return -1;
+    }
+    unsigned long long value = strtoull(text, &end, 0);
+    if (errno != 0 || end == text || *end != '\0') {
+        return -1;
+    }
+    *out = (uint64_t)value;
+    return 0;
+}
+
+static void print_usage(const char* program) {
+    fprintf(stderr,
+        "usage: %s [--action-set NAME] [--sample-per-depth N] "
+        "[--sample-seed N] [--store-all-depth D] "
+        "[--output-bin PATH] [--output-json PATH]\n",
+        program);
+    fprintf(stderr, "available action sets:");
+    for (int i = 0; i < ACTION_SET_COUNT; i++) {
+        fprintf(stderr, " %s", ACTION_SETS[i].name);
+    }
+    fprintf(stderr, "\n");
+}
+
+static int parse_args(int argc, char** argv, Options* options) {
+    options->action_set = &ACTION_SETS[0];
+    options->output_bin = NULL;
+    options->output_json = NULL;
+    options->sample_per_depth = 65536u;
+    options->sample_seed = 0u;
+    memset(options->store_all_depths, 0, sizeof(options->store_all_depths));
+    options->output_bin_explicit = 0;
+    options->output_json_explicit = 0;
+
+    for (int i = 1; i < argc; i++) {
+        if (strcmp(argv[i], "--action-set") == 0 && i + 1 < argc) {
+            const ActionSet* action_set = action_set_by_name(argv[++i]);
+            if (action_set == NULL) {
+                fprintf(stderr, "unknown --action-set %s\n", argv[i]);
+                return -1;
+            }
+            options->action_set = action_set;
+        } else if (strcmp(argv[i], "--sample-per-depth") == 0 && i + 1 < argc) {
+            if (parse_uint32(argv[++i], &options->sample_per_depth) != 0) {
+                fprintf(stderr, "invalid --sample-per-depth value\n");
+                return -1;
+            }
+        } else if (strcmp(argv[i], "--sample-seed") == 0 && i + 1 < argc) {
+            if (parse_uint64(argv[++i], &options->sample_seed) != 0) {
+                fprintf(stderr, "invalid --sample-seed value\n");
+                return -1;
+            }
+        } else if (strcmp(argv[i], "--store-all-depth") == 0 && i + 1 < argc) {
+            uint32_t depth = 0;
+            if (parse_uint32(argv[++i], &depth) != 0) {
+                fprintf(stderr, "invalid --store-all-depth value\n");
+                return -1;
+            }
+            int depth_index = target_depth_index((int)depth);
+            if (depth_index < 0) {
+                fprintf(stderr, "unsupported --store-all-depth %u\n", depth);
+                return -1;
+            }
+            options->store_all_depths[depth_index] = 1;
+        } else if (strcmp(argv[i], "--output-bin") == 0 && i + 1 < argc) {
+            options->output_bin = argv[++i];
+            options->output_bin_explicit = 1;
+        } else if (strcmp(argv[i], "--output-json") == 0 && i + 1 < argc) {
+            options->output_json = argv[++i];
+            options->output_json_explicit = 1;
+        } else if (strcmp(argv[i], "--help") == 0) {
+            print_usage(argv[0]);
+            exit(0);
+        } else {
+            print_usage(argv[0]);
+            return -1;
+        }
+    }
+
+    if (options->output_bin == NULL || !options->output_bin_explicit) {
+        options->output_bin = options->action_set->default_bin;
+    }
+    if (options->output_json == NULL || !options->output_json_explicit) {
+        options->output_json = options->action_set->default_json;
+    }
+    if (options->action_set->store_all_d16_by_default) {
+        options->store_all_depths[target_depth_index(16)] = 1;
+    }
+    return 0;
+}
+
+int main(int argc, char** argv) {
+    Options options;
+    if (parse_args(argc, argv, &options) != 0) {
+        return 1;
+    }
+
+    ACTIVE_ACTION_SET = options.action_set;
+    ACTIVE_SAMPLE_SEED = options.sample_seed;
+    build_next_state();
+    int worker_count = 1;
+#ifdef _OPENMP
+    worker_count = omp_get_max_threads();
+#endif
+    WorkerResult* workers =
+        (WorkerResult*)calloc((size_t)worker_count, sizeof(WorkerResult));
+    if (workers == NULL) {
+        fprintf(stderr, "failed to allocate worker results\n");
+        return 1;
+    }
+
+#ifdef _OPENMP
+#pragma omp parallel
+#endif
+    {
+        int worker_index = 0;
+#ifdef _OPENMP
+        worker_index = omp_get_thread_num();
+#endif
+        init_worker_result(&workers[worker_index], &options);
+        compute_worker_records(&workers[worker_index]);
+    }
+
+    WorkerResult merged;
+    merge_results(&merged, workers, worker_count, &options);
+    int rc = 0;
+    if (write_binary(options.output_bin, &merged) != 0) {
+        rc = 1;
+    }
+    if (write_json(options.output_json, &merged, &options) != 0) {
+        rc = 1;
+    }
+    for (int i = 0; i < worker_count; i++) {
+        free_worker_result(&workers[i]);
+    }
+    free(workers);
+    free_worker_result(&merged);
+    return rc == 0 ? 0 : 1;
+}
diff --git a/pufferlib/sweep.py b/pufferlib/sweep.py
index 36e27bf42a..fc753e5395 100644
--- a/pufferlib/sweep.py
+++ b/pufferlib/sweep.py
@@ -145,8 +145,11 @@ def _params_from_puffer_sweep(sweep_config, only_include=None):
         only_include = [p.strip() for p in sweep_config['sweep_only'].split(',')]
 
     for name, param in sweep_config.items():
-        if name in ('method', 'metric', 'metric_distribution', 'goal', 'downsample', 'use_gpu', 'prune_pareto',
-                    'sweep_only', 'max_suggestion_cost', 'early_stop_quantile', 'gpus', 'max_runs'):
+        if name in ('method', 'metric', 'metric_distribution', 'goal',
+                'downsample', 'use_gpu', 'prune_pareto', 'sweep_only',
+                'max_suggestion_cost', 'early_stop_quantile', 'gpus',
+                'max_runs', 'match_enemy_model_path', 'match_num_games',
+                'match_enemy_hidden_size', 'match_enemy_num_layers'):
             continue
 
         assert isinstance(param, dict), f'Param {name} is not a dict'