From e0d9496f93da3186575c5a6847d57dcb892b557a Mon Sep 17 00:00:00 2001 From: Eugene Vinitsky Date: Sun, 31 May 2026 13:47:25 -0400 Subject: [PATCH 1/7] drive: add GIGAFLOW W_lane (coarse map view) observation channel MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Adds a per-agent top-K coarse-view observation built from samples spaced along all drivable lanes, augmented with Dijkstra distance to the agent's current goal (absolute + min-anchored relative). Replaces close-range lane centerline obs slots in the paper-aligned configuration. For each agent each step, emit obs_slots_coarse_n slots × 6 floats: ego-frame position (2), ego-relative heading (2), abs and min-anchored relative Dijkstra distance to the current goal (2). The lane graph is directed, so unreachable sample→goal pairs fall back to the closest spatially-reachable coarse sample (euclidean leg + graph leg) — this encodes a U-turn / lane-change cost without requiring undirected graph preprocessing. Off by default (obs_slots_coarse_n = 0). Map-load: build per-lane cumulative arclength on RoadMapElement, the road_to_lane_graph reverse map, and the coarse_samples array. All live on SharedMapData so use_map_cache shares them across envs. Goal projection (goal_lane_graph_idx + goal_along_s) refreshes at every goal-set / goal-advance site (compute_goals success path, set_start_position replay branch, c_step goal-advance block). Removed from reset_agent_state since goal projection is owned by the goal pipeline and outlives the generic per-episode state reset. Co-Authored-By: Claude Opus 4.7 --- pufferlib/config/ocean/drive.ini | 10 +- pufferlib/ocean/drive/binding.c | 4 + pufferlib/ocean/drive/datatypes.h | 23 ++ pufferlib/ocean/drive/drive.h | 359 ++++++++++++++++++++++++++++++ pufferlib/ocean/drive/drive.py | 13 ++ pufferlib/ocean/env_binding.h | 1 + 6 files changed, 409 insertions(+), 1 deletion(-) diff --git a/pufferlib/config/ocean/drive.ini b/pufferlib/config/ocean/drive.ini index 813d1970e..a4605d345 100644 --- a/pufferlib/config/ocean/drive.ini +++ b/pufferlib/config/ocean/drive.ini @@ -111,10 +111,18 @@ map_dir = "pufferlib/resources/drive/binaries/carla" num_maps = 8 ; --- Observation slot counts --- -obs_slots_lane_n = 80 +obs_slots_lane_n = 0 obs_slots_boundary_n = 80 obs_slots_partners_n = 16 obs_slots_traffic_controls_n = 4 +; GIGAFLOW W_lane (coarse map view). 0 disables; >0 adds top-K nearest coarse +; lane samples per agent within obs_range_coarse_m. Each slot is 6 floats: +; ego-frame pos (2), ego-relative heading (2), absolute and min-anchored +; Dijkstra distance to current goal (2). +obs_slots_coarse_n = 80 +coarse_sample_spacing_m = 40.0 +obs_range_coarse_m = 200.0 +obs_norm_coarse_dist_m = 200.0 ; Fraction of segment observation slots to drop (reduces obs size) obs_dropout_lane = 0.0 obs_dropout_boundary = 0.0 diff --git a/pufferlib/ocean/drive/binding.c b/pufferlib/ocean/drive/binding.c index 13f37465b..f8658156f 100644 --- a/pufferlib/ocean/drive/binding.c +++ b/pufferlib/ocean/drive/binding.c @@ -1957,6 +1957,10 @@ static int my_init(Env *env, PyObject *args, PyObject *kwargs) { env->obs_slots_lane_n = (int) unpack(kwargs, "obs_slots_lane_n"); env->obs_slots_partners_n = (int) unpack(kwargs, "obs_slots_partners_n"); env->obs_slots_traffic_controls_n = (int) unpack(kwargs, "obs_slots_traffic_controls_n"); + env->obs_slots_coarse_n = (int) unpack(kwargs, "obs_slots_coarse_n"); + env->coarse_sample_spacing_m = (float) unpack(kwargs, "coarse_sample_spacing_m"); + env->obs_range_coarse_m = (float) unpack(kwargs, "obs_range_coarse_m"); + env->obs_norm_coarse_dist_m = (float) unpack(kwargs, "obs_norm_coarse_dist_m"); env->traffic_control_scope = (int) unpack(kwargs, "traffic_control_scope"); env->dt = (float) unpack(kwargs, "dt"); env->spawn_initial_speed = (float) unpack(kwargs, "spawn_initial_speed"); diff --git a/pufferlib/ocean/drive/datatypes.h b/pufferlib/ocean/drive/datatypes.h index 579c45c42..3affd3be3 100644 --- a/pufferlib/ocean/drive/datatypes.h +++ b/pufferlib/ocean/drive/datatypes.h @@ -249,6 +249,12 @@ struct Agent { float goal_position_z; // alias = goal_positions_z[current_goal_idx] int current_goal_idx; // index of next goal to reach (0..N-1) + // GIGAFLOW W_lane: goal projection (lane in the graph + along-lane arclength). + // Refreshed at every goal-set/advance site so per-step coarse-view emission + // can look up Dijkstra distance from each sample to the current goal in O(1). + int goal_lane_graph_idx; // -1 if no current goal lane found + float goal_along_s; + int stopped; // 0/1 -> freeze if set int removed; // 0/1 -> remove from sim if set @@ -291,6 +297,22 @@ struct RoadMapElement { int num_exits; int *exit_lanes; float speed_limit; + + // For drivable lanes only: per-vertex arclength from lane start (length = segment_length). + // NULL for non-drivable elements. Built post-load in build_lane_arclengths. + float *cumulative_s; +}; + +// Sampled point along a drivable lane centerline; used by the coarse map view +// (GIGAFLOW W_lane). Sampled every coarse_sample_spacing_m meters at map load. +struct CoarseSample { + float x; + float y; + float z; + float heading; // tangent at the sample + int road_idx; // index into Drive.road_elements (which lane this sits on) + int lane_graph_idx; // index into lane_graph (Dijkstra row); == road_to_lane_graph[road_idx] + float along_s; // arclength from lane start to this sample }; struct TrafficControlElement { @@ -339,6 +361,7 @@ void free_road_element(struct RoadMapElement *element) { free(element->headings); free(element->entry_lanes); free(element->exit_lanes); + free(element->cumulative_s); } void free_traffic_element(struct TrafficControlElement *element) { diff --git a/pufferlib/ocean/drive/drive.h b/pufferlib/ocean/drive/drive.h index 1dc87e6fd..10676d757 100644 --- a/pufferlib/ocean/drive/drive.h +++ b/pufferlib/ocean/drive/drive.h @@ -10,6 +10,7 @@ #include #include #include +#include #include #include #include @@ -124,6 +125,9 @@ #define PADDED_OBSERVATION_VALUE -0.001f #define STATIC_TARGET_FEATURES 3 #define DYNAMIC_TARGET_FEATURES 5 +// GIGAFLOW W_lane: per coarse-sample feature count. +// rel_x, rel_y, cos(dh), sin(dh), dist_abs_norm, dist_rel_norm (min-anchored). +#define COARSE_FEATURES 6 // GIGAFLOW specific #define MAX_ROUTE_LENGTH 64 @@ -321,6 +325,11 @@ struct SharedMapData { GridMap *grid_map; int *neighbor_offsets; struct LaneGraph lane_graph; + // GIGAFLOW W_lane: coarse samples spaced along all drivable lanes, plus the + // reverse map road_idx → lane_graph_idx so per-step distance lookups are O(1). + struct CoarseSample *coarse_samples; + int n_coarse_samples; + int *road_to_lane_graph; // size = num_road_elements; -1 for non-drivable elements int ref_count; pid_t owner_pid; }; @@ -360,6 +369,10 @@ struct Drive { int num_traffic_elements; int num_objects; struct LaneGraph lane_graph; + // GIGAFLOW W_lane (shared with SharedMapData when use_map_cache=1) + struct CoarseSample *coarse_samples; + int n_coarse_samples; + int *road_to_lane_graph; int static_agent_count; int *static_agent_indices; int expert_static_agent_count; @@ -428,6 +441,10 @@ struct Drive { int obs_slots_lane_n; int obs_slots_partners_n; int obs_slots_traffic_controls_n; + int obs_slots_coarse_n; // GIGAFLOW W_lane: top-K coarse-view lane samples per agent + float coarse_sample_spacing_m; + float obs_range_coarse_m; // Euclidean radius for top-K filter + float obs_norm_coarse_dist_m; int traffic_control_scope; int obs_slots_lane_kept; int obs_slots_boundary_kept; @@ -1550,6 +1567,186 @@ static void update_agent_z(Drive *env, Agent *agent) { agent->sim_z = sum_z / check_count; } +// ---------------------------------------- +// GIGAFLOW W_lane (coarse map view) helpers +// ---------------------------------------- + +// Per-vertex arclength from each drivable lane's start. Built once at map load, +// freed via free_road_element. Used to convert (lane, segment, segment-t) +// triples to lane-local arclength for both ego projection and goal projection. +static void build_lane_arclengths(Drive *env) { + for (int i = 0; i < env->num_road_elements; i++) { + RoadMapElement *r = &env->road_elements[i]; + r->cumulative_s = NULL; + if (!is_drivable_road_lane(r->type) || r->segment_length < 2) { + continue; + } + r->cumulative_s = (float *) malloc(r->segment_length * sizeof(float)); + r->cumulative_s[0] = 0.0f; + for (int k = 1; k < r->segment_length; k++) { + float dx = r->x[k] - r->x[k - 1]; + float dy = r->y[k] - r->y[k - 1]; + r->cumulative_s[k] = r->cumulative_s[k - 1] + sqrtf(dx * dx + dy * dy); + } + } +} + +// road_to_lane_graph[road_idx] = j s.t. lane_graph.lane_ids[j] == road_idx, else -1. +// Lets per-step distance lookups index the lane_graph distances matrix in O(1). +static void build_road_to_lane_graph(Drive *env) { + env->road_to_lane_graph = (int *) malloc(env->num_road_elements * sizeof(int)); + for (int i = 0; i < env->num_road_elements; i++) { + env->road_to_lane_graph[i] = -1; + } + for (int j = 0; j < env->lane_graph.n_lanes; j++) { + int road_id = env->lane_graph.lane_ids[j]; + if (road_id >= 0 && road_id < env->num_road_elements) { + env->road_to_lane_graph[road_id] = j; + } + } +} + +// Walk each drivable lane that participates in the lane graph, dropping a +// CoarseSample every coarse_sample_spacing_m meters along the centerline. +// Lanes shorter than the spacing still get one sample at s=0 (lane start). +static void build_coarse_samples(Drive *env) { + float spacing = env->coarse_sample_spacing_m; + if (spacing <= 0.0f) { + spacing = 40.0f; + } + + int total = 0; + for (int i = 0; i < env->num_road_elements; i++) { + RoadMapElement *r = &env->road_elements[i]; + if (r->cumulative_s == NULL || env->road_to_lane_graph[i] < 0) { + continue; + } + float lane_len = r->cumulative_s[r->segment_length - 1]; + if (lane_len <= 0.0f) { + continue; + } + total += (int) floorf(lane_len / spacing) + 1; + } + + env->n_coarse_samples = total; + env->coarse_samples + = (total > 0) ? (struct CoarseSample *) malloc(total * sizeof(struct CoarseSample)) : NULL; + + int out = 0; + for (int i = 0; i < env->num_road_elements; i++) { + RoadMapElement *r = &env->road_elements[i]; + if (r->cumulative_s == NULL || env->road_to_lane_graph[i] < 0) { + continue; + } + float lane_len = r->cumulative_s[r->segment_length - 1]; + if (lane_len <= 0.0f) { + continue; + } + int n_samples = (int) floorf(lane_len / spacing) + 1; + int lg = env->road_to_lane_graph[i]; + for (int k = 0; k < n_samples; k++) { + float s = (float) k * spacing; + if (s > lane_len) { + s = lane_len; + } + int seg = 0; + while (seg < r->segment_length - 2 && r->cumulative_s[seg + 1] < s) { + seg++; + } + float seg_s0 = r->cumulative_s[seg]; + float seg_s1 = r->cumulative_s[seg + 1]; + float t = (seg_s1 > seg_s0) ? (s - seg_s0) / (seg_s1 - seg_s0) : 0.0f; + struct CoarseSample *cs = &env->coarse_samples[out++]; + cs->x = r->x[seg] + t * (r->x[seg + 1] - r->x[seg]); + cs->y = r->y[seg] + t * (r->y[seg + 1] - r->y[seg]); + cs->z = r->z[seg] + t * (r->z[seg + 1] - r->z[seg]); + cs->heading = r->headings[seg]; + cs->road_idx = i; + cs->lane_graph_idx = lg; + cs->along_s = s; + } + } +} + +// Scan all drivable lanes for the one whose centerline projection is closest +// to (x, y); return its road_idx and along-lane arclength. Used to project a +// goal point onto the lane graph. Brute force over road_elements; called only +// at goal-set/advance time. +static void find_nearest_drivable_lane(Drive *env, float x, float y, int *out_road_idx, float *out_along_s) { + int best_road = -1; + float best_along_s = 0.0f; + float best_d2 = FLT_MAX; + for (int i = 0; i < env->num_road_elements; i++) { + RoadMapElement *r = &env->road_elements[i]; + if (r->cumulative_s == NULL || env->road_to_lane_graph[i] < 0) { + continue; + } + for (int seg = 0; seg < r->segment_length - 1; seg++) { + float dx = r->x[seg + 1] - r->x[seg]; + float dy = r->y[seg + 1] - r->y[seg]; + float len2 = dx * dx + dy * dy; + float t = (len2 > 0.0f) ? ((x - r->x[seg]) * dx + (y - r->y[seg]) * dy) / len2 : 0.0f; + if (t < 0.0f) t = 0.0f; + if (t > 1.0f) t = 1.0f; + float px = r->x[seg] + t * dx; + float py = r->y[seg] + t * dy; + float d2 = (px - x) * (px - x) + (py - y) * (py - y); + if (d2 < best_d2) { + best_d2 = d2; + best_road = i; + float seg_s0 = r->cumulative_s[seg]; + float seg_s1 = r->cumulative_s[seg + 1]; + best_along_s = seg_s0 + t * (seg_s1 - seg_s0); + } + } + } + *out_road_idx = best_road; + *out_along_s = best_along_s; +} + +// Refresh the agent's goal-lane projection from the current goal alias +// (goal_position_x/y/z). Call wherever the alias is reassigned — at episode +// start, when compute_goals resets, and when c_step advances the goal index. +static void set_agent_goal_projection(Drive *env, Agent *agent) { + agent->goal_lane_graph_idx = -1; + agent->goal_along_s = 0.0f; + if (env->road_to_lane_graph == NULL) { + return; + } + int road_idx = -1; + float along_s = 0.0f; + find_nearest_drivable_lane(env, agent->goal_position_x, agent->goal_position_y, &road_idx, &along_s); + if (road_idx >= 0) { + agent->goal_lane_graph_idx = env->road_to_lane_graph[road_idx]; + agent->goal_along_s = along_s; + } +} + +// Geodesic distance between two on-lane points using lane_graph's all-pairs +// Dijkstra. Convention: distances[i*n + j] = shortest path from END of lane i +// to START of lane j. Same-lane returns signed forward distance (clipped to 0). +// Unreachable pairs (distances[] sentinel = FLT_MAX or similar) are clipped to +// COARSE_DIST_MAX so the obs stays finite — the policy treats large values as +// "no path forward through this sample". +#define COARSE_DIST_MAX 2000.0f +static float coarse_lane_dist(Drive *env, int lg_a, float s_a, int lg_b, float s_b) { + if (lg_a < 0 || lg_b < 0 || env->lane_graph.distances == NULL) { + return COARSE_DIST_MAX; + } + if (lg_a == lg_b) { + float d = s_b - s_a; + return (d > 0.0f) ? d : 0.0f; + } + int n = env->lane_graph.n_lanes; + float lane_a_len = env->lane_graph.lane_lengths[lg_a]; + float graph_dist = env->lane_graph.distances[lg_a * n + lg_b]; + if (!isfinite(graph_dist) || graph_dist >= COARSE_DIST_MAX) { + return COARSE_DIST_MAX; + } + float total = (lane_a_len - s_a) + graph_dist + s_b; + return (total < COARSE_DIST_MAX) ? total : COARSE_DIST_MAX; +} + // ======================================== // Route/Path/Goal Functions // ======================================== @@ -2002,6 +2199,7 @@ static void compute_goals(Drive *env, int agent_idx) { agent->goal_position_x = agent->goal_positions_x[0]; agent->goal_position_y = agent->goal_positions_y[0]; agent->goal_position_z = agent->goal_positions_z[0]; + set_agent_goal_projection(env, agent); return; } @@ -3515,6 +3713,8 @@ static void free_shared_map_data(struct SharedMapData *shared) { free(shared->grid_map); free(shared->neighbor_offsets); free_lane_graph(&shared->lane_graph); + free(shared->coarse_samples); + free(shared->road_to_lane_graph); free(shared->map_name); for (int i = 0; i < g_map_cache_count; i++) { if (g_map_cache[i] == shared) { @@ -3545,6 +3745,9 @@ void init(Drive *env) { env->grid_map = shared->grid_map; env->neighbor_offsets = shared->neighbor_offsets; env->lane_graph = shared->lane_graph; + env->coarse_samples = shared->coarse_samples; + env->n_coarse_samples = shared->n_coarse_samples; + env->road_to_lane_graph = shared->road_to_lane_graph; env->shared_map = shared; shared->ref_count++; } else { @@ -3557,6 +3760,10 @@ void init(Drive *env) { env->grid_map->vision_range = 2 * vision_half_range + 1; init_neighbor_offsets(env); cache_neighbor_offsets(env); + // GIGAFLOW W_lane derived data (depends on road_elements + lane_graph being loaded). + build_lane_arclengths(env); + build_road_to_lane_graph(env); + build_coarse_samples(env); if (env->use_map_cache) { // Transfer the just-built geometry into a shared, ref-counted entry that // this env borrows (ref_count starts at 1). @@ -3567,6 +3774,9 @@ void init(Drive *env) { entry->grid_map = env->grid_map; entry->neighbor_offsets = env->neighbor_offsets; entry->lane_graph = env->lane_graph; + entry->coarse_samples = env->coarse_samples; + entry->n_coarse_samples = env->n_coarse_samples; + entry->road_to_lane_graph = env->road_to_lane_graph; entry->ref_count = 1; entry->owner_pid = getpid(); map_cache_insert(entry); @@ -3638,6 +3848,7 @@ void init(Drive *env) { agent->goal_position_x = agent->goal_positions_x[0]; agent->goal_position_y = agent->goal_positions_y[0]; agent->goal_position_z = agent->goal_positions_z[0]; + set_agent_goal_projection(env, agent); } } } @@ -3684,6 +3895,8 @@ void c_close(Drive *env) { free(env->grid_map->neighbor_cache_count); free(env->grid_map); free_lane_graph(&env->lane_graph); + free(env->coarse_samples); + free(env->road_to_lane_graph); } free(env->static_agent_indices); @@ -3700,6 +3913,7 @@ static int compute_observation_size(Drive *env) { : 0; return EGO_FEATURES + PARTNER_FEATURES * env->obs_slots_partners_n + ROAD_FEATURES * (env->obs_slots_lane_kept + env->obs_slots_boundary_kept) + + COARSE_FEATURES * env->obs_slots_coarse_n + TRAFFIC_CONTROL_FEATURES * env->obs_slots_traffic_controls_n + env->reward_conditioning * NUM_REWARD_COEFS + env->num_target_waypoints * target_features; } @@ -4671,6 +4885,147 @@ static int write_road_obs(Drive *env, Agent *ego, float *obs, int obs_idx, int * return obs_idx; } +// GIGAFLOW W_lane: per-agent top-K nearest coarse lane samples within +// obs_range_coarse_m. Each slot carries 6 floats: ego-frame position (2), +// ego-relative heading (2), absolute and min-anchored relative Dijkstra +// distance to the agent's current goal (2). Empty slots pad with the standard +// observation sentinel. Read ego->goal_lane_graph_idx / goal_along_s set at +// the goal-update sites. +static int write_coarse_obs(Drive *env, Agent *ego, float *obs, int obs_idx, int *coarse_count) { + int K = env->obs_slots_coarse_n; + if (K <= 0) { + *coarse_count = 0; + return obs_idx; + } + + float range_m = (env->obs_range_coarse_m > 0.0f) ? env->obs_range_coarse_m : 200.0f; + float range2 = range_m * range_m; + float dist_norm = (env->obs_norm_coarse_dist_m > 0.0f) ? env->obs_norm_coarse_dist_m : range_m; + float xy_norm = (env->obs_norm_xy_offset_m > 0.0f) ? env->obs_norm_xy_offset_m : 120.0f; + + int sel_idx[K]; + float sel_d2[K]; + int n_sel = 0; + for (int s = 0; s < env->n_coarse_samples; s++) { + struct CoarseSample *cs = &env->coarse_samples[s]; + float dx = cs->x - ego->sim_x; + float dy = cs->y - ego->sim_y; + float d2 = dx * dx + dy * dy; + if (d2 > range2) { + continue; + } + if (n_sel < K) { + int pos = n_sel; + while (pos > 0 && sel_d2[pos - 1] > d2) { + sel_d2[pos] = sel_d2[pos - 1]; + sel_idx[pos] = sel_idx[pos - 1]; + pos--; + } + sel_d2[pos] = d2; + sel_idx[pos] = s; + n_sel++; + } else if (d2 < sel_d2[K - 1]) { + int pos = K - 1; + while (pos > 0 && sel_d2[pos - 1] > d2) { + sel_d2[pos] = sel_d2[pos - 1]; + sel_idx[pos] = sel_idx[pos - 1]; + pos--; + } + sel_d2[pos] = d2; + sel_idx[pos] = s; + } + } + + *coarse_count = n_sel; + + // The lane graph is directed: opposite-direction lanes have inf distance + // to the goal lane through the graph alone. When a selected sample is + // unreachable, fall back to the nearest spatially-reachable coarse sample + // and use (euclidean(S, S') + graph_dist(S' -> goal)) — the spatial leg + // represents the implicit U-turn / lane-change cost. + int n_lanes = env->lane_graph.n_lanes; + int goal_lg = ego->goal_lane_graph_idx; + char reachable[env->n_coarse_samples]; + for (int s = 0; s < env->n_coarse_samples; s++) { + int slg = env->coarse_samples[s].lane_graph_idx; + if (slg < 0 || goal_lg < 0) { + reachable[s] = 0; + continue; + } + if (slg == goal_lg) { + reachable[s] = 1; + continue; + } + float gd = env->lane_graph.distances[slg * n_lanes + goal_lg]; + reachable[s] = (isfinite(gd) && gd < COARSE_DIST_MAX) ? 1 : 0; + } + + float abs_dist[K]; + float min_abs = FLT_MAX; + for (int k = 0; k < n_sel; k++) { + struct CoarseSample *cs = &env->coarse_samples[sel_idx[k]]; + if (reachable[sel_idx[k]]) { + abs_dist[k] + = coarse_lane_dist(env, cs->lane_graph_idx, cs->along_s, ego->goal_lane_graph_idx, ego->goal_along_s); + } else { + float best_d2 = FLT_MAX; + int best_idx = -1; + for (int s = 0; s < env->n_coarse_samples; s++) { + if (!reachable[s]) { + continue; + } + float dx = env->coarse_samples[s].x - cs->x; + float dy = env->coarse_samples[s].y - cs->y; + float d2 = dx * dx + dy * dy; + if (d2 < best_d2) { + best_d2 = d2; + best_idx = s; + } + } + if (best_idx >= 0) { + struct CoarseSample *cr = &env->coarse_samples[best_idx]; + float spatial_leg = sqrtf(best_d2); + float graph_leg = coarse_lane_dist( + env, cr->lane_graph_idx, cr->along_s, ego->goal_lane_graph_idx, ego->goal_along_s); + abs_dist[k] = spatial_leg + graph_leg; + if (abs_dist[k] > COARSE_DIST_MAX) { + abs_dist[k] = COARSE_DIST_MAX; + } + } else { + abs_dist[k] = COARSE_DIST_MAX; + } + } + if (abs_dist[k] < min_abs) { + min_abs = abs_dist[k]; + } + } + if (n_sel == 0) { + min_abs = 0.0f; + } + + for (int k = 0; k < K; k++) { + int base = obs_idx + k * COARSE_FEATURES; + if (k >= n_sel) { + for (int c = 0; c < COARSE_FEATURES; c++) { + obs[base + c] = PADDED_OBSERVATION_VALUE; + } + continue; + } + struct CoarseSample *cs = &env->coarse_samples[sel_idx[k]]; + float rel_x, rel_y; + project_point_to_ego_frame(ego, cs->x, cs->y, &rel_x, &rel_y); + float dh = compute_heading_diff(cs->heading, ego->sim_heading); + obs[base + 0] = rel_x / xy_norm; + obs[base + 1] = rel_y / xy_norm; + obs[base + 2] = cosf(dh); + obs[base + 3] = sinf(dh); + obs[base + 4] = abs_dist[k] / dist_norm; + obs[base + 5] = (abs_dist[k] - min_abs) / dist_norm; + } + + return obs_idx + K * COARSE_FEATURES; +} + static int write_traffic_control_obs(Drive *env, Agent *ego, float *obs, int obs_idx, int *traffic_control_count) { typedef struct { int idx; @@ -4755,6 +5110,7 @@ static void compute_observations(Drive *env) { int partner_count = 0; int lane_count = 0; int boundary_count = 0; + int coarse_count = 0; int traffic_control_count = 0; int obs_idx = 0; @@ -4762,6 +5118,7 @@ static void compute_observations(Drive *env) { obs_idx = write_reward_target_obs(env, ego, obs, obs_idx); obs_idx = write_partner_obs(env, ego, i, obs, obs_idx, &partner_count); obs_idx = write_road_obs(env, ego, obs, obs_idx, &lane_count, &boundary_count); + obs_idx = write_coarse_obs(env, ego, obs, obs_idx, &coarse_count); obs_idx = write_traffic_control_obs(env, ego, obs, obs_idx, &traffic_control_count); assert(obs_idx == obs_per_agent); } @@ -5128,6 +5485,7 @@ void c_reset(Drive *env) { agent->goal_position_x = agent->goal_positions_x[0]; agent->goal_position_y = agent->goal_positions_y[0]; agent->goal_position_z = agent->goal_positions_z[0]; + set_agent_goal_projection(env, agent); } else { build_path(env, agent_idx); compute_goals(env, agent_idx); @@ -5242,6 +5600,7 @@ void c_step(Drive *env) { agent->goal_position_x = agent->goal_positions_x[agent->current_goal_idx]; agent->goal_position_y = agent->goal_positions_y[agent->current_goal_idx]; agent->goal_position_z = agent->goal_positions_z[agent->current_goal_idx]; + set_agent_goal_projection(env, agent); } } } diff --git a/pufferlib/ocean/drive/drive.py b/pufferlib/ocean/drive/drive.py index 577d42252..37540d3e4 100644 --- a/pufferlib/ocean/drive/drive.py +++ b/pufferlib/ocean/drive/drive.py @@ -86,6 +86,10 @@ def __init__( obs_slots_boundary_n=32, obs_slots_partners_n=16, obs_slots_traffic_controls_n=4, + obs_slots_coarse_n=0, + coarse_sample_spacing_m=40.0, + obs_range_coarse_m=200.0, + obs_norm_coarse_dist_m=200.0, traffic_control_scope=0, starting_map=0, obs_norm_goal_offset_m=100.0, @@ -181,6 +185,10 @@ def __init__( self.obs_slots_partners_n = obs_slots_partners_n self.traffic_control_scope = traffic_control_scope self.obs_slots_traffic_controls_n = obs_slots_traffic_controls_n + self.obs_slots_coarse_n = int(obs_slots_coarse_n) + self.coarse_sample_spacing_m = float(coarse_sample_spacing_m) + self.obs_range_coarse_m = float(obs_range_coarse_m) + self.obs_norm_coarse_dist_m = float(obs_norm_coarse_dist_m) self.obs_norm_goal_offset_m = float(obs_norm_goal_offset_m) self.obs_norm_xy_offset_m = float(obs_norm_xy_offset_m) self.obs_norm_veh_length_m = float(obs_norm_veh_length_m) @@ -226,6 +234,7 @@ def __init__( + self.obs_slots_partners_n * self.partner_features + self.obs_slots_lane_kept * self.road_features + self.obs_slots_boundary_kept * self.road_features + + self.obs_slots_coarse_n * binding.COARSE_FEATURES + self.obs_slots_traffic_controls_n * self.traffic_control_features ) @@ -397,6 +406,10 @@ def _env_init_kwargs(self, map_file, max_agents): "obs_slots_boundary_n": self.obs_slots_boundary_n, "obs_slots_partners_n": self.obs_slots_partners_n, "obs_slots_traffic_controls_n": self.obs_slots_traffic_controls_n, + "obs_slots_coarse_n": self.obs_slots_coarse_n, + "coarse_sample_spacing_m": self.coarse_sample_spacing_m, + "obs_range_coarse_m": self.obs_range_coarse_m, + "obs_norm_coarse_dist_m": self.obs_norm_coarse_dist_m, "traffic_control_scope": self.traffic_control_scope, "dt": self.dt, "spawn_initial_speed": self.spawn_initial_speed, diff --git a/pufferlib/ocean/env_binding.h b/pufferlib/ocean/env_binding.h index 9c6cb56f6..9129c3ce0 100644 --- a/pufferlib/ocean/env_binding.h +++ b/pufferlib/ocean/env_binding.h @@ -1379,6 +1379,7 @@ PyMODINIT_FUNC PyInit_binding(void) { PyModule_AddIntConstant(m, "MAX_ENTITIES_PER_CELL", MAX_ENTITIES_PER_CELL); PyModule_AddIntConstant(m, "ROAD_FEATURES", ROAD_FEATURES); PyModule_AddIntConstant(m, "PARTNER_FEATURES", PARTNER_FEATURES); + PyModule_AddIntConstant(m, "COARSE_FEATURES", COARSE_FEATURES); PyModule_AddIntConstant(m, "TRAFFIC_CONTROL_FEATURES", TRAFFIC_CONTROL_FEATURES); PyModule_AddIntConstant(m, "NUM_TRAFFIC_CONTROL_TYPES", NUM_TRAFFIC_CONTROL_TYPES); PyModule_AddIntConstant(m, "NUM_TRAFFIC_CONTROL_STATES", NUM_TRAFFIC_CONTROL_STATES); From e480fa00dbd3c7082f5f21e0374dcfbe6a941bed Mon Sep 17 00:00:00 2001 From: Eugene Vinitsky Date: Sun, 31 May 2026 14:22:00 -0400 Subject: [PATCH 2/7] drive: collapse W_lane coarse view into the lane channel + viz it MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Lane slots now carry coarse-view samples directly: the close-range lane-segment branch of write_road_obs is replaced with a top-K nearest coarse-sample scan (40m-spaced points along all drivable lanes within obs_range_coarse_m), and the existing 7-wide ROAD_FEATURES layout is reinterpreted in-place as [rel_x, rel_y, rel_z, dist_abs, dist_rel_min, cos_dh, sin_dh]. Boundary slots are untouched and still carry close-range ROAD_EDGE polyline segments via the grid map. Side effects of folding coarse into lane: obs_slots_coarse_n and COARSE_FEATURES disappear (lane slot count + ROAD_FEATURES already do that job), and the policy's existing lane_encoder consumes the new content with zero changes — same slot width, same plumbing. viz.py plot_observation: lane drawing reinterprets the 7 floats as coarse content, scatters dots colored by min-anchored dist_rel via RdYlGn_r (routing-best slot gets a lime ring), draws a short heading tick per dot. Padding check switched to the PADDED_OBSERVATION_VALUE sentinel. Fixes a pre-existing _img_from_fig DPI-mismatch crash on HiDPI displays by switching to buffer_rgba. scripts/render_coarse_obs.py: steps a Drive env and dumps the plot_observation render at configurable intervals so we can inspect how the coarse view evolves with the agent's position. Co-Authored-By: Claude Opus 4.7 --- pufferlib/config/ocean/drive.ini | 11 +- pufferlib/ocean/drive/binding.c | 1 - pufferlib/ocean/drive/drive.h | 350 ++++++++++++++----------------- pufferlib/ocean/drive/drive.py | 4 - pufferlib/ocean/env_binding.h | 1 - pufferlib/viz.py | 61 ++++-- scripts/render_coarse_obs.py | 87 ++++++++ 7 files changed, 292 insertions(+), 223 deletions(-) create mode 100644 scripts/render_coarse_obs.py diff --git a/pufferlib/config/ocean/drive.ini b/pufferlib/config/ocean/drive.ini index a4605d345..a6f398748 100644 --- a/pufferlib/config/ocean/drive.ini +++ b/pufferlib/config/ocean/drive.ini @@ -111,15 +111,14 @@ map_dir = "pufferlib/resources/drive/binaries/carla" num_maps = 8 ; --- Observation slot counts --- -obs_slots_lane_n = 0 +; Lane slots carry GIGAFLOW W_lane coarse-view samples (40m-spaced points +; along all drivable lanes, top-K nearest within obs_range_coarse_m). Each +; slot reuses the 7-float ROAD_FEATURES layout but reinterprets indices 3-4 +; as Dijkstra distance to goal (abs + min-anchored relative). +obs_slots_lane_n = 80 obs_slots_boundary_n = 80 obs_slots_partners_n = 16 obs_slots_traffic_controls_n = 4 -; GIGAFLOW W_lane (coarse map view). 0 disables; >0 adds top-K nearest coarse -; lane samples per agent within obs_range_coarse_m. Each slot is 6 floats: -; ego-frame pos (2), ego-relative heading (2), absolute and min-anchored -; Dijkstra distance to current goal (2). -obs_slots_coarse_n = 80 coarse_sample_spacing_m = 40.0 obs_range_coarse_m = 200.0 obs_norm_coarse_dist_m = 200.0 diff --git a/pufferlib/ocean/drive/binding.c b/pufferlib/ocean/drive/binding.c index f8658156f..1cfe40988 100644 --- a/pufferlib/ocean/drive/binding.c +++ b/pufferlib/ocean/drive/binding.c @@ -1957,7 +1957,6 @@ static int my_init(Env *env, PyObject *args, PyObject *kwargs) { env->obs_slots_lane_n = (int) unpack(kwargs, "obs_slots_lane_n"); env->obs_slots_partners_n = (int) unpack(kwargs, "obs_slots_partners_n"); env->obs_slots_traffic_controls_n = (int) unpack(kwargs, "obs_slots_traffic_controls_n"); - env->obs_slots_coarse_n = (int) unpack(kwargs, "obs_slots_coarse_n"); env->coarse_sample_spacing_m = (float) unpack(kwargs, "coarse_sample_spacing_m"); env->obs_range_coarse_m = (float) unpack(kwargs, "obs_range_coarse_m"); env->obs_norm_coarse_dist_m = (float) unpack(kwargs, "obs_norm_coarse_dist_m"); diff --git a/pufferlib/ocean/drive/drive.h b/pufferlib/ocean/drive/drive.h index 10676d757..256e2d06c 100644 --- a/pufferlib/ocean/drive/drive.h +++ b/pufferlib/ocean/drive/drive.h @@ -125,9 +125,6 @@ #define PADDED_OBSERVATION_VALUE -0.001f #define STATIC_TARGET_FEATURES 3 #define DYNAMIC_TARGET_FEATURES 5 -// GIGAFLOW W_lane: per coarse-sample feature count. -// rel_x, rel_y, cos(dh), sin(dh), dist_abs_norm, dist_rel_norm (min-anchored). -#define COARSE_FEATURES 6 // GIGAFLOW specific #define MAX_ROUTE_LENGTH 64 @@ -438,12 +435,11 @@ struct Drive { int reward_randomization; int compute_eval_metrics; int obs_slots_boundary_n; - int obs_slots_lane_n; + int obs_slots_lane_n; // Coarse-view sample count (lane channel carries GIGAFLOW W_lane samples) int obs_slots_partners_n; int obs_slots_traffic_controls_n; - int obs_slots_coarse_n; // GIGAFLOW W_lane: top-K coarse-view lane samples per agent float coarse_sample_spacing_m; - float obs_range_coarse_m; // Euclidean radius for top-K filter + float obs_range_coarse_m; // Euclidean radius for top-K coarse-sample selection float obs_norm_coarse_dist_m; int traffic_control_scope; int obs_slots_lane_kept; @@ -3913,7 +3909,6 @@ static int compute_observation_size(Drive *env) { : 0; return EGO_FEATURES + PARTNER_FEATURES * env->obs_slots_partners_n + ROAD_FEATURES * (env->obs_slots_lane_kept + env->obs_slots_boundary_kept) - + COARSE_FEATURES * env->obs_slots_coarse_n + TRAFFIC_CONTROL_FEATURES * env->obs_slots_traffic_controls_n + env->reward_conditioning * NUM_REWARD_COEFS + env->num_target_waypoints * target_features; } @@ -4765,7 +4760,154 @@ static int write_partner_obs(Drive *env, Agent *ego, int agent_idx, float *obs, return obs_idx + (env->obs_slots_partners_n - partners_written) * PARTNER_FEATURES; } +// Lane slots now carry GIGAFLOW W_lane coarse-view samples (top-K nearest +// global lane samples within obs_range_coarse_m). Boundary slots still carry +// close-range ROAD_EDGE polylines from the grid map. The coarse slot reuses +// the original 7-wide ROAD_FEATURES layout but reinterprets indices 3-4 as +// goal distances (was seg_half_len, lane_width): +// [0] rel_x in ego frame / obs_norm_xy_offset_m +// [1] rel_y in ego frame / obs_norm_xy_offset_m +// [2] rel_z / Z_BUFFER +// [3] dist(sample -> goal) / obs_norm_coarse_dist_m (absolute) +// [4] (dist(sample -> goal) - min_k dist) / obs_norm_coarse_dist_m (min-anchored relative) +// [5] cos(sample_heading - ego_heading) +// [6] sin(sample_heading - ego_heading) +// Unreachable sample->goal pairs in the directed lane graph are replaced by +// (euclidean to nearest reachable sample) + (its graph distance to goal). static int write_road_obs(Drive *env, Agent *ego, float *obs, int obs_idx, int *lane_count, int *boundary_count) { + int lane_obs_idx = obs_idx; + int boundary_obs_idx = lane_obs_idx + env->obs_slots_lane_kept * ROAD_FEATURES; + obs_idx = boundary_obs_idx + env->obs_slots_boundary_kept * ROAD_FEATURES; + + // === Lane slots: coarse-view samples === + int K = env->obs_slots_lane_kept; + if (K > 0) { + float range_m = (env->obs_range_coarse_m > 0.0f) ? env->obs_range_coarse_m : 200.0f; + float range2 = range_m * range_m; + float dist_norm = (env->obs_norm_coarse_dist_m > 0.0f) ? env->obs_norm_coarse_dist_m : range_m; + float xy_norm = (env->obs_norm_xy_offset_m > 0.0f) ? env->obs_norm_xy_offset_m : 120.0f; + + int sel_idx[K]; + float sel_d2[K]; + int n_sel = 0; + for (int s = 0; s < env->n_coarse_samples; s++) { + struct CoarseSample *cs = &env->coarse_samples[s]; + float dx = cs->x - ego->sim_x; + float dy = cs->y - ego->sim_y; + float d2 = dx * dx + dy * dy; + if (d2 > range2) { + continue; + } + if (n_sel < K) { + int pos = n_sel; + while (pos > 0 && sel_d2[pos - 1] > d2) { + sel_d2[pos] = sel_d2[pos - 1]; + sel_idx[pos] = sel_idx[pos - 1]; + pos--; + } + sel_d2[pos] = d2; + sel_idx[pos] = s; + n_sel++; + } else if (d2 < sel_d2[K - 1]) { + int pos = K - 1; + while (pos > 0 && sel_d2[pos - 1] > d2) { + sel_d2[pos] = sel_d2[pos - 1]; + sel_idx[pos] = sel_idx[pos - 1]; + pos--; + } + sel_d2[pos] = d2; + sel_idx[pos] = s; + } + } + + // Per-sample reachability to goal lane via the directed lane graph. + int n_lanes = env->lane_graph.n_lanes; + int goal_lg = ego->goal_lane_graph_idx; + char reachable[env->n_coarse_samples]; + for (int s = 0; s < env->n_coarse_samples; s++) { + int slg = env->coarse_samples[s].lane_graph_idx; + if (slg < 0 || goal_lg < 0) { + reachable[s] = 0; + continue; + } + if (slg == goal_lg) { + reachable[s] = 1; + continue; + } + float gd = env->lane_graph.distances[slg * n_lanes + goal_lg]; + reachable[s] = (isfinite(gd) && gd < COARSE_DIST_MAX) ? 1 : 0; + } + + float abs_dist[K]; + float min_abs = FLT_MAX; + for (int k = 0; k < n_sel; k++) { + struct CoarseSample *cs = &env->coarse_samples[sel_idx[k]]; + if (reachable[sel_idx[k]]) { + abs_dist[k] + = coarse_lane_dist(env, cs->lane_graph_idx, cs->along_s, goal_lg, ego->goal_along_s); + } else { + float best_d2 = FLT_MAX; + int best_idx = -1; + for (int s = 0; s < env->n_coarse_samples; s++) { + if (!reachable[s]) { + continue; + } + float dx = env->coarse_samples[s].x - cs->x; + float dy = env->coarse_samples[s].y - cs->y; + float d2 = dx * dx + dy * dy; + if (d2 < best_d2) { + best_d2 = d2; + best_idx = s; + } + } + if (best_idx >= 0) { + struct CoarseSample *cr = &env->coarse_samples[best_idx]; + float spatial_leg = sqrtf(best_d2); + float graph_leg + = coarse_lane_dist(env, cr->lane_graph_idx, cr->along_s, goal_lg, ego->goal_along_s); + abs_dist[k] = spatial_leg + graph_leg; + if (abs_dist[k] > COARSE_DIST_MAX) { + abs_dist[k] = COARSE_DIST_MAX; + } + } else { + abs_dist[k] = COARSE_DIST_MAX; + } + } + if (abs_dist[k] < min_abs) { + min_abs = abs_dist[k]; + } + } + if (n_sel == 0) { + min_abs = 0.0f; + } + + for (int k = 0; k < K; k++) { + int base = lane_obs_idx + k * ROAD_FEATURES; + if (k >= n_sel) { + for (int c = 0; c < ROAD_FEATURES; c++) { + obs[base + c] = PADDED_OBSERVATION_VALUE; + } + continue; + } + struct CoarseSample *cs = &env->coarse_samples[sel_idx[k]]; + float rel_x, rel_y; + project_point_to_ego_frame(ego, cs->x, cs->y, &rel_x, &rel_y); + float rel_z = cs->z - ego->sim_z; + float dh = compute_heading_diff(cs->heading, ego->sim_heading); + obs[base + 0] = rel_x / xy_norm; + obs[base + 1] = rel_y / xy_norm; + obs[base + 2] = rel_z / Z_BUFFER; + obs[base + 3] = abs_dist[k] / dist_norm; + obs[base + 4] = (abs_dist[k] - min_abs) / dist_norm; + obs[base + 5] = cosf(dh); + obs[base + 6] = sinf(dh); + } + *lane_count = n_sel; + } else { + *lane_count = 0; + } + + // === Boundary slots: close-range ROAD_EDGE polylines via grid map === int grid_idx = get_grid_index(env, ego->sim_x, ego->sim_y); int neighbor_count = 0; const GridMapEntity *neighbor_entities = NULL; @@ -4774,27 +4916,18 @@ static int write_road_obs(Drive *env, Agent *ego, float *obs, int obs_idx, int * neighbor_entities = env->grid_map->neighbor_cache_entities[grid_idx]; } - int lane_obs_idx = obs_idx; - int boundary_obs_idx = lane_obs_idx + env->obs_slots_lane_kept * ROAD_FEATURES; - obs_idx = boundary_obs_idx + env->obs_slots_boundary_kept * ROAD_FEATURES; - - float lanes_buffer[env->obs_slots_lane_n * ROAD_FEATURES]; float boundaries_buffer[env->obs_slots_boundary_n * ROAD_FEATURES]; - float *lane_obs_dest = env->road_dropout_enabled ? lanes_buffer : &obs[lane_obs_idx]; float *boundary_obs_dest = env->road_dropout_enabled ? boundaries_buffer : &obs[boundary_obs_idx]; - int lanes_found = 0; int boundaries_found = 0; for (int k = 0; k < neighbor_count; k++) { - if (lanes_found >= env->obs_slots_lane_n && boundaries_found >= env->obs_slots_boundary_n) { + if (boundaries_found >= env->obs_slots_boundary_n) { break; } int entity_idx = neighbor_entities[k].entity_idx; int geometry_idx = neighbor_entities[k].geometry_idx; RoadMapElement *road_element = &env->road_elements[entity_idx]; - int is_lane = is_road_lane(road_element->type); - int is_edge = is_road_edge(road_element->type); - if (!is_lane && !is_edge) { + if (!is_road_edge(road_element->type)) { continue; } @@ -4824,7 +4957,7 @@ static int write_road_obs(Drive *env, Agent *ego, float *obs, int obs_idx, int * float seg_dir_y = (seg_half_len > 0) ? seg_dy / seg_half_len : seg_dy; float rel_seg_dir_x, rel_seg_dir_y; project_vector_to_ego_frame(ego, seg_dir_x, seg_dir_y, &rel_seg_dir_x, &rel_seg_dir_y); - if (is_edge && seg_half_len > 0) { + if (seg_half_len > 0) { float angle = atan2f(rel_seg_dir_y, rel_seg_dir_x); if (angle > (float) M_PI / 2.0f) { angle -= (float) M_PI; @@ -4835,35 +4968,22 @@ static int write_road_obs(Drive *env, Agent *ego, float *obs, int obs_idx, int * rel_seg_dir_y = sinf(angle); } - float *segment_dest = is_lane ? lane_obs_dest : boundary_obs_dest; - int *segment_count = is_lane ? &lanes_found : &boundaries_found; - int segment_cap = is_lane ? env->obs_slots_lane_n : env->obs_slots_boundary_n; - if (*segment_count >= segment_cap) { - continue; - } - int feature_base = (*segment_count)++ * ROAD_FEATURES; - segment_dest[feature_base] = rel_x / env->obs_norm_xy_offset_m; - segment_dest[feature_base + 1] = rel_y / env->obs_norm_xy_offset_m; - segment_dest[feature_base + 2] = rel_z / Z_BUFFER; - segment_dest[feature_base + 3] = seg_half_len / env->obs_norm_road_seg_length_m; - segment_dest[feature_base + 4] = LANE_WIDTH / env->obs_norm_road_seg_width_m; - segment_dest[feature_base + 5] = rel_seg_dir_x; - segment_dest[feature_base + 6] = rel_seg_dir_y; + int feature_base = boundaries_found * ROAD_FEATURES; + boundaries_found++; + boundary_obs_dest[feature_base] = rel_x / env->obs_norm_xy_offset_m; + boundary_obs_dest[feature_base + 1] = rel_y / env->obs_norm_xy_offset_m; + boundary_obs_dest[feature_base + 2] = rel_z / Z_BUFFER; + boundary_obs_dest[feature_base + 3] = seg_half_len / env->obs_norm_road_seg_length_m; + boundary_obs_dest[feature_base + 4] = LANE_WIDTH / env->obs_norm_road_seg_width_m; + boundary_obs_dest[feature_base + 5] = rel_seg_dir_x; + boundary_obs_dest[feature_base + 6] = rel_seg_dir_y; } if (env->road_dropout_enabled) { - int lanes_to_copy = (lanes_found < env->obs_slots_lane_kept) ? lanes_found : env->obs_slots_lane_kept; int boundaries_to_copy = (boundaries_found < env->obs_slots_boundary_kept) ? boundaries_found : env->obs_slots_boundary_kept; - *lane_count = lanes_to_copy; *boundary_count = boundaries_to_copy; - subsample_road_observation_rows(lanes_buffer, lanes_found, lanes_to_copy); subsample_road_observation_rows(boundaries_buffer, boundaries_found, boundaries_to_copy); - memcpy(&obs[lane_obs_idx], lanes_buffer, lanes_to_copy * ROAD_FEATURES * sizeof(float)); - fill_padded_observation_rows( - &obs[lane_obs_idx + lanes_to_copy * ROAD_FEATURES], - env->obs_slots_lane_kept - lanes_to_copy, - ROAD_FEATURES); memcpy(&obs[boundary_obs_idx], boundaries_buffer, boundaries_to_copy * ROAD_FEATURES * sizeof(float)); fill_padded_observation_rows( &obs[boundary_obs_idx + boundaries_to_copy * ROAD_FEATURES], @@ -4872,12 +4992,7 @@ static int write_road_obs(Drive *env, Agent *ego, float *obs, int obs_idx, int * return obs_idx; } - *lane_count = lanes_found; *boundary_count = boundaries_found; - fill_padded_observation_rows( - &obs[lane_obs_idx + lanes_found * ROAD_FEATURES], - env->obs_slots_lane_kept - lanes_found, - ROAD_FEATURES); fill_padded_observation_rows( &obs[boundary_obs_idx + boundaries_found * ROAD_FEATURES], env->obs_slots_boundary_kept - boundaries_found, @@ -4885,147 +5000,6 @@ static int write_road_obs(Drive *env, Agent *ego, float *obs, int obs_idx, int * return obs_idx; } -// GIGAFLOW W_lane: per-agent top-K nearest coarse lane samples within -// obs_range_coarse_m. Each slot carries 6 floats: ego-frame position (2), -// ego-relative heading (2), absolute and min-anchored relative Dijkstra -// distance to the agent's current goal (2). Empty slots pad with the standard -// observation sentinel. Read ego->goal_lane_graph_idx / goal_along_s set at -// the goal-update sites. -static int write_coarse_obs(Drive *env, Agent *ego, float *obs, int obs_idx, int *coarse_count) { - int K = env->obs_slots_coarse_n; - if (K <= 0) { - *coarse_count = 0; - return obs_idx; - } - - float range_m = (env->obs_range_coarse_m > 0.0f) ? env->obs_range_coarse_m : 200.0f; - float range2 = range_m * range_m; - float dist_norm = (env->obs_norm_coarse_dist_m > 0.0f) ? env->obs_norm_coarse_dist_m : range_m; - float xy_norm = (env->obs_norm_xy_offset_m > 0.0f) ? env->obs_norm_xy_offset_m : 120.0f; - - int sel_idx[K]; - float sel_d2[K]; - int n_sel = 0; - for (int s = 0; s < env->n_coarse_samples; s++) { - struct CoarseSample *cs = &env->coarse_samples[s]; - float dx = cs->x - ego->sim_x; - float dy = cs->y - ego->sim_y; - float d2 = dx * dx + dy * dy; - if (d2 > range2) { - continue; - } - if (n_sel < K) { - int pos = n_sel; - while (pos > 0 && sel_d2[pos - 1] > d2) { - sel_d2[pos] = sel_d2[pos - 1]; - sel_idx[pos] = sel_idx[pos - 1]; - pos--; - } - sel_d2[pos] = d2; - sel_idx[pos] = s; - n_sel++; - } else if (d2 < sel_d2[K - 1]) { - int pos = K - 1; - while (pos > 0 && sel_d2[pos - 1] > d2) { - sel_d2[pos] = sel_d2[pos - 1]; - sel_idx[pos] = sel_idx[pos - 1]; - pos--; - } - sel_d2[pos] = d2; - sel_idx[pos] = s; - } - } - - *coarse_count = n_sel; - - // The lane graph is directed: opposite-direction lanes have inf distance - // to the goal lane through the graph alone. When a selected sample is - // unreachable, fall back to the nearest spatially-reachable coarse sample - // and use (euclidean(S, S') + graph_dist(S' -> goal)) — the spatial leg - // represents the implicit U-turn / lane-change cost. - int n_lanes = env->lane_graph.n_lanes; - int goal_lg = ego->goal_lane_graph_idx; - char reachable[env->n_coarse_samples]; - for (int s = 0; s < env->n_coarse_samples; s++) { - int slg = env->coarse_samples[s].lane_graph_idx; - if (slg < 0 || goal_lg < 0) { - reachable[s] = 0; - continue; - } - if (slg == goal_lg) { - reachable[s] = 1; - continue; - } - float gd = env->lane_graph.distances[slg * n_lanes + goal_lg]; - reachable[s] = (isfinite(gd) && gd < COARSE_DIST_MAX) ? 1 : 0; - } - - float abs_dist[K]; - float min_abs = FLT_MAX; - for (int k = 0; k < n_sel; k++) { - struct CoarseSample *cs = &env->coarse_samples[sel_idx[k]]; - if (reachable[sel_idx[k]]) { - abs_dist[k] - = coarse_lane_dist(env, cs->lane_graph_idx, cs->along_s, ego->goal_lane_graph_idx, ego->goal_along_s); - } else { - float best_d2 = FLT_MAX; - int best_idx = -1; - for (int s = 0; s < env->n_coarse_samples; s++) { - if (!reachable[s]) { - continue; - } - float dx = env->coarse_samples[s].x - cs->x; - float dy = env->coarse_samples[s].y - cs->y; - float d2 = dx * dx + dy * dy; - if (d2 < best_d2) { - best_d2 = d2; - best_idx = s; - } - } - if (best_idx >= 0) { - struct CoarseSample *cr = &env->coarse_samples[best_idx]; - float spatial_leg = sqrtf(best_d2); - float graph_leg = coarse_lane_dist( - env, cr->lane_graph_idx, cr->along_s, ego->goal_lane_graph_idx, ego->goal_along_s); - abs_dist[k] = spatial_leg + graph_leg; - if (abs_dist[k] > COARSE_DIST_MAX) { - abs_dist[k] = COARSE_DIST_MAX; - } - } else { - abs_dist[k] = COARSE_DIST_MAX; - } - } - if (abs_dist[k] < min_abs) { - min_abs = abs_dist[k]; - } - } - if (n_sel == 0) { - min_abs = 0.0f; - } - - for (int k = 0; k < K; k++) { - int base = obs_idx + k * COARSE_FEATURES; - if (k >= n_sel) { - for (int c = 0; c < COARSE_FEATURES; c++) { - obs[base + c] = PADDED_OBSERVATION_VALUE; - } - continue; - } - struct CoarseSample *cs = &env->coarse_samples[sel_idx[k]]; - float rel_x, rel_y; - project_point_to_ego_frame(ego, cs->x, cs->y, &rel_x, &rel_y); - float dh = compute_heading_diff(cs->heading, ego->sim_heading); - obs[base + 0] = rel_x / xy_norm; - obs[base + 1] = rel_y / xy_norm; - obs[base + 2] = cosf(dh); - obs[base + 3] = sinf(dh); - obs[base + 4] = abs_dist[k] / dist_norm; - obs[base + 5] = (abs_dist[k] - min_abs) / dist_norm; - } - - return obs_idx + K * COARSE_FEATURES; -} - static int write_traffic_control_obs(Drive *env, Agent *ego, float *obs, int obs_idx, int *traffic_control_count) { typedef struct { int idx; @@ -5110,7 +5084,6 @@ static void compute_observations(Drive *env) { int partner_count = 0; int lane_count = 0; int boundary_count = 0; - int coarse_count = 0; int traffic_control_count = 0; int obs_idx = 0; @@ -5118,7 +5091,6 @@ static void compute_observations(Drive *env) { obs_idx = write_reward_target_obs(env, ego, obs, obs_idx); obs_idx = write_partner_obs(env, ego, i, obs, obs_idx, &partner_count); obs_idx = write_road_obs(env, ego, obs, obs_idx, &lane_count, &boundary_count); - obs_idx = write_coarse_obs(env, ego, obs, obs_idx, &coarse_count); obs_idx = write_traffic_control_obs(env, ego, obs, obs_idx, &traffic_control_count); assert(obs_idx == obs_per_agent); } diff --git a/pufferlib/ocean/drive/drive.py b/pufferlib/ocean/drive/drive.py index 37540d3e4..15a762795 100644 --- a/pufferlib/ocean/drive/drive.py +++ b/pufferlib/ocean/drive/drive.py @@ -86,7 +86,6 @@ def __init__( obs_slots_boundary_n=32, obs_slots_partners_n=16, obs_slots_traffic_controls_n=4, - obs_slots_coarse_n=0, coarse_sample_spacing_m=40.0, obs_range_coarse_m=200.0, obs_norm_coarse_dist_m=200.0, @@ -185,7 +184,6 @@ def __init__( self.obs_slots_partners_n = obs_slots_partners_n self.traffic_control_scope = traffic_control_scope self.obs_slots_traffic_controls_n = obs_slots_traffic_controls_n - self.obs_slots_coarse_n = int(obs_slots_coarse_n) self.coarse_sample_spacing_m = float(coarse_sample_spacing_m) self.obs_range_coarse_m = float(obs_range_coarse_m) self.obs_norm_coarse_dist_m = float(obs_norm_coarse_dist_m) @@ -234,7 +232,6 @@ def __init__( + self.obs_slots_partners_n * self.partner_features + self.obs_slots_lane_kept * self.road_features + self.obs_slots_boundary_kept * self.road_features - + self.obs_slots_coarse_n * binding.COARSE_FEATURES + self.obs_slots_traffic_controls_n * self.traffic_control_features ) @@ -406,7 +403,6 @@ def _env_init_kwargs(self, map_file, max_agents): "obs_slots_boundary_n": self.obs_slots_boundary_n, "obs_slots_partners_n": self.obs_slots_partners_n, "obs_slots_traffic_controls_n": self.obs_slots_traffic_controls_n, - "obs_slots_coarse_n": self.obs_slots_coarse_n, "coarse_sample_spacing_m": self.coarse_sample_spacing_m, "obs_range_coarse_m": self.obs_range_coarse_m, "obs_norm_coarse_dist_m": self.obs_norm_coarse_dist_m, diff --git a/pufferlib/ocean/env_binding.h b/pufferlib/ocean/env_binding.h index 9129c3ce0..9c6cb56f6 100644 --- a/pufferlib/ocean/env_binding.h +++ b/pufferlib/ocean/env_binding.h @@ -1379,7 +1379,6 @@ PyMODINIT_FUNC PyInit_binding(void) { PyModule_AddIntConstant(m, "MAX_ENTITIES_PER_CELL", MAX_ENTITIES_PER_CELL); PyModule_AddIntConstant(m, "ROAD_FEATURES", ROAD_FEATURES); PyModule_AddIntConstant(m, "PARTNER_FEATURES", PARTNER_FEATURES); - PyModule_AddIntConstant(m, "COARSE_FEATURES", COARSE_FEATURES); PyModule_AddIntConstant(m, "TRAFFIC_CONTROL_FEATURES", TRAFFIC_CONTROL_FEATURES); PyModule_AddIntConstant(m, "NUM_TRAFFIC_CONTROL_TYPES", NUM_TRAFFIC_CONTROL_TYPES); PyModule_AddIntConstant(m, "NUM_TRAFFIC_CONTROL_STATES", NUM_TRAFFIC_CONTROL_STATES); diff --git a/pufferlib/viz.py b/pufferlib/viz.py index c72d0a2f8..3d2b24cbf 100644 --- a/pufferlib/viz.py +++ b/pufferlib/viz.py @@ -505,8 +505,8 @@ def plot_simulator_state(scenario, timestep: int = 0) -> np.ndarray: def _img_from_fig(fig: matplotlib.figure.Figure, close: bool = True) -> np.ndarray: fig.subplots_adjust(left=0.01, bottom=0.02, right=1.00, top=0.96) fig.canvas.draw() - data = np.frombuffer(fig.canvas.tostring_argb(), dtype=np.uint8) - img = data.reshape(fig.canvas.get_width_height()[::-1] + (4,))[:, :, 1:] + rgba = np.asarray(fig.canvas.buffer_rgba()) + img = rgba[:, :, :3].copy() if close: plt.close(fig) return img @@ -745,32 +745,48 @@ def plot_observation( # Road elements rl2p = scales["road_length_to_position"] - rw2p = scales["road_width_to_position"] + + # Lane slots now carry GIGAFLOW W_lane coarse-view samples (40m-spaced + # global lane samples). Layout: rel_x, rel_y, rel_z, dist_abs, dist_rel, + # cos_dh, sin_dh. Padding slots are filled with PADDED_OBSERVATION_VALUE + # (-0.001) at every index. count_lane = 0 - for i in range(lane_obs.shape[0]): - if np.all(lane_obs[i] == 0): - continue - count_lane += 1 - rel_x, rel_y = lane_obs[i][0], lane_obs[i][1] - length, width = lane_obs[i][3] * rl2p, lane_obs[i][4] * rw2p - dir_cos, dir_sin = lane_obs[i][5], lane_obs[i][6] - color = "lightgrey" - ax.scatter(rel_x, rel_y, color=color, s=10, zorder=1) - ax.plot( - [rel_x + dir_cos * length / 2, rel_x - dir_cos * length / 2], - [rel_y + dir_sin * length / 2, rel_y - dir_sin * length / 2], - color=color, - linewidth=1, - zorder=1, - ) + rel_dists = lane_obs[:, 4] + valid_lane = ~np.all(np.isclose(lane_obs, -0.001), axis=1) + if valid_lane.any(): + vmax = float(rel_dists[valid_lane].max()) if rel_dists[valid_lane].size else 1.0 + if vmax <= 0: + vmax = 1.0 + cmap = plt.get_cmap("RdYlGn_r") + tick = 0.025 # heading tick half-length in normalized obs coords + for i in range(lane_obs.shape[0]): + if not valid_lane[i]: + continue + count_lane += 1 + rel_x, rel_y = lane_obs[i][0], lane_obs[i][1] + dist_abs = lane_obs[i][3] + dist_rel = lane_obs[i][4] + cos_dh, sin_dh = lane_obs[i][5], lane_obs[i][6] + color = cmap(min(1.0, dist_rel / vmax)) + ax.scatter(rel_x, rel_y, color=color, s=28, zorder=2, edgecolors="black", linewidths=0.4) + ax.plot( + [rel_x, rel_x + cos_dh * tick], + [rel_y, rel_y + sin_dh * tick], + color=color, + linewidth=1.2, + zorder=2, + ) + if np.isclose(dist_rel, 0.0, atol=1e-4): + # Min-anchored zero — the routing-best sample in this K-set. + ax.scatter(rel_x, rel_y, facecolors="none", edgecolors="lime", s=120, linewidths=2.0, zorder=3) count_boundary = 0 for i in range(boundary_obs.shape[0]): - if np.all(boundary_obs[i] == 0): + if np.all(np.isclose(boundary_obs[i], -0.001)) or np.all(boundary_obs[i] == 0): continue count_boundary += 1 rel_x, rel_y = boundary_obs[i][0], boundary_obs[i][1] - length, width = boundary_obs[i][3] * rl2p, boundary_obs[i][4] * rw2p + length = boundary_obs[i][3] * rl2p dir_cos, dir_sin = boundary_obs[i][5], boundary_obs[i][6] color = "black" ax.scatter(rel_x, rel_y, color=color, s=10, zorder=1) @@ -785,7 +801,8 @@ def plot_observation( ax.text( 0.12, 0.95, - f"Lanes: {count_lane}\nBoundaries: {count_boundary}", + f"Coarse samples: {count_lane}\nBoundaries: {count_boundary}\n" + "(green = routing-best in K)", transform=ax.transAxes, fontsize=10, verticalalignment="top", diff --git a/scripts/render_coarse_obs.py b/scripts/render_coarse_obs.py new file mode 100644 index 000000000..51885e908 --- /dev/null +++ b/scripts/render_coarse_obs.py @@ -0,0 +1,87 @@ +"""Render an agent's observation at multiple points along an episode so we can +see how the GIGAFLOW W_lane coarse view (lane slots) changes with position. + +Saves per-frame PNGs and a single side-by-side grid PNG under +benchmark/coarse_view_renders/. +""" + +import argparse +import os + +import matplotlib.pyplot as plt +import numpy as np + +from pufferlib.ocean.drive.drive import Drive +from pufferlib.viz import plot_observation + + +def main(): + parser = argparse.ArgumentParser() + parser.add_argument("--map", default="pufferlib/resources/drive/binaries/carla/opendrive__Town10HD.bin") + parser.add_argument("--frames", type=int, default=8, help="number of obs renders along the episode") + parser.add_argument("--steps-between", type=int, default=20, help="steps between renders") + parser.add_argument("--agent-idx", type=int, default=0) + parser.add_argument("--obs-slots-lane-n", type=int, default=80) + parser.add_argument("--obs-slots-boundary-n", type=int, default=80) + parser.add_argument("--obs-range-coarse-m", type=float, default=200.0) + parser.add_argument("--obs-norm-coarse-dist-m", type=float, default=200.0) + parser.add_argument("--num-agents", type=int, default=4) + parser.add_argument("--seed", type=int, default=0) + parser.add_argument("--out-dir", default="benchmark/coarse_view_renders") + parser.add_argument("--dpi", type=int, default=200, help="figure DPI for the captured raster") + args = parser.parse_args() + + os.makedirs(args.out_dir, exist_ok=True) + plt.rcParams["figure.dpi"] = args.dpi + + env = Drive( + num_agents=args.num_agents, + max_agents_per_env=args.num_agents, + min_agents_per_env=args.num_agents, + map_dir=args.map, + num_maps=1, + obs_slots_lane_n=args.obs_slots_lane_n, + obs_slots_boundary_n=args.obs_slots_boundary_n, + obs_range_coarse_m=args.obs_range_coarse_m, + obs_norm_coarse_dist_m=args.obs_norm_coarse_dist_m, + num_target_waypoints=1, + simulation_mode="gigaflow", + control_mode="control_vehicles", + use_map_cache=1, + scenario_length=max(400, args.frames * args.steps_between + 20), + seed=args.seed, + ) + env.reset() + + plot_kwargs = dict( + target_type=env.target_type, + reward_conditioning=env.reward_conditioning, + num_target_waypoints=env.num_target_waypoints, + obs_slots_partners_n=env.obs_slots_partners_n, + obs_slots_lane_n=env.obs_slots_lane_n, + obs_slots_boundary_n=env.obs_slots_boundary_n, + obs_slots_traffic_controls_n=env.obs_slots_traffic_controls_n, + obs_dropout_lane=env.obs_dropout_lane, + obs_dropout_boundary=env.obs_dropout_boundary, + agent_idx=args.agent_idx, + obs_norm_goal_offset_m=env.obs_norm_goal_offset_m, + obs_norm_xy_offset_m=env.obs_norm_xy_offset_m, + obs_norm_veh_width_m=env.obs_norm_veh_width_m, + obs_norm_veh_length_m=env.obs_norm_veh_length_m, + obs_norm_road_seg_length_m=env.obs_norm_road_seg_length_m, + obs_norm_road_seg_width_m=env.obs_norm_road_seg_width_m, + ) + + for f in range(args.frames): + img = plot_observation(env.observations.copy(), **plot_kwargs) + path = os.path.join(args.out_dir, f"obs_t{f * args.steps_between:04d}.png") + plt.imsave(path, img) + print(f"saved {path} ({img.shape[1]}x{img.shape[0]})") + for _ in range(args.steps_between): + env.step(np.zeros(env.actions.shape, dtype=env.actions.dtype)) + + env.close() + + +if __name__ == "__main__": + main() From 5d0531dd7595bcc656b575da712293b3501d727f Mon Sep 17 00:00:00 2001 From: Eugene Vinitsky Date: Sun, 31 May 2026 16:06:42 -0400 Subject: [PATCH 3/7] cluster: switch single-agent eval to egl + bev/sim_state every 20 epochs Replaces the obs_html eval at interval=250 (~ once per 1B-step run) with the egl mp4 backend at interval=20 (~ every 50M env steps), capturing both bev (top-down following the agent) and sim_state (chase) views per scenario. render_num_scenarios drops to 4 so each eval cycle stays cheap. Co-Authored-By: Claude Opus 4.7 --- scripts/cluster_configs/single_agent_speed_run.yaml | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/scripts/cluster_configs/single_agent_speed_run.yaml b/scripts/cluster_configs/single_agent_speed_run.yaml index 300a8281e..23c182f03 100644 --- a/scripts/cluster_configs/single_agent_speed_run.yaml +++ b/scripts/cluster_configs/single_agent_speed_run.yaml @@ -35,13 +35,14 @@ env.obs_slots_traffic_controls_n: 0 # Training train.total_timesteps: 1_000_000_000 -# Disable nuPlan-based evaluators. validation_gigaflow stays on but is -# reconfigured to triage_html (CPU-only scene playback + per-episode metrics -# including dnf_rate), pinned to the same Town10HD single-agent setup the -# training uses, so each eval cycle drops a gallery of rollouts you can browse -# for DNFs. The eight-way ffmpeg/EGL spike of the default egl backend is gone. eval.validation_replay.enabled: 0 -eval.validation_gigaflow.render_backend: obs_html +# validation_gigaflow is the live single-agent eval. The egl backend writes +# agent-following mp4s (bev + chase) every ~50M env steps so we can scrub +# behavior changes during training. +eval.validation_gigaflow.render_backend: egl +eval.validation_gigaflow.render_views: ["bev", "sim_state"] +eval.validation_gigaflow.interval: 20 +eval.validation_gigaflow.eval.render_num_scenarios: 4 eval.validation_gigaflow.env.map_dir: pufferlib/resources/drive/binaries/carla/opendrive__Town10HD.bin eval.validation_gigaflow.env.num_maps: 1 eval.validation_gigaflow.env.num_agents: 64 From 12490a964d9d3b9c5fa822d1fd9d73d2738b59d8 Mon Sep 17 00:00:00 2001 From: Eugene Vinitsky Date: Sun, 31 May 2026 16:22:18 -0400 Subject: [PATCH 4/7] =?UTF-8?q?cluster:=20drop=20render=5Fviews=20from=20y?= =?UTF-8?q?aml=20=E2=80=94=20submit=5Fcluster.py=20mangles=20list=20args?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit submit_cluster.py serializes the YAML list ["bev","sim_state"] as a Python list repr ('[\"bev\",\"sim_state\"]') and passes it to argparse as a single shell token, which trips the inner shell. The two views we want are already the drive.ini default for validation_gigaflow.render_views, so the override was redundant anyway. Co-Authored-By: Claude Opus 4.7 --- scripts/cluster_configs/single_agent_speed_run.yaml | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/scripts/cluster_configs/single_agent_speed_run.yaml b/scripts/cluster_configs/single_agent_speed_run.yaml index 23c182f03..3a3c55d50 100644 --- a/scripts/cluster_configs/single_agent_speed_run.yaml +++ b/scripts/cluster_configs/single_agent_speed_run.yaml @@ -37,10 +37,9 @@ train.total_timesteps: 1_000_000_000 eval.validation_replay.enabled: 0 # validation_gigaflow is the live single-agent eval. The egl backend writes -# agent-following mp4s (bev + chase) every ~50M env steps so we can scrub -# behavior changes during training. +# agent-following mp4s (bev + sim_state inherited from drive.ini) every +# ~50M env steps so we can scrub behavior changes during training. eval.validation_gigaflow.render_backend: egl -eval.validation_gigaflow.render_views: ["bev", "sim_state"] eval.validation_gigaflow.interval: 20 eval.validation_gigaflow.eval.render_num_scenarios: 4 eval.validation_gigaflow.env.map_dir: pufferlib/resources/drive/binaries/carla/opendrive__Town10HD.bin From b27dbd795a8f0adaee88e3ec22d3a344d7ee2e38 Mon Sep 17 00:00:00 2001 From: Eugene Vinitsky Date: Sun, 31 May 2026 16:24:42 -0400 Subject: [PATCH 5/7] cluster: move interval override to eval.validation_defaults pufferl's argparser only generates --eval.. CLI flags for keys already present in the [eval.] ini section. interval is set in [eval.validation_defaults] but NOT in [eval.validation_gigaflow], so --eval.validation-gigaflow.interval was an unrecognized arg. Override on the parent defaults section instead; validation_gigaflow inherits and the other inheriting evals are all disabled in this yaml. Co-Authored-By: Claude Opus 4.7 --- scripts/cluster_configs/single_agent_speed_run.yaml | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/scripts/cluster_configs/single_agent_speed_run.yaml b/scripts/cluster_configs/single_agent_speed_run.yaml index 3a3c55d50..aed0a4a8e 100644 --- a/scripts/cluster_configs/single_agent_speed_run.yaml +++ b/scripts/cluster_configs/single_agent_speed_run.yaml @@ -38,9 +38,12 @@ train.total_timesteps: 1_000_000_000 eval.validation_replay.enabled: 0 # validation_gigaflow is the live single-agent eval. The egl backend writes # agent-following mp4s (bev + sim_state inherited from drive.ini) every -# ~50M env steps so we can scrub behavior changes during training. +# ~50M env steps so we can scrub behavior changes during training. The +# interval override is placed on validation_defaults (validation_gigaflow +# inherits it) because pufferl's argparser only generates --eval.. +# flags for keys already present in the [eval.] ini section. +eval.validation_defaults.interval: 20 eval.validation_gigaflow.render_backend: egl -eval.validation_gigaflow.interval: 20 eval.validation_gigaflow.eval.render_num_scenarios: 4 eval.validation_gigaflow.env.map_dir: pufferlib/resources/drive/binaries/carla/opendrive__Town10HD.bin eval.validation_gigaflow.env.num_maps: 1 From a700313282db88645fce73d37a77da89c05de346 Mon Sep 17 00:00:00 2001 From: Eugene Vinitsky Date: Sun, 31 May 2026 16:27:07 -0400 Subject: [PATCH 6/7] =?UTF-8?q?pufferl:=20argparse=20=E2=80=94=20also=20em?= =?UTF-8?q?it=20override=20flags=20for=20inherited=20eval=20keys?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Before this, --eval.. CLI flags only existed for keys physically present in the [eval.] ini section. Overriding a key that only lived in the parent (inherits = "validation_defaults") meant either adding placeholder lines to the child section or routing the override through the parent (which then affects every sibling that inherits the same parent). Either way pollutes config. Add a second pass that walks the `inherits` chain of each [eval.] section and registers --eval.. flags backed by argparse.SUPPRESS, so the value is only carried into the nested config dict when the user actually passes the flag. Inheritance merge in EvalManager continues to use the parent value otherwise. Co-Authored-By: Claude Opus 4.7 --- pufferlib/pufferl.py | 36 ++++++++++++++++++++++++++++++++++++ 1 file changed, 36 insertions(+) diff --git a/pufferlib/pufferl.py b/pufferlib/pufferl.py index 809225b7d..2b36210ac 100644 --- a/pufferlib/pufferl.py +++ b/pufferlib/pufferl.py @@ -2161,6 +2161,42 @@ def puffer_type(value): fmt = f"--{key}" if section == "base" else f"--{section}.{key}" parser.add_argument(fmt.replace("_", "-"), default=puffer_type(p[section][key]), type=puffer_type) + # Also emit override flags for keys inherited via `inherits = "..."` in + # [eval.] sections. Without this, an eval section can only override + # keys that physically appear inside its own section, which forces hacky + # placeholder lines or pushes the override onto the parent (affecting any + # sibling that also inherits from the same parent). Suppressed-by-default + # so the existing inherits-merge keeps using the parent value unless the + # CLI flag is actually passed. + def _resolve_inherits_chain(section_name): + chain, cur, seen = [], section_name, set() + while cur not in seen: + seen.add(cur) + parent = p[cur].get("inherits") if cur in p else None + if not parent: + break + parent = parent.strip().strip('"').strip("'") + parent_section = f"eval.{parent}" + if parent_section not in p: + break + chain.append(parent_section) + cur = parent_section + return chain + + existing_flags = {a.option_strings[0] for a in parser._actions if a.option_strings} + for section in p.sections(): + if not section.startswith("eval.") or section == "eval.validation_defaults": + continue + for parent_section in _resolve_inherits_chain(section): + for key in p[parent_section]: + if key == "inherits" or key in p[section]: + continue + fmt = f"--{section}.{key}".replace("_", "-") + if fmt in existing_flags: + continue + parser.add_argument(fmt, default=argparse.SUPPRESS, type=puffer_type) + existing_flags.add(fmt) + parser.add_argument( "-h", "--help", default=argparse.SUPPRESS, action="help", help="Show this help message and exit" ) From 406ebac95089cb3ce2302f4d5255bf7e464afe9a Mon Sep 17 00:00:00 2001 From: Eugene Vinitsky Date: Sun, 31 May 2026 16:27:42 -0400 Subject: [PATCH 7/7] pufferl: drop redundant validation_defaults skip in inherits-chain pass MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit _resolve_inherits_chain returns [] for validation_defaults (no `inherits` key), so the inner loop already does nothing for it. The explicit skip was misleading — it suggested some special semantic for that section when there isn't one. Co-Authored-By: Claude Opus 4.7 --- pufferlib/pufferl.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pufferlib/pufferl.py b/pufferlib/pufferl.py index 2b36210ac..ad68aabd8 100644 --- a/pufferlib/pufferl.py +++ b/pufferlib/pufferl.py @@ -2185,7 +2185,7 @@ def _resolve_inherits_chain(section_name): existing_flags = {a.option_strings[0] for a in parser._actions if a.option_strings} for section in p.sections(): - if not section.startswith("eval.") or section == "eval.validation_defaults": + if not section.startswith("eval."): continue for parent_section in _resolve_inherits_chain(section): for key in p[parent_section]: