NateBJones-Projects · alanshurafa · Jun 13, 2026 · Jun 14, 2026 · Jun 14, 2026 · chatgpt-codex-connector
diff --git a/integrations/open-brain-rest/index.ts b/integrations/open-brain-rest/index.ts
@@ -424,8 +424,17 @@ async function createThought(body: z.infer<typeof captureSchema>) {
       ? "new"
       : null;
 
-  const update = {
-    embedding,
+  // When upsert_thought resolved the row via the original-fingerprint fallback,
+  // `content`/`embedding` here were computed from the OLD pre-correction text
+  // (a reimport of the original source). The matched row is the CORRECTED one,
+  // so overwriting its embedding with the stale-text vector would silently
+  // desync it from its corrected content. Skip the embedding overwrite on that
+  // path — the RPC already merged metadata and left content untouched, so this
+  // becomes a pure dedup hit.
+  const matchedViaOriginalFingerprint =
+    upsert.data?.matched_via === "original_fingerprint";
+
+  const update: Record<string, unknown> = {
     metadata,
     type,
     source_type: sourceType,
@@ -435,6 +444,9 @@ async function createThought(body: z.infer<typeof captureSchema>) {
     status,
     status_updated_at: status ? new Date().toISOString() : null,
   };
+  if (!matchedViaOriginalFingerprint) {
+    update.embedding = embedding;
+  }
 
   const { error } = await supabase.from("thoughts").update(update).eq("id", thoughtId);
   if (error) throw new Error(error.message);

diff --git a/schemas/enhanced-thoughts/README.md b/schemas/enhanced-thoughts/README.md
@@ -19,10 +19,16 @@ This schema extension adds six new columns to the `thoughts` table (`type`, `sen
 - **`get_thought_connections`** -- Finds thoughts that share metadata topics or people with a given thought.
 - **`backfill_thought_types(p_allowed_types TEXT[])`** -- Populates the new top-level `type` column from `metadata->>'type'`. The default allowlist covers the canonical eight values (`idea`, `task`, `person_note`, `reference`, `decision`, `lesson`, `meeting`, `journal`). Pass a custom array to accept additional values, or pass `NULL` to backfill whatever `metadata->>'type'` contains.
 
+It also overrides the base `upsert_thought` so structured columns (`type`, `source_type`, `importance`, `quality_score`, `sensitivity_tier`, `status`) stay in sync on capture, and (when `schemas/typed-reasoning-edges` is installed) installs one optional opt-in RPC:
+
+- **`match_thoughts_superseded_aware`** -- The same nearest-neighbor search as the core `match_thoughts`, plus a `superseded_by` column. Thoughts that have been replaced (the target of a `supersedes` edge in `thought_edges`) get a 0.8x ranking penalty so fresh thoughts surface above their stale predecessors. The core `match_thoughts` is left untouched; callers opt in by name.
+
 ## Prerequisites
 
 - Working Open Brain setup (see the getting-started guide in `docs/01-getting-started.md`)
 - Supabase project with the `thoughts` table, `match_thoughts` function, and `upsert_thought` function already created
+- Apply `schemas/workflow-status/` first if it is not already applied. The `upsert_thought` override here writes to the `status` and `status_updated_at` columns that `workflow-status` creates. Both files use `ADD COLUMN IF NOT EXISTS`, so applying either order is safe, but `workflow-status` must be present before the first `upsert_thought` call runs.
+- Optional: `schemas/typed-reasoning-edges/` (creates `public.thought_edges`). Required only for `match_thoughts_superseded_aware`. If it is absent, the rest of this migration still applies and that one function is skipped with a `NOTICE`; re-run this `schema.sql` after installing typed-reasoning-edges to add it.
 
 ## Credential Tracker
 
@@ -45,7 +51,7 @@ SUPABASE (from your Open Brain setup)
 2. Create a new query and paste the full contents of `schema.sql`
 3. Click **Run** to execute the migration
 4. Open **Table Editor** and select the `thoughts` table to confirm the new columns appear: `type`, `sensitivity_tier`, `importance`, `quality_score`, `source_type`, `enriched`
-5. Navigate to **Database > Functions** and verify the new functions exist: `search_thoughts_text`, `brain_stats_aggregate`, `get_thought_connections`, `backfill_thought_types`
+5. Navigate to **Database > Functions** and verify the functions exist: `search_thoughts_text`, `brain_stats_aggregate`, `get_thought_connections`, `backfill_thought_types`, and `upsert_thought`. If you have applied `schemas/typed-reasoning-edges/`, `match_thoughts_superseded_aware` is present too; if not, that one function is skipped (see Prerequisites)
 6. If you have existing thoughts with `type` or `source` values stored in the metadata JSONB, the script automatically calls `backfill_thought_types()` with the default canonical allowlist. If your brain uses non-canonical `type` values, re-run `SELECT backfill_thought_types(ARRAY['your','custom','types']);` or `SELECT backfill_thought_types(NULL);` to accept any value
 
 ## Expected Outcome
@@ -54,7 +60,7 @@ After running the migration:
 
 - The `thoughts` table has six new columns with sensible defaults:
   - `sensitivity_tier TEXT DEFAULT 'standard'` (canonical values: `'standard'`, `'personal'`, `'restricted'`)
-  - `importance SMALLINT DEFAULT 3` (scale: 1-5, where 3 is the default)
+  - `importance SMALLINT DEFAULT 3` (the column default is 3; the `upsert_thought` override accepts and clamps payload values to 0-100. See "Changes from v1" for why this is NOT the ExoCortex 0-6 scale.)
   - `quality_score NUMERIC(5,2) DEFAULT 50` (scale: 0-100, where 50 is the default)
   - `enriched BOOLEAN DEFAULT false`
   - `type TEXT` (nullable; populated by backfill or writers)
@@ -63,13 +69,33 @@ After running the migration:
 - Four new RPC functions callable via the Supabase client or REST API (`search_thoughts_text`, `brain_stats_aggregate`, `get_thought_connections`, `backfill_thought_types`).
 - Any existing thoughts with `type` or `source` in their metadata JSONB will have those values copied into the new top-level columns (via `backfill_thought_types()` for `type` with the canonical allowlist, plus an inline `UPDATE` for `source_type`).
 
+## Changes from v1
+
+v1.1 brings the RPCs in line with how the reference Open Brain install runs them today. Everything is additive and idempotent — re-running `schema.sql` on a v1 install is safe. The `upsert_thought` return contract (`{id, fingerprint}`) and the `status` / `status_updated_at` handling are unchanged, so existing callers and `schemas/workflow-status/` are not affected.
+
+What changed for an existing install:
+
+- **`search_thoughts_text` now reads three control keys out of `p_filter`.** `start_date` and `end_date` (ISO 8601 timestamps) filter `created_at` to that range; `exclude_restricted` (boolean) drops restricted rows. A row counts as restricted when **either** the promoted `sensitivity_tier` column **or** `metadata->>'sensitivity_tier'` is `'restricted'`, so rows captured before this schema — or by canonical flows that keep the tier only in `metadata` (the same place `schemas/provenance-chains` reads it) — do not leak through on the column's `'standard'` default. These keys are stripped from the metadata-containment predicate, so they no longer require a literal metadata key of the same name. Any other `p_filter` key keeps its original `metadata @> filter` behavior. If you were (accidentally) relying on a metadata key literally named `start_date`/`end_date`/`exclude_restricted`, it is now interpreted as a control key instead.
+
+- **`upsert_thought` gained two dedup/merge guards.**
+  - *Original-fingerprint fallback*: when a thought's content is corrected, its fingerprint changes. If your update path appends the pre-edit fingerprint to an append-only array `metadata.original_fingerprints[]`, a later reimport of the original source text now lands on the corrected row as a dedup hit instead of inserting a stale sibling that "outvotes" the correction. Exact-fingerprint match still wins first. If you never write `original_fingerprints`, behavior is identical to v1 (an extra indexed lookup on miss, no semantic change). On this fallback path the incoming `p_content` is the **old, pre-correction** text, so the return payload now carries a `matched_via` field (`'inserted'`, `'fingerprint'`, or `'original_fingerprint'`) alongside the unchanged `{id, fingerprint}`. A caller that recomputes an embedding from `p_content` (such as `integrations/open-brain-rest`) checks for `matched_via = 'original_fingerprint'` and skips writing the stale-text embedding over the corrected row. The RPC itself never writes `content` or `embedding` on the merge path, so older callers that ignore `matched_via` keep their previous behavior.
+  - *User-edit guard*: keys listed in `metadata.user_edits` are treated as human-owned. On the merge path they (and the system-managed `user_edits` / `original_fingerprints` maps) are stripped from the incoming patch so an automated reimport cannot resurrect stale values over a human correction. The guard now also covers the **promoted scalar columns**: if a field (`type`, `importance`, `quality_score`, `source_type`, `sensitivity_tier`) is marked human-owned, the scalar column is preserved too, so the column and `metadata.<key>` stay in agreement instead of the column silently overwriting. If you never write `user_edits`, behavior is identical to v1.
+  - *Merge preserves omitted fields*: on a re-upsert that **omits** a structured field, the existing column is now kept rather than reset to a hardcoded default. A metadata-only re-upsert (new tags, a note) no longer rewrites `importance`/`quality_score`/`type`/`source_type`/`sensitivity_tier` — in particular it no longer silently downgrades `sensitivity_tier` from `restricted` to `standard`. An explicitly provided value still updates the column, and brand-new rows still get the documented insert defaults.
+  - To make the fallback possible the function now does an explicit fingerprint lookup and branches INSERT vs UPDATE instead of using `ON CONFLICT`. The visible result is the same `{id, fingerprint}` payload.
+
+- **Importance stays on the 0-100 scale (deliberate deviation).** ExoCortex widened its own importance to a 0-6 scale. Open Brain's `upsert_thought` already accepts a wider 0-100 range, so it does not clip 0-6 values — adopting 0-6 here would retroactively rescale every existing row's importance, which is a breaking data change, not an additive one. The column default remains 3; payload values are clamped to 0-100. Treat 0-6 as a subset if you want cross-system parity.
+
+- **New opt-in RPC `match_thoughts_superseded_aware`** (installed only when `schemas/typed-reasoning-edges/` is present). It returns the same columns as the core `match_thoughts` plus `superseded_by UUID`, and applies a 0.8x penalty to thoughts that are the target of a `supersedes` edge so stale predecessors rank below their replacements without being excluded. The core `match_thoughts` is not modified. If `public.thought_edges` is missing, this function is skipped with a `NOTICE` and the rest of the migration still applies.
+
 ## Security
 
 This schema follows stock Open Brain's "service_role only" posture:
 
 - `brain_stats_aggregate` and `get_thought_connections` are `SECURITY DEFINER` with `SET search_path = public` (defense in depth against search-path hijacks). They can read the full `thoughts` table regardless of RLS.
 - `search_thoughts_text` is `SECURITY INVOKER` and respects RLS.
-- **None of the three RPCs are granted to `anon`.** Execute privilege is limited to `authenticated` and `service_role`. The publishable anon key cannot call them.
+- `match_thoughts_superseded_aware` is `SECURITY INVOKER` and granted to `service_role` only, matching the access posture of `public.thought_edges` (service-role only).
+- `upsert_thought` is granted to `service_role` only. The `exclude_restricted` control key on `search_thoughts_text` lets a caller drop restricted rows, but the default is `false` (restricted rows are returned), so set it explicitly when building any lower-trust surface.
+- **None of the read RPCs are granted to `anon`.** Execute privilege is limited to `authenticated` and `service_role` (or `service_role` only, per function above). The publishable anon key cannot call them.
 
 If you want to expose any of these to `anon` (for example, a public-read dashboard), add your own `GRANT EXECUTE ... TO anon;` in a follow-up migration and confirm that `p_exclude_restricted := true` (the default) plus your sensitivity-tier hygiene gives you the exposure surface you actually want. This is an explicit opt-in: the default stance is private.
 
@@ -83,3 +109,7 @@ Solution: Confirm your thoughts have content populated. Try a simple query first
 
 **Issue: brain_stats_aggregate returns empty types or topics**
 Solution: The function filters by `created_at`. Pass `p_since_days := 0` for all-time stats. Also confirm that your thoughts have the `type` column populated. If you use non-canonical type values in `metadata->>'type'` (anything outside `idea`, `task`, `person_note`, `reference`, `decision`, `lesson`, `meeting`, `journal`), call the backfill RPC with your own allowlist, e.g. `SELECT backfill_thought_types(ARRAY['idea','task','article','quote']);`, or `SELECT backfill_thought_types(NULL);` to accept whatever is present.
+
+## More from Nate
+
+Open Brain is built in the open by Nate B. Jones — more practical systems like this on his [Substack](https://substack.com/@natesnewsletter) and at [natebjones.com](https://natebjones.com).
diff --git a/schemas/enhanced-thoughts/metadata.json b/schemas/enhanced-thoughts/metadata.json
@@ -1,18 +1,18 @@
 {
   "name": "Enhanced Thoughts Columns and Utility RPCs",
-  "description": "Adds structured columns (type, importance, quality_score, sensitivity_tier, source_type, enriched) to the thoughts table and installs utility RPCs for full-text search, aggregate statistics, and thought connections.",
+  "description": "Adds structured columns (type, importance, quality_score, sensitivity_tier, source_type, enriched) to the thoughts table and installs utility RPCs for full-text search (with date and restricted-tier filters), aggregate statistics, thought connections, a dedup-guarded upsert, and an optional superseded-aware semantic search.",
   "category": "schemas",
   "author": {
     "name": "Alan Shurafa",
     "github": "alanshurafa"
   },
-  "version": "1.0.0",
+  "version": "1.1.0",
   "requires": {
     "open_brain": true
   },
-  "tags": ["schema", "metadata", "search", "statistics", "enrichment"],
+  "tags": ["schema", "metadata", "search", "statistics", "enrichment", "deduplication", "semantic-search"],
   "difficulty": "beginner",
   "estimated_time": "15 minutes",
   "created": "2026-04-06",
-  "updated": "2026-04-17"
+  "updated": "2026-06-13"
 }