From 119929e0f7443fa5a99f6d5f910a5e10d02886db Mon Sep 17 00:00:00 2001 From: Alan Shurafa Date: Fri, 17 Apr 2026 21:17:30 -0400 Subject: [PATCH 1/8] [schemas] Fix REVIEW-BLOCKER-1: align README with SQL defaults Why: The README claimed defaults (importance=5, quality_score=0.50, sensitivity_tier='normal') that disagreed with schema.sql's actual values (3, 50, 'standard'). The 'normal' tier would have broken every other contribution in the repo that expects 'standard'. The SQL is authoritative -- update the README to match. Also fix the ranking formula's coalesce fallback (quality_score used 0.50 while the column is 0..100) so NULL rows don't get a near-zero rank bonus. --- schemas/enhanced-thoughts/README.md | 8 +++++++- schemas/enhanced-thoughts/schema.sql | 6 ++++-- 2 files changed, 11 insertions(+), 3 deletions(-) diff --git a/schemas/enhanced-thoughts/README.md b/schemas/enhanced-thoughts/README.md index 5e1c69b93..4b6c2a2b0 100644 --- a/schemas/enhanced-thoughts/README.md +++ b/schemas/enhanced-thoughts/README.md @@ -44,7 +44,13 @@ SUPABASE (from your Open Brain setup) After running the migration: -- The `thoughts` table has six new columns with dashboard-friendly defaults. +- The `thoughts` table has six new columns with sensible defaults: + - `sensitivity_tier TEXT DEFAULT 'standard'` (canonical values: `'standard'`, `'personal'`, `'restricted'`) + - `importance SMALLINT DEFAULT 3` (scale: 1-5, where 3 is the default) + - `quality_score NUMERIC(5,2) DEFAULT 50` (scale: 0-100, where 50 is the default) + - `enriched BOOLEAN DEFAULT false` + - `type TEXT` (nullable; populated by backfill or writers) + - `source_type TEXT` (nullable; populated by backfill or writers) - New indexes on `type`, `importance`, `source_type`, and a GIN tsvector index on `content` for fast full-text search. - Three new RPC functions callable via the Supabase client or REST API. - `upsert_thought` remains the canonical write path, but now keeps structured dashboard columns synchronized with metadata payloads. diff --git a/schemas/enhanced-thoughts/schema.sql b/schemas/enhanced-thoughts/schema.sql index e272f974b..d7b0bbc55 100644 --- a/schemas/enhanced-thoughts/schema.sql +++ b/schemas/enhanced-thoughts/schema.sql @@ -112,8 +112,10 @@ BEGIN ELSE 0 END ) - + (coalesce(t.importance, 5) / 20.0)::real - + (coalesce(t.quality_score, 0.50) / 500.0)::real + -- importance is 1..5; max bonus 5/20 = 0.25 + + (coalesce(t.importance, 3) / 20.0)::real + -- quality_score is 0..100; max bonus 100/500 = 0.20 + + (coalesce(t.quality_score, 50) / 500.0)::real )::real AS rank FROM public.thoughts t CROSS JOIN query_input q From 605e783a23273358a3098cfe5c9f11d91b65a487 Mon Sep 17 00:00:00 2001 From: Alan Shurafa Date: Fri, 17 Apr 2026 21:18:21 -0400 Subject: [PATCH 2/8] [schemas] Fix REVIEW-BLOCKER-2: remove anon GRANT on all three RPCs Why: Two of three RPCs are SECURITY DEFINER and all three were granted to anon -- which turns the publishable anon key into a universal read handle over the entire thoughts table, inverting Open Brain's stock RLS-behind-service_role posture. Restrict EXECUTE to authenticated and service_role. Keep SECURITY DEFINER with SET search_path = public (defense-in-depth against search-path hijacks). Document the security posture in the README so anyone who wants public read can opt in explicitly rather than inherit it silently. --- schemas/enhanced-thoughts/README.md | 10 ++++++++++ schemas/enhanced-thoughts/schema.sql | 16 +++++++++++++--- 2 files changed, 23 insertions(+), 3 deletions(-) diff --git a/schemas/enhanced-thoughts/README.md b/schemas/enhanced-thoughts/README.md index 4b6c2a2b0..fdec8cf1f 100644 --- a/schemas/enhanced-thoughts/README.md +++ b/schemas/enhanced-thoughts/README.md @@ -56,6 +56,16 @@ After running the migration: - `upsert_thought` remains the canonical write path, but now keeps structured dashboard columns synchronized with metadata payloads. - Any existing thoughts with `type` or `source` in their metadata JSONB will have those values copied into the new top-level columns. +## Security + +This schema follows stock Open Brain's "service_role only" posture: + +- `brain_stats_aggregate` and `get_thought_connections` are `SECURITY DEFINER` with `SET search_path = public` (defense in depth against search-path hijacks). They can read the full `thoughts` table regardless of RLS. +- `search_thoughts_text` is `SECURITY INVOKER` and respects RLS. +- **None of the three RPCs are granted to `anon`.** Execute privilege is limited to `authenticated` and `service_role`. The publishable anon key cannot call them. + +If you want to expose any of these to `anon` (for example, a public-read dashboard), add your own `GRANT EXECUTE ... TO anon;` in a follow-up migration and confirm that `p_exclude_restricted := true` (the default) plus your sensitivity-tier hygiene gives you the exposure surface you actually want. This is an explicit opt-in: the default stance is private. + ## Troubleshooting **Issue: "column already exists" warnings** diff --git a/schemas/enhanced-thoughts/schema.sql b/schemas/enhanced-thoughts/schema.sql index d7b0bbc55..6d705a08d 100644 --- a/schemas/enhanced-thoughts/schema.sql +++ b/schemas/enhanced-thoughts/schema.sql @@ -134,8 +134,12 @@ BEGIN END; $$; +-- Do NOT grant to `anon`. Stock Open Brain keeps `thoughts` behind RLS +-- (service_role only). Broadening execution to the publishable anon key +-- would expose the entire brain to anyone who knows the project URL. +-- See README "Security" section. GRANT EXECUTE ON FUNCTION search_thoughts_text(TEXT, INTEGER, JSONB, INTEGER) - TO authenticated, anon, service_role; + TO authenticated, service_role; -- ============================================================ -- 3. BRAIN STATS AGGREGATE RPC @@ -191,8 +195,10 @@ BEGIN END; $$; +-- Do NOT grant to `anon`. This RPC is SECURITY DEFINER and would bypass +-- RLS on the thoughts table. See README "Security" section. GRANT EXECUTE ON FUNCTION brain_stats_aggregate(INTEGER, BOOLEAN) - TO authenticated, anon, service_role; + TO authenticated, service_role; -- ============================================================ -- 4. THOUGHT CONNECTIONS RPC @@ -284,8 +290,12 @@ BEGIN END; $$; +-- Do NOT grant to `anon`. This RPC is SECURITY DEFINER and exposes +-- a 200-char content preview plus metadata for any thought by UUID; +-- granting to anon would let anyone with the project URL pull content. +-- See README "Security" section. GRANT EXECUTE ON FUNCTION get_thought_connections(UUID, INT, BOOLEAN) - TO authenticated, anon, service_role; + TO authenticated, service_role; -- ============================================================ -- 5. BACKFILL EXISTING DATA From 933325331c192e0c74fa68a8a60a5f956056278a Mon Sep 17 00:00:00 2001 From: Alan Shurafa Date: Fri, 17 Apr 2026 21:18:38 -0400 Subject: [PATCH 3/8] [schemas] Fix REVIEW-HIGH-2: use NOT EXISTS instead of NOT IN Why: `t.id NOT IN (SELECT hit_id ...)` has NULL-unsafe semantics -- if the subquery ever yields a NULL, the predicate becomes NULL (not true) and the row is silently filtered out. In this schema the PK is NOT NULL so the bug cannot fire today, but NOT EXISTS is the correct discipline and usually plans better for anti-joins. --- schemas/enhanced-thoughts/schema.sql | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/schemas/enhanced-thoughts/schema.sql b/schemas/enhanced-thoughts/schema.sql index 6d705a08d..57dc68162 100644 --- a/schemas/enhanced-thoughts/schema.sql +++ b/schemas/enhanced-thoughts/schema.sql @@ -78,7 +78,7 @@ BEGIN AND (SELECT count(*) FROM tsvector_hits) < (p_limit + p_offset) AND t.content ILIKE '%' || q.raw_query || '%' AND t.metadata @> coalesce(p_filter, '{}'::jsonb) - AND t.id NOT IN (SELECT th.hit_id FROM tsvector_hits th) + AND NOT EXISTS (SELECT 1 FROM tsvector_hits th WHERE th.hit_id = t.id) LIMIT 500 ), all_hits AS ( From 2fc379b197a8924063a07916f9e99efc0a515b38 Mon Sep 17 00:00:00 2001 From: Alan Shurafa Date: Fri, 17 Apr 2026 21:19:12 -0400 Subject: [PATCH 4/8] [schemas] Fix REVIEW-HIGH-3: mark read-only RPCs as STABLE Why: search_thoughts_text was VOLATILE and get_thought_connections had no volatility declared (defaults to VOLATILE). Both are pure readers over their inputs within a transaction -- they touch no sequences, write no rows, and do not depend on now() for results. Marking them STABLE unlocks planner optimizations (function inlining, CSE, index-only scans when used in predicates) and keeps PostgREST calls fast under dashboard load. --- schemas/enhanced-thoughts/schema.sql | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/schemas/enhanced-thoughts/schema.sql b/schemas/enhanced-thoughts/schema.sql index 57dc68162..98f32400a 100644 --- a/schemas/enhanced-thoughts/schema.sql +++ b/schemas/enhanced-thoughts/schema.sql @@ -49,7 +49,7 @@ RETURNS TABLE ( total_count BIGINT ) LANGUAGE plpgsql -VOLATILE +STABLE SET statement_timeout = '25s' AS $$ BEGIN @@ -222,6 +222,7 @@ RETURNS TABLE ( overlap_count INT ) LANGUAGE plpgsql +STABLE SECURITY DEFINER SET search_path = public AS $$ From 385ba046a26d0b0eac7f8b40b7363775767b7ac0 Mon Sep 17 00:00:00 2001 From: Alan Shurafa Date: Fri, 17 Apr 2026 21:21:08 -0400 Subject: [PATCH 5/8] [schemas] Fix REVIEW-HIGH-4: make type-backfill allowlist configurable Why: The inline backfill hard-coded an 8-value type allowlist (idea/task/person_note/reference/decision/lesson/meeting/journal) and silently discarded every other value. Users with brains that already use 'article', 'quote', 'bookmark', etc. would run the migration, see the README promise a backfill, and get NULL on all their rows with no warning. Wrap the backfill in backfill_thought_types( p_allowed_types TEXT[]) with the canonical 8 as the default, so paste-and-run keeps working while power users can override the list (or pass NULL to accept any value). Document the knob in the README and update Troubleshooting to point at it. --- schemas/enhanced-thoughts/README.md | 15 +++++------ schemas/enhanced-thoughts/schema.sql | 40 +++++++++++++++++++++++++--- 2 files changed, 43 insertions(+), 12 deletions(-) diff --git a/schemas/enhanced-thoughts/README.md b/schemas/enhanced-thoughts/README.md index fdec8cf1f..4368b3006 100644 --- a/schemas/enhanced-thoughts/README.md +++ b/schemas/enhanced-thoughts/README.md @@ -4,11 +4,12 @@ ## What It Does -This schema extension adds six new columns to the `thoughts` table (`type`, `sensitivity_tier`, `importance`, `quality_score`, `source_type`, `enriched`) so thoughts can be classified, filtered, and ranked without parsing the metadata JSONB every time. It also upgrades `upsert_thought` so metadata-backed writes keep those structured columns in sync. It installs three utility RPC functions: +This schema extension adds six new columns to the `thoughts` table (`type`, `sensitivity_tier`, `importance`, `quality_score`, `source_type`, `enriched`) so thoughts can be classified, filtered, and ranked without parsing the metadata JSONB every time. It also installs four RPC functions: - **`search_thoughts_text`** -- Full-text search with boolean operators, ILIKE fallback, pagination, and result counts. - **`brain_stats_aggregate`** -- Returns total thought count, top types, and top topics as a single JSONB payload. - **`get_thought_connections`** -- Finds thoughts that share metadata topics or people with a given thought. +- **`backfill_thought_types(p_allowed_types TEXT[])`** -- Populates the new top-level `type` column from `metadata->>'type'`. The default allowlist covers the canonical eight values (`idea`, `task`, `person_note`, `reference`, `decision`, `lesson`, `meeting`, `journal`). Pass a custom array to accept additional values, or pass `NULL` to backfill whatever `metadata->>'type'` contains. ## Prerequisites @@ -36,9 +37,8 @@ SUPABASE (from your Open Brain setup) 2. Create a new query and paste the full contents of `schema.sql` 3. Click **Run** to execute the migration 4. Open **Table Editor** and select the `thoughts` table to confirm the new columns appear: `type`, `sensitivity_tier`, `importance`, `quality_score`, `source_type`, `enriched` -5. Navigate to **Database > Functions** and verify three new functions exist: `search_thoughts_text`, `brain_stats_aggregate`, `get_thought_connections` -6. Verify `upsert_thought` still exists. The enhanced version mirrors `metadata.type`, `metadata.source`, `metadata.importance`, `metadata.quality_score`, `metadata.sensitivity_tier`, and task/idea status into top-level columns. -7. If you have existing thoughts with `type` or `source` values stored in the metadata JSONB, the backfill statements at the bottom of the script will have populated the new columns automatically +5. Navigate to **Database > Functions** and verify the new functions exist: `search_thoughts_text`, `brain_stats_aggregate`, `get_thought_connections`, `backfill_thought_types` +6. If you have existing thoughts with `type` or `source` values stored in the metadata JSONB, the script automatically calls `backfill_thought_types()` with the default canonical allowlist. If your brain uses non-canonical `type` values, re-run `SELECT backfill_thought_types(ARRAY['your','custom','types']);` or `SELECT backfill_thought_types(NULL);` to accept any value ## Expected Outcome @@ -52,9 +52,8 @@ After running the migration: - `type TEXT` (nullable; populated by backfill or writers) - `source_type TEXT` (nullable; populated by backfill or writers) - New indexes on `type`, `importance`, `source_type`, and a GIN tsvector index on `content` for fast full-text search. -- Three new RPC functions callable via the Supabase client or REST API. -- `upsert_thought` remains the canonical write path, but now keeps structured dashboard columns synchronized with metadata payloads. -- Any existing thoughts with `type` or `source` in their metadata JSONB will have those values copied into the new top-level columns. +- Four new RPC functions callable via the Supabase client or REST API (`search_thoughts_text`, `brain_stats_aggregate`, `get_thought_connections`, `backfill_thought_types`). +- Any existing thoughts with `type` or `source` in their metadata JSONB will have those values copied into the new top-level columns (via `backfill_thought_types()` for `type` with the canonical allowlist, plus an inline `UPDATE` for `source_type`). ## Security @@ -75,4 +74,4 @@ Solution: These are safe to ignore. The `ADD COLUMN IF NOT EXISTS` syntax preven Solution: Confirm your thoughts have content populated. Try a simple query first (single word, no operators). If using boolean operators, ensure the syntax matches websearch format ("quoted phrases", word AND word, -excluded). **Issue: brain_stats_aggregate returns empty types or topics** -Solution: The function filters by `created_at`. Pass `p_since_days := 0` for all-time stats. Also confirm that your thoughts have the `type` column populated (run the backfill UPDATE if needed). +Solution: The function filters by `created_at`. Pass `p_since_days := 0` for all-time stats. Also confirm that your thoughts have the `type` column populated. If you use non-canonical type values in `metadata->>'type'` (anything outside `idea`, `task`, `person_note`, `reference`, `decision`, `lesson`, `meeting`, `journal`), call the backfill RPC with your own allowlist, e.g. `SELECT backfill_thought_types(ARRAY['idea','task','article','quote']);`, or `SELECT backfill_thought_types(NULL);` to accept whatever is present. diff --git a/schemas/enhanced-thoughts/schema.sql b/schemas/enhanced-thoughts/schema.sql index 98f32400a..3a6619656 100644 --- a/schemas/enhanced-thoughts/schema.sql +++ b/schemas/enhanced-thoughts/schema.sql @@ -304,10 +304,42 @@ GRANT EXECUTE ON FUNCTION get_thought_connections(UUID, INT, BOOLEAN) -- exist. Safe to run multiple times (WHERE ... IS NULL guard). -- ============================================================ --- Backfill type from metadata -UPDATE thoughts SET type = metadata->>'type' -WHERE type IS NULL AND metadata->>'type' IS NOT NULL - AND metadata->>'type' IN ('idea','task','person_note','reference','decision','lesson','meeting','journal'); +-- Backfill `type` from metadata. Wrapped in an RPC so callers can +-- override the allowlist. Default allowlist matches the canonical +-- Open Brain type vocabulary; pass NULL to accept any string value +-- present in metadata->>'type'. +CREATE OR REPLACE FUNCTION backfill_thought_types( + p_allowed_types TEXT[] DEFAULT ARRAY[ + 'idea','task','person_note','reference', + 'decision','lesson','meeting','journal' + ] +) +RETURNS BIGINT +LANGUAGE plpgsql +VOLATILE +SET search_path = public +AS $$ +DECLARE + v_updated BIGINT; +BEGIN + UPDATE public.thoughts + SET type = metadata->>'type' + WHERE type IS NULL + AND metadata->>'type' IS NOT NULL + AND (p_allowed_types IS NULL OR metadata->>'type' = ANY(p_allowed_types)); + + GET DIAGNOSTICS v_updated = ROW_COUNT; + RETURN v_updated; +END; +$$; + +-- Do NOT grant to `anon`. This RPC writes to the thoughts table. +GRANT EXECUTE ON FUNCTION backfill_thought_types(TEXT[]) + TO authenticated, service_role; + +-- Run the backfill with the default allowlist so the paste-and-run +-- flow still auto-populates `type` for canonical values. +SELECT backfill_thought_types(); -- Backfill source_type from metadata UPDATE thoughts SET source_type = metadata->>'source' From 2755735a2dafc1eb90fb8f7bf76b539b5ee2b111 Mon Sep 17 00:00:00 2001 From: Alan Shurafa Date: Fri, 17 Apr 2026 21:21:27 -0400 Subject: [PATCH 6/8] [schemas] Fix REVIEW-MEDIUM-3: align NULL handling in restricted filter Why: get_thought_connections used `bt.sensitivity_tier != 'restricted'` which evaluates to NULL (not true) when the column is NULL, silently dropping rows. brain_stats_aggregate already uses the NULL-safe `IS DISTINCT FROM 'restricted'`. Match that pattern so both RPCs see the same set of rows. --- schemas/enhanced-thoughts/schema.sql | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/schemas/enhanced-thoughts/schema.sql b/schemas/enhanced-thoughts/schema.sql index 3a6619656..735e458fd 100644 --- a/schemas/enhanced-thoughts/schema.sql +++ b/schemas/enhanced-thoughts/schema.sql @@ -269,7 +269,7 @@ BEGIN ) AS shared_people FROM thoughts bt WHERE bt.id != p_thought_id - AND (NOT p_exclude_restricted OR bt.sensitivity_tier != 'restricted') + AND (NOT p_exclude_restricted OR bt.sensitivity_tier IS DISTINCT FROM 'restricted') AND ( EXISTS ( SELECT 1 FROM jsonb_array_elements_text(bt.metadata->'topics') val From 336b6ca339333dfb1b3f52dfdd65d9e365b7ca5c Mon Sep 17 00:00:00 2001 From: Alan Shurafa Date: Fri, 17 Apr 2026 21:21:42 -0400 Subject: [PATCH 7/8] [schemas] Fix REVIEW-LOW-2: bump metadata.updated to merge date Why: The `updated` field in metadata.json is a signal for downstream consumers that the file has been revised. Since this commit chain adds security, correctness, and configurability changes, bump the date from 2026-04-06 to the current 2026-04-17 so anyone reading the metadata can see the contribution was recently touched. --- schemas/enhanced-thoughts/metadata.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/schemas/enhanced-thoughts/metadata.json b/schemas/enhanced-thoughts/metadata.json index 0c26fdc3b..757a341b3 100644 --- a/schemas/enhanced-thoughts/metadata.json +++ b/schemas/enhanced-thoughts/metadata.json @@ -14,5 +14,5 @@ "difficulty": "beginner", "estimated_time": "15 minutes", "created": "2026-04-06", - "updated": "2026-04-06" + "updated": "2026-04-17" } From 1fd9ae42e690b26023b9b0e3e60522d827098fc2 Mon Sep 17 00:00:00 2001 From: Jonathan Edwards Date: Thu, 11 Jun 2026 13:34:11 -0400 Subject: [PATCH 8/8] docs: add community credit for enhanced thoughts --- schemas/enhanced-thoughts/README.md | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/schemas/enhanced-thoughts/README.md b/schemas/enhanced-thoughts/README.md index 4368b3006..f7314114f 100644 --- a/schemas/enhanced-thoughts/README.md +++ b/schemas/enhanced-thoughts/README.md @@ -1,5 +1,13 @@ # Enhanced Thoughts Columns and Utility RPCs +
+ +![Community Contribution](https://img.shields.io/badge/OB1_COMMUNITY-Approved_Contribution-2ea44f?style=for-the-badge&logo=github) + +**Created by [@alanshurafa](https://github.com/alanshurafa)** + +
+ > Adds structured columns and utility functions to the Open Brain thoughts table for richer classification, full-text search, statistics, and connection discovery. ## What It Does