2002yy · 2002yy · Jun 5, 2026 · Jun 5, 2026
diff --git a/.env.example b/.env.example
@@ -64,3 +64,9 @@ DEEPSEEK_MODEL_PRO_NAME=deepseek-v4-pro
 # 本地 trafilatura/readability/raw 和 Firecrawl 都失败后，是否允许调用 hosted Jina Reader 兜底。
 # 默认关闭；开启后会把公开 HTTP(S) URL 发给 https://r.jina.ai/ 读取 Markdown。
 # NEWS_ENABLE_JINA_READER=false
+
+# === RAG 向量后端（默认 local，无需额外依赖）===
+# local 使用当前 deterministic hash-vector prototype；chroma 是可选持久化适配器，需要自行安装 chromadb。
+# RAG_VECTOR_BACKEND=local
+# RAG_CHROMA_PATH=logs/chroma
+# RAG_CHROMA_COLLECTION=study_agent
diff --git a/README.md b/README.md
@@ -3,7 +3,7 @@
 <p>
   <a href="https://github.com/2002yy/study-agent/actions/workflows/ci.yml"><img src="https://github.com/2002yy/study-agent/actions/workflows/ci.yml/badge.svg" alt="CI"></a>
   <img src="https://img.shields.io/badge/python-3.12-blue" alt="Python 3.12">
-  <img src="https://img.shields.io/badge/tests-265%20passed-green" alt="265 tests passed">
+  <img src="https://img.shields.io/badge/tests-273%20passed-green" alt="273 tests passed">
 </p>
 
 A local AI learning assistant with long-term memory, role-based group chat,
@@ -17,7 +17,7 @@ Study Agent 是一个本地优先的 AI 学习助手，重点不是简单调用
 - **长期记忆**：Markdown memory + safe writer
 - **上下文分层**：fast / light / deep / archive
 - **联网搜索**：RSS / News fetch → article extraction → LLM digest → source tracing
-- **RAG MVP**：本地 Markdown / TXT / DOCX / PDF 索引、关键词 / 本地向量原型 / hybrid 检索、引用上下文、来源块、Streamlit 检索面板、聊天注入和 FastAPI RAG 接口
+- **RAG MVP**：本地 Markdown / TXT / DOCX / PDF 索引、关键词 / 本地向量原型 / hybrid / backend-vector 检索、引用上下文、来源块、Streamlit 检索/调试面板、聊天注入和 FastAPI RAG 接口
 - **工程安全**：SSRF protection、detect-secrets、配置模板
 - **工程质量**：pytest 测试套件、Ruff、GitHub Actions CI、打包检查
 
@@ -27,11 +27,11 @@ Study Agent 是一个本地优先的 AI 学习助手，重点不是简单调用
 - **Model routing** with fast / light / deep / archive context tiers
 - **Long-term memory** based on Markdown files and safe-writer persistence
 - **Web search pipeline**: feed registry → URL safety checks → article extraction → LLM digest → auditable source trace
-- **RAG MVP**: local Markdown / TXT / DOCX / PDF indexing, lexical / local vector prototype / hybrid retrieval, citation-first context formatting, source blocks, a Streamlit retrieval panel, optional chat injection, and FastAPI RAG endpoints
+- **RAG MVP**: local Markdown / TXT / DOCX / PDF indexing, lexical / local vector prototype / hybrid / backend-vector retrieval, citation-first context formatting, source blocks, a Streamlit retrieval/debug panel, optional chat injection, and FastAPI RAG endpoints
 - **SSRF protection** for article fetching, **detect-secrets** in CI
 - **Batched session logging** and multi-layer caching for performance
 - **Performance budget**: mode-based `max_tokens` bounds on the main chat, WeChat, and news LLM paths
-- **265 pytest tests**, Ruff clean, GitHub Actions CI workflow
+- **273 pytest tests**, Ruff clean, GitHub Actions CI workflow
 
 For a detailed breakdown of the stack and engineering highlights, see [Technical Stack & Engineering Highlights](docs/TECH_STACK.md).
 
@@ -109,7 +109,7 @@ Study Agent 的定位很明确：**一个运行在你本地的、有长期记忆
 | **角色群聊** | 四位角色（三月七、刻晴、纳西妲、流萤）群聊讨论，各有独立人设 |
 | **联网搜索** | Google News + Bing News + RSSHub 多源聚合，页面正文三层提取 |
 | **来源追溯** | 搜索结果写入群聊记录，可回溯依据 |
-| **RAG MVP** | 本地 Markdown / TXT / DOCX / PDF 文档索引，前端面板返回带文件路径、行号、分数和命中词的引用片段，并可注入单人聊天和微信群互动回复；FastAPI 提供 `/health`、`/rag`、`/rag/index`、`/rag/query` |
+| **RAG MVP** | 本地 Markdown / TXT / DOCX / PDF 文档索引，前端面板返回带文件路径、行号、分数、命中词和 score breakdown 的引用片段，并可注入单人聊天和微信群互动回复；FastAPI 提供 `/health`、`/rag`、`/rag/index`、`/rag/query` |
 | **课后总结** | 学习完成后自动总结进展，用户确认后写入记忆 |
 | **长期记忆** | 学习者画像、进度追踪、项目上下文、当前焦点，多级记忆档案 |
 | **多 Provider** | 支持 OpenAI / DeepSeek / OpenRouter / SiliconFlow / 本地模型 |
@@ -207,6 +207,15 @@ pip-compile requirements-dev.in    # 重新锁定开发依赖
 
 参数优先级：代码显式参数 → 任务级环境变量 → 任务默认值 → 全局环境变量 → provider 级环境变量。完整配置见 [`.env.example`](.env.example) 和 [用户指南](USER_GUIDE.md)。
 
+RAG 向量后端默认使用 `local`，不需要额外服务；可选 `chroma` adapter 需要用户自行安装 `chromadb`：
+
+```bash
+RAG_VECTOR_BACKEND=local
+# RAG_VECTOR_BACKEND=chroma
+# RAG_CHROMA_PATH=logs/chroma
+# RAG_CHROMA_COLLECTION=study_agent
+```
+
 ---
 
 ## 项目结构
@@ -234,7 +243,7 @@ pip-compile requirements-dev.in    # 重新锁定开发依赖
 │   ├── config.py           # 全局配置
 │   ├── router.py           # 路由配置
 │   ├── news/               # 新闻聚合链路
-│   ├── rag/                # 本地 RAG MVP：加载、分块、索引、关键词/向量原型检索
+│   ├── rag/                # 本地 RAG MVP：加载、分块、索引、关键词/向量原型/可选后端检索
 │   └── ui/                 # Streamlit UI 组件
 ├── tests/                  # pytest 测试套件
 ├── docs/                   # 设计文档与工程说明
@@ -255,7 +264,7 @@ pip-compile requirements-dev.in    # 重新锁定开发依赖
 ## 测试
 
 ```bash
-pytest tests/ -v            # current local baseline: 265 passed
+pytest tests/ -v            # current local baseline: 273 passed
 pytest tests/ --cov=src     # 覆盖率
 ruff check src/ tests/      # linting
 mypy --explicit-package-bases src/  # CI soft check; may report type debt
@@ -299,7 +308,7 @@ CI 通过 GitHub Actions 在 push / pull request 上运行，集成 `pytest`、`
 
 - [ ] FastAPI service layer (partial): `/health`, `/rag`, `/rag/index`, `/rag/query` implemented; `/chat` and `/memory` remain planned
 - [x] RAG MVP: Markdown / TXT / DOCX / PDF loading, chunking, local keyword retrieval, local vector prototype, hybrid retrieval, citation context, source blocks, Streamlit retrieval panel, optional single-chat and WeChat interactive injection
-- [ ] RAG document QA (partial): PDF parsing has file-size, page-count, extracted-text and encrypted-file guards; embedding model retrieval remains planned
+- [ ] RAG document QA (partial): PDF parsing has file-size, page-count, extracted-text and encrypted-file guards; Chroma adapter scaffold exists; production embedding model retrieval remains planned
 - [ ] Vector store: FAISS local prototype, pgvector engineering version
 - [ ] Web UI: TypeScript + Vue3 / React, streaming chat, source panel
 - [ ] Observability: trace_id, token usage, latency, provider fallback logs

diff --git a/docs/INTERVIEW_NOTES.md b/docs/INTERVIEW_NOTES.md
@@ -10,7 +10,7 @@ Study Agent 是一个本地优先的 AI 学习助手，重点在多 Provider 模
 2. **长期记忆写入安全** — safe writer + preview/confirm 机制，防止不可逆的记忆污染
 3. **联网搜索来源追溯** — Feed registry / RSS 多源聚合 → URL safety matrix → 文章正文三层提取 → LLM digest → pipeline trace 全过程来源可回溯
 4. **Streamlit 重渲染性能优化** — 多层缓存策略、按模式批量落盘、主链路 token 预算控制
-5. **CI / Ruff / detect-secrets 工程检查** — 265 pytest tests、Ruff clean、GitHub Actions workflow、detect-secrets 对未豁免发现硬阻断
+5. **CI / Ruff / detect-secrets 工程检查** — 273 pytest tests、Ruff clean、GitHub Actions workflow、detect-secrets 对未豁免发现硬阻断
 
 ## 可讲亮点
 

diff --git a/docs/RAG.md b/docs/RAG.md
@@ -20,11 +20,13 @@ Implemented:
 - Optional single-chat and WeChat interactive reply injection through the `用于聊天回答` toggle
 - UI source blocks for retrieved file paths, line ranges, scores and matched terms
 - FastAPI endpoints: `GET /health`, `POST /rag`, `POST /rag/index`, `POST /rag/query`
+- Streamlit knowledge/debug panel with index summary, document rows, chunk preview and score breakdowns
+- Optional vector backend interface with local fallback and Chroma adapter scaffold
 
 Not implemented yet:
 
-- Embedding model integration
-- FAISS, pgvector or other vector stores
+- Production embedding model integration
+- FAISS, pgvector or managed vector stores
 - Automatic injection into every generation path; current injection covers single chat and WeChat interactive replies, but not news discussion or after-session feedback
 
 ## Module Map
@@ -34,6 +36,9 @@ Not implemented yet:
 | `src/rag/loader.py` | Load supported local files into normalized `RagDocument` objects |
 | `src/rag/chunker.py` | Split documents into line-traceable `RagChunk` objects |
 | `src/rag/index.py` | Build, save, load and search a local JSON RAG index |
+| `src/rag/embeddings.py` | Embedding provider contract and local hash embedding provider |
+| `src/rag/backends.py` | Vector backend contract, local backend and environment-driven backend selection |
+| `src/rag/chroma_backend.py` | Optional Chroma persistent backend adapter scaffold |
 | `src/rag/vector.py` | Deterministic local vector prototype and hybrid retrieval |
 | `src/rag/eval.py` | LLM-free retrieval quality evaluation over gold query fixtures |
 | `src/rag/service.py` | Application-facing helpers for indexing, querying and context formatting |
@@ -62,6 +67,7 @@ Supported retrieval modes:
 - `lexical`: TF-IDF-style term scoring
 - `vector`: deterministic local hash-vector cosine similarity
 - `hybrid`: normalized lexical score plus vector similarity
+- `backend_vector`: configured vector backend; defaults to local and can use the optional Chroma adapter
 
 Each result keeps:
 
@@ -123,6 +129,22 @@ P4-B adds API/query diagnostics:
 - Per-result rank, chunk id, source path, matched terms and score breakdown
 - Optional one-query evaluation when `/rag/query` receives `expected_sources`
 
+P4-C / P6 adds Streamlit inspection controls:
+
+- Current index path, document count and chunk count
+- Indexed document table with file type, size, mtime, hash prefix and chunk count
+- Chunk preview table with line range, character count and source path
+- Retrieval controls for mode, `top_k`, `min_score` and debug visibility
+- Score-breakdown table for retrieved chunks
+
+P5 adds the first vector-backend abstraction:
+
+- `EmbeddingProvider` protocol plus `LocalHashEmbeddingProvider`
+- `VectorBackend` protocol plus `LocalVectorBackend`
+- `RAG_VECTOR_BACKEND=local|chroma`
+- Optional `ChromaVectorBackend` using lazy `chromadb` import, `PersistentClient`, collection `upsert` and vector query
+- `tests/test_rag_backends.py` verifies local backend behavior, environment config and Chroma fake-client upsert/query behavior
+
 ## Next Steps
 
 ### P4: Retrieval Quality Loop
@@ -132,26 +154,27 @@ Goal: prove retrieval quality before expanding the stack.
 - [x] Add a small gold fixture set with queries, expected sources and expected terms.
 - [x] Track `recall@k`, mean reciprocal rank, source hit rate and empty-result rate.
 - [x] Surface retrieval debug data in tests and API responses before adding more UI polish.
-- [ ] Add a Streamlit source/debug panel for inspecting score breakdowns.
+- [x] Add a Streamlit source/debug panel for inspecting score breakdowns.
 - Keep the first evaluation layer LLM-free so CI can catch retrieval regressions deterministically.
 
 ### P5: Real Embedding Backend
 
 Goal: replace the local hash-vector prototype with optional real embeddings without breaking local-first defaults.
 
-- Extract a retriever / vector-backend contract.
-- Keep JSON + lexical / hybrid retrieval as the zero-infrastructure fallback.
-- Add one optional backend first, likely Qdrant or Chroma; defer FAISS if Windows install friction is high.
-- Make embedding provider selection explicit through config.
+- [x] Extract an embedding-provider and vector-backend contract.
+- [x] Keep JSON + lexical / hybrid retrieval as the zero-infrastructure fallback.
+- [x] Add an optional Chroma adapter scaffold with lazy import and fake-client tests.
+- [x] Make vector backend selection explicit through config.
+- [ ] Add a production embedding provider; current Chroma adapter uses the local hash embedding provider by default.
 
 ### P6: Knowledge UI
 
 Goal: turn the Streamlit expander into a usable knowledge panel.
 
-- List indexed documents with chunk count, mtime, hash and status.
-- Add query debugging controls for mode, `top_k`, threshold and score preview.
-- Add source preview with title, path, page or line range and matched terms.
-- Add per-chat RAG scope selection instead of one global toggle only.
+- [x] List indexed documents with chunk count, mtime, hash and status.
+- [x] Add query debugging controls for mode, `top_k`, threshold and score preview.
+- [x] Add source preview with title, path, page or line range and matched terms.
+- [ ] Add per-chat RAG scope selection instead of one global toggle only.
 
 ### P7: Agentic RAG
 

diff --git a/docs/TECH_STACK.md b/docs/TECH_STACK.md
@@ -35,7 +35,7 @@ Study Agent 是一个本地运行的 AI 学习助理系统，面向个人学习
 | Long-term Memory | Markdown files | 用 `summary.md`、`current_focus.md`、`learner_profile.md` 等文件保存长期记忆 |
 | Context Control | fast / light / deep / archive tiers | 按性能模式选择不同记忆文件组，控制 token 成本 |
 | Routing | Rule-based router + optional LLM router | 根据任务类型、用户选择和性能模式决定角色、学习模式和模型档位 |
-| RAG MVP | `src/rag/*`, `src/ui/rag_panel.py`, `src/api.py`, JSON index | 本地 Markdown / TXT / DOCX / PDF 加载、分块、关键词 / 本地向量原型 / hybrid 检索、引用上下文拼装、来源块、Streamlit 检索面板、聊天注入和 FastAPI RAG endpoints |
+| RAG MVP | `src/rag/*`, `src/ui/rag_panel.py`, `src/api.py`, JSON index | 本地 Markdown / TXT / DOCX / PDF 加载、分块、关键词 / 本地向量原型 / hybrid / backend-vector 检索、引用上下文拼装、来源块、Streamlit 检索/调试面板、聊天注入和 FastAPI RAG endpoints |
 | News Search | Feed registry / RSS / Google News / Bing News / RSSHub-style sources | 多源新闻聚合、源健康记录、去重、排序、来源追溯 |
 | Article Extraction | `trafilatura`, `readability-lxml`, `lxml` | 新闻网页正文读取与降级解析 |
 | Security | URL safety matrix, SSRF validation, redirect checks, secret scanning | 防止读取本地/内网资源，降低密钥误提交风险 |
@@ -273,16 +273,18 @@ User query
 - 带 `source_path`、标题、chunk 序号和行号范围的分块
 - 本地关键词 / TF-IDF-style 检索
 - deterministic hash-vector 本地向量原型与 hybrid 检索模式
+- `EmbeddingProvider` / `VectorBackend` 抽象，默认 local backend，可选 Chroma adapter scaffold
 - 简单中文 CJK bigram 匹配
 - JSON index 保存与加载，默认路径为 `logs/rag_index.json`
 - `build_rag_context()` 将检索结果拼装为带引用的 LLM 上下文块
 - Streamlit `本地资料检索` 面板支持上传资料、输入本地路径、建立索引、检索和查看引用上下文
+- Streamlit 面板显示当前索引、文档列表、chunk preview、检索参数和 score breakdown
 - 单人聊天和微信群互动回复可通过 `用于聊天回答` 开关把检索结果注入 system prompt，并显示 RAG 引用来源块
 - FastAPI `GET /health`、`POST /rag`、`POST /rag/index`、`POST /rag/query`
 
 未实现边界：
 
-- 尚未接入 embedding model、FAISS、pgvector 或其他生产向量库
+- 尚未接入生产 embedding model、FAISS、pgvector 或其他生产向量库；Chroma 目前是 optional adapter scaffold
 - FastAPI 目前覆盖 health 和 RAG；`/chat`、`/memory` 仍是后续服务化任务
 - 尚未自动注入所有生成路径；当前覆盖单人聊天和微信群互动回复，不覆盖新闻讨论或课后反馈
 

diff --git a/docs/TESTING.md b/docs/TESTING.md
@@ -6,7 +6,7 @@ Current verified baseline:
 
 | Check | Status | Evidence |
 |---|---|---|
-| pytest | Passed | `265 passed` locally on 2026-06-05 |
+| pytest | Passed | `273 passed` locally on 2026-06-05 |
 | Ruff | Passed | `python -m ruff check .` clean locally on 2026-06-04 |
 | Package helper | Passed | `python tools/package_project_helper.py . NUL 0` locally on 2026-06-04 |
 | mypy | Soft check, not clean | `python -m mypy --explicit-package-bases src/` reported 18 errors locally on 2026-06-04 |
@@ -24,8 +24,9 @@ Current verified baseline:
 | **News URL safety** | `test_url_normalizer.py`, `test_link_resolver.py` | 28 |
 | **News pipeline trace / audit** | `test_news_pipeline_trace.py`, `test_news_audit.py` | 5 |
 | **Feed registry / health** | `test_feed_registry.py`, `test_feed_diagnostics.py` | 9 |
-| **RAG MVP** | `test_rag.py` | 22 |
+| **RAG MVP** | `test_rag.py` | 24 |
 | **RAG evaluation** | `test_rag_eval.py` | 5 |
+| **RAG vector backends** | `test_rag_backends.py` | 6 |
 | **FastAPI RAG endpoints** | `test_api.py` | 6 |
 | **Architecture flows** | `test_architecture_flows.py` | 12 |
 | **WeChat decoupling** | `test_wechat_decoupling.py` | 4 |
@@ -76,7 +77,7 @@ def test_flush_uses_safe_writer():
 ## Running Tests
 
 ```bash
-python -m pytest             # current baseline: 265 passed
+python -m pytest             # current baseline: 273 passed
 pytest tests/ -v             # Verbose
 pytest tests/ --cov=src      # Coverage
 python -m ruff check .       # Linting

diff --git a/src/rag/__init__.py b/src/rag/__init__.py
@@ -6,6 +6,14 @@
     save_rag_index,
     search_rag_index,
 )
+from src.rag.backends import (
+    LocalVectorBackend,
+    VectorBackendStatus,
+    get_vector_backend,
+    get_vector_backend_from_env,
+    vector_backend_config_from_env,
+)
+from src.rag.embeddings import LocalHashEmbeddingProvider
 from src.rag.eval import (
     RagEvalCase,
     RagEvalResult,
@@ -39,6 +47,10 @@
     "evaluate_rag_index",
     "format_rag_sources",
     "index_documents",
+    "get_vector_backend",
+    "get_vector_backend_from_env",
+    "LocalHashEmbeddingProvider",
+    "LocalVectorBackend",
     "load_eval_cases",
     "load_rag_index",
     "query_documents",
@@ -50,4 +62,6 @@
     "search_rag_index",
     "search_rag_index_vector",
     "search_documents",
+    "VectorBackendStatus",
+    "vector_backend_config_from_env",
 ]