Skip to content

Commit e4566a8

Browse files
committed
AI tools now have semantic search
1 parent 58781ad commit e4566a8

22 files changed

Lines changed: 725 additions & 476 deletions

neural_engine/core/domain_router.py

Lines changed: 82 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -4,51 +4,108 @@
44
This improves accuracy by using domain-specific experts instead of
55
one general-purpose LLM trying to handle everything.
66
7-
Uses micro-LLM approach: small focused LLM call for domain detection.
8-
No regex patterns - generalizable and maintainable.
7+
Uses semantic tool discovery to determine domain - no hardcoded keywords!
8+
The domain is inferred from which tools match the goal semantically.
99
"""
1010

11-
from typing import Optional
11+
from typing import Optional, TYPE_CHECKING
12+
13+
if TYPE_CHECKING:
14+
from neural_engine.core.tool_discovery import ToolDiscovery
1215

1316

1417
class DomainRouter:
1518
"""
1619
Routes user goals to specialized domain handlers.
1720
18-
Domains:
19-
- memory: Memory read/write operations
20-
- strava: Strava API operations
21-
- calculator: Mathematical calculations
22-
- general: Everything else (default)
21+
Domains are INFERRED from tool metadata, not hardcoded:
22+
- If top matching tools have domain="fitness" → route to strava specialist
23+
- If top matching tools have domain="memory" → route to memory specialist
24+
- Otherwise → general
2325
24-
Uses small LLM call for robust, generalizable domain detection.
26+
This is the semantic, generalizable approach!
2527
"""
2628

27-
def __init__(self, ollama_client=None):
28-
"""Initialize domain router with optional LLM client."""
29+
def __init__(self, ollama_client=None, tool_discovery: Optional['ToolDiscovery'] = None):
30+
"""Initialize domain router with optional LLM client and tool discovery."""
2931
self.ollama_client = ollama_client
32+
self.tool_discovery = tool_discovery
3033

3134
def detect_domain(self, goal: str) -> str:
3235
"""
33-
Detect the domain for a given goal using per-domain voting.
36+
Detect the domain for a given goal using semantic tool matching.
37+
38+
NEW approach:
39+
1. Use tool_discovery to find semantically matching tools
40+
2. Look at the domain metadata of top matches
41+
3. Route to the dominant domain
3442
35-
Each domain gets asked: "Does this goal belong to YOUR domain?"
36-
Highest confidence wins.
43+
This means "show me my runs from last week" will:
44+
- Match strava_get_my_activities (domain="fitness")
45+
- Route to strava domain
46+
47+
Without any hardcoded keywords!
3748
3849
Args:
3950
goal: User goal text
4051
4152
Returns:
4253
Domain name ("memory", "strava", "calculator", "general")
4354
"""
44-
# If no LLM client, use fast keyword fallback
45-
if not self.ollama_client:
46-
return self._keyword_fallback(goal)
55+
# Try semantic tool discovery first (the smart way!)
56+
if self.tool_discovery:
57+
domain = self._detect_via_tool_discovery(goal)
58+
if domain != "general":
59+
return domain
60+
61+
# Fallback to LLM voting if tool discovery didn't give clear result
62+
if self.ollama_client:
63+
return self._detect_via_llm_voting(goal)
64+
65+
# Last resort: keyword fallback
66+
return self._keyword_fallback(goal)
67+
68+
def _detect_via_tool_discovery(self, goal: str) -> str:
69+
"""
70+
Detect domain by looking at which tools match semantically.
71+
72+
This is the CORE innovation - no keywords needed!
73+
"""
74+
# Get top 5 semantically matching tools
75+
candidates = self.tool_discovery.semantic_search(goal, n_results=5)
76+
77+
if not candidates:
78+
return "general"
4779

48-
# Vote each domain
80+
# Count domains from tool metadata
81+
domain_scores = {}
82+
for i, candidate in enumerate(candidates):
83+
# Weight by position (first match matters more)
84+
weight = 1.0 / (i + 1)
85+
86+
# Get domain from metadata (stored during indexing)
87+
domain = candidate.get('domain', 'general')
88+
89+
# Map fitness domain to strava (for specialist routing)
90+
if domain == 'fitness':
91+
domain = 'strava'
92+
93+
domain_scores[domain] = domain_scores.get(domain, 0) + weight
94+
95+
# Return highest scoring domain
96+
if domain_scores:
97+
best_domain = max(domain_scores, key=domain_scores.get)
98+
# Only return if it has significant score
99+
if domain_scores[best_domain] > 0.5:
100+
return best_domain
101+
102+
return "general"
103+
104+
def _detect_via_llm_voting(self, goal: str) -> str:
105+
"""Fallback: Use LLM to vote on domain."""
49106
domains = [
50-
("memory", "personal user information stored previously: remembering user's name, preferences, past conversations, things the user told you to remember"),
51-
("strava", "fitness activities, running, cycling, workouts, exercise data from Strava"),
107+
("memory", "retrieving or storing PERSONAL INFORMATION that the user previously told you (their name, preferences, favorites). NOT for external data like fitness activities or API data."),
108+
("strava", "fitness activities, running data, cycling, workouts, exercise tracking from Strava API - including 'runs', 'rides', 'activities', recent fitness data"),
52109
("calculator", "mathematical calculations, numbers, arithmetic operations"),
53110
]
54111

@@ -61,6 +118,11 @@ def detect_domain(self, goal: str) -> str:
61118
Does this goal belong to the {domain_name} domain?
62119
({domain_desc})
63120
121+
IMPORTANT:
122+
- "memory" is ONLY for recalling things the USER TOLD YOU (like their name, preferences)
123+
- "strava" is for fitness data like runs, rides, activities (even if it says "my runs")
124+
- If the goal asks about exercise/fitness activities, answer NO for memory, YES for strava
125+
64126
Answer YES or NO with confidence 0-100:
65127
YES if goal clearly matches this domain
66128
NO if goal doesn't match this domain

neural_engine/core/intent_classifier_neuron.py

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -339,9 +339,11 @@ def _classify_zeroshot(self, goal: str) -> str:
339339
"role": "system",
340340
"content": (
341341
"You are an intent classifier. Classify user goals as either:\n"
342-
"- 'generative' (creative writing, stories, poems, general knowledge)\n"
343-
"- 'tool_use' (calculations, data retrieval, API calls, specific actions)\n\n"
344-
"Respond with only the intent name."
342+
"- 'generative' (creative writing, stories, poems, general knowledge questions, explanations, opinions, conversation)\n"
343+
"- 'tool_use' (calculations, storing/recalling personal data, API calls, running code, specific actions with measurable results)\n\n"
344+
"Important: Questions like 'What is X?' or 'Tell me about Y' are GENERATIVE - they need knowledge, not tools.\n"
345+
"Only use tool_use for things that require executing code or calling an API.\n\n"
346+
"Respond with ONLY the intent name, nothing else."
345347
)
346348
},
347349
{"role": "user", "content": goal}

neural_engine/core/memory_operations_specialist.py

Lines changed: 73 additions & 45 deletions
Original file line numberDiff line numberDiff line change
@@ -3,11 +3,16 @@
33
44
This specialized classifier focuses ONLY on memory operations,
55
achieving higher accuracy than the general classifier.
6+
7+
Uses tool semantic metadata for action hints when available.
68
"""
79

8-
from typing import Dict
10+
from typing import Dict, Optional, TYPE_CHECKING
911
from .neuron import BaseNeuron
1012

13+
if TYPE_CHECKING:
14+
from neural_engine.core.tool_discovery import ToolDiscovery
15+
1116

1217
class MemoryOperationsSpecialist(BaseNeuron):
1318
"""
@@ -16,14 +21,54 @@ class MemoryOperationsSpecialist(BaseNeuron):
1621
Distinguishes between:
1722
- memory_write: Storing information
1823
- memory_read: Retrieving information
24+
25+
Uses semantic metadata from tools when available.
1926
"""
2027

21-
def __init__(self, message_bus, ollama_client):
28+
def __init__(self, message_bus, ollama_client, tool_discovery: Optional['ToolDiscovery'] = None):
2229
super().__init__(message_bus, ollama_client)
30+
self.tool_discovery = tool_discovery
31+
32+
# Cache tool action hints (populated from semantic metadata)
33+
self._write_actions = {"store", "save", "write", "remember", "memorize", "note"}
34+
self._read_actions = {"retrieve", "recall", "get", "read", "fetch"}
35+
36+
def set_tool_discovery(self, tool_discovery: 'ToolDiscovery'):
37+
"""Set tool discovery and update action hints from metadata."""
38+
self.tool_discovery = tool_discovery
39+
self._update_action_hints()
40+
41+
def _update_action_hints(self):
42+
"""Update action hints from tool semantic metadata."""
43+
if not self.tool_discovery:
44+
return
45+
46+
# Get memory tools and extract their action metadata
47+
try:
48+
tools = self.tool_discovery.tool_registry.get_all_tools()
49+
50+
for tool_name, tool_instance in tools.items():
51+
if hasattr(tool_instance, 'get_semantic_metadata'):
52+
metadata = tool_instance.get_semantic_metadata()
53+
if metadata.get('domain') == 'memory':
54+
actions = set(metadata.get('actions', []))
55+
synonyms = set(metadata.get('synonyms', []))
56+
57+
# memory_write actions
58+
if 'store' in actions or 'save' in actions or 'write' in actions:
59+
self._write_actions.update(actions)
60+
self._write_actions.update(synonyms)
61+
62+
# memory_read actions
63+
if 'retrieve' in actions or 'read' in actions or 'get' in actions:
64+
self._read_actions.update(actions)
65+
self._read_actions.update(synonyms)
66+
except Exception:
67+
pass # Keep defaults if tool discovery fails
2368

2469
def classify_memory_operation(self, goal: str) -> str:
2570
"""
26-
Classify memory operation type.
71+
Classify memory operation type using LLM with semantic-aware fallback.
2772
2873
Args:
2974
goal: User goal text
@@ -43,19 +88,6 @@ def classify_memory_operation(self, goal: str) -> str:
4388
"- Questions (What? Tell me? Recall?) → read\n"
4489
"- Commands to store (Remember that X, Save Y) → write\n"
4590
"- Past tense recall (what I told you, what you know) → read\n\n"
46-
"Examples:\n"
47-
"User: Remember that my name is Alice\n"
48-
"Assistant: write\n\n"
49-
"User: What is my name?\n"
50-
"Assistant: read\n\n"
51-
"User: Remember what I told you about my favorite color\n"
52-
"Assistant: read\n\n"
53-
"User: Store the value 42 for key 'answer'\n"
54-
"Assistant: write\n\n"
55-
"User: Recall what I told you about my birthday\n"
56-
"Assistant: read\n\n"
57-
"User: What did I tell you?\n"
58-
"Assistant: read\n\n"
5991
"Respond with ONLY 'write' or 'read'."
6092
)
6193
},
@@ -65,44 +97,40 @@ def classify_memory_operation(self, goal: str) -> str:
6597
response = self.ollama_client.chat(messages)
6698
operation = response['message']['content'].strip().lower()
6799

68-
# Fallback: analyze keywords FIRST before trusting LLM response
100+
# Semantic-aware fallback using action hints from tool metadata
69101
goal_lower = goal.lower()
70102

71-
# Strongest WRITE signals - statements of fact with "my X is Y" pattern
72-
if any(p in goal_lower for p in ["my name is", "my favorite", "i am", "i'm from", "i like", "i live", "actually"]):
73-
# Check if it's a question (would be read)
74-
if not any(q in goal_lower for q in ["what", "who", "when", "where", "why", "how", "tell me", "?", "recall"]):
75-
return "write"
103+
# Check if goal contains write actions (from semantic metadata)
104+
write_action_score = sum(1 for action in self._write_actions if action in goal_lower)
105+
read_action_score = sum(1 for action in self._read_actions if action in goal_lower)
76106

77-
# Strongest READ signals - past tense recall patterns
78-
if any(p in goal_lower for p in ["what i told", "what i said", "what you know", "told you about", "said earlier", "remember what"]):
79-
return "read"
107+
# Linguistic patterns for read/write detection (grammatical, not domain-specific)
108+
# These are OK because they detect SENTENCE STRUCTURE, not topic
109+
is_statement = any(p in goal_lower for p in ["my name is", "my favorite is", "i am ", "i like "])
110+
is_question = "?" in goal or any(q in goal_lower for q in ["what is", "who is", "tell me"])
111+
is_recall = any(p in goal_lower for p in ["what i told", "what you know", "do you remember"])
80112

81-
# Question words strongly indicate read
82-
if any(q in goal_lower for q in ["what is", "who is", "when did", "where is", "why did", "how did", "tell me", "show me", "recall"]):
113+
# Combine signals
114+
if is_statement and not is_question and not is_recall:
115+
return "write"
116+
117+
if is_question or is_recall:
83118
return "read"
84119

85-
# Validate and clean LLM response
86-
if "write" in operation or "stor" in operation or "save" in operation:
120+
# Use action scores from semantic metadata
121+
if write_action_score > read_action_score:
87122
return "write"
88-
elif "read" in operation or "recall" in operation or "get" in operation:
123+
elif read_action_score > write_action_score:
89124
return "read"
90-
else:
91-
# Final fallback: check for explicit write commands
92-
write_patterns = ["remember that", "store", "save", "write", "note that", "set"]
93-
read_patterns = ["recall", "retrieve", "get", "fetch"]
94-
95-
# Check for explicit write patterns with "that" clause
96-
for pattern in write_patterns:
97-
if pattern in goal_lower and "that" in goal_lower:
98-
return "write"
99-
100-
# Check for read patterns
101-
if any(p in goal_lower for p in read_patterns):
102-
return "read"
103-
104-
# Default to read if uncertain (safer - read has no side effects)
125+
126+
# Trust LLM response as final fallback
127+
if "write" in operation or "stor" in operation:
128+
return "write"
129+
elif "read" in operation or "recall" in operation:
105130
return "read"
131+
132+
# Default to read (safer - no side effects)
133+
return "read"
106134

107135
def select_memory_tool(self, goal: str) -> Dict:
108136
"""

0 commit comments

Comments
 (0)