-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathmodels.py
More file actions
124 lines (105 loc) · 5.09 KB
/
Copy pathmodels.py
File metadata and controls
124 lines (105 loc) · 5.09 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
from datetime import datetime, timezone
from typing import Any, Optional
from pydantic import BaseModel, Field, field_validator
class Fact(BaseModel):
subject_name: str = Field("Self", description="The name of the person this fact is about (use 'Self' for the sender)")
category: str = Field(..., description="Work|Location|Family|Interest|Preference|Other")
value: str = Field(..., description="The specific fact content")
confidence: str = Field("medium", description="high|medium|low")
source_quote: Optional[str] = Field(None, description="Exact snippet from the text")
is_first_party: bool = Field(True, description="True if the subject is speaking about themselves")
temporal_status: str = Field("unknown", description="current|past|unknown — whether this fact is still true")
extracted_at: str = Field(default_factory=lambda: datetime.now(timezone.utc).isoformat())
@field_validator("confidence")
@classmethod
def validate_confidence(cls, v: str) -> str:
if v.lower() not in ["high", "medium", "low"]:
return "medium"
return v.lower()
@field_validator("temporal_status")
@classmethod
def validate_temporal_status(cls, v: str) -> str:
if v.lower() not in ["current", "past", "unknown"]:
return "unknown"
return v.lower()
class Relationship(BaseModel):
target_name: str
target_id: Optional[str] = None
type: str = Field(..., description="friend|colleague|family|knows|works_at|lives_in")
context: Optional[str] = None
confidence: str = "medium"
extracted_at: str = Field(default_factory=lambda: datetime.now(timezone.utc).isoformat())
@field_validator("type")
@classmethod
def normalize_type(cls, v: str) -> str:
"""Normalize compound types like 'friend|colleague' → first value, lowercased."""
return v.split("|")[0].strip().lower()
class ExtractionResult(BaseModel):
reasoning_scratchpad: Optional[str] = None
extractions: list[Fact] = []
relationships: list[Relationship] = []
summary_of_sender: Optional[str] = None
class ContactProfile(BaseModel):
contact_id: str
display_name: str
facts: list[Fact] = []
relationships: list[Relationship] = []
summary: str = ""
chat_type: str = "single"
last_updated: str = Field(default_factory=lambda: datetime.now(timezone.utc).isoformat())
message_count: int = 0
def add_fact(self, new_fact: Fact):
"""
Merge a single fact into the profile with attribute-level priority.
PRIORITY RULE: First-party data (Self) cannot be overwritten by third-party data.
"""
if not new_fact.value:
return
existing_idx = -1
for i, f in enumerate(self.facts):
if f.category == new_fact.category and f.value.lower() == new_fact.value.lower():
existing_idx = i
break
if new_fact.category in ["Work", "Location", "Biographical"] and f.category == new_fact.category:
if not f.is_first_party and new_fact.is_first_party:
existing_idx = i
break
if existing_idx >= 0:
existing_fact = self.facts[existing_idx]
if (new_fact.is_first_party and not existing_fact.is_first_party) or \
(new_fact.is_first_party == existing_fact.is_first_party and
new_fact.confidence == "high" and existing_fact.confidence != "high"):
self.facts[existing_idx] = new_fact
else:
self.facts.append(new_fact)
self.last_updated = datetime.now(timezone.utc).isoformat()
def add_relationship(self, rel: Relationship):
"""Add or update a relationship. Rejects low confidence and deduplicates."""
# Skip low-confidence relationships
if rel.confidence.lower() == "low":
return
rel_target = rel.target_name.lower().strip()
rel_type = rel.type.lower().strip()
for existing in self.relationships:
existing_target = existing.target_name.lower().strip()
existing_type = existing.type.lower().strip()
# Same target — check for type overlap
if existing_target == rel_target:
# Exact type match → skip
if existing_type == rel_type:
return
# If one is 'knows' and the other is more specific, keep the specific one
if rel_type == "knows":
return # already have a more specific type
self.relationships.append(rel)
self.last_updated = datetime.now(timezone.utc).isoformat()
class GroupChatSummary(BaseModel):
chat_id: str
display_name: str
participant_names: list[str] = []
summary: str = ""
last_updated: str = Field(default_factory=lambda: datetime.now(timezone.utc).isoformat())
class IntelligenceGraph(BaseModel):
nodes: list[dict[str, Any]] = [] # {id, label, type, val}
links: list[dict[str, Any]] = [] # {source, target, label}
last_updated: str = Field(default_factory=lambda: datetime.now(timezone.utc).isoformat())