-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathexample_orn_identifier_resolution.py
More file actions
189 lines (155 loc) · 5.61 KB
/
example_orn_identifier_resolution.py
File metadata and controls
189 lines (155 loc) · 5.61 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
"""
Demonstration of ORN/Glomerulus Identifier Resolution
======================================================
This example demonstrates the robust identifier resolution system that allows
users to pass ORN/glomerulus names in various formats.
The resolver automatically normalizes messy inputs like:
- "DL3", "dl3", "ORN_DL3", "ORN-DL3", "Glomerulus DL3"
- "Ir31a", "IR31A", "ORN_Ir31a"
- "Or7a", "OR7A", "ORN_Or7a"
All resolve to their canonical forms (e.g., "ORN_DL3", "ORN_Ir31a").
"""
from pathlib import Path
from door_toolkit.connectomics import CrossTalkNetwork
from door_toolkit.connectomics.pathway_analysis import (
analyze_single_orn,
compare_orn_pair,
find_pathways,
)
from door_toolkit.integration.orn_identifier import (
normalize_orn_identifier,
resolve_orn_identifier,
suggest_orn_identifiers,
get_available_glomeruli,
)
def main():
# Load network
data_path = Path(__file__).parent.parent.parent / "data"
csv_file = data_path / "interglomerular_crosstalk_pathways.csv"
if not csv_file.exists():
print(f"❌ Data file not found: {csv_file}")
print("Please run the FlyWire extraction script first.")
return
print("=" * 70)
print("ORN/Glomerulus Identifier Resolution Demo")
print("=" * 70)
print()
# Load connectome network
print("Loading FlyWire connectome network...")
network = CrossTalkNetwork.from_csv(str(csv_file))
print(f"✓ Loaded network with {network.data.num_glomeruli} glomeruli\n")
# Get available glomeruli
available = get_available_glomeruli(network)
print(f"Available glomeruli: {len(available)} total")
print(f"Sample: {sorted(list(available))[:10]}\n")
print("=" * 70)
print("1. NORMALIZATION EXAMPLES")
print("=" * 70)
print()
# Demonstrate normalization
test_inputs = [
"DL3",
"dl3",
"ORN_DL3",
"ORN-DL3",
"ORN DL3",
"Glomerulus DL3",
"Ir31a",
"IR31A",
"ORN_Ir31a",
"Or7a",
]
print("Normalization (format agnostic):")
for raw in test_inputs:
normalized = normalize_orn_identifier(raw)
print(f" {raw:20s} → {normalized}")
print()
print("=" * 70)
print("2. RESOLUTION WITH FUZZY MATCHING")
print("=" * 70)
print()
# Test resolution with actual network
print("Resolving identifiers against FlyWire network:\n")
test_cases = ["DL3", "dl5", "Ir31a", "Or7a", "va1d"]
for identifier in test_cases:
try:
resolved = resolve_orn_identifier(identifier, available)
print(f"✓ '{identifier}' → '{resolved}'")
except ValueError as e:
print(f"✗ '{identifier}' failed: {e}")
print()
print("=" * 70)
print("3. FUZZY MATCHING WITH SUGGESTIONS")
print("=" * 70)
print()
# Test fuzzy matching
print("When exact match fails, suggestions are provided:\n")
typo_input = "DL33" # User meant DL3
suggestions = suggest_orn_identifiers(typo_input, available, k=5)
print(f"Input: '{typo_input}'")
print("Suggestions:")
for identifier, score in suggestions:
print(f" {identifier:20s} (similarity: {score:.2f})")
print()
print("=" * 70)
print("4. INTEGRATION WITH PATHWAY ANALYSIS")
print("=" * 70)
print()
# Demonstrate that pathway analysis functions now accept messy inputs
print("analyze_single_orn() now accepts various formats:\n")
# Test with different input formats - all should work
formats_to_test = ["DL5", "ORN_DL5", "dl5"]
for fmt in formats_to_test:
try:
result = analyze_single_orn(network, fmt, by_glomerulus=True)
print(f"✓ analyze_single_orn(network, '{fmt}')")
print(f" Found {result.num_pathways} pathways from {fmt}")
except Exception as e:
print(f"✗ '{fmt}' failed: {e}")
print()
print("=" * 70)
print("5. COMPARE ORN PAIR WITH MESSY INPUTS")
print("=" * 70)
print()
# Test pair comparison with messy inputs
print("compare_orn_pair() with different input styles:\n")
try:
comparison = compare_orn_pair(
network,
"dl5", # lowercase
"VA1v", # mixed case
by_glomerulus=True
)
print(f"✓ compare_orn_pair(network, 'dl5', 'VA1v')")
print(f" DL5 → VA1v: {len(comparison.pathways_1_to_2)} pathways")
print(f" VA1v → DL5: {len(comparison.pathways_2_to_1)} pathways")
print(f" Asymmetry ratio: {comparison.get_asymmetry_ratio():.2f}")
except Exception as e:
print(f"✗ Comparison failed: {e}")
print()
print("=" * 70)
print("6. ERROR HANDLING WITH SUGGESTIONS")
print("=" * 70)
print()
# Demonstrate error handling
print("When identifier cannot be resolved, helpful error is raised:\n")
try:
resolve_orn_identifier("XYZ999", available)
except ValueError as e:
print(f"Error message:\n{e}\n")
print("=" * 70)
print("SUMMARY")
print("=" * 70)
print()
print("✓ Identifier resolution system successfully handles:")
print(" - Case-insensitive inputs (DL3, dl3, Dl3)")
print(" - Multiple separator styles (ORN_DL3, ORN-DL3, ORN DL3)")
print(" - Prefix variations (DL3, ORN_DL3, Glomerulus DL3)")
print(" - Receptor names (Ir31a, Or7a, Gr21a)")
print(" - Fuzzy matching for typos (DL33 → suggests DL3, DL5)")
print()
print("✓ All pathway analysis functions now accept messy inputs")
print("✓ Clear error messages with suggestions when resolution fails")
print()
if __name__ == "__main__":
main()