Skip to content

Commit 60f84a8

Browse files
authored
Merge pull request #230 from Isskar/bugs_deletion
feat : delete useless tests
2 parents 0aa458e + 933dd90 commit 60f84a8

7 files changed

Lines changed: 114 additions & 145 deletions

File tree

.gitignore

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -67,7 +67,6 @@ rag_evaluation/config/test_datasets/
6767

6868
# Project specific files
6969
*.pkl
70-
*.faiss
7170
*.pdf
7271
*.docx
7372
*.xlsx

flux_donnees.mmd

Lines changed: 112 additions & 54 deletions
Original file line numberDiff line numberDiff line change
@@ -1,70 +1,128 @@
11
graph TD
22
%% Users and Interface
3-
User([User]) -->|Connects| Login[Login Page]
4-
Login -->|Verifies credentials| AuthDB[(SQLite Database<br>Users)]
5-
Login -->|Authentication successful| Chat[Chat Interface]
6-
Login -->|Admin access| Admin[User Management]
7-
8-
%% Question Flow
9-
User -->|Asks a question| Chat
10-
Chat -->|Sends query| Retriever[FAISS Retriever]
11-
12-
%% Document Search
13-
Retriever -->|Vector similarity search| VectorDB[(FAISS Vector<br>Database)]
14-
VectorDB -->|Relevant documents| Retriever
15-
16-
%% Response Generation
17-
Retriever -->|Document context| Chain[LangChain Chain]
18-
Chain -->|Builds prompt| LLM[LLM Model<br>GPT 4o]
19-
LLM -->|Generates response in French| Chain
20-
Chain -->|Formatted response| Chat
21-
Chat -->|Displays response + sources| User
22-
23-
%% Database Initialization and Update
24-
ConfluenceAPI[Confluence API] -->|Retrieves pages| DataLoader[DataLoader]
25-
DataLoader -->|Processes documents| TextSplitter[Text Splitter]
26-
TextSplitter -->|Splits into chunks| Embedder[Embeddings<br>HuggingFace]
27-
Embedder -->|Vectors + Metadata| VectorDB
28-
29-
%% User Management
30-
Admin -->|Add/Remove| AuthDB
31-
32-
%% Advanced Options
33-
RebuildDB[Option: Rebuild DB] -.->|Triggers| DataLoader
3+
User([User]) -->|Accesses| WebApp[Streamlit Web App]
4+
User -->|CLI Commands| CLI[CLI Interface]
5+
6+
%% Authentication Flow
7+
WebApp -->|Authentication| AzureAuth[Azure AD Authentication]
8+
AzureAuth -->|Valid credentials| ChatInterface[Chat Interface]
9+
AzureAuth -->|Admin access| AdminInterface[Admin Interface]
10+
11+
%% Main Chat Flow
12+
User -->|Asks question| ChatInterface
13+
ChatInterface -->|Query| SemanticPipeline[Semantic RAG Pipeline]
14+
15+
%% Semantic Processing Pipeline
16+
SemanticPipeline -->|Analyze query| QueryProcessor[Query Processor]
17+
QueryProcessor -->|Intent & expansion| SemanticRetrieval[Semantic Retrieval Tool]
18+
SemanticRetrieval -->|Vector search| WeaviateDB[(Weaviate Vector DB<br>Collection: isschat_docs)]
19+
WeaviateDB -->|Relevant documents| SemanticRetrieval
20+
SemanticRetrieval -->|Ranked results| GenerationTool[Generation Tool]
21+
GenerationTool -->|Context + prompt| LLM[Gemini 2.5 Flash Lite]
22+
LLM -->|Generated response| GenerationTool
23+
GenerationTool -->|Final answer + sources| ChatInterface
24+
25+
%% Data Storage & Management
26+
GenerationTool -->|Save conversation| DataManager[Data Manager]
27+
DataManager -->|Store data| StorageSystem{Storage System}
28+
StorageSystem -->|Local| LocalStorage[(Local File Storage)]
29+
StorageSystem -->|Cloud| AzureStorage[(Azure Blob Storage)]
30+
31+
%% Features & History
32+
ChatInterface -->|User feedback| FeaturesManager[Features Manager]
33+
ChatInterface -->|Conversation history| HistoryManager[History Manager]
34+
FeaturesManager -->|Feedback data| DataManager
35+
HistoryManager -->|Load/save history| DataManager
36+
37+
%% CLI Operations
38+
CLI -->|Ingest command| IngestionPipeline[Confluence Ingestion Pipeline]
39+
CLI -->|Status command| StatusCheck[System Status Check]
40+
CLI -->|Query command| SemanticPipeline
41+
CLI -->|Chat command| InteractiveCLI[Interactive CLI Chat]
42+
43+
%% Data Ingestion Flow
44+
IngestionPipeline -->|Extract| ConfluenceConnector[Confluence Connector]
45+
ConfluenceConnector -->|Fetch pages| ConfluenceAPI[Confluence API]
46+
ConfluenceConnector -->|Raw documents| DocumentProcessor[Document Processor]
47+
DocumentProcessor -->|Clean & structure| DocumentChunker[Document Chunker]
48+
DocumentChunker -->|Text chunks| EmbeddingService[Embedding Service<br>multilingual-e5-small]
49+
EmbeddingService -->|Vector embeddings| WeaviateDB
50+
51+
%% Configuration & Secrets
52+
SemanticPipeline -.->|Config| ConfigManager[Configuration Manager]
53+
ConfigManager -.->|Secrets| KeyVault[Azure Key Vault]
54+
ConfigManager -.->|Settings| EnvFile[Environment Variables]
55+
56+
%% Performance & Monitoring
57+
AdminInterface -->|View metrics| PerformanceDashboard[Performance Dashboard]
58+
PerformanceDashboard -->|Query stats| DataManager
3459

3560
%% Subgraphs for organization
36-
subgraph "User Interface"
61+
subgraph "User Interface Layer"
3762
User
38-
Login
39-
Chat
40-
Admin
41-
RebuildDB
63+
WebApp
64+
CLI
65+
ChatInterface
66+
AdminInterface
67+
InteractiveCLI
68+
end
69+
70+
subgraph "Authentication & Authorization"
71+
AzureAuth
72+
KeyVault
4273
end
4374

44-
subgraph "RAG Processing"
45-
Retriever
46-
Chain
75+
subgraph "RAG Processing Engine"
76+
SemanticPipeline
77+
QueryProcessor
78+
SemanticRetrieval
79+
GenerationTool
4780
LLM
4881
end
4982

50-
subgraph "Data Storage"
51-
VectorDB
52-
AuthDB
83+
subgraph "Data Storage Layer"
84+
WeaviateDB
85+
DataManager
86+
StorageSystem
87+
LocalStorage
88+
AzureStorage
5389
end
5490

55-
subgraph "Data ingestion"
91+
subgraph "Application Components"
92+
FeaturesManager
93+
HistoryManager
94+
PerformanceDashboard
95+
ConfigManager
96+
end
97+
98+
subgraph "Data Ingestion Pipeline"
99+
IngestionPipeline
100+
ConfluenceConnector
56101
ConfluenceAPI
57-
DataLoader
58-
TextSplitter
59-
Embedder
102+
DocumentProcessor
103+
DocumentChunker
104+
EmbeddingService
105+
end
106+
107+
subgraph "Configuration & Environment"
108+
ConfigManager
109+
KeyVault
110+
EnvFile
60111
end
61112

62-
classDef interface fill:#f9f,stroke:#333,stroke-width:1px
63-
classDef processing fill:#bbf,stroke:#333,stroke-width:1px
64-
classDef storage fill:#bfb,stroke:#333,stroke-width:1px
65-
classDef ingestion fill:#fbb,stroke:#333,stroke-width:1px
113+
%% Styling
114+
classDef interface fill:#e1f5fe,stroke:#0277bd,stroke-width:2px
115+
classDef auth fill:#f3e5f5,stroke:#7b1fa2,stroke-width:2px
116+
classDef processing fill:#e8f5e8,stroke:#2e7d32,stroke-width:2px
117+
classDef storage fill:#fff3e0,stroke:#ef6c00,stroke-width:2px
118+
classDef components fill:#fce4ec,stroke:#c2185b,stroke-width:2px
119+
classDef ingestion fill:#f1f8e9,stroke:#558b2f,stroke-width:2px
120+
classDef config fill:#f5f5f5,stroke:#616161,stroke-width:2px
66121

67-
class User,Login,Chat,Admin,RebuildDB interface
68-
class Retriever,Chain,LLM processing
69-
class VectorDB,AuthDB storage
70-
class ConfluenceAPI,DataLoader,TextSplitter,Embedder ingestion
122+
class User,WebApp,CLI,ChatInterface,AdminInterface,InteractiveCLI interface
123+
class AzureAuth,KeyVault auth
124+
class SemanticPipeline,QueryProcessor,SemanticRetrieval,GenerationTool,LLM processing
125+
class WeaviateDB,DataManager,StorageSystem,LocalStorage,AzureStorage storage
126+
class FeaturesManager,HistoryManager,PerformanceDashboard components
127+
class IngestionPipeline,ConfluenceConnector,ConfluenceAPI,DocumentProcessor,DocumentChunker,EmbeddingService ingestion
128+
class ConfigManager,EnvFile config

pyproject.toml

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,6 @@ dependencies = [
1414
"streamlit>=1.31.0",
1515
"langchain-core>=0.2.0",
1616
"langchain-text-splitters>=0.2.0",
17-
"faiss-cpu>=1.7.4",
1817
"pandas>=2.0.0",
1918
"python-dotenv>=1.0.0",
2019
"huggingface-hub[hf-xet]>=0.19.0",

src/rag/semantic_pipeline.py

Lines changed: 0 additions & 63 deletions
Original file line numberDiff line numberDiff line change
@@ -239,51 +239,6 @@ def compare_with_basic_retrieval(self, query: str, k: int = 5) -> Dict[str, Any]
239239
except Exception as e:
240240
return {"error": str(e), "query": query}
241241

242-
def test_problematic_query(self, query: str = "qui sont les collaborateurs sur Isschat") -> Dict[str, Any]:
243-
"""
244-
Test the pipeline with the specific problematic query about collaborators.
245-
246-
Args:
247-
query: The problematic query to test
248-
249-
Returns:
250-
Detailed test results
251-
"""
252-
try:
253-
# Test with full semantic pipeline
254-
start_time = time.time()
255-
answer, sources = self.process_query(query, verbose=True)
256-
response_time = (time.time() - start_time) * 1000
257-
258-
# Get comparison data
259-
comparison = self.compare_with_basic_retrieval(query)
260-
261-
# Analyze if the answer contains team information
262-
team_keywords = ["vincent", "nicolas", "emin", "fraillon", "lambropoulos", "calyaka", "équipe", "team"]
263-
answer_lower = answer.lower()
264-
team_mentions = [keyword for keyword in team_keywords if keyword in answer_lower]
265-
266-
return {
267-
"test_query": query,
268-
"semantic_pipeline_result": {
269-
"answer": answer,
270-
"sources": sources,
271-
"response_time_ms": response_time,
272-
"team_keywords_found": team_mentions,
273-
"contains_team_info": len(team_mentions) > 2,
274-
},
275-
"comparison": comparison,
276-
"success_criteria": {
277-
"finds_team_info": len(team_mentions) > 2,
278-
"mentions_specific_names": any(name in answer_lower for name in ["vincent", "nicolas", "emin"]),
279-
"better_than_basic": comparison.get("improvement_metrics", {}).get("semantic_advantage", False),
280-
},
281-
"pipeline_status": self.get_status(),
282-
}
283-
284-
except Exception as e:
285-
return {"error": str(e), "test_query": query}
286-
287242
def _format_sources_for_storage(self, formatted_docs) -> list[dict]:
288243
"""Format sources for storage with enhanced metadata"""
289244
sources = []
@@ -351,24 +306,6 @@ def get_status(self) -> Dict[str, Any]:
351306
except Exception as e:
352307
return {"pipeline_type": "semantic_rag_pipeline", "ready": False, "error": str(e)}
353308

354-
def check_pipeline(self, test_query: str = "qui sont les collaborateurs sur Isschat") -> Dict[str, Any]:
355-
"""Check pipeline with default problematic query"""
356-
try:
357-
if not self.is_ready():
358-
return {"success": False, "error": "Pipeline not ready", "details": self.get_status()}
359-
360-
# Run the problematic query test
361-
test_result = self.test_problematic_query(test_query)
362-
363-
return {
364-
"success": test_result.get("success_criteria", {}).get("finds_team_info", False),
365-
"test_result": test_result,
366-
"pipeline_status": self.get_status(),
367-
}
368-
369-
except Exception as e:
370-
return {"success": False, "error": str(e), "test_query": test_query}
371-
372309

373310
class SemanticRAGPipelineFactory:
374311
"""Factory for creating semantic RAG pipelines"""

src/vectordb/interface.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
"""
22
Vector database interface for clean abstractions.
3-
Supports both Qdrant and FAISS implementations.
3+
Supports Weaviate vector database implementation.
44
"""
55

66
from abc import ABC, abstractmethod

src/webapp/app.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -113,7 +113,7 @@ def initialize_embedder():
113113
def get_model(rebuild_db=False):
114114
# Display a spinner during loading
115115
with st.spinner("Loading RAG model..."):
116-
# Check if the index.faiss file exists
116+
# Initialize RAG model
117117
from src.config.settings import get_debug_info
118118

119119
# Get debug info

uv.lock

Lines changed: 0 additions & 24 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

0 commit comments

Comments
 (0)