ac-i2i-engineering · ndayiemile · Oct 6, 2025 · Sep 27, 2025 · Sep 29, 2025 · Oct 6, 2025
diff --git a/.gitignore b/.gitignore
@@ -168,6 +168,6 @@ cython_debug/
 
 # media
 user_uploads/
-.vscode/
 base_knowledge/
-user_uploads/
+.vscode/
+.DS_Store/
diff --git a/resumax_backend/resumax_algo/gemini_model.py b/resumax_backend/resumax_algo/gemini_model.py
@@ -8,11 +8,20 @@
 from . import system_instructions
 from .models import ConversationsThread, Conversation
 from pathlib import Path
+import threading
+import os
 
 
 # Module-level client instance for reuse across functions
 _client = None
 
+# Thread-safe in-memory store for chat sessions (per user, per thread)
+_chat_sessions = {}
+_chat_sessions_lock = threading.Lock()
+
+# Cache for base knowledge file URIs to avoid re-uploading
+_base_knowledge_uris = None
+_base_knowledge_lock = threading.Lock()
 
 def _get_genai_client():
     """Get or create a shared GenAI client instance"""
@@ -24,51 +33,74 @@ def _get_genai_client():
     return _client
 
 
-async def _get_or_create_chat_session(thread_id=None):
-    """Get chat session with conversation history from database"""
+def _get_session_key(thread_id, user_id):
+    """Return a unique key for the chat session store."""
+    return (user_id, thread_id)
+
+async def _get_or_create_chat_session(thread_id=None, user_id=None):
+    """Get or create a persistent Gemini chat session for a user and thread."""
+    if not thread_id or not user_id:
+        raise Exception("Both thread_id and user_id are required for session persistence.")
+    key = _get_session_key(thread_id, user_id)
+    with _chat_sessions_lock:
+        if key in _chat_sessions:
+            return _chat_sessions[key]
     client = _get_genai_client()
+    system_content = system_instructions.SYSTEM_PROMPT
+    history = await _get_conversation_history(thread_id, max_history=15) if thread_id else []
 
-    try:
-        # Create chat session with system instruction (text only)
-        system_content = system_instructions.SYSTEM_PROMPT
-
-        # Get conversation history if thread_id provided
-        history = await _get_conversation_history(thread_id) if thread_id else None
-
-        # Debug: Print history information
-        if history:
-            print(f"🔄 Loading {len(history)} history messages for thread {thread_id}")
-            for i, msg in enumerate(history):
-                role = msg.get('role', 'unknown')
-                text_preview = msg['parts'][0].get('text', 'no content')[:50] if msg['parts'] else 'no content'
-                print(f"  {i+1}. {role}: {text_preview}...")
-            print(f"📋 Full history structure: {history}")
-        else:
-            print(f"📝 No history found for thread {thread_id}" if thread_id else "🆕 New conversation (no thread_id)")
-
-        chat = client.chats.create(
-            model='gemini-2.5-flash',
-            config={"system_instruction": system_content},
-            history=history
-        )
-
-        return chat
+    # Add context files as the first message in history
+    context_file_uris = upload_base_knowledge_files()
+    context_parts = [
+        types.Part.from_uri(file_uri=uri, mime_type="application/pdf")
+        for uri in context_file_uris
+    ]
+    # You can add a clarifying text part if you want
+    context_parts.append(types.Part.from_text(text="These are reference documents from Loeb center, always refer to them while responding. They also contains few shots, and other information regarding what we care about"))
+    context_message = {
+        'role': 'model',
+        'parts': context_parts
+    }
+    history = [context_message] + history
+
+    # Improved model configuration for better performance
+    chat = client.chats.create(
+        model='models/gemini-2.5-flash',
+        config={
+            "system_instruction": system_content,
+            "temperature": 0.7,  
+            "top_p": 0.9,     
+            "max_output_tokens": 2048 
+        },
+        history=history
+    )
+    with _chat_sessions_lock:
+        # Limit the number of active sessions to prevent memory issues
+        if len(_chat_sessions) >= 20: 
+            # Remove oldest session (simple LRU-like behavior)
+            oldest_key = next(iter(_chat_sessions))
+            del _chat_sessions[oldest_key]
+            print(f"Removed oldest chat session to manage memory: {oldest_key}")
 
-    except Exception as e:
-        raise Exception(f"Failed to create chat session: {e}")
+        _chat_sessions[key] = chat
+    return chat
 
 
 @sync_to_async
-def _get_conversation_history(thread_id):
-    """Get conversation history from database for chat session"""
-    try:
-        from .models import AttachedFile
-
+def _get_conversation_history(thread_id, max_history=20):
+    """Get conversation history from database for chat session with optimizations"""
+    try:        
         thread = ConversationsThread.objects.filter(id=thread_id).first()
         if not thread:
             return []
 
-        conversations = Conversation.objects.filter(thread=thread).order_by('created_at')
+        # Optimize query and limit history length for better performance
+        conversations = Conversation.objects.filter(
+            thread=thread
+        ).select_related('thread').prefetch_related('attachedfile_set').order_by('-created_at')[:max_history]
+
+        # Reverse to get chronological order after limiting
+        conversations = list(reversed(conversations))
 
         history = []
         for conv in conversations:
@@ -77,13 +109,10 @@ def _get_conversation_history(thread_id):
                 user_parts = [{'text': conv.prompt}]
 
                 # Add attached files to user message
-                attached_files = AttachedFile.objects.filter(conversation=conv)
-                for file in attached_files:
+                for file in conv.attachedfile_set.all():  # Use prefetched data
                     if file.file_path and file.file_type:
-                        # Create file part for Google GenAI
                         client = _get_genai_client()
                         try:
-                            # Get the full file path
                             full_path = pathlib.Path(settings.MEDIA_ROOT) / file.stored_filename
                             if full_path.exists():
                                 uploaded = client.files.upload(file=full_path)
@@ -128,49 +157,124 @@ def _process_file_url(file_url):
     return None
 
 
-async def generate_response(promptText, fileUrls=None, thread_id=None):
-    """Generate content using Gemini Chat API"""
+async def generate_response(promptText, fileUrls=None, thread_id=None, user_id=None):
+    """Generate content using Gemini Chat API with persistent chat session and optimizations."""
     if not promptText or not promptText.strip():
         raise Exception("Prompt text cannot be empty")
 
     try:
-        chat = await _get_or_create_chat_session(thread_id)
-
+        chat = await _get_or_create_chat_session(thread_id, user_id)
         message_parts = [types.Part.from_text(text=promptText)]
 
-        # Handle file uploads if provided
+        # Process file uploads concurrently if provided
         if fileUrls:
             file_parts = await _process_file_uploads(fileUrls)
             message_parts.extend(file_parts)
 
-        response = chat.send_message(message_parts)
-
-        if not response or not response.text:
-            raise Exception("Empty response from Gemini API")
-
-        return response.text
-
+        # Add timeout and retry logic for better reliability
+        max_retries = 3
+        for attempt in range(max_retries):
+            try:
+                response = await asyncio.to_thread(chat.send_message, message_parts)
+                if response and response.text:
+                    return response.text
+                else:
+                    raise Exception("Empty response from Gemini API")
+            except Exception as e:
+                if attempt == max_retries - 1:  # Last attempt
+                    raise e
+                print(f"Attempt {attempt + 1} failed, retrying: {e}")
+                await asyncio.sleep(1)  # Brief delay before retry
+
     except Exception as e:
         raise Exception(f"Content generation failed: {e}")
 
 
 async def _process_file_uploads(fileUrls):
-    """Process file uploads"""
+    """Process file uploads concurrently for better performance"""
     if not fileUrls:
         return []
 
     client = _get_genai_client()
     file_parts = []
 
+    # Process files concurrently for better performance
+    upload_tasks = []
     for file_url in fileUrls:
         file_data = _process_file_url(file_url)
         if file_data:
-            file_path, mime_type = file_data
+            upload_tasks.append(_upload_single_file(client, file_data))
+
+    if upload_tasks:
+        results = await asyncio.gather(*upload_tasks, return_exceptions=True)
+        for result in results:
+            if isinstance(result, types.Part):
+                file_parts.append(result)
+            elif isinstance(result, Exception):
+                print(f"Error in file upload: {result}")
+
+    return file_parts
+
+
+async def _upload_single_file(client, file_data):
+    """Upload a single file"""
+    file_path, mime_type = file_data
+
+    try:
+        uploaded = await asyncio.to_thread(client.files.upload, file=file_path)
+        return types.Part.from_uri(file_uri=uploaded.uri, mime_type=mime_type)
+    except Exception as e:
+        raise Exception(f"Failed to upload {file_path.name}: {e}")
+
+def upload_base_knowledge_files():
+    """Upload all files in the base_knowledge folder to Gemini and return their URIs.
+    Uses caching to avoid re-uploading the same files multiple times."""
+    global _base_knowledge_uris
+
+    # Check if we already have cached URIs
+    with _base_knowledge_lock:
+        if _base_knowledge_uris is not None:
+            print("used cached base knowledge")
+            return _base_knowledge_uris
+
+    # If not cached, upload the files
+    base_knowledge_dir = os.path.join(settings.BASE_DIR, 'base_knowledge')
+    client = _get_genai_client()
+    context_file_uris = []
+
+    print("📚 Uploading base knowledge files to Gemini (first time only)...")
+    for filename in os.listdir(base_knowledge_dir):
+        file_path = os.path.join(base_knowledge_dir, filename)
+        if os.path.isfile(file_path):
             try:
-                uploaded = client.files.upload(file=file_path)
-                file_part = types.Part.from_uri(file_uri=uploaded.uri, mime_type=mime_type)
-                file_parts.append(file_part)
+                uploaded = client.files.upload(file=pathlib.Path(file_path))
+                context_file_uris.append(uploaded.uri)
+                print(f"✅ Uploaded {filename}")
             except Exception as e:
-                print(f"Error uploading {file_path.name}: {e}")
+                print(f"❌ Error uploading {filename}: {e}")
+
+    # Cache the URIs for future use
+    with _base_knowledge_lock:
+        _base_knowledge_uris = context_file_uris
+
+    print(f"📚 Base knowledge files cached ({len(context_file_uris)} files)")
+    return context_file_uris
+
+
+def clear_base_knowledge_cache():
+    """Clear the cached base knowledge file URIs. Useful for development or if files change."""
+    global _base_knowledge_uris
+    with _base_knowledge_lock:
+        _base_knowledge_uris = None
+    print("🗑️ Base knowledge cache cleared")
+
+
+def get_cache_stats():
+    """Get statistics about current cache usage."""
+    with _base_knowledge_lock:
+        base_knowledge_count = len(_base_knowledge_uris) if _base_knowledge_uris else 0
 
-    return file_parts
+    return {
+        'base_knowledge_files': base_knowledge_count,
+        'active_chat_sessions': len(_chat_sessions)
+    }
diff --git a/resumax_backend/resumax_api/views.py b/resumax_backend/resumax_api/views.py
@@ -51,7 +51,7 @@ def conversations(request, thread_id):
         if not promptAttachedFiles:
             # Generate response using Gemini
             try:
-                response = asyncio.run(generate_response(promptText, thread_id=thread_id))
+                response = asyncio.run(generate_response(promptText, thread_id=thread_id, user_id=user.id))
             except Exception as e:
                 return Response({"error": str(e)}, status=500)
             # Save conversation to the database
@@ -97,7 +97,7 @@ def conversations(request, thread_id):
         try:
             # Generate response considering attached files
             file_urls = [file_data['file_url'] for file_data in uploaded_file_data]
-            response = asyncio.run(generate_response(promptText, file_urls, thread_id=thread_id))
+            response = asyncio.run(generate_response(promptText, file_urls, thread_id=thread_id, user_id=user.id))
 
             # Truncate response if it's too long for the database
             if len(response) > 20000: