mrge-io · cubic-dev-local · Sep 23, 2025 · cubic-dev-ai · Sep 24, 2025 · cubic-dev-ai
diff --git a/backend/onyx/agents/agent_search/dr/sub_agents/basic_search/dr_basic_search_3_reduce.py b/backend/onyx/agents/agent_search/dr/sub_agents/basic_search/dr_basic_search_3_reduce.py
@@ -5,12 +5,12 @@
 
 from onyx.agents.agent_search.dr.sub_agents.states import SubAgentMainState
 from onyx.agents.agent_search.dr.sub_agents.states import SubAgentUpdate
-from onyx.agents.agent_search.dr.utils import chunks_or_sections_to_search_docs
 from onyx.agents.agent_search.shared_graph_utils.utils import (
     get_langgraph_node_log_string,
 )
 from onyx.agents.agent_search.shared_graph_utils.utils import write_custom_event
 from onyx.context.search.models import SavedSearchDoc
+from onyx.context.search.models import SearchDoc
 from onyx.server.query_and_chat.streaming_models import SectionEnd
 from onyx.utils.logger import setup_logger
 
@@ -47,7 +47,7 @@ def is_reducer(
             doc_list.append(x)
 
     # Convert InferenceSections to SavedSearchDocs
-    search_docs = chunks_or_sections_to_search_docs(doc_list)
+    search_docs = SearchDoc.chunks_or_sections_to_search_docs(doc_list)
     retrieved_saved_search_docs = [
         SavedSearchDoc.from_search_doc(search_doc, db_doc_id=0)
         for search_doc in search_docs

diff --git a/backend/onyx/agents/agent_search/dr/utils.py b/backend/onyx/agents/agent_search/dr/utils.py
@@ -13,7 +13,7 @@
 )
 from onyx.context.search.models import InferenceSection
 from onyx.context.search.models import SavedSearchDoc
-from onyx.context.search.utils import chunks_or_sections_to_search_docs
+from onyx.context.search.models import SearchDoc
 from onyx.tools.tool_implementations.web_search.web_search_tool import (
     WebSearchTool,
 )
@@ -266,7 +266,7 @@ def convert_inference_sections_to_search_docs(
     is_internet: bool = False,
 ) -> list[SavedSearchDoc]:
     # Convert InferenceSections to SavedSearchDocs
-    search_docs = chunks_or_sections_to_search_docs(inference_sections)
+    search_docs = SearchDoc.chunks_or_sections_to_search_docs(inference_sections)
     for search_doc in search_docs:
         search_doc.is_internet = is_internet
 

diff --git a/backend/onyx/context/search/models.py b/backend/onyx/context/search/models.py
@@ -1,3 +1,4 @@
+from collections.abc import Sequence
 from datetime import datetime
 from typing import Any
 
@@ -355,6 +356,97 @@ class SearchDoc(BaseModel):
     secondary_owners: list[str] | None = None
     is_internet: bool = False
 
+    @classmethod
+    def chunks_or_sections_to_search_docs(
+        cls,
+        items: "Sequence[InferenceChunk | InferenceSection] | None",
+    ) -> list["SearchDoc"]:
+        """Convert a sequence of InferenceChunk or InferenceSection objects to SearchDoc objects."""
+        if not items:
+            return []
+
+        search_docs = [
+            cls(
+                document_id=(
+                    chunk := (
+                        item.center_chunk
+                        if isinstance(item, InferenceSection)
+                        else item
+                    )
+                ).document_id,
+                chunk_ind=chunk.chunk_id,
+                semantic_identifier=chunk.semantic_identifier or "Unknown",
+                link=chunk.source_links[0] if chunk.source_links else None,
+                blurb=chunk.blurb,
+                source_type=chunk.source_type,
+                boost=chunk.boost,
+                hidden=chunk.hidden,
+                metadata=chunk.metadata,
+                score=chunk.score,
+                match_highlights=chunk.match_highlights,
+                updated_at=chunk.updated_at,
+                primary_owners=chunk.primary_owners,
+                secondary_owners=chunk.secondary_owners,
+                is_internet=False,
+            )
+            for item in items
+        ]
+
+        return search_docs
+
+    @classmethod
+    def from_inference_section(
+        cls, inference_section: "InferenceSection"
+    ) -> "SearchDoc":
+        """Convert an InferenceSection to a SearchDoc using the center chunk's data."""
+        chunk = inference_section.center_chunk
+        return cls(
+            document_id=chunk.document_id,
+            chunk_ind=chunk.chunk_id,
+            semantic_identifier=chunk.semantic_identifier or "Unknown",
+            link=chunk.source_links[0] if chunk.source_links else None,
+            blurb=chunk.blurb,
+            source_type=chunk.source_type,
+            boost=chunk.boost,
+            hidden=chunk.hidden,
+            metadata=chunk.metadata,
+            score=chunk.score,
+            is_relevant=chunk.is_relevant,
+            relevance_explanation=chunk.relevance_explanation,
+            match_highlights=chunk.match_highlights,
+            updated_at=chunk.updated_at,
+            primary_owners=chunk.primary_owners,
+            secondary_owners=chunk.secondary_owners,
+            is_internet=False,
+        )
+
+    @classmethod
+    def from_inference_chunk(cls, inference_chunk: "InferenceChunk") -> "SearchDoc":
+        """Convert an InferenceChunk to a SearchDoc."""
+        return cls(
+            document_id=inference_chunk.document_id,
+            chunk_ind=inference_chunk.chunk_id,
+            semantic_identifier=inference_chunk.semantic_identifier or "Unknown",
+            link=(
+                inference_chunk.source_links[0]
+                if inference_chunk.source_links
+                else None
+            ),
+            blurb=inference_chunk.blurb,
+            source_type=inference_chunk.source_type,
+            boost=inference_chunk.boost,
+            hidden=inference_chunk.hidden,
+            metadata=inference_chunk.metadata,
+            score=inference_chunk.score,
+            is_relevant=inference_chunk.is_relevant,
+            relevance_explanation=inference_chunk.relevance_explanation,
+            match_highlights=inference_chunk.match_highlights,
+            updated_at=inference_chunk.updated_at,
+            primary_owners=inference_chunk.primary_owners,
+            secondary_owners=inference_chunk.secondary_owners,
+            is_internet=False,
+        )
+
     def model_dump(self, *args: list, **kwargs: dict[str, Any]) -> dict[str, Any]:  # type: ignore
         initial_dict = super().model_dump(*args, **kwargs)  # type: ignore
         initial_dict["updated_at"] = (

diff --git a/backend/onyx/context/search/utils.py b/backend/onyx/context/search/utils.py
@@ -118,40 +118,6 @@ def inference_section_from_chunks(
     )
 
 
-def chunks_or_sections_to_search_docs(
-    items: Sequence[InferenceChunk | InferenceSection] | None,
-) -> list[SearchDoc]:
-    if not items:
-        return []
-
-    search_docs = [
-        SearchDoc(
-            document_id=(
-                chunk := (
-                    item.center_chunk if isinstance(item, InferenceSection) else item
-                )
-            ).document_id,
-            chunk_ind=chunk.chunk_id,
-            semantic_identifier=chunk.semantic_identifier or "Unknown",
-            link=chunk.source_links[0] if chunk.source_links else None,
-            blurb=chunk.blurb,
-            source_type=chunk.source_type,
-            boost=chunk.boost,
-            hidden=chunk.hidden,
-            metadata=chunk.metadata,
-            score=chunk.score,
-            match_highlights=chunk.match_highlights,
-            updated_at=chunk.updated_at,
-            primary_owners=chunk.primary_owners,
-            secondary_owners=chunk.secondary_owners,
-            is_internet=False,
-        )
-        for item in items
-    ]
-
-    return search_docs
-
-
 def remove_stop_words_and_punctuation(keywords: list[str]) -> list[str]:
     try:
         # Re-tokenize using the NLTK tokenizer for better matching

diff --git a/backend/onyx/db/chat.py b/backend/onyx/db/chat.py
@@ -34,7 +34,6 @@
 from onyx.context.search.models import RetrievalDocs
 from onyx.context.search.models import SavedSearchDoc
 from onyx.context.search.models import SearchDoc as ServerSearchDoc
-from onyx.context.search.utils import chunks_or_sections_to_search_docs
 from onyx.db.models import AgentSearchMetrics
 from onyx.db.models import AgentSubQuery
 from onyx.db.models import AgentSubQuestion
@@ -1147,7 +1146,7 @@ def log_agent_sub_question_results(
             db_session.add(sub_query_object)
             db_session.commit()
 
-            search_docs = chunks_or_sections_to_search_docs(
+            search_docs = ServerSearchDoc.chunks_or_sections_to_search_docs(
                 sub_query.retrieved_documents
             )
             for doc in search_docs:

diff --git a/backend/onyx/server/query_and_chat/query_backend.py b/backend/onyx/server/query_and_chat/query_backend.py
@@ -14,7 +14,6 @@
 from onyx.context.search.preprocessing.access_filters import (
     build_access_filters_for_user,
 )
-from onyx.context.search.utils import chunks_or_sections_to_search_docs
 from onyx.db.chat import get_chat_messages_by_session
 from onyx.db.chat import get_chat_session_by_id
 from onyx.db.chat import get_chat_sessions_by_user
@@ -74,7 +73,7 @@ def admin_search(
         )
     matching_chunks = document_index.admin_retrieval(query=query, filters=final_filters)
 
-    documents = chunks_or_sections_to_search_docs(matching_chunks)
+    documents = SearchDoc.chunks_or_sections_to_search_docs(matching_chunks)
 
     # Deduplicate documents by id
     deduplicated_documents: list[SearchDoc] = []