✨ Multi modal agent.

Zhi-a · Zhi-a · commit 5cbdc90961c0 · 2025-11-27T17:30:57.000+08:00
Pass the URL of the multimodal file as the query to the agent.
diff --git a/backend/agents/create_agent_info.py b/backend/agents/create_agent_info.py
@@ -9,6 +9,7 @@
 from nexent.core.agents.agent_model import AgentRunInfo, ModelConfig, AgentConfig, ToolConfig
 from nexent.memory.memory_service import search_memory_in_levels
 
+from services.file_management_service import get_llm_model
 from services.vectordatabase_service import (
     ElasticSearchService,
     get_vector_db_core,
@@ -25,7 +26,7 @@
 from utils.model_name_utils import add_repo_to_name
 from utils.prompt_template_utils import get_agent_prompt_template
 from utils.config_utils import tenant_config_manager, get_model_name_from_config
-from consts.const import LOCAL_MCP_SERVER, MODEL_CONFIG_MAPPING, LANGUAGE
+from consts.const import LOCAL_MCP_SERVER, MODEL_CONFIG_MAPPING, LANGUAGE, DATA_PROCESS_SERVICE
 
 logger = logging.getLogger("create_agent_info")
 logger.setLevel(logging.DEBUG)
@@ -243,6 +244,12 @@ async def create_tool_config_list(agent_id, tenant_id, user_id):
                 "vlm_model": get_vlm_model(tenant_id=tenant_id),
                 "storage_client": minio_client,
             }
+        elif tool_config.class_name == "AnalyzeTextFileTool":
+            tool_config.metadata = {
+                "llm_model": get_llm_model(tenant_id=tenant_id),
+                "storage_client": minio_client,
+                "data_process_service_url": DATA_PROCESS_SERVICE
+            }
 
         tool_config_list.append(tool_config)
 
@@ -307,8 +314,8 @@ async def join_minio_file_description_to_query(minio_files, query):
     if minio_files and isinstance(minio_files, list):
         file_descriptions = []
         for file in minio_files:
-            if isinstance(file, dict) and "description" in file and file["description"]:
-                file_descriptions.append(file["description"])
+            if isinstance(file, dict) and "url" in file and file["url"] and "name" in file and file["name"]:
+                file_descriptions.append("File S3 URL: " + "s3:/" + file["url"] + ", file name:" + file["name"])
 
         if file_descriptions:
             final_query = "User provided some reference files:\n"
diff --git a/backend/services/file_management_service.py b/backend/services/file_management_service.py
@@ -279,15 +279,8 @@ async def preprocess_files_generator(
                 if "error" in file_data:
                     raise Exception(file_data["error"])
 
-                if file_data["ext"] in ['.jpg', '.jpeg', '.png', '.gif', '.bmp', '.webp']:
-                    # description = await process_image_file(query, file_data["filename"], file_data["content"], tenant_id, language)
-                    # truncation_percentage = None
-                    description = ""
-                    truncation_percentage = None
-                else:
-                    # description, truncation_percentage = await process_text_file(query, file_data["filename"], file_data["content"], tenant_id, language)
-                    description = ""
-                    truncation_percentage = None
+                description = ""
+                truncation_percentage = None
                 file_descriptions.append(description)
 
                 # Send processing result for each file
diff --git a/backend/services/tool_configuration_service.py b/backend/services/tool_configuration_service.py
@@ -11,7 +11,7 @@
 import jsonref
 from mcpadapt.smolagents_adapter import _sanitize_function_name
 
-from consts.const import DEFAULT_USER_ID, LOCAL_MCP_SERVER
+from consts.const import DEFAULT_USER_ID, LOCAL_MCP_SERVER, DATA_PROCESS_SERVICE
 from consts.exceptions import MCPConnectionError, ToolExecutionException, NotFoundException
 from consts.model import ToolInstanceInfoRequest, ToolInfo, ToolSourceEnum, ToolValidateRequest
 from database.remote_mcp_db import get_mcp_records_by_tenant, get_mcp_server_by_name_and_tenant
@@ -23,6 +23,7 @@
     search_last_tool_instance_by_tool_id,
 )
 from database.user_tenant_db import get_all_tenant_ids
+from services.file_management_service import get_llm_model
 from services.vectordatabase_service import get_embedding_model, get_vector_db_core
 from services.tenant_config_service import get_selected_knowledge_list
 from database.client import minio_client
@@ -615,6 +616,17 @@ def _validate_local_tool(
                 'embedding_model': embedding_model,
             }
             tool_instance = tool_class(**params)
+        elif tool_name == "analyze_text_file":
+            if not tenant_id or not user_id:
+                raise ToolExecutionException(f"Tenant ID and User ID are required for {tool_name} validation")
+            long_text_to_text_model = get_llm_model(tenant_id=tenant_id)
+            params = {
+                **instantiation_params,
+                'llm_model': long_text_to_text_model,
+                'storage_client': minio_client,
+                "data_process_service_url": DATA_PROCESS_SERVICE
+            }
+            tool_instance = tool_class(**params)
         elif tool_name == "analyze_image":
             if not tenant_id or not user_id:
                 raise ToolExecutionException(f"Tenant ID and User ID are required for {tool_name} validation")
diff --git a/frontend/server.js b/frontend/server.js
@@ -13,7 +13,7 @@ const handle = app.getRequestHandler();
 const HTTP_BACKEND = process.env.HTTP_BACKEND || 'http://localhost:5010'; // config
 const WS_BACKEND = process.env.WS_BACKEND || 'ws://localhost:5014'; // runtime
 const RUNTIME_HTTP_BACKEND = process.env.RUNTIME_HTTP_BACKEND || 'http://localhost:5014'; // runtime
-const MINIO_BACKEND = process.env.MINIO_ENDPOINT || 'http://localhost:9000';
+const MINIO_BACKEND = process.env.MINIO_ENDPOINT || 'http://localhost:9010';
 const PORT = 3000;
 
 const proxy = createProxyServer();
diff --git a/sdk/nexent/core/tools/analyze_image_tool.py b/sdk/nexent/core/tools/analyze_image_tool.py
@@ -99,8 +99,6 @@ def _forward_impl(self, image_urls_list: List[bytes], query: str) -> List[str]:
         if self.observer:
             running_prompt = self.running_prompt_zh if self.observer.lang == "zh" else self.running_prompt_en
             self.observer.add_message("", ProcessType.TOOL, running_prompt)
-            card_content = [{"icon": "image", "text": f"Analyzing images..."}]
-            self.observer.add_message("", ProcessType.CARD, json.dumps(card_content, ensure_ascii=False))
 
         if image_urls_list is None:
             raise ValueError("image_urls cannot be None")
diff --git a/sdk/nexent/core/tools/analyze_text_file_tool.py b/sdk/nexent/core/tools/analyze_text_file_tool.py
@@ -104,8 +104,6 @@ def _forward_impl(
         if self.observer:
             running_prompt = self.running_prompt_zh if self.observer.lang == "zh" else self.running_prompt_en
             self.observer.add_message("", ProcessType.TOOL, running_prompt)
-            card_content = [{"icon": "file", "text": f"Analyzing file..."}]
-            self.observer.add_message("", ProcessType.CARD, json.dumps(card_content, ensure_ascii=False))
 
         if file_url_list is None:
             raise ValueError("file_url_list cannot be None")
diff --git a/test/backend/agents/test_create_agent_info.py b/test/backend/agents/test_create_agent_info.py
@@ -1276,15 +1276,18 @@ class TestJoinMinioFileDescriptionToQuery:
     async def test_join_minio_file_description_to_query_with_files(self):
         """Test case with file descriptions"""
         minio_files = [
-            {"description": "File 1 description"},
-            {"description": "File 2 description"},
-            {"no_description": "should be ignored"}
+            {"url": "/nexent/1.pdf", "name": "1.pdf"},
+            {"url": "/nexent/2.pdf", "name": "2.pdf"},
+            {"url": "/nexent/3.pdf", "name": "3.pdf"},
         ]
         query = "test query"
 
         result = await join_minio_file_description_to_query(minio_files, query)
 
-        expected = "User provided some reference files:\nFile 1 description\nFile 2 description\n\nUser wants to answer questions based on the above information: test query"
+        expected = ("User provided some reference files:\nFile S3 URL: s3://nexent/1.pdf, file name:1.pdf\n"
+                    "File S3 URL: s3://nexent/2.pdf, file name:2.pdf\n"
+                    "File S3 URL: s3://nexent/3.pdf, file name:3.pdf\n\n"
+                    'User wants to answer questions based on the above information: test query')
         assert result == expected
 
     @pytest.mark.asyncio
diff --git a/test/sdk/core/tools/test_analyze_text_file_tool.py b/test/sdk/core/tools/test_analyze_text_file_tool.py
@@ -56,19 +56,6 @@ def tool(observer_zh, llm_model):
 
 
 class TestAnalyzeTextFileTool:
-    def test_forward_impl_success_records_observer(self, tool, llm_model, observer_zh):
-        tool.process_text_file = MagicMock(return_value="Extracted text")
-        tool.analyze_file = MagicMock(return_value=("analysis", 0.0))
-
-        result = tool._forward_impl([b"file-bytes"], "Why?")
-
-        assert result == ["analysis"]
-        tool.process_text_file.assert_called_once_with("file_1.txt", b"file-bytes")
-        tool.analyze_file.assert_called_once_with("Why?", "Extracted text")
-        observer_zh.add_message.assert_any_call("", ProcessType.TOOL, "正在分析文件...")
-        observer_zh.add_message.assert_any_call("", ProcessType.CARD, json.dumps(
-            [{"icon": "file", "text": "Analyzing file..."}], ensure_ascii=False))
-
     def test_forward_impl_switches_language(self, observer_en, llm_model, monkeypatch):
         tool = AnalyzeTextFileTool(
             storage_client=MagicMock(),