simplify the code

kesmeey · kesmeey · commit 69f919e97631 · 2025-12-05T17:56:45.000+08:00
diff --git a/tests/input/test_ernie_vl_processor.py b/tests/input/test_ernie_vl_processor.py
@@ -173,6 +173,13 @@ def mock_load_tokenizer(dp_instance):
         # Note: extract_mm_items is not mocked by default, only when needed
         self.data_processor.extract_mm_items = MagicMock(return_value=([], [], [], [], None, [], []))
 
+    def _restore_real_extract_mm_items(self):
+        """Helper method to restore real extract_mm_items method for testing"""
+        from fastdeploy.input.ernie4_5_vl_processor.process import DataProcessor
+
+        original_extract_mm_items = DataProcessor.extract_mm_items
+        self.data_processor.extract_mm_items = original_extract_mm_items.__get__(self.data_processor, DataProcessor)
+
     def _mock_convert_tokens_to_ids(self, token):
         token_id_map = {
             "<|begin_of_sentence|>": 101,
@@ -563,109 +570,6 @@ def test_prompt_token_ids2outputs_add_processed_video_token_len_mismatch(self):
             self.data_processor.prompt_token_ids2outputs(request)
         self.assertIn("video tokens num not match the size", str(ctx.exception))
 
-    def test_text2ids_basic(self):
-        """Test text2ids with basic text input"""
-        text = "Hello world"
-        # Ensure encode returns proper format
-        self.mock_tokenizer.encode.return_value = {"input_ids": [1, 2, 3]}
-        outputs = self.data_processor.text2ids(text)
-
-        self.assertIn("input_ids", outputs)
-        self.assertIn("token_type_ids", outputs)
-        self.assertIn("position_ids", outputs)
-        self.assertGreater(len(outputs["input_ids"]), 0)
-        self.assertEqual(len(outputs["images"]), 0)
-        self.assertEqual(len(outputs["videos"]) if "videos" in outputs else 0, 0)
-
-    def test_text2ids_with_image_placeholder(self):
-        """Test text2ids with image placeholder"""
-        mock_img = Image.new("RGB", (224, 224))
-        text = "Hello <|image@placeholder|> world"
-        self.data_processor.image_preprocessor.get_smarted_resize.return_value = (None, (16, 16))
-        self.data_processor.image_preprocessor.preprocess.return_value = {
-            "pixel_values": np.random.randn(256, 3 * 14 * 14).astype(np.float32),
-            "image_grid_thw": np.array([[1, 16, 16]]),
-        }
-
-        outputs = self.data_processor.text2ids(text, images=[mock_img])
-
-        self.assertGreater(len(outputs["input_ids"]), 0)
-        self.assertGreater(len(outputs["images"]), 0)
-        self.assertEqual(outputs["num_input_image_tokens"], 64)  # (16*16) // (2*2) = 64
-
-    def test_text2ids_with_video_placeholder(self):
-        """Test text2ids with video placeholder"""
-        mock_frames = [Image.new("RGB", (224, 224)) for _ in range(4)]
-        text = "Hello <|video@placeholder|> world"
-        self.data_processor._load_and_process_video = MagicMock(return_value=mock_frames)
-        self.data_processor.image_preprocessor.get_smarted_resize.return_value = (None, (16, 16))
-        self.data_processor.image_preprocessor.preprocess.return_value = {
-            "pixel_values_videos": np.random.randn(4, 256, 3 * 14 * 14).astype(np.float32),
-            "video_grid_thw": np.array([[4, 16, 16]]),
-        }
-
-        outputs = self.data_processor.text2ids(text, videos=["test_video.mp4"])
-
-        self.assertGreater(len(outputs["input_ids"]), 0)
-        self.assertGreater(len(outputs["images"]), 0)
-        self.assertGreater(outputs["num_input_video_tokens"], 0)
-
-    def test_request2ids_basic(self):
-        """Test request2ids with basic request"""
-        self.data_processor.is_training = False
-        # Fix apply_chat_template to return text without image placeholder
-        self.mock_tokenizer.apply_chat_template.return_value = "User: Hello"
-        request = {
-            "messages": [{"role": "user", "content": "Hello"}],
-            "add_generation_prompt": True,
-        }
-
-        with patch("fastdeploy.input.ernie4_5_vl_processor.process.parse_chat_messages") as mock_parse:
-            mock_parse.return_value = [{"role": "user", "content": [{"type": "text", "text": "Hello"}]}]
-            outputs = self.data_processor.request2ids(request)
-
-            self.assertIn("input_ids", outputs)
-            self.assertGreater(len(outputs["input_ids"]), 0)
-
-    def test_request2ids_with_multimodal(self):
-        """Test request2ids with multimodal content"""
-        self.data_processor.is_training = False
-        mock_image = Image.new("RGB", (224, 224))
-        # Fix apply_chat_template to return text with image placeholder matching the image
-        self.mock_tokenizer.apply_chat_template.return_value = "User: What's in this image?<|image@placeholder|>"
-        request = {
-            "messages": [
-                {
-                    "role": "user",
-                    "content": [
-                        {"type": "text", "text": "What's in this image?"},
-                        {"type": "image", "data": mock_image, "uuid": "img1"},
-                    ],
-                }
-            ],
-            "add_generation_prompt": True,
-        }
-
-        with patch("fastdeploy.input.ernie4_5_vl_processor.process.parse_chat_messages") as mock_parse:
-            mock_parse.return_value = [
-                {
-                    "role": "user",
-                    "content": [
-                        {"type": "text", "text": "What's in this image?"},
-                        {"type": "image", "data": mock_image, "uuid": "img1"},
-                    ],
-                }
-            ]
-            self.data_processor.image_preprocessor.get_smarted_resize.return_value = (None, (16, 16))
-            self.data_processor.image_preprocessor.preprocess.return_value = {
-                "pixel_values": np.random.randn(256, 3 * 14 * 14).astype(np.float32),
-                "image_grid_thw": np.array([[1, 16, 16]]),
-            }
-            outputs = self.data_processor.request2ids(request)
-
-            self.assertIn("input_ids", outputs)
-            self.assertGreater(len(outputs["images"]), 0)
-
     def test_extract_mm_items_basic(self):
         """Test extract_mm_items with basic multimodal items"""
         request = {
@@ -681,11 +585,6 @@ def test_extract_mm_items_basic(self):
             ]
         }
 
-        # Restore real extract_mm_items method for this test
-        from fastdeploy.input.ernie4_5_vl_processor.process import DataProcessor
-
-        original_extract_mm_items = DataProcessor.extract_mm_items
-
         with patch("fastdeploy.input.ernie4_5_vl_processor.process.parse_chat_messages") as mock_parse:
             mock_parse.return_value = [
                 {
@@ -698,9 +597,7 @@ def test_extract_mm_items_basic(self):
                 }
             ]
             # Use real extract_mm_items method (cache is disabled, so no zmq connection needed)
-            self.data_processor.extract_mm_items = original_extract_mm_items.__get__(
-                self.data_processor, DataProcessor
-            )
+            self._restore_real_extract_mm_items()
             images, videos, image_uuid, video_uuid, dealer, missing_idx, mm_items = (
                 self.data_processor.extract_mm_items(request)
             )
@@ -716,17 +613,10 @@ def test_extract_mm_items_missing_data_error(self):
         self.data_processor.enable_processor_cache = False
         request = {"messages": [{"role": "user", "content": [{"type": "image", "uuid": "img1"}]}]}
 
-        # Restore real extract_mm_items method for this test
-        from fastdeploy.input.ernie4_5_vl_processor.process import DataProcessor
-
-        original_extract_mm_items = DataProcessor.extract_mm_items
-
         with patch("fastdeploy.input.ernie4_5_vl_processor.process.parse_chat_messages") as mock_parse:
             mock_parse.return_value = [{"role": "user", "content": [{"type": "image", "uuid": "img1"}]}]
             # Use real extract_mm_items method
-            self.data_processor.extract_mm_items = original_extract_mm_items.__get__(
-                self.data_processor, DataProcessor
-            )
+            self._restore_real_extract_mm_items()
             with self.assertRaises(ValueError) as ctx:
                 self.data_processor.extract_mm_items(request)
             self.assertIn("Missing items cannot be retrieved", str(ctx.exception))
@@ -1439,29 +1329,11 @@ def test_fancy_print(self):
                 self.assertIn(expected_contains, result)
 
     def test_prompt_token_ids2outputs(self):
-        """Test prompt_token_ids2outputs method"""
-        # No messages
-        request = {"prompt_token_ids": [1, 2, 3, 4, 5]}
-        outputs = self.processor.prompt_token_ids2outputs(request)
-        self.assertEqual(len(outputs["input_ids"]), 5)
-
-        # With image - need to match token count with actual image patch count
+        """Test prompt_token_ids2outputs method - only unique scenarios not covered by TestDataProcessorTargetMethods"""
         self.processor.is_training = False
         mock_image = Image.new("RGB", (224, 224))
-        # Calculate expected token count: (16*16) // (2*2) = 64 tokens
         num_tokens = (16 * 16) // (self.processor.spatial_conv_size**2)
-        request = {
-            "messages": [{"role": "user", "content": [{"type": "image", "data": mock_image, "uuid": "img1"}]}],
-            "prompt_token_ids": [self.processor.image_start_id]
-            + [self.processor.image_patch_id] * num_tokens
-            + [self.processor.image_end_id],
-        }
-        with patch("fastdeploy.input.ernie4_5_vl_processor.process.parse_chat_messages") as mock_parse:
-            mock_parse.return_value = [
-                {"role": "user", "content": [{"type": "image", "data": mock_image, "uuid": "img1"}]}
-            ]
-            outputs = self.processor.prompt_token_ids2outputs(request)
-            self.assertGreater(len(outputs["input_ids"]), 0)
+        num_video_tokens = (4 * 16 * 16) // (self.processor.spatial_conv_size**2 * self.processor.temporal_conv_size)
 
         # Incomplete image tokens
         request = {
@@ -1496,7 +1368,6 @@ def test_prompt_token_ids2outputs(self):
 
         # Video count mismatch
         mock_frames = [Image.new("RGB", (224, 224)) for _ in range(4)]
-        num_video_tokens = (4 * 16 * 16) // (self.processor.spatial_conv_size**2 * self.processor.temporal_conv_size)
         request = {
             "messages": [{"role": "user", "content": [{"type": "video", "data": mock_frames, "uuid": "vid1"}]}],
             "prompt_token_ids": [
@@ -1565,21 +1436,6 @@ def test_prompt_token_ids2outputs(self):
             with self.assertRaises(ValueError):
                 self.processor.prompt_token_ids2outputs(request)
 
-        # Test with cached image (tuple format)
-        cached_image = (np.random.rand(256, 3 * 14 * 14).astype(np.float32), {"thw": (1, 16, 16)})
-        request = {
-            "messages": [{"role": "user", "content": [{"type": "image", "data": cached_image, "uuid": "img1"}]}],
-            "prompt_token_ids": [self.processor.image_start_id]
-            + [self.processor.image_patch_id] * num_tokens
-            + [self.processor.image_end_id],
-        }
-        with patch("fastdeploy.input.ernie4_5_vl_processor.process.parse_chat_messages") as mock_parse:
-            mock_parse.return_value = [
-                {"role": "user", "content": [{"type": "image", "data": cached_image, "uuid": "img1"}]}
-            ]
-            outputs = self.processor.prompt_token_ids2outputs(request)
-            self.assertGreater(len(outputs["input_ids"]), 0)
-
         # Test with video (dict format)
         request = {
             "messages": [
@@ -1608,21 +1464,6 @@ def test_prompt_token_ids2outputs(self):
             outputs = self.processor.prompt_token_ids2outputs(request)
             self.assertGreater(len(outputs["input_ids"]), 0)
 
-        # Test with cached video (tuple format)
-        cached_video = (np.random.rand(4 * 256, 3 * 14 * 14).astype(np.float32), {"thw": (4, 16, 16)})
-        request = {
-            "messages": [{"role": "user", "content": [{"type": "video", "data": cached_video, "uuid": "vid1"}]}],
-            "prompt_token_ids": [self.processor.video_start_id]
-            + [self.processor.image_patch_id] * num_video_tokens
-            + [self.processor.video_end_id],
-        }
-        with patch("fastdeploy.input.ernie4_5_vl_processor.process.parse_chat_messages") as mock_parse:
-            mock_parse.return_value = [
-                {"role": "user", "content": [{"type": "video", "data": cached_video, "uuid": "vid1"}]}
-            ]
-            outputs = self.processor.prompt_token_ids2outputs(request)
-            self.assertGreater(len(outputs["input_ids"]), 0)
-
         # Test prompt_token_ids2outputs with processor cache update
         self.processor.enable_processor_cache = True
         # Reset preprocess mock to return correct format
@@ -1653,56 +1494,6 @@ def test_prompt_token_ids2outputs(self):
                         mock_update.assert_called_once()
         self.processor.enable_processor_cache = False
 
-        # Test token_len mismatch for processed image
-        cached_image_wrong = (np.random.rand(128, 3 * 14 * 14).astype(np.float32), {"thw": (1, 16, 16)})
-        request = {
-            "messages": [{"role": "user", "content": [{"type": "image", "data": cached_image_wrong, "uuid": "img1"}]}],
-            "prompt_token_ids": [self.processor.image_start_id]
-            + [self.processor.image_patch_id] * num_tokens
-            + [self.processor.image_end_id],
-        }
-        with patch("fastdeploy.input.ernie4_5_vl_processor.process.parse_chat_messages") as mock_parse:
-            mock_parse.return_value = [
-                {"role": "user", "content": [{"type": "image", "data": cached_image_wrong, "uuid": "img1"}]}
-            ]
-            with self.assertRaisesRegex(ValueError, "image tokens num not match"):
-                self.processor.prompt_token_ids2outputs(request)
-
-        # Test token_len mismatch for video
-        request = {
-            "messages": [{"role": "user", "content": [{"type": "video", "data": mock_frames, "uuid": "vid1"}]}],
-            "prompt_token_ids": [self.processor.video_start_id]
-            + [self.processor.image_patch_id] * 10
-            + [self.processor.video_end_id],
-        }
-        with (
-            patch("fastdeploy.input.ernie4_5_vl_processor.process.parse_chat_messages") as mock_parse,
-            patch("fastdeploy.input.ernie4_5_vl_processor.process.read_video_decord") as mock_read,
-            patch("fastdeploy.input.ernie4_5_vl_processor.process.read_frames_decord") as mock_frames_read,
-            patch("fastdeploy.input.ernie4_5_vl_processor.process.render_frame_timestamp") as mock_render,
-        ):
-            mock_parse.return_value = [
-                {"role": "user", "content": [{"type": "video", "data": mock_frames, "uuid": "vid1"}]}
-            ]
-            self._setup_video_mocks(mock_read, mock_frames_read, mock_render, mock_frames)
-            with self.assertRaisesRegex(ValueError, "video tokens num not match"):
-                self.processor.prompt_token_ids2outputs(request)
-
-        # Test token_len mismatch for processed video
-        cached_video_wrong = (np.random.rand(128, 3 * 14 * 14).astype(np.float32), {"thw": (4, 16, 16)})
-        request = {
-            "messages": [{"role": "user", "content": [{"type": "video", "data": cached_video_wrong, "uuid": "vid1"}]}],
-            "prompt_token_ids": [self.processor.video_start_id]
-            + [self.processor.image_patch_id] * num_video_tokens
-            + [self.processor.video_end_id],
-        }
-        with patch("fastdeploy.input.ernie4_5_vl_processor.process.parse_chat_messages") as mock_parse:
-            mock_parse.return_value = [
-                {"role": "user", "content": [{"type": "video", "data": cached_video_wrong, "uuid": "vid1"}]}
-            ]
-            with self.assertRaisesRegex(ValueError, "video tokens num not match"):
-                self.processor.prompt_token_ids2outputs(request)
-
     def test_load_tokenizer(self):
         """Test _load_tokenizer method"""
         with patch("os.path.exists", return_value=True):