Skip to content

Commit 69f919e

Browse files
committed
simplify the code
1 parent 8fa368e commit 69f919e

File tree

1 file changed

+11
-220
lines changed

1 file changed

+11
-220
lines changed

tests/input/test_ernie_vl_processor.py

Lines changed: 11 additions & 220 deletions
Original file line numberDiff line numberDiff line change
@@ -173,6 +173,13 @@ def mock_load_tokenizer(dp_instance):
173173
# Note: extract_mm_items is not mocked by default, only when needed
174174
self.data_processor.extract_mm_items = MagicMock(return_value=([], [], [], [], None, [], []))
175175

176+
def _restore_real_extract_mm_items(self):
177+
"""Helper method to restore real extract_mm_items method for testing"""
178+
from fastdeploy.input.ernie4_5_vl_processor.process import DataProcessor
179+
180+
original_extract_mm_items = DataProcessor.extract_mm_items
181+
self.data_processor.extract_mm_items = original_extract_mm_items.__get__(self.data_processor, DataProcessor)
182+
176183
def _mock_convert_tokens_to_ids(self, token):
177184
token_id_map = {
178185
"<|begin_of_sentence|>": 101,
@@ -563,109 +570,6 @@ def test_prompt_token_ids2outputs_add_processed_video_token_len_mismatch(self):
563570
self.data_processor.prompt_token_ids2outputs(request)
564571
self.assertIn("video tokens num not match the size", str(ctx.exception))
565572

566-
def test_text2ids_basic(self):
567-
"""Test text2ids with basic text input"""
568-
text = "Hello world"
569-
# Ensure encode returns proper format
570-
self.mock_tokenizer.encode.return_value = {"input_ids": [1, 2, 3]}
571-
outputs = self.data_processor.text2ids(text)
572-
573-
self.assertIn("input_ids", outputs)
574-
self.assertIn("token_type_ids", outputs)
575-
self.assertIn("position_ids", outputs)
576-
self.assertGreater(len(outputs["input_ids"]), 0)
577-
self.assertEqual(len(outputs["images"]), 0)
578-
self.assertEqual(len(outputs["videos"]) if "videos" in outputs else 0, 0)
579-
580-
def test_text2ids_with_image_placeholder(self):
581-
"""Test text2ids with image placeholder"""
582-
mock_img = Image.new("RGB", (224, 224))
583-
text = "Hello <|image@placeholder|> world"
584-
self.data_processor.image_preprocessor.get_smarted_resize.return_value = (None, (16, 16))
585-
self.data_processor.image_preprocessor.preprocess.return_value = {
586-
"pixel_values": np.random.randn(256, 3 * 14 * 14).astype(np.float32),
587-
"image_grid_thw": np.array([[1, 16, 16]]),
588-
}
589-
590-
outputs = self.data_processor.text2ids(text, images=[mock_img])
591-
592-
self.assertGreater(len(outputs["input_ids"]), 0)
593-
self.assertGreater(len(outputs["images"]), 0)
594-
self.assertEqual(outputs["num_input_image_tokens"], 64) # (16*16) // (2*2) = 64
595-
596-
def test_text2ids_with_video_placeholder(self):
597-
"""Test text2ids with video placeholder"""
598-
mock_frames = [Image.new("RGB", (224, 224)) for _ in range(4)]
599-
text = "Hello <|video@placeholder|> world"
600-
self.data_processor._load_and_process_video = MagicMock(return_value=mock_frames)
601-
self.data_processor.image_preprocessor.get_smarted_resize.return_value = (None, (16, 16))
602-
self.data_processor.image_preprocessor.preprocess.return_value = {
603-
"pixel_values_videos": np.random.randn(4, 256, 3 * 14 * 14).astype(np.float32),
604-
"video_grid_thw": np.array([[4, 16, 16]]),
605-
}
606-
607-
outputs = self.data_processor.text2ids(text, videos=["test_video.mp4"])
608-
609-
self.assertGreater(len(outputs["input_ids"]), 0)
610-
self.assertGreater(len(outputs["images"]), 0)
611-
self.assertGreater(outputs["num_input_video_tokens"], 0)
612-
613-
def test_request2ids_basic(self):
614-
"""Test request2ids with basic request"""
615-
self.data_processor.is_training = False
616-
# Fix apply_chat_template to return text without image placeholder
617-
self.mock_tokenizer.apply_chat_template.return_value = "User: Hello"
618-
request = {
619-
"messages": [{"role": "user", "content": "Hello"}],
620-
"add_generation_prompt": True,
621-
}
622-
623-
with patch("fastdeploy.input.ernie4_5_vl_processor.process.parse_chat_messages") as mock_parse:
624-
mock_parse.return_value = [{"role": "user", "content": [{"type": "text", "text": "Hello"}]}]
625-
outputs = self.data_processor.request2ids(request)
626-
627-
self.assertIn("input_ids", outputs)
628-
self.assertGreater(len(outputs["input_ids"]), 0)
629-
630-
def test_request2ids_with_multimodal(self):
631-
"""Test request2ids with multimodal content"""
632-
self.data_processor.is_training = False
633-
mock_image = Image.new("RGB", (224, 224))
634-
# Fix apply_chat_template to return text with image placeholder matching the image
635-
self.mock_tokenizer.apply_chat_template.return_value = "User: What's in this image?<|image@placeholder|>"
636-
request = {
637-
"messages": [
638-
{
639-
"role": "user",
640-
"content": [
641-
{"type": "text", "text": "What's in this image?"},
642-
{"type": "image", "data": mock_image, "uuid": "img1"},
643-
],
644-
}
645-
],
646-
"add_generation_prompt": True,
647-
}
648-
649-
with patch("fastdeploy.input.ernie4_5_vl_processor.process.parse_chat_messages") as mock_parse:
650-
mock_parse.return_value = [
651-
{
652-
"role": "user",
653-
"content": [
654-
{"type": "text", "text": "What's in this image?"},
655-
{"type": "image", "data": mock_image, "uuid": "img1"},
656-
],
657-
}
658-
]
659-
self.data_processor.image_preprocessor.get_smarted_resize.return_value = (None, (16, 16))
660-
self.data_processor.image_preprocessor.preprocess.return_value = {
661-
"pixel_values": np.random.randn(256, 3 * 14 * 14).astype(np.float32),
662-
"image_grid_thw": np.array([[1, 16, 16]]),
663-
}
664-
outputs = self.data_processor.request2ids(request)
665-
666-
self.assertIn("input_ids", outputs)
667-
self.assertGreater(len(outputs["images"]), 0)
668-
669573
def test_extract_mm_items_basic(self):
670574
"""Test extract_mm_items with basic multimodal items"""
671575
request = {
@@ -681,11 +585,6 @@ def test_extract_mm_items_basic(self):
681585
]
682586
}
683587

684-
# Restore real extract_mm_items method for this test
685-
from fastdeploy.input.ernie4_5_vl_processor.process import DataProcessor
686-
687-
original_extract_mm_items = DataProcessor.extract_mm_items
688-
689588
with patch("fastdeploy.input.ernie4_5_vl_processor.process.parse_chat_messages") as mock_parse:
690589
mock_parse.return_value = [
691590
{
@@ -698,9 +597,7 @@ def test_extract_mm_items_basic(self):
698597
}
699598
]
700599
# Use real extract_mm_items method (cache is disabled, so no zmq connection needed)
701-
self.data_processor.extract_mm_items = original_extract_mm_items.__get__(
702-
self.data_processor, DataProcessor
703-
)
600+
self._restore_real_extract_mm_items()
704601
images, videos, image_uuid, video_uuid, dealer, missing_idx, mm_items = (
705602
self.data_processor.extract_mm_items(request)
706603
)
@@ -716,17 +613,10 @@ def test_extract_mm_items_missing_data_error(self):
716613
self.data_processor.enable_processor_cache = False
717614
request = {"messages": [{"role": "user", "content": [{"type": "image", "uuid": "img1"}]}]}
718615

719-
# Restore real extract_mm_items method for this test
720-
from fastdeploy.input.ernie4_5_vl_processor.process import DataProcessor
721-
722-
original_extract_mm_items = DataProcessor.extract_mm_items
723-
724616
with patch("fastdeploy.input.ernie4_5_vl_processor.process.parse_chat_messages") as mock_parse:
725617
mock_parse.return_value = [{"role": "user", "content": [{"type": "image", "uuid": "img1"}]}]
726618
# Use real extract_mm_items method
727-
self.data_processor.extract_mm_items = original_extract_mm_items.__get__(
728-
self.data_processor, DataProcessor
729-
)
619+
self._restore_real_extract_mm_items()
730620
with self.assertRaises(ValueError) as ctx:
731621
self.data_processor.extract_mm_items(request)
732622
self.assertIn("Missing items cannot be retrieved", str(ctx.exception))
@@ -1439,29 +1329,11 @@ def test_fancy_print(self):
14391329
self.assertIn(expected_contains, result)
14401330

14411331
def test_prompt_token_ids2outputs(self):
1442-
"""Test prompt_token_ids2outputs method"""
1443-
# No messages
1444-
request = {"prompt_token_ids": [1, 2, 3, 4, 5]}
1445-
outputs = self.processor.prompt_token_ids2outputs(request)
1446-
self.assertEqual(len(outputs["input_ids"]), 5)
1447-
1448-
# With image - need to match token count with actual image patch count
1332+
"""Test prompt_token_ids2outputs method - only unique scenarios not covered by TestDataProcessorTargetMethods"""
14491333
self.processor.is_training = False
14501334
mock_image = Image.new("RGB", (224, 224))
1451-
# Calculate expected token count: (16*16) // (2*2) = 64 tokens
14521335
num_tokens = (16 * 16) // (self.processor.spatial_conv_size**2)
1453-
request = {
1454-
"messages": [{"role": "user", "content": [{"type": "image", "data": mock_image, "uuid": "img1"}]}],
1455-
"prompt_token_ids": [self.processor.image_start_id]
1456-
+ [self.processor.image_patch_id] * num_tokens
1457-
+ [self.processor.image_end_id],
1458-
}
1459-
with patch("fastdeploy.input.ernie4_5_vl_processor.process.parse_chat_messages") as mock_parse:
1460-
mock_parse.return_value = [
1461-
{"role": "user", "content": [{"type": "image", "data": mock_image, "uuid": "img1"}]}
1462-
]
1463-
outputs = self.processor.prompt_token_ids2outputs(request)
1464-
self.assertGreater(len(outputs["input_ids"]), 0)
1336+
num_video_tokens = (4 * 16 * 16) // (self.processor.spatial_conv_size**2 * self.processor.temporal_conv_size)
14651337

14661338
# Incomplete image tokens
14671339
request = {
@@ -1496,7 +1368,6 @@ def test_prompt_token_ids2outputs(self):
14961368

14971369
# Video count mismatch
14981370
mock_frames = [Image.new("RGB", (224, 224)) for _ in range(4)]
1499-
num_video_tokens = (4 * 16 * 16) // (self.processor.spatial_conv_size**2 * self.processor.temporal_conv_size)
15001371
request = {
15011372
"messages": [{"role": "user", "content": [{"type": "video", "data": mock_frames, "uuid": "vid1"}]}],
15021373
"prompt_token_ids": [
@@ -1565,21 +1436,6 @@ def test_prompt_token_ids2outputs(self):
15651436
with self.assertRaises(ValueError):
15661437
self.processor.prompt_token_ids2outputs(request)
15671438

1568-
# Test with cached image (tuple format)
1569-
cached_image = (np.random.rand(256, 3 * 14 * 14).astype(np.float32), {"thw": (1, 16, 16)})
1570-
request = {
1571-
"messages": [{"role": "user", "content": [{"type": "image", "data": cached_image, "uuid": "img1"}]}],
1572-
"prompt_token_ids": [self.processor.image_start_id]
1573-
+ [self.processor.image_patch_id] * num_tokens
1574-
+ [self.processor.image_end_id],
1575-
}
1576-
with patch("fastdeploy.input.ernie4_5_vl_processor.process.parse_chat_messages") as mock_parse:
1577-
mock_parse.return_value = [
1578-
{"role": "user", "content": [{"type": "image", "data": cached_image, "uuid": "img1"}]}
1579-
]
1580-
outputs = self.processor.prompt_token_ids2outputs(request)
1581-
self.assertGreater(len(outputs["input_ids"]), 0)
1582-
15831439
# Test with video (dict format)
15841440
request = {
15851441
"messages": [
@@ -1608,21 +1464,6 @@ def test_prompt_token_ids2outputs(self):
16081464
outputs = self.processor.prompt_token_ids2outputs(request)
16091465
self.assertGreater(len(outputs["input_ids"]), 0)
16101466

1611-
# Test with cached video (tuple format)
1612-
cached_video = (np.random.rand(4 * 256, 3 * 14 * 14).astype(np.float32), {"thw": (4, 16, 16)})
1613-
request = {
1614-
"messages": [{"role": "user", "content": [{"type": "video", "data": cached_video, "uuid": "vid1"}]}],
1615-
"prompt_token_ids": [self.processor.video_start_id]
1616-
+ [self.processor.image_patch_id] * num_video_tokens
1617-
+ [self.processor.video_end_id],
1618-
}
1619-
with patch("fastdeploy.input.ernie4_5_vl_processor.process.parse_chat_messages") as mock_parse:
1620-
mock_parse.return_value = [
1621-
{"role": "user", "content": [{"type": "video", "data": cached_video, "uuid": "vid1"}]}
1622-
]
1623-
outputs = self.processor.prompt_token_ids2outputs(request)
1624-
self.assertGreater(len(outputs["input_ids"]), 0)
1625-
16261467
# Test prompt_token_ids2outputs with processor cache update
16271468
self.processor.enable_processor_cache = True
16281469
# Reset preprocess mock to return correct format
@@ -1653,56 +1494,6 @@ def test_prompt_token_ids2outputs(self):
16531494
mock_update.assert_called_once()
16541495
self.processor.enable_processor_cache = False
16551496

1656-
# Test token_len mismatch for processed image
1657-
cached_image_wrong = (np.random.rand(128, 3 * 14 * 14).astype(np.float32), {"thw": (1, 16, 16)})
1658-
request = {
1659-
"messages": [{"role": "user", "content": [{"type": "image", "data": cached_image_wrong, "uuid": "img1"}]}],
1660-
"prompt_token_ids": [self.processor.image_start_id]
1661-
+ [self.processor.image_patch_id] * num_tokens
1662-
+ [self.processor.image_end_id],
1663-
}
1664-
with patch("fastdeploy.input.ernie4_5_vl_processor.process.parse_chat_messages") as mock_parse:
1665-
mock_parse.return_value = [
1666-
{"role": "user", "content": [{"type": "image", "data": cached_image_wrong, "uuid": "img1"}]}
1667-
]
1668-
with self.assertRaisesRegex(ValueError, "image tokens num not match"):
1669-
self.processor.prompt_token_ids2outputs(request)
1670-
1671-
# Test token_len mismatch for video
1672-
request = {
1673-
"messages": [{"role": "user", "content": [{"type": "video", "data": mock_frames, "uuid": "vid1"}]}],
1674-
"prompt_token_ids": [self.processor.video_start_id]
1675-
+ [self.processor.image_patch_id] * 10
1676-
+ [self.processor.video_end_id],
1677-
}
1678-
with (
1679-
patch("fastdeploy.input.ernie4_5_vl_processor.process.parse_chat_messages") as mock_parse,
1680-
patch("fastdeploy.input.ernie4_5_vl_processor.process.read_video_decord") as mock_read,
1681-
patch("fastdeploy.input.ernie4_5_vl_processor.process.read_frames_decord") as mock_frames_read,
1682-
patch("fastdeploy.input.ernie4_5_vl_processor.process.render_frame_timestamp") as mock_render,
1683-
):
1684-
mock_parse.return_value = [
1685-
{"role": "user", "content": [{"type": "video", "data": mock_frames, "uuid": "vid1"}]}
1686-
]
1687-
self._setup_video_mocks(mock_read, mock_frames_read, mock_render, mock_frames)
1688-
with self.assertRaisesRegex(ValueError, "video tokens num not match"):
1689-
self.processor.prompt_token_ids2outputs(request)
1690-
1691-
# Test token_len mismatch for processed video
1692-
cached_video_wrong = (np.random.rand(128, 3 * 14 * 14).astype(np.float32), {"thw": (4, 16, 16)})
1693-
request = {
1694-
"messages": [{"role": "user", "content": [{"type": "video", "data": cached_video_wrong, "uuid": "vid1"}]}],
1695-
"prompt_token_ids": [self.processor.video_start_id]
1696-
+ [self.processor.image_patch_id] * num_video_tokens
1697-
+ [self.processor.video_end_id],
1698-
}
1699-
with patch("fastdeploy.input.ernie4_5_vl_processor.process.parse_chat_messages") as mock_parse:
1700-
mock_parse.return_value = [
1701-
{"role": "user", "content": [{"type": "video", "data": cached_video_wrong, "uuid": "vid1"}]}
1702-
]
1703-
with self.assertRaisesRegex(ValueError, "video tokens num not match"):
1704-
self.processor.prompt_token_ids2outputs(request)
1705-
17061497
def test_load_tokenizer(self):
17071498
"""Test _load_tokenizer method"""
17081499
with patch("os.path.exists", return_value=True):

0 commit comments

Comments
 (0)