@@ -173,6 +173,13 @@ def mock_load_tokenizer(dp_instance):
173173 # Note: extract_mm_items is not mocked by default, only when needed
174174 self .data_processor .extract_mm_items = MagicMock (return_value = ([], [], [], [], None , [], []))
175175
176+ def _restore_real_extract_mm_items (self ):
177+ """Helper method to restore real extract_mm_items method for testing"""
178+ from fastdeploy .input .ernie4_5_vl_processor .process import DataProcessor
179+
180+ original_extract_mm_items = DataProcessor .extract_mm_items
181+ self .data_processor .extract_mm_items = original_extract_mm_items .__get__ (self .data_processor , DataProcessor )
182+
176183 def _mock_convert_tokens_to_ids (self , token ):
177184 token_id_map = {
178185 "<|begin_of_sentence|>" : 101 ,
@@ -563,109 +570,6 @@ def test_prompt_token_ids2outputs_add_processed_video_token_len_mismatch(self):
563570 self .data_processor .prompt_token_ids2outputs (request )
564571 self .assertIn ("video tokens num not match the size" , str (ctx .exception ))
565572
566- def test_text2ids_basic (self ):
567- """Test text2ids with basic text input"""
568- text = "Hello world"
569- # Ensure encode returns proper format
570- self .mock_tokenizer .encode .return_value = {"input_ids" : [1 , 2 , 3 ]}
571- outputs = self .data_processor .text2ids (text )
572-
573- self .assertIn ("input_ids" , outputs )
574- self .assertIn ("token_type_ids" , outputs )
575- self .assertIn ("position_ids" , outputs )
576- self .assertGreater (len (outputs ["input_ids" ]), 0 )
577- self .assertEqual (len (outputs ["images" ]), 0 )
578- self .assertEqual (len (outputs ["videos" ]) if "videos" in outputs else 0 , 0 )
579-
580- def test_text2ids_with_image_placeholder (self ):
581- """Test text2ids with image placeholder"""
582- mock_img = Image .new ("RGB" , (224 , 224 ))
583- text = "Hello <|image@placeholder|> world"
584- self .data_processor .image_preprocessor .get_smarted_resize .return_value = (None , (16 , 16 ))
585- self .data_processor .image_preprocessor .preprocess .return_value = {
586- "pixel_values" : np .random .randn (256 , 3 * 14 * 14 ).astype (np .float32 ),
587- "image_grid_thw" : np .array ([[1 , 16 , 16 ]]),
588- }
589-
590- outputs = self .data_processor .text2ids (text , images = [mock_img ])
591-
592- self .assertGreater (len (outputs ["input_ids" ]), 0 )
593- self .assertGreater (len (outputs ["images" ]), 0 )
594- self .assertEqual (outputs ["num_input_image_tokens" ], 64 ) # (16*16) // (2*2) = 64
595-
596- def test_text2ids_with_video_placeholder (self ):
597- """Test text2ids with video placeholder"""
598- mock_frames = [Image .new ("RGB" , (224 , 224 )) for _ in range (4 )]
599- text = "Hello <|video@placeholder|> world"
600- self .data_processor ._load_and_process_video = MagicMock (return_value = mock_frames )
601- self .data_processor .image_preprocessor .get_smarted_resize .return_value = (None , (16 , 16 ))
602- self .data_processor .image_preprocessor .preprocess .return_value = {
603- "pixel_values_videos" : np .random .randn (4 , 256 , 3 * 14 * 14 ).astype (np .float32 ),
604- "video_grid_thw" : np .array ([[4 , 16 , 16 ]]),
605- }
606-
607- outputs = self .data_processor .text2ids (text , videos = ["test_video.mp4" ])
608-
609- self .assertGreater (len (outputs ["input_ids" ]), 0 )
610- self .assertGreater (len (outputs ["images" ]), 0 )
611- self .assertGreater (outputs ["num_input_video_tokens" ], 0 )
612-
613- def test_request2ids_basic (self ):
614- """Test request2ids with basic request"""
615- self .data_processor .is_training = False
616- # Fix apply_chat_template to return text without image placeholder
617- self .mock_tokenizer .apply_chat_template .return_value = "User: Hello"
618- request = {
619- "messages" : [{"role" : "user" , "content" : "Hello" }],
620- "add_generation_prompt" : True ,
621- }
622-
623- with patch ("fastdeploy.input.ernie4_5_vl_processor.process.parse_chat_messages" ) as mock_parse :
624- mock_parse .return_value = [{"role" : "user" , "content" : [{"type" : "text" , "text" : "Hello" }]}]
625- outputs = self .data_processor .request2ids (request )
626-
627- self .assertIn ("input_ids" , outputs )
628- self .assertGreater (len (outputs ["input_ids" ]), 0 )
629-
630- def test_request2ids_with_multimodal (self ):
631- """Test request2ids with multimodal content"""
632- self .data_processor .is_training = False
633- mock_image = Image .new ("RGB" , (224 , 224 ))
634- # Fix apply_chat_template to return text with image placeholder matching the image
635- self .mock_tokenizer .apply_chat_template .return_value = "User: What's in this image?<|image@placeholder|>"
636- request = {
637- "messages" : [
638- {
639- "role" : "user" ,
640- "content" : [
641- {"type" : "text" , "text" : "What's in this image?" },
642- {"type" : "image" , "data" : mock_image , "uuid" : "img1" },
643- ],
644- }
645- ],
646- "add_generation_prompt" : True ,
647- }
648-
649- with patch ("fastdeploy.input.ernie4_5_vl_processor.process.parse_chat_messages" ) as mock_parse :
650- mock_parse .return_value = [
651- {
652- "role" : "user" ,
653- "content" : [
654- {"type" : "text" , "text" : "What's in this image?" },
655- {"type" : "image" , "data" : mock_image , "uuid" : "img1" },
656- ],
657- }
658- ]
659- self .data_processor .image_preprocessor .get_smarted_resize .return_value = (None , (16 , 16 ))
660- self .data_processor .image_preprocessor .preprocess .return_value = {
661- "pixel_values" : np .random .randn (256 , 3 * 14 * 14 ).astype (np .float32 ),
662- "image_grid_thw" : np .array ([[1 , 16 , 16 ]]),
663- }
664- outputs = self .data_processor .request2ids (request )
665-
666- self .assertIn ("input_ids" , outputs )
667- self .assertGreater (len (outputs ["images" ]), 0 )
668-
669573 def test_extract_mm_items_basic (self ):
670574 """Test extract_mm_items with basic multimodal items"""
671575 request = {
@@ -681,11 +585,6 @@ def test_extract_mm_items_basic(self):
681585 ]
682586 }
683587
684- # Restore real extract_mm_items method for this test
685- from fastdeploy .input .ernie4_5_vl_processor .process import DataProcessor
686-
687- original_extract_mm_items = DataProcessor .extract_mm_items
688-
689588 with patch ("fastdeploy.input.ernie4_5_vl_processor.process.parse_chat_messages" ) as mock_parse :
690589 mock_parse .return_value = [
691590 {
@@ -698,9 +597,7 @@ def test_extract_mm_items_basic(self):
698597 }
699598 ]
700599 # Use real extract_mm_items method (cache is disabled, so no zmq connection needed)
701- self .data_processor .extract_mm_items = original_extract_mm_items .__get__ (
702- self .data_processor , DataProcessor
703- )
600+ self ._restore_real_extract_mm_items ()
704601 images , videos , image_uuid , video_uuid , dealer , missing_idx , mm_items = (
705602 self .data_processor .extract_mm_items (request )
706603 )
@@ -716,17 +613,10 @@ def test_extract_mm_items_missing_data_error(self):
716613 self .data_processor .enable_processor_cache = False
717614 request = {"messages" : [{"role" : "user" , "content" : [{"type" : "image" , "uuid" : "img1" }]}]}
718615
719- # Restore real extract_mm_items method for this test
720- from fastdeploy .input .ernie4_5_vl_processor .process import DataProcessor
721-
722- original_extract_mm_items = DataProcessor .extract_mm_items
723-
724616 with patch ("fastdeploy.input.ernie4_5_vl_processor.process.parse_chat_messages" ) as mock_parse :
725617 mock_parse .return_value = [{"role" : "user" , "content" : [{"type" : "image" , "uuid" : "img1" }]}]
726618 # Use real extract_mm_items method
727- self .data_processor .extract_mm_items = original_extract_mm_items .__get__ (
728- self .data_processor , DataProcessor
729- )
619+ self ._restore_real_extract_mm_items ()
730620 with self .assertRaises (ValueError ) as ctx :
731621 self .data_processor .extract_mm_items (request )
732622 self .assertIn ("Missing items cannot be retrieved" , str (ctx .exception ))
@@ -1439,29 +1329,11 @@ def test_fancy_print(self):
14391329 self .assertIn (expected_contains , result )
14401330
14411331 def test_prompt_token_ids2outputs (self ):
1442- """Test prompt_token_ids2outputs method"""
1443- # No messages
1444- request = {"prompt_token_ids" : [1 , 2 , 3 , 4 , 5 ]}
1445- outputs = self .processor .prompt_token_ids2outputs (request )
1446- self .assertEqual (len (outputs ["input_ids" ]), 5 )
1447-
1448- # With image - need to match token count with actual image patch count
1332+ """Test prompt_token_ids2outputs method - only unique scenarios not covered by TestDataProcessorTargetMethods"""
14491333 self .processor .is_training = False
14501334 mock_image = Image .new ("RGB" , (224 , 224 ))
1451- # Calculate expected token count: (16*16) // (2*2) = 64 tokens
14521335 num_tokens = (16 * 16 ) // (self .processor .spatial_conv_size ** 2 )
1453- request = {
1454- "messages" : [{"role" : "user" , "content" : [{"type" : "image" , "data" : mock_image , "uuid" : "img1" }]}],
1455- "prompt_token_ids" : [self .processor .image_start_id ]
1456- + [self .processor .image_patch_id ] * num_tokens
1457- + [self .processor .image_end_id ],
1458- }
1459- with patch ("fastdeploy.input.ernie4_5_vl_processor.process.parse_chat_messages" ) as mock_parse :
1460- mock_parse .return_value = [
1461- {"role" : "user" , "content" : [{"type" : "image" , "data" : mock_image , "uuid" : "img1" }]}
1462- ]
1463- outputs = self .processor .prompt_token_ids2outputs (request )
1464- self .assertGreater (len (outputs ["input_ids" ]), 0 )
1336+ num_video_tokens = (4 * 16 * 16 ) // (self .processor .spatial_conv_size ** 2 * self .processor .temporal_conv_size )
14651337
14661338 # Incomplete image tokens
14671339 request = {
@@ -1496,7 +1368,6 @@ def test_prompt_token_ids2outputs(self):
14961368
14971369 # Video count mismatch
14981370 mock_frames = [Image .new ("RGB" , (224 , 224 )) for _ in range (4 )]
1499- num_video_tokens = (4 * 16 * 16 ) // (self .processor .spatial_conv_size ** 2 * self .processor .temporal_conv_size )
15001371 request = {
15011372 "messages" : [{"role" : "user" , "content" : [{"type" : "video" , "data" : mock_frames , "uuid" : "vid1" }]}],
15021373 "prompt_token_ids" : [
@@ -1565,21 +1436,6 @@ def test_prompt_token_ids2outputs(self):
15651436 with self .assertRaises (ValueError ):
15661437 self .processor .prompt_token_ids2outputs (request )
15671438
1568- # Test with cached image (tuple format)
1569- cached_image = (np .random .rand (256 , 3 * 14 * 14 ).astype (np .float32 ), {"thw" : (1 , 16 , 16 )})
1570- request = {
1571- "messages" : [{"role" : "user" , "content" : [{"type" : "image" , "data" : cached_image , "uuid" : "img1" }]}],
1572- "prompt_token_ids" : [self .processor .image_start_id ]
1573- + [self .processor .image_patch_id ] * num_tokens
1574- + [self .processor .image_end_id ],
1575- }
1576- with patch ("fastdeploy.input.ernie4_5_vl_processor.process.parse_chat_messages" ) as mock_parse :
1577- mock_parse .return_value = [
1578- {"role" : "user" , "content" : [{"type" : "image" , "data" : cached_image , "uuid" : "img1" }]}
1579- ]
1580- outputs = self .processor .prompt_token_ids2outputs (request )
1581- self .assertGreater (len (outputs ["input_ids" ]), 0 )
1582-
15831439 # Test with video (dict format)
15841440 request = {
15851441 "messages" : [
@@ -1608,21 +1464,6 @@ def test_prompt_token_ids2outputs(self):
16081464 outputs = self .processor .prompt_token_ids2outputs (request )
16091465 self .assertGreater (len (outputs ["input_ids" ]), 0 )
16101466
1611- # Test with cached video (tuple format)
1612- cached_video = (np .random .rand (4 * 256 , 3 * 14 * 14 ).astype (np .float32 ), {"thw" : (4 , 16 , 16 )})
1613- request = {
1614- "messages" : [{"role" : "user" , "content" : [{"type" : "video" , "data" : cached_video , "uuid" : "vid1" }]}],
1615- "prompt_token_ids" : [self .processor .video_start_id ]
1616- + [self .processor .image_patch_id ] * num_video_tokens
1617- + [self .processor .video_end_id ],
1618- }
1619- with patch ("fastdeploy.input.ernie4_5_vl_processor.process.parse_chat_messages" ) as mock_parse :
1620- mock_parse .return_value = [
1621- {"role" : "user" , "content" : [{"type" : "video" , "data" : cached_video , "uuid" : "vid1" }]}
1622- ]
1623- outputs = self .processor .prompt_token_ids2outputs (request )
1624- self .assertGreater (len (outputs ["input_ids" ]), 0 )
1625-
16261467 # Test prompt_token_ids2outputs with processor cache update
16271468 self .processor .enable_processor_cache = True
16281469 # Reset preprocess mock to return correct format
@@ -1653,56 +1494,6 @@ def test_prompt_token_ids2outputs(self):
16531494 mock_update .assert_called_once ()
16541495 self .processor .enable_processor_cache = False
16551496
1656- # Test token_len mismatch for processed image
1657- cached_image_wrong = (np .random .rand (128 , 3 * 14 * 14 ).astype (np .float32 ), {"thw" : (1 , 16 , 16 )})
1658- request = {
1659- "messages" : [{"role" : "user" , "content" : [{"type" : "image" , "data" : cached_image_wrong , "uuid" : "img1" }]}],
1660- "prompt_token_ids" : [self .processor .image_start_id ]
1661- + [self .processor .image_patch_id ] * num_tokens
1662- + [self .processor .image_end_id ],
1663- }
1664- with patch ("fastdeploy.input.ernie4_5_vl_processor.process.parse_chat_messages" ) as mock_parse :
1665- mock_parse .return_value = [
1666- {"role" : "user" , "content" : [{"type" : "image" , "data" : cached_image_wrong , "uuid" : "img1" }]}
1667- ]
1668- with self .assertRaisesRegex (ValueError , "image tokens num not match" ):
1669- self .processor .prompt_token_ids2outputs (request )
1670-
1671- # Test token_len mismatch for video
1672- request = {
1673- "messages" : [{"role" : "user" , "content" : [{"type" : "video" , "data" : mock_frames , "uuid" : "vid1" }]}],
1674- "prompt_token_ids" : [self .processor .video_start_id ]
1675- + [self .processor .image_patch_id ] * 10
1676- + [self .processor .video_end_id ],
1677- }
1678- with (
1679- patch ("fastdeploy.input.ernie4_5_vl_processor.process.parse_chat_messages" ) as mock_parse ,
1680- patch ("fastdeploy.input.ernie4_5_vl_processor.process.read_video_decord" ) as mock_read ,
1681- patch ("fastdeploy.input.ernie4_5_vl_processor.process.read_frames_decord" ) as mock_frames_read ,
1682- patch ("fastdeploy.input.ernie4_5_vl_processor.process.render_frame_timestamp" ) as mock_render ,
1683- ):
1684- mock_parse .return_value = [
1685- {"role" : "user" , "content" : [{"type" : "video" , "data" : mock_frames , "uuid" : "vid1" }]}
1686- ]
1687- self ._setup_video_mocks (mock_read , mock_frames_read , mock_render , mock_frames )
1688- with self .assertRaisesRegex (ValueError , "video tokens num not match" ):
1689- self .processor .prompt_token_ids2outputs (request )
1690-
1691- # Test token_len mismatch for processed video
1692- cached_video_wrong = (np .random .rand (128 , 3 * 14 * 14 ).astype (np .float32 ), {"thw" : (4 , 16 , 16 )})
1693- request = {
1694- "messages" : [{"role" : "user" , "content" : [{"type" : "video" , "data" : cached_video_wrong , "uuid" : "vid1" }]}],
1695- "prompt_token_ids" : [self .processor .video_start_id ]
1696- + [self .processor .image_patch_id ] * num_video_tokens
1697- + [self .processor .video_end_id ],
1698- }
1699- with patch ("fastdeploy.input.ernie4_5_vl_processor.process.parse_chat_messages" ) as mock_parse :
1700- mock_parse .return_value = [
1701- {"role" : "user" , "content" : [{"type" : "video" , "data" : cached_video_wrong , "uuid" : "vid1" }]}
1702- ]
1703- with self .assertRaisesRegex (ValueError , "video tokens num not match" ):
1704- self .processor .prompt_token_ids2outputs (request )
1705-
17061497 def test_load_tokenizer (self ):
17071498 """Test _load_tokenizer method"""
17081499 with patch ("os.path.exists" , return_value = True ):
0 commit comments