@@ -68,33 +68,30 @@ def normalize_bbox(bbox, width, height):
6868 ]
6969
7070
71- def process_objects (objects , threshold ):
71+ def process_objects (qwen_output , width , height , threshold ):
7272 """
73- Process detected objects by filtering, transforming, and enriching them .
73+ Transform Qwen object detection output to IMAGE schema format .
7474
75- - Filters objects by confidence threshold
75+ - Transforms from Qwen format (bbox_2d, label) to IMAGE format
76+ - Normalizes bounding boxes to [0,1] range
77+ - Assigns confidence threshold to all objects
7678 - Normalizes labels (replaces underscores with spaces)
77- - Renumbers IDs sequentially
7879 - Calculates geometric properties (area, centroid)
80+ - Filters objects by confidence threshold
7981
8082 Args:
81- objects (list): List of detected objects with confidence scores
83+ qwen_output (list): Qwen detection output with bbox_2d and label
84+ width (int): Image width in pixels for normalization
85+ height (int): Image height in pixels for normalization
8286 threshold (float): Minimum confidence score (0-1)
8387
8488 Returns:
8589 list: Processed objects with computed properties
8690 """
8791 processed = []
88- for obj in objects :
89- if obj .get ("confidence" , 0 ) >= threshold :
90- obj ['type' ] = obj ['type' ].replace ('_' , ' ' )
91- processed .append (obj )
92-
93- # Renumber IDs sequentially after filtering
94- for idx , obj in enumerate (processed ):
95- obj ['ID' ] = idx
96-
97- x1 , y1 , x2 , y2 = obj ["dimensions" ]
92+ for idx , item in enumerate (qwen_output ):
93+ # Normalize bounding box
94+ x1 , y1 , x2 , y2 = normalize_bbox (item ["bbox_2d" ], width , height )
9895
9996 # Calculate area (width * height)
10097 area = (x2 - x1 ) * (y2 - y1 )
@@ -103,13 +100,20 @@ def process_objects(objects, threshold):
103100 centroid_x = (x1 + x2 ) / 2
104101 centroid_y = (y1 + y2 ) / 2
105102
106- # Create object entry according to schema
107- obj ["area" ] = area
108- obj ["centroid" ] = [centroid_x , centroid_y ]
103+ # Create object entry according to IMAGE schema
104+ obj = {
105+ "ID" : idx ,
106+ "type" : item ["label" ].replace ('_' , ' ' ),
107+ "dimensions" : [x1 , y1 , x2 , y2 ],
108+ "confidence" : threshold ,
109+ "area" : area ,
110+ "centroid" : [centroid_x , centroid_y ]
111+ }
112+
113+ processed .append (obj )
109114
110115 logging .debug (
111- f"Processed { len (objects )} objects to { len (processed )} "
112- f"objects with confidence >= { threshold } "
116+ f"Processed { len (qwen_output )} objects from Qwen output"
113117 )
114118 return processed
115119
@@ -155,35 +159,42 @@ def detect_objects():
155159 if error :
156160 return jsonify (error ), error ["code" ]
157161
162+ stop_tokens = [
163+ "<|im_end|>" , # Qwen's end token
164+ "<|endoftext|>" , # Alternative end token
165+ "\n \n \n " , # Triple newline
166+ "```" , # Code block end
167+ ]
168+
158169 try :
159170 # Get object info
160- object_json = llm_client .chat_completion (
171+ qwen_output = llm_client .chat_completion (
161172 prompt = OBJECT_DETECTION_PROMPT ,
162173 image_base64 = base64_image ,
163174 json_schema = BBOX_RESPONSE_SCHEMA ,
164- temperature = 0.0 ,
165- parse_json = True
175+ temperature = 0.5 ,
176+ parse_json = True ,
177+ stop = stop_tokens
166178 )
167179
168- if object_json is None or len (object_json .get ("objects" , [])) == 0 :
180+ logging .debug (f"Qwen output received: { qwen_output } " )
181+
182+ if qwen_output is None or len (qwen_output ) == 0 :
169183 logging .error ("Failed to extract objects from the graphic." )
170184 return jsonify ({"error" : "No objects extracted" }), 204
171185
172- # Normalize bounding boxes
186+ # Transform Qwen format to IMAGE schema format
173187 width , height = pil_image .size
174- for obj in object_json ["objects" ]:
175- # Normalize bounding boxes
176- obj ["dimensions" ] = normalize_bbox (
177- obj ["dimensions" ], width , height
178- )
179-
180- # Filter objects by confidence threshold, add area and centroid,
181- # remove underscores from labels, and renumber IDs
182- object_json ["objects" ] = process_objects (
183- object_json ["objects" ],
188+ processed_objects = process_objects (
189+ qwen_output ,
190+ width ,
191+ height ,
184192 CONF_THRESHOLD
185193 )
186194
195+ # Wrap in "objects" for schema compliance
196+ object_json = {"objects" : processed_objects }
197+
187198 logging .pii (f"Normalized output: { object_json } " )
188199
189200 # Data schema validation
0 commit comments