From 4e0ee7d570143f06dc5fb62cd6da9118fa809ab5 Mon Sep 17 00:00:00 2001
From: Devin AI <158243242+devin-ai-integration[bot]@users.noreply.github.com>
Date: Wed, 19 Nov 2025 16:03:03 +0000
Subject: [PATCH] Fix task planner matching plans to wrong tasks (issue #3953)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The task planner was blindly zipping tasks with plans returned by the LLM,
assuming they were in the same order. However, the LLM sometimes returns
plans in the wrong order (e.g., starting with Task 21 instead of Task 1),
causing plans to be attached to the wrong tasks.

This fix:
- Extracts the task number from each plan's task field using regex
- Creates a mapping of task number to plan
- Applies plans to tasks based on the correct task number match
- Adds warning logs when task numbers can't be extracted or plans are missing

Added comprehensive test that reproduces the bug by mocking an LLM response
with plans in the wrong order and verifies the fix correctly matches plans
to their corresponding tasks.

Co-Authored-By: João <joao@crewai.com>
---
 lib/crewai/src/crewai/crew.py | 27 +++++++++--
 lib/crewai/tests/test_crew.py | 90 +++++++++++++++++++++++++++++++++++
 2 files changed, 113 insertions(+), 4 deletions(-)

diff --git a/lib/crewai/src/crewai/crew.py b/lib/crewai/src/crewai/crew.py
index 00bed8f018..81743df42c 100644
--- a/lib/crewai/src/crewai/crew.py
+++ b/lib/crewai/src/crewai/crew.py
@@ -778,15 +778,34 @@ async def run_crew(crew: Self, input_data: Any) -> CrewOutput:
 
     def _handle_crew_planning(self) -> None:
         """Handles the Crew planning."""
+        import re
+
         self._logger.log("info", "Planning the crew execution")
         result = CrewPlanner(
             tasks=self.tasks, planning_agent_llm=self.planning_llm
         )._handle_crew_planning()
 
-        for task, step_plan in zip(
-            self.tasks, result.list_of_plans_per_task, strict=False
-        ):
-            task.description += step_plan.plan
+        plan_map: dict[int, str] = {}
+        for step_plan in result.list_of_plans_per_task:
+            match = re.search(r"Task Number (\d+)", step_plan.task, re.IGNORECASE)
+            if match:
+                task_number = int(match.group(1))
+                plan_map[task_number] = step_plan.plan
+            else:
+                self._logger.log(
+                    "warning",
+                    f"Could not extract task number from plan task field: {step_plan.task}",
+                )
+
+        for idx, task in enumerate(self.tasks):
+            task_number = idx + 1  # Task numbers are 1-indexed
+            if task_number in plan_map:
+                task.description += plan_map[task_number]
+            else:
+                self._logger.log(
+                    "warning",
+                    f"No plan found for task {task_number}. Task description: {task.description}",
+                )
 
     def _store_execution_log(
         self,
diff --git a/lib/crewai/tests/test_crew.py b/lib/crewai/tests/test_crew.py
index d4cf1acbfe..c73f59afbc 100644
--- a/lib/crewai/tests/test_crew.py
+++ b/lib/crewai/tests/test_crew.py
@@ -4772,3 +4772,93 @@ def test_ensure_exchanged_messages_are_propagated_to_external_memory():
     assert "Researcher" in messages[0]["content"]
     assert messages[1]["role"] == "user"
     assert "Research a topic to teach a kid aged 6 about math" in messages[1]["content"]
+
+
+def test_crew_planning_with_mismatched_task_order():
+    """Test that crew planning correctly matches plans to tasks even when LLM returns them out of order.
+    
+    This test reproduces the bug reported in issue #3953 where the task planner
+    returns plans in the wrong order (e.g., starting with Task 21 instead of Task 1),
+    causing plans to be attached to the wrong tasks.
+    """
+    from crewai.utilities.planning_handler import PlanPerTask, PlannerTaskPydanticOutput
+    
+    # Create 5 tasks with distinct descriptions
+    tasks = []
+    agents = []
+    for i in range(1, 6):
+        agent = Agent(
+            role=f"Agent {i}",
+            goal=f"Goal {i}",
+            backstory=f"Backstory {i}",
+        )
+        agents.append(agent)
+        task = Task(
+            description=f"Task {i} description",
+            expected_output=f"Output {i}",
+            agent=agent,
+        )
+        tasks.append(task)
+    
+    crew = Crew(
+        agents=agents,
+        tasks=tasks,
+        planning=True,
+        planning_llm="gpt-4o-mini",
+    )
+    
+    # Mock the LLM response to return plans in the WRONG order
+    # Simulating the bug where Task 5 plan comes first, then Task 3, etc.
+    wrong_order_plans = [
+        PlanPerTask(
+            task="Task Number 5 - Task 5 description",
+            plan="\n\nPlan for task 5"
+        ),
+        PlanPerTask(
+            task="Task Number 3 - Task 3 description",
+            plan="\n\nPlan for task 3"
+        ),
+        PlanPerTask(
+            task="Task Number 1 - Task 1 description",
+            plan="\n\nPlan for task 1"
+        ),
+        PlanPerTask(
+            task="Task Number 4 - Task 4 description",
+            plan="\n\nPlan for task 4"
+        ),
+        PlanPerTask(
+            task="Task Number 2 - Task 2 description",
+            plan="\n\nPlan for task 2"
+        ),
+    ]
+    
+    with patch.object(Task, "execute_sync") as mock_execute:
+        mock_execute.return_value = TaskOutput(
+            description="Planning task",
+            agent="planner",
+            pydantic=PlannerTaskPydanticOutput(
+                list_of_plans_per_task=wrong_order_plans
+            ),
+        )
+        
+        # Call the planning method
+        crew._handle_crew_planning()
+    
+    # Verify that each task has the CORRECT plan appended to its description
+    # Task 1 should have "Plan for task 1", not "Plan for task 5"
+    assert "Plan for task 1" in crew.tasks[0].description, \
+        f"Task 1 should have 'Plan for task 1' but got: {crew.tasks[0].description}"
+    assert "Plan for task 2" in crew.tasks[1].description, \
+        f"Task 2 should have 'Plan for task 2' but got: {crew.tasks[1].description}"
+    assert "Plan for task 3" in crew.tasks[2].description, \
+        f"Task 3 should have 'Plan for task 3' but got: {crew.tasks[2].description}"
+    assert "Plan for task 4" in crew.tasks[3].description, \
+        f"Task 4 should have 'Plan for task 4' but got: {crew.tasks[3].description}"
+    assert "Plan for task 5" in crew.tasks[4].description, \
+        f"Task 5 should have 'Plan for task 5' but got: {crew.tasks[4].description}"
+    
+    # Also verify that wrong plans are NOT in the wrong tasks
+    assert "Plan for task 5" not in crew.tasks[0].description, \
+        "Task 1 should not have Plan for task 5"
+    assert "Plan for task 3" not in crew.tasks[1].description, \
+        "Task 2 should not have Plan for task 3"