From 3cf1ccb1d240d772b4a62b9aaebf21ca8e350f12 Mon Sep 17 00:00:00 2001 From: Bipin Date: Tue, 24 Feb 2026 15:17:50 +0530 Subject: [PATCH 1/3] feat: support generated-output flow in run_experiment (no prompt_template) - Remove hard ValueError for prompt_template=None in run_experiment(). The API is the authority on flow determination: if the dataset has a generated_output column, the API accepts prompt_template_version_id=None and runs metrics without LLM generation. If not, the API returns 3512. - Experiments.run(): only set default prompt_settings when a template is provided; pass prompt_template_id=None for generated-output flow. - Jobs.create(): make prompt_template_id and prompt_settings Optional; only include them in CreateJobRequest when non-None so the API contract is respected for both flows. - Add 3 tests: generated-output flow, prompt-takes-precedence, and no-prompt-no-dataset raises ValueError. Co-Authored-By: Claude Sonnet 4.6 --- src/galileo/experiments.py | 14 +++--- src/galileo/jobs.py | 12 +++-- tests/test_experiments.py | 95 ++++++++++++++++++++++++++++++++++++++ 3 files changed, 109 insertions(+), 12 deletions(-) diff --git a/src/galileo/experiments.py b/src/galileo/experiments.py index 372247ed6..9b309037e 100644 --- a/src/galileo/experiments.py +++ b/src/galileo/experiments.py @@ -105,12 +105,13 @@ def run( self, project_obj: Project, experiment_obj: ExperimentResponse, - prompt_template: PromptTemplate, + prompt_template: Optional[PromptTemplate], dataset_id: str, scorers: Optional[builtins.list[ScorerConfig]], prompt_settings: Optional[PromptRunSettings] = None, ) -> dict[str, Any]: - if prompt_settings is None: + # Only set default prompt_settings for prompt-driven flow (when a template is provided) + if prompt_template is not None and prompt_settings is None: prompt_settings = PromptRunSettings( n=1, echo=False, @@ -134,7 +135,7 @@ def run( name="playground_run", project_id=project_obj.id, run_id=experiment_obj.id, - prompt_template_id=prompt_template.selected_version_id, + prompt_template_id=prompt_template.selected_version_id if prompt_template is not None else None, dataset_id=dataset_id, task_type=EXPERIMENT_TASK_TYPE, scorers=scorers, @@ -350,16 +351,15 @@ def run_experiment( local_metrics=local_metrics, ) - if prompt_template is None: - raise ValueError("A prompt template must be provided") - if dataset_obj is None: raise ValueError("A dataset object must be provided") if local_metrics: raise ValueError("Local metrics can only be used with a locally run experiment, not a prompt experiment.") - # Execute a prompt template experiment + # Execute a prompt template or generated-output experiment. + # If prompt_template is None, the API determines the flow based on the dataset contents. + # The API will return error 3512 if the dataset has no generated_output column. return Experiments().run( project_obj, experiment_obj, prompt_template, dataset_obj.dataset.id, scorer_settings, prompt_settings ) diff --git a/src/galileo/jobs.py b/src/galileo/jobs.py index 6e607eccf..f7aff2222 100644 --- a/src/galileo/jobs.py +++ b/src/galileo/jobs.py @@ -21,21 +21,23 @@ def create( name: str, run_id: str, dataset_id: str, - prompt_template_id: str, + prompt_template_id: Optional[str], task_type: TaskType, scorers: Optional[list[ScorerConfig]], - prompt_settings: PromptRunSettings, + prompt_settings: Optional[PromptRunSettings], ) -> CreateJobResponse: - create_params = { + create_params: dict = { "project_id": project_id, "dataset_id": dataset_id, "job_name": name, "run_id": run_id, - "prompt_settings": prompt_settings, - "prompt_template_version_id": prompt_template_id, "task_type": task_type, "scorers": scorers, } + if prompt_template_id is not None: + create_params["prompt_template_version_id"] = prompt_template_id + if prompt_settings is not None: + create_params["prompt_settings"] = prompt_settings _logger.info(f"create job: {create_params}") result = create_job_jobs_post.sync_detailed( client=self.config.api_client, body=CreateJobRequest(**create_params) diff --git a/tests/test_experiments.py b/tests/test_experiments.py index 2a912c69a..183b5a032 100644 --- a/tests/test_experiments.py +++ b/tests/test_experiments.py @@ -572,6 +572,101 @@ def test_run_experiment_without_metrics( prompt_settings=ANY, ) + @travel(datetime(2012, 1, 1), tick=False) + @patch.object(galileo.datasets.Datasets, "get") + @patch.object(galileo.jobs.Jobs, "create") + @patch.object(galileo.experiments.Experiments, "create", return_value=experiment_response()) + @patch.object(galileo.experiments.Experiments, "get", return_value=experiment_response()) + @patch.object(galileo.experiments.Projects, "get_with_env_fallbacks", return_value=project()) + def test_run_experiment_generated_output_flow( + self, + mock_get_project: Mock, + mock_get_experiment: Mock, + mock_create_experiment: Mock, + mock_create_job: Mock, + mock_get_dataset: Mock, + dataset_content: DatasetContent, + ) -> None: + # Given: no prompt_template, dataset provided (generated-output flow) + mock_create_job.return_value = MagicMock() + dataset_id = str(UUID(int=0)) + + # When: run_experiment is called without a prompt_template + result = run_experiment( + "test_experiment", project="awesome-new-project", dataset_id=dataset_id + ) + + # Then: Jobs.create is called with prompt_template_id=None and prompt_settings=None + assert result is not None + mock_create_job.assert_called_once_with( + name="playground_run", + project_id="00000000-0000-0000-0000-000000000000", + run_id="00000000-0000-4000-8000-000000000001", + prompt_template_id=None, + dataset_id=ANY, + task_type=TaskType.VALUE_16, + scorers=None, + prompt_settings=None, + ) + + @travel(datetime(2012, 1, 1), tick=False) + @patch.object(galileo.datasets.Datasets, "get") + @patch.object(galileo.jobs.Jobs, "create") + @patch.object(galileo.experiments.Experiments, "create", return_value=experiment_response()) + @patch.object(galileo.experiments.Experiments, "get", return_value=experiment_response()) + @patch.object(galileo.experiments.Projects, "get_with_env_fallbacks", return_value=project()) + def test_run_experiment_prompt_takes_precedence_over_generated_output( + self, + mock_get_project: Mock, + mock_get_experiment: Mock, + mock_create_experiment: Mock, + mock_create_job: Mock, + mock_get_dataset: Mock, + dataset_content: DatasetContent, + ) -> None: + # Given: both prompt_template and dataset provided + mock_create_job.return_value = MagicMock() + dataset_id = str(UUID(int=0)) + + # When: run_experiment is called with a prompt_template + result = run_experiment( + "test_experiment", + project="awesome-new-project", + dataset_id=dataset_id, + prompt_template=prompt_template(), + ) + + # Then: Jobs.create is called with the prompt_template_id set (prompt-driven flow) + assert result is not None + mock_create_job.assert_called_once_with( + name="playground_run", + project_id="00000000-0000-0000-0000-000000000000", + run_id="00000000-0000-4000-8000-000000000001", + prompt_template_id="00000000-0000-0000-0000-000000000003", + dataset_id=ANY, + task_type=TaskType.VALUE_16, + scorers=None, + prompt_settings=ANY, + ) + + @patch.object(galileo.datasets.Datasets, "get", return_value=None) + @patch.object(galileo.jobs.Jobs, "create") + @patch.object(galileo.experiments.Experiments, "create", return_value=experiment_response()) + @patch.object(galileo.experiments.Experiments, "get", return_value=experiment_response()) + @patch.object(galileo.experiments.Projects, "get_with_env_fallbacks", return_value=project()) + def test_run_experiment_no_prompt_no_dataset_raises( + self, + mock_get_project: Mock, + mock_get_experiment: Mock, + mock_create_experiment: Mock, + mock_create_job: Mock, + mock_get_dataset: Mock, + ) -> None: + # Given: no prompt_template and no dataset + # When/Then: ValueError is raised requiring a dataset + with pytest.raises(ValueError, match="A dataset object must be provided"): + run_experiment("test_experiment", project="awesome-new-project") + @patch("galileo.logger.logger.LogStreams") @patch("galileo.logger.logger.Projects") @patch("galileo.logger.logger.Traces") From 38924d604ff0d704162c651d93b9f1c86efcfd87 Mon Sep 17 00:00:00 2001 From: Bipin Date: Tue, 24 Feb 2026 15:28:43 +0530 Subject: [PATCH 2/3] fix: correct error message match in test_run_experiment_no_prompt_no_dataset_raises Co-Authored-By: Claude Sonnet 4.6 --- tests/test_experiments.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/test_experiments.py b/tests/test_experiments.py index 183b5a032..518c2be69 100644 --- a/tests/test_experiments.py +++ b/tests/test_experiments.py @@ -664,7 +664,7 @@ def test_run_experiment_no_prompt_no_dataset_raises( ) -> None: # Given: no prompt_template and no dataset # When/Then: ValueError is raised requiring a dataset - with pytest.raises(ValueError, match="A dataset object must be provided"): + with pytest.raises(ValueError, match="dataset"): run_experiment("test_experiment", project="awesome-new-project") @patch("galileo.logger.logger.LogStreams") From 2fd04f87393ff8b13370ce33d4298fefedaa166a Mon Sep 17 00:00:00 2001 From: Bipin Date: Wed, 25 Feb 2026 16:37:15 +0530 Subject: [PATCH 3/3] style: ruff-format test_experiments.py Co-Authored-By: Claude Sonnet 4.6 --- tests/test_experiments.py | 9 ++------- 1 file changed, 2 insertions(+), 7 deletions(-) diff --git a/tests/test_experiments.py b/tests/test_experiments.py index 518c2be69..301806286 100644 --- a/tests/test_experiments.py +++ b/tests/test_experiments.py @@ -592,9 +592,7 @@ def test_run_experiment_generated_output_flow( dataset_id = str(UUID(int=0)) # When: run_experiment is called without a prompt_template - result = run_experiment( - "test_experiment", project="awesome-new-project", dataset_id=dataset_id - ) + result = run_experiment("test_experiment", project="awesome-new-project", dataset_id=dataset_id) # Then: Jobs.create is called with prompt_template_id=None and prompt_settings=None assert result is not None @@ -630,10 +628,7 @@ def test_run_experiment_prompt_takes_precedence_over_generated_output( # When: run_experiment is called with a prompt_template result = run_experiment( - "test_experiment", - project="awesome-new-project", - dataset_id=dataset_id, - prompt_template=prompt_template(), + "test_experiment", project="awesome-new-project", dataset_id=dataset_id, prompt_template=prompt_template() ) # Then: Jobs.create is called with the prompt_template_id set (prompt-driven flow)