diff --git a/src/galileo/experiments.py b/src/galileo/experiments.py index 372247ed..9b309037 100644 --- a/src/galileo/experiments.py +++ b/src/galileo/experiments.py @@ -105,12 +105,13 @@ def run( self, project_obj: Project, experiment_obj: ExperimentResponse, - prompt_template: PromptTemplate, + prompt_template: Optional[PromptTemplate], dataset_id: str, scorers: Optional[builtins.list[ScorerConfig]], prompt_settings: Optional[PromptRunSettings] = None, ) -> dict[str, Any]: - if prompt_settings is None: + # Only set default prompt_settings for prompt-driven flow (when a template is provided) + if prompt_template is not None and prompt_settings is None: prompt_settings = PromptRunSettings( n=1, echo=False, @@ -134,7 +135,7 @@ def run( name="playground_run", project_id=project_obj.id, run_id=experiment_obj.id, - prompt_template_id=prompt_template.selected_version_id, + prompt_template_id=prompt_template.selected_version_id if prompt_template is not None else None, dataset_id=dataset_id, task_type=EXPERIMENT_TASK_TYPE, scorers=scorers, @@ -350,16 +351,15 @@ def run_experiment( local_metrics=local_metrics, ) - if prompt_template is None: - raise ValueError("A prompt template must be provided") - if dataset_obj is None: raise ValueError("A dataset object must be provided") if local_metrics: raise ValueError("Local metrics can only be used with a locally run experiment, not a prompt experiment.") - # Execute a prompt template experiment + # Execute a prompt template or generated-output experiment. + # If prompt_template is None, the API determines the flow based on the dataset contents. + # The API will return error 3512 if the dataset has no generated_output column. return Experiments().run( project_obj, experiment_obj, prompt_template, dataset_obj.dataset.id, scorer_settings, prompt_settings ) diff --git a/src/galileo/jobs.py b/src/galileo/jobs.py index 6e607ecc..f7aff222 100644 --- a/src/galileo/jobs.py +++ b/src/galileo/jobs.py @@ -21,21 +21,23 @@ def create( name: str, run_id: str, dataset_id: str, - prompt_template_id: str, + prompt_template_id: Optional[str], task_type: TaskType, scorers: Optional[list[ScorerConfig]], - prompt_settings: PromptRunSettings, + prompt_settings: Optional[PromptRunSettings], ) -> CreateJobResponse: - create_params = { + create_params: dict = { "project_id": project_id, "dataset_id": dataset_id, "job_name": name, "run_id": run_id, - "prompt_settings": prompt_settings, - "prompt_template_version_id": prompt_template_id, "task_type": task_type, "scorers": scorers, } + if prompt_template_id is not None: + create_params["prompt_template_version_id"] = prompt_template_id + if prompt_settings is not None: + create_params["prompt_settings"] = prompt_settings _logger.info(f"create job: {create_params}") result = create_job_jobs_post.sync_detailed( client=self.config.api_client, body=CreateJobRequest(**create_params) diff --git a/tests/test_experiments.py b/tests/test_experiments.py index 2a912c69..30180628 100644 --- a/tests/test_experiments.py +++ b/tests/test_experiments.py @@ -572,6 +572,96 @@ def test_run_experiment_without_metrics( prompt_settings=ANY, ) + @travel(datetime(2012, 1, 1), tick=False) + @patch.object(galileo.datasets.Datasets, "get") + @patch.object(galileo.jobs.Jobs, "create") + @patch.object(galileo.experiments.Experiments, "create", return_value=experiment_response()) + @patch.object(galileo.experiments.Experiments, "get", return_value=experiment_response()) + @patch.object(galileo.experiments.Projects, "get_with_env_fallbacks", return_value=project()) + def test_run_experiment_generated_output_flow( + self, + mock_get_project: Mock, + mock_get_experiment: Mock, + mock_create_experiment: Mock, + mock_create_job: Mock, + mock_get_dataset: Mock, + dataset_content: DatasetContent, + ) -> None: + # Given: no prompt_template, dataset provided (generated-output flow) + mock_create_job.return_value = MagicMock() + dataset_id = str(UUID(int=0)) + + # When: run_experiment is called without a prompt_template + result = run_experiment("test_experiment", project="awesome-new-project", dataset_id=dataset_id) + + # Then: Jobs.create is called with prompt_template_id=None and prompt_settings=None + assert result is not None + mock_create_job.assert_called_once_with( + name="playground_run", + project_id="00000000-0000-0000-0000-000000000000", + run_id="00000000-0000-4000-8000-000000000001", + prompt_template_id=None, + dataset_id=ANY, + task_type=TaskType.VALUE_16, + scorers=None, + prompt_settings=None, + ) + + @travel(datetime(2012, 1, 1), tick=False) + @patch.object(galileo.datasets.Datasets, "get") + @patch.object(galileo.jobs.Jobs, "create") + @patch.object(galileo.experiments.Experiments, "create", return_value=experiment_response()) + @patch.object(galileo.experiments.Experiments, "get", return_value=experiment_response()) + @patch.object(galileo.experiments.Projects, "get_with_env_fallbacks", return_value=project()) + def test_run_experiment_prompt_takes_precedence_over_generated_output( + self, + mock_get_project: Mock, + mock_get_experiment: Mock, + mock_create_experiment: Mock, + mock_create_job: Mock, + mock_get_dataset: Mock, + dataset_content: DatasetContent, + ) -> None: + # Given: both prompt_template and dataset provided + mock_create_job.return_value = MagicMock() + dataset_id = str(UUID(int=0)) + + # When: run_experiment is called with a prompt_template + result = run_experiment( + "test_experiment", project="awesome-new-project", dataset_id=dataset_id, prompt_template=prompt_template() + ) + + # Then: Jobs.create is called with the prompt_template_id set (prompt-driven flow) + assert result is not None + mock_create_job.assert_called_once_with( + name="playground_run", + project_id="00000000-0000-0000-0000-000000000000", + run_id="00000000-0000-4000-8000-000000000001", + prompt_template_id="00000000-0000-0000-0000-000000000003", + dataset_id=ANY, + task_type=TaskType.VALUE_16, + scorers=None, + prompt_settings=ANY, + ) + + @patch.object(galileo.datasets.Datasets, "get", return_value=None) + @patch.object(galileo.jobs.Jobs, "create") + @patch.object(galileo.experiments.Experiments, "create", return_value=experiment_response()) + @patch.object(galileo.experiments.Experiments, "get", return_value=experiment_response()) + @patch.object(galileo.experiments.Projects, "get_with_env_fallbacks", return_value=project()) + def test_run_experiment_no_prompt_no_dataset_raises( + self, + mock_get_project: Mock, + mock_get_experiment: Mock, + mock_create_experiment: Mock, + mock_create_job: Mock, + mock_get_dataset: Mock, + ) -> None: + # Given: no prompt_template and no dataset + # When/Then: ValueError is raised requiring a dataset + with pytest.raises(ValueError, match="dataset"): + run_experiment("test_experiment", project="awesome-new-project") + @patch("galileo.logger.logger.LogStreams") @patch("galileo.logger.logger.Projects") @patch("galileo.logger.logger.Traces")