Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
14 changes: 7 additions & 7 deletions src/galileo/experiments.py
Original file line number Diff line number Diff line change
Expand Up @@ -105,12 +105,13 @@ def run(
self,
project_obj: Project,
experiment_obj: ExperimentResponse,
prompt_template: PromptTemplate,
prompt_template: Optional[PromptTemplate],
dataset_id: str,
scorers: Optional[builtins.list[ScorerConfig]],
prompt_settings: Optional[PromptRunSettings] = None,
) -> dict[str, Any]:
if prompt_settings is None:
# Only set default prompt_settings for prompt-driven flow (when a template is provided)
if prompt_template is not None and prompt_settings is None:
prompt_settings = PromptRunSettings(
n=1,
echo=False,
Expand All @@ -134,7 +135,7 @@ def run(
name="playground_run",
project_id=project_obj.id,
run_id=experiment_obj.id,
prompt_template_id=prompt_template.selected_version_id,
prompt_template_id=prompt_template.selected_version_id if prompt_template is not None else None,
dataset_id=dataset_id,
task_type=EXPERIMENT_TASK_TYPE,
scorers=scorers,
Expand Down Expand Up @@ -350,16 +351,15 @@ def run_experiment(
local_metrics=local_metrics,
)

if prompt_template is None:
raise ValueError("A prompt template must be provided")

if dataset_obj is None:
raise ValueError("A dataset object must be provided")

if local_metrics:
raise ValueError("Local metrics can only be used with a locally run experiment, not a prompt experiment.")

# Execute a prompt template experiment
# Execute a prompt template or generated-output experiment.
# If prompt_template is None, the API determines the flow based on the dataset contents.
# The API will return error 3512 if the dataset has no generated_output column.
return Experiments().run(
project_obj, experiment_obj, prompt_template, dataset_obj.dataset.id, scorer_settings, prompt_settings
)
Expand Down
12 changes: 7 additions & 5 deletions src/galileo/jobs.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,21 +21,23 @@ def create(
name: str,
run_id: str,
dataset_id: str,
prompt_template_id: str,
prompt_template_id: Optional[str],
task_type: TaskType,
scorers: Optional[list[ScorerConfig]],
prompt_settings: PromptRunSettings,
prompt_settings: Optional[PromptRunSettings],
) -> CreateJobResponse:
create_params = {
create_params: dict = {
"project_id": project_id,
"dataset_id": dataset_id,
"job_name": name,
"run_id": run_id,
"prompt_settings": prompt_settings,
"prompt_template_version_id": prompt_template_id,
"task_type": task_type,
"scorers": scorers,
}
if prompt_template_id is not None:
create_params["prompt_template_version_id"] = prompt_template_id
if prompt_settings is not None:
create_params["prompt_settings"] = prompt_settings
_logger.info(f"create job: {create_params}")
result = create_job_jobs_post.sync_detailed(
client=self.config.api_client, body=CreateJobRequest(**create_params)
Expand Down
90 changes: 90 additions & 0 deletions tests/test_experiments.py
Original file line number Diff line number Diff line change
Expand Up @@ -572,6 +572,96 @@ def test_run_experiment_without_metrics(
prompt_settings=ANY,
)

@travel(datetime(2012, 1, 1), tick=False)
@patch.object(galileo.datasets.Datasets, "get")
@patch.object(galileo.jobs.Jobs, "create")
@patch.object(galileo.experiments.Experiments, "create", return_value=experiment_response())
@patch.object(galileo.experiments.Experiments, "get", return_value=experiment_response())
@patch.object(galileo.experiments.Projects, "get_with_env_fallbacks", return_value=project())
def test_run_experiment_generated_output_flow(
self,
mock_get_project: Mock,
mock_get_experiment: Mock,
mock_create_experiment: Mock,
mock_create_job: Mock,
mock_get_dataset: Mock,
dataset_content: DatasetContent,
) -> None:
# Given: no prompt_template, dataset provided (generated-output flow)
mock_create_job.return_value = MagicMock()
dataset_id = str(UUID(int=0))

# When: run_experiment is called without a prompt_template
result = run_experiment("test_experiment", project="awesome-new-project", dataset_id=dataset_id)

# Then: Jobs.create is called with prompt_template_id=None and prompt_settings=None
assert result is not None
mock_create_job.assert_called_once_with(
name="playground_run",
project_id="00000000-0000-0000-0000-000000000000",
run_id="00000000-0000-4000-8000-000000000001",
prompt_template_id=None,
dataset_id=ANY,
task_type=TaskType.VALUE_16,
scorers=None,
prompt_settings=None,
)

@travel(datetime(2012, 1, 1), tick=False)
@patch.object(galileo.datasets.Datasets, "get")
@patch.object(galileo.jobs.Jobs, "create")
@patch.object(galileo.experiments.Experiments, "create", return_value=experiment_response())
@patch.object(galileo.experiments.Experiments, "get", return_value=experiment_response())
@patch.object(galileo.experiments.Projects, "get_with_env_fallbacks", return_value=project())
def test_run_experiment_prompt_takes_precedence_over_generated_output(
self,
mock_get_project: Mock,
mock_get_experiment: Mock,
mock_create_experiment: Mock,
mock_create_job: Mock,
mock_get_dataset: Mock,
dataset_content: DatasetContent,
) -> None:
# Given: both prompt_template and dataset provided
mock_create_job.return_value = MagicMock()
dataset_id = str(UUID(int=0))

# When: run_experiment is called with a prompt_template
result = run_experiment(
"test_experiment", project="awesome-new-project", dataset_id=dataset_id, prompt_template=prompt_template()
)

# Then: Jobs.create is called with the prompt_template_id set (prompt-driven flow)
assert result is not None
mock_create_job.assert_called_once_with(
name="playground_run",
project_id="00000000-0000-0000-0000-000000000000",
run_id="00000000-0000-4000-8000-000000000001",
prompt_template_id="00000000-0000-0000-0000-000000000003",
dataset_id=ANY,
task_type=TaskType.VALUE_16,
scorers=None,
prompt_settings=ANY,
)

@patch.object(galileo.datasets.Datasets, "get", return_value=None)
@patch.object(galileo.jobs.Jobs, "create")
@patch.object(galileo.experiments.Experiments, "create", return_value=experiment_response())
@patch.object(galileo.experiments.Experiments, "get", return_value=experiment_response())
@patch.object(galileo.experiments.Projects, "get_with_env_fallbacks", return_value=project())
def test_run_experiment_no_prompt_no_dataset_raises(
self,
mock_get_project: Mock,
mock_get_experiment: Mock,
mock_create_experiment: Mock,
mock_create_job: Mock,
mock_get_dataset: Mock,
) -> None:
# Given: no prompt_template and no dataset
# When/Then: ValueError is raised requiring a dataset
with pytest.raises(ValueError, match="dataset"):
run_experiment("test_experiment", project="awesome-new-project")

@patch("galileo.logger.logger.LogStreams")
@patch("galileo.logger.logger.Projects")
@patch("galileo.logger.logger.Traces")
Expand Down
Loading