Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 9 additions & 0 deletions tests/test_datasets/test_dataset_functions.py
Original file line number Diff line number Diff line change
Expand Up @@ -280,6 +280,7 @@ def test_dataset_by_name_cannot_access_private_data(self):
self.use_production_server()
self.assertRaises(OpenMLPrivateDatasetError, openml.datasets.get_dataset, "NAME_GOES_HERE")

@pytest.mark.xfail(reason="failures_issue_1544", strict=False)
def test_get_dataset_lazy_all_functions(self):
"""Test that all expected functionality is available without downloading the dataset."""
dataset = openml.datasets.get_dataset(1)
Expand Down Expand Up @@ -664,6 +665,7 @@ def test_attributes_arff_from_df_unknown_dtype(self):
with pytest.raises(ValueError, match=err_msg):
attributes_arff_from_df(df)

@pytest.mark.xfail(reason="failures_issue_1544", strict=False)
def test_create_dataset_numpy(self):
data = np.array([[1, 2, 3], [1.2, 2.5, 3.8], [2, 5, 8], [0, 1, 0]]).T

Expand Down Expand Up @@ -751,6 +753,7 @@ def test_create_dataset_list(self):
), "Uploaded ARFF does not match original one"
assert _get_online_dataset_format(dataset.id) == "arff", "Wrong format for dataset"

@pytest.mark.xfail(reason="failures_issue_1544", strict=False)
def test_create_dataset_sparse(self):
# test the scipy.sparse.coo_matrix
sparse_data = scipy.sparse.coo_matrix(
Expand Down Expand Up @@ -868,6 +871,7 @@ def test_get_online_dataset_arff(self):
return_type=arff.DENSE if d_format == "arff" else arff.COO,
), "ARFF files are not equal"

@pytest.mark.xfail(reason="failures_issue_1544", strict=False)
def test_topic_api_error(self):
# Check server exception when non-admin accessses apis
self.assertRaisesRegex(
Expand Down Expand Up @@ -895,6 +899,7 @@ def test_get_online_dataset_format(self):
dataset_id
), "The format of the ARFF files is different"

@pytest.mark.xfail(reason="failures_issue_1544", strict=False)
def test_create_dataset_pandas(self):
data = [
["a", "sunny", 85.0, 85.0, "FALSE", "no"],
Expand Down Expand Up @@ -1119,6 +1124,7 @@ def test_ignore_attributes_dataset(self):
paper_url=paper_url,
)

@pytest.mark.xfail(reason="failures_issue_1544", strict=False)
def test_publish_fetch_ignore_attribute(self):
"""Test to upload and retrieve dataset and check ignore_attributes"""
data = [
Expand Down Expand Up @@ -1237,6 +1243,7 @@ def test_create_dataset_row_id_attribute_error(self):
paper_url=paper_url,
)

@pytest.mark.xfail(reason="failures_issue_1544", strict=False)
def test_create_dataset_row_id_attribute_inference(self):
# meta-information
name = f"{self._get_sentinel()}-pandas_testing_dataset"
Expand Down Expand Up @@ -1400,6 +1407,7 @@ def test_data_edit_non_critical_field(self):
edited_dataset = openml.datasets.get_dataset(did)
assert edited_dataset.description == desc

@pytest.mark.xfail(reason="failures_issue_1544", strict=False)
def test_data_edit_critical_field(self):
# Case 2
# only owners (or admin) can edit all critical fields of datasets
Expand Down Expand Up @@ -1448,6 +1456,7 @@ def test_data_edit_requires_valid_dataset(self):
description="xor operation dataset",
)

@pytest.mark.xfail(reason="failures_issue_1544", strict=False)
def test_data_edit_cannot_edit_critical_field_if_dataset_has_task(self):
# Need to own a dataset to be able to edit meta-data
# Will be creating a forked version of an existing dataset to allow the unit test user
Expand Down
5 changes: 5 additions & 0 deletions tests/test_flows/test_flow.py
Original file line number Diff line number Diff line change
Expand Up @@ -178,6 +178,7 @@ def test_to_xml_from_xml(self):
assert new_flow is not flow

@pytest.mark.sklearn()
@pytest.mark.xfail(reason="failures_issue_1544", strict=False)
def test_publish_flow(self):
flow = openml.OpenMLFlow(
name="sklearn.dummy.DummyClassifier",
Expand Down Expand Up @@ -219,6 +220,7 @@ def test_publish_existing_flow(self, flow_exists_mock):
)

@pytest.mark.sklearn()
@pytest.mark.xfail(reason="failures_issue_1544", strict=False)
def test_publish_flow_with_similar_components(self):
clf = sklearn.ensemble.VotingClassifier(
[("lr", sklearn.linear_model.LogisticRegression(solver="lbfgs"))],
Expand Down Expand Up @@ -269,6 +271,7 @@ def test_publish_flow_with_similar_components(self):
TestBase.logger.info(f"collected from {__file__.split('/')[-1]}: {flow3.flow_id}")

@pytest.mark.sklearn()
@pytest.mark.xfail(reason="failures_issue_1544", strict=False)
def test_semi_legal_flow(self):
# TODO: Test if parameters are set correctly!
# should not throw error as it contains two differentiable forms of
Expand Down Expand Up @@ -377,6 +380,7 @@ def get_sentinel():
assert not flow_id

@pytest.mark.sklearn()
@pytest.mark.xfail(reason="failures_issue_1544", strict=False)
def test_existing_flow_exists(self):
# create a flow
nb = sklearn.naive_bayes.GaussianNB()
Expand Down Expand Up @@ -417,6 +421,7 @@ def test_existing_flow_exists(self):
assert downloaded_flow_id == flow.flow_id

@pytest.mark.sklearn()
@pytest.mark.xfail(reason="failures_issue_1544", strict=False)
def test_sklearn_to_upload_to_flow(self):
iris = sklearn.datasets.load_iris()
X = iris.data
Expand Down
2 changes: 2 additions & 0 deletions tests/test_flows/test_flow_functions.py
Original file line number Diff line number Diff line change
Expand Up @@ -274,6 +274,7 @@ def test_are_flows_equal_ignore_if_older(self):
assert_flows_equal(flow, flow, ignore_parameter_values_on_older_children=None)

@pytest.mark.sklearn()
@pytest.mark.xfail(reason="failures_issue_1544", strict=False)
@unittest.skipIf(
Version(sklearn.__version__) < Version("0.20"),
reason="OrdinalEncoder introduced in 0.20. "
Expand Down Expand Up @@ -388,6 +389,7 @@ def test_get_flow_reinstantiate_flow_not_strict_pre_023(self):
assert "sklearn==0.19.1" not in flow.dependencies

@pytest.mark.sklearn()
@pytest.mark.xfail(reason="failures_issue_1544", strict=False)
def test_get_flow_id(self):
if self.long_version:
list_all = openml.utils._list_all
Expand Down
5 changes: 5 additions & 0 deletions tests/test_runs/test_run.py
Original file line number Diff line number Diff line change
Expand Up @@ -118,6 +118,7 @@ def _check_array(array, type_):
assert run_prime_trace_content is None

@pytest.mark.sklearn()
@pytest.mark.xfail(reason="failures_issue_1544", strict=False)
def test_to_from_filesystem_vanilla(self):
model = Pipeline(
[
Expand Down Expand Up @@ -153,6 +154,7 @@ def test_to_from_filesystem_vanilla(self):

@pytest.mark.sklearn()
@pytest.mark.flaky()
@pytest.mark.xfail(reason="failures_issue_1544", strict=False)
def test_to_from_filesystem_search(self):
model = Pipeline(
[
Expand Down Expand Up @@ -187,6 +189,7 @@ def test_to_from_filesystem_search(self):
)

@pytest.mark.sklearn()
@pytest.mark.xfail(reason="failures_issue_1544", strict=False)
def test_to_from_filesystem_no_model(self):
model = Pipeline(
[("imputer", SimpleImputer(strategy="mean")), ("classifier", DummyClassifier())],
Expand Down Expand Up @@ -292,6 +295,7 @@ def assert_run_prediction_data(task, run, model):
assert_method(y_test, saved_y_test)

@pytest.mark.sklearn()
@pytest.mark.xfail(reason="failures_issue_1544", strict=False)
def test_publish_with_local_loaded_flow(self):
"""
Publish a run tied to a local flow after it has first been saved to
Expand Down Expand Up @@ -335,6 +339,7 @@ def test_publish_with_local_loaded_flow(self):
openml.runs.get_run(loaded_run.run_id)

@pytest.mark.sklearn()
@pytest.mark.xfail(reason="failures_issue_1544", strict=False)
def test_offline_and_online_run_identical(self):
extension = SklearnExtension()

Expand Down
22 changes: 22 additions & 0 deletions tests/test_runs/test_run_functions.py
Original file line number Diff line number Diff line change
Expand Up @@ -413,6 +413,7 @@ def test_run_regression_on_classif_task(self):
task=task,
)

@pytest.mark.xfail(reason="failures_issue_1544", strict=False)
@pytest.mark.sklearn()
def test_check_erronous_sklearn_flow_fails(self):
task_id = 115 # diabetes; crossvalidation
Expand Down Expand Up @@ -881,6 +882,7 @@ def test_run_and_upload_maskedarrays(self):

##########################################################################

@pytest.mark.xfail(reason="failures_issue_1544", strict=False)
@pytest.mark.sklearn()
def test_learning_curve_task_1(self):
task_id = 801 # diabates dataset
Expand All @@ -905,6 +907,7 @@ def test_learning_curve_task_1(self):
)
self._check_sample_evaluations(run.sample_evaluations, num_repeats, num_folds, num_samples)

@pytest.mark.xfail(reason="failures_issue_1544", strict=False)
@pytest.mark.sklearn()
def test_learning_curve_task_2(self):
task_id = 801 # diabates dataset
Expand Down Expand Up @@ -941,6 +944,7 @@ def test_learning_curve_task_2(self):
)
self._check_sample_evaluations(run.sample_evaluations, num_repeats, num_folds, num_samples)

@pytest.mark.xfail(reason="failures_issue_1544", strict=False)
@pytest.mark.sklearn()
@unittest.skipIf(
Version(sklearn.__version__) < Version("0.21"),
Expand Down Expand Up @@ -1019,6 +1023,7 @@ def _test_local_evaluations(self, run):
assert alt_scores[idx] >= 0
assert alt_scores[idx] <= 1

@pytest.mark.xfail(reason="failures_issue_1544", strict=False)
@pytest.mark.sklearn()
def test_local_run_swapped_parameter_order_model(self):
clf = DecisionTreeClassifier()
Expand All @@ -1034,6 +1039,7 @@ def test_local_run_swapped_parameter_order_model(self):

self._test_local_evaluations(run)

@pytest.mark.xfail(reason="failures_issue_1544", strict=False)
@pytest.mark.sklearn()
@unittest.skipIf(
Version(sklearn.__version__) < Version("0.20"),
Expand Down Expand Up @@ -1062,6 +1068,7 @@ def test_local_run_swapped_parameter_order_flow(self):

self._test_local_evaluations(run)

@pytest.mark.xfail(reason="failures_issue_1544", strict=False)
@pytest.mark.sklearn()
@unittest.skipIf(
Version(sklearn.__version__) < Version("0.20"),
Expand Down Expand Up @@ -1099,6 +1106,7 @@ def test_online_run_metric_score(self):

self._test_local_evaluations(run)

@pytest.mark.xfail(reason="failures_issue_1544", strict=False)
@pytest.mark.sklearn()
@unittest.skipIf(
Version(sklearn.__version__) < Version("0.20"),
Expand Down Expand Up @@ -1160,6 +1168,7 @@ def test_initialize_model_from_run(self):
assert flowS.components["Imputer"].parameters["strategy"] == '"most_frequent"'
assert flowS.components["VarianceThreshold"].parameters["threshold"] == "0.05"

@pytest.mark.xfail(reason="failures_issue_1544", strict=False)
@pytest.mark.sklearn()
@unittest.skipIf(
Version(sklearn.__version__) < Version("0.20"),
Expand Down Expand Up @@ -1219,6 +1228,7 @@ def test__run_exists(self):
run_ids = run_exists(task.task_id, setup_exists)
assert run_ids, (run_ids, clf)

@pytest.mark.xfail(reason="failures_issue_1544", strict=False)
@pytest.mark.sklearn()
def test_run_with_illegal_flow_id(self):
# check the case where the user adds an illegal flow id to a
Expand All @@ -1238,6 +1248,7 @@ def test_run_with_illegal_flow_id(self):
avoid_duplicate_runs=True,
)

@pytest.mark.xfail(reason="failures_issue_1544", strict=False)
@pytest.mark.sklearn()
def test_run_with_illegal_flow_id_after_load(self):
# Same as `test_run_with_illegal_flow_id`, but test this error is also
Expand Down Expand Up @@ -1294,6 +1305,7 @@ def test_run_with_illegal_flow_id_1(self):
avoid_duplicate_runs=True,
)

@pytest.mark.xfail(reason="failures_issue_1544", strict=False)
@pytest.mark.sklearn()
def test_run_with_illegal_flow_id_1_after_load(self):
# Same as `test_run_with_illegal_flow_id_1`, but test this error is
Expand Down Expand Up @@ -1332,6 +1344,7 @@ def test_run_with_illegal_flow_id_1_after_load(self):
loaded_run.publish,
)

@pytest.mark.xfail(reason="failures_issue_1544", strict=False)
@pytest.mark.sklearn()
@unittest.skipIf(
Version(sklearn.__version__) < Version("0.20"),
Expand Down Expand Up @@ -1559,6 +1572,7 @@ def test_get_runs_list_by_tag(self):
runs = openml.runs.list_runs(tag="curves", size=2)
assert len(runs) >= 1

@pytest.mark.xfail(reason="failures_issue_1544", strict=False)
@pytest.mark.sklearn()
@unittest.skipIf(
Version(sklearn.__version__) < Version("0.20"),
Expand Down Expand Up @@ -1595,6 +1609,7 @@ def test_run_on_dataset_with_missing_labels_dataframe(self):
# repeat, fold, row_id, 6 confidences, prediction and correct label
assert len(row) == 12

@pytest.mark.xfail(reason="failures_issue_1544", strict=False)
@pytest.mark.sklearn()
@unittest.skipIf(
Version(sklearn.__version__) < Version("0.20"),
Expand Down Expand Up @@ -1647,6 +1662,7 @@ def test_get_uncached_run(self):
with pytest.raises(openml.exceptions.OpenMLCacheException):
openml.runs.functions._get_cached_run(10)

@pytest.mark.xfail(reason="failures_issue_1544", strict=False)
@pytest.mark.sklearn()
def test_run_flow_on_task_downloaded_flow(self):
model = sklearn.ensemble.RandomForestClassifier(n_estimators=33)
Expand Down Expand Up @@ -1687,6 +1703,7 @@ def test_format_prediction_classification_no_probabilities(self):
with pytest.raises(ValueError, match="`proba` is required for classification task"):
format_prediction(classification, *ignored_input, proba=None)

@pytest.mark.xfail(reason="failures_issue_1544", strict=False)
def test_format_prediction_classification_incomplete_probabilities(self):
classification = openml.tasks.get_task(
self.TEST_SERVER_TASK_SIMPLE["task_id"],
Expand All @@ -1707,13 +1724,15 @@ def test_format_prediction_task_without_classlabels_set(self):
with pytest.raises(ValueError, match="The classification task must have class labels set"):
format_prediction(classification, *ignored_input, proba={})

@pytest.mark.xfail(reason="failures_issue_1544", strict=False)
def test_format_prediction_task_learning_curve_sample_not_set(self):
learning_curve = openml.tasks.get_task(801, download_data=False) # diabetes;crossvalidation
probabilities = {c: 0.2 for c in learning_curve.class_labels}
ignored_input = [0] * 5
with pytest.raises(ValueError, match="`sample` can not be none for LearningCurveTask"):
format_prediction(learning_curve, *ignored_input, sample=None, proba=probabilities)

@pytest.mark.xfail(reason="failures_issue_1544", strict=False)
def test_format_prediction_task_regression(self):
task_meta_data = self.TEST_SERVER_TASK_REGRESSION["task_meta_data"]
_task_id = check_task_existence(**task_meta_data)
Expand Down Expand Up @@ -1743,6 +1762,7 @@ def test_format_prediction_task_regression(self):



@pytest.mark.xfail(reason="failures_issue_1544", strict=False)
@unittest.skipIf(
Version(sklearn.__version__) < Version("0.20"),
reason="SimpleImputer doesn't handle mixed type DataFrame as input",
Expand Down Expand Up @@ -1843,6 +1863,7 @@ def test_delete_unknown_run(mock_delete, test_files_directory, test_api_key):


@pytest.mark.sklearn()
@pytest.mark.xfail(reason="failures_issue_1544", strict=False)
@unittest.skipIf(
Version(sklearn.__version__) < Version("0.21"),
reason="couldn't perform local tests successfully w/o bloating RAM",
Expand Down Expand Up @@ -1919,6 +1940,7 @@ def test__run_task_get_arffcontent_2(parallel_mock):
)


@pytest.mark.xfail(reason="failures_issue_1544", strict=False)
@pytest.mark.sklearn()
@unittest.skipIf(
Version(sklearn.__version__) < Version("0.21"),
Expand Down
4 changes: 4 additions & 0 deletions tests/test_setups/test_setup_functions.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,7 @@ def setUp(self):
self.extension = SklearnExtension()
super().setUp()

@pytest.mark.xfail(reason="failures_issue_1544", strict=False)
@pytest.mark.sklearn()
def test_nonexisting_setup_exists(self):
# first publish a non-existing flow
Expand Down Expand Up @@ -81,6 +82,7 @@ def _existing_setup_exists(self, classif):
setup_id = openml.setups.setup_exists(flow)
assert setup_id == run.setup_id

@pytest.mark.xfail(reason="failures_issue_1544", strict=False)
@pytest.mark.sklearn()
def test_existing_setup_exists_1(self):
def side_effect(self):
Expand All @@ -96,11 +98,13 @@ def side_effect(self):
nb = sklearn.naive_bayes.GaussianNB()
self._existing_setup_exists(nb)

@pytest.mark.xfail(reason="failures_issue_1544", strict=False)
@pytest.mark.sklearn()
def test_exisiting_setup_exists_2(self):
# Check a flow with one hyperparameter
self._existing_setup_exists(sklearn.naive_bayes.GaussianNB())

@pytest.mark.xfail(reason="failures_issue_1544", strict=False)
@pytest.mark.sklearn()
def test_existing_setup_exists_3(self):
# Check a flow with many hyperparameters
Expand Down
3 changes: 3 additions & 0 deletions tests/test_tasks/test_classification_task.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,18 +18,21 @@ def setUp(self, n_levels: int = 1):
self.task_type = TaskType.SUPERVISED_CLASSIFICATION
self.estimation_procedure = 5

@pytest.mark.xfail(reason="failures_issue_1544", strict=False)
def test_download_task(self):
task = super().test_download_task()
assert task.task_id == self.task_id
assert task.task_type_id == TaskType.SUPERVISED_CLASSIFICATION
assert task.dataset_id == 20
assert task.estimation_procedure_id == self.estimation_procedure

@pytest.mark.xfail(reason="failures_issue_1544", strict=False)
def test_class_labels(self):
task = get_task(self.task_id)
assert task.class_labels == ["tested_negative", "tested_positive"]


@pytest.mark.xfail(reason="failures_issue_1544", strict=False)
@pytest.mark.server()
def test_get_X_and_Y():
task = get_task(119)
Expand Down
Loading