diff --git a/tests/test_datasets/test_dataset_functions.py b/tests/test_datasets/test_dataset_functions.py index 266a6f6f7..f8cb1943c 100644 --- a/tests/test_datasets/test_dataset_functions.py +++ b/tests/test_datasets/test_dataset_functions.py @@ -280,6 +280,7 @@ def test_dataset_by_name_cannot_access_private_data(self): self.use_production_server() self.assertRaises(OpenMLPrivateDatasetError, openml.datasets.get_dataset, "NAME_GOES_HERE") + @pytest.mark.xfail(reason="failures_issue_1544", strict=False) def test_get_dataset_lazy_all_functions(self): """Test that all expected functionality is available without downloading the dataset.""" dataset = openml.datasets.get_dataset(1) @@ -664,6 +665,7 @@ def test_attributes_arff_from_df_unknown_dtype(self): with pytest.raises(ValueError, match=err_msg): attributes_arff_from_df(df) + @pytest.mark.xfail(reason="failures_issue_1544", strict=False) def test_create_dataset_numpy(self): data = np.array([[1, 2, 3], [1.2, 2.5, 3.8], [2, 5, 8], [0, 1, 0]]).T @@ -751,6 +753,7 @@ def test_create_dataset_list(self): ), "Uploaded ARFF does not match original one" assert _get_online_dataset_format(dataset.id) == "arff", "Wrong format for dataset" + @pytest.mark.xfail(reason="failures_issue_1544", strict=False) def test_create_dataset_sparse(self): # test the scipy.sparse.coo_matrix sparse_data = scipy.sparse.coo_matrix( @@ -868,6 +871,7 @@ def test_get_online_dataset_arff(self): return_type=arff.DENSE if d_format == "arff" else arff.COO, ), "ARFF files are not equal" + @pytest.mark.xfail(reason="failures_issue_1544", strict=False) def test_topic_api_error(self): # Check server exception when non-admin accessses apis self.assertRaisesRegex( @@ -895,6 +899,7 @@ def test_get_online_dataset_format(self): dataset_id ), "The format of the ARFF files is different" + @pytest.mark.xfail(reason="failures_issue_1544", strict=False) def test_create_dataset_pandas(self): data = [ ["a", "sunny", 85.0, 85.0, "FALSE", "no"], @@ -1119,6 +1124,7 @@ def test_ignore_attributes_dataset(self): paper_url=paper_url, ) + @pytest.mark.xfail(reason="failures_issue_1544", strict=False) def test_publish_fetch_ignore_attribute(self): """Test to upload and retrieve dataset and check ignore_attributes""" data = [ @@ -1237,6 +1243,7 @@ def test_create_dataset_row_id_attribute_error(self): paper_url=paper_url, ) + @pytest.mark.xfail(reason="failures_issue_1544", strict=False) def test_create_dataset_row_id_attribute_inference(self): # meta-information name = f"{self._get_sentinel()}-pandas_testing_dataset" @@ -1400,6 +1407,7 @@ def test_data_edit_non_critical_field(self): edited_dataset = openml.datasets.get_dataset(did) assert edited_dataset.description == desc + @pytest.mark.xfail(reason="failures_issue_1544", strict=False) def test_data_edit_critical_field(self): # Case 2 # only owners (or admin) can edit all critical fields of datasets @@ -1448,6 +1456,7 @@ def test_data_edit_requires_valid_dataset(self): description="xor operation dataset", ) + @pytest.mark.xfail(reason="failures_issue_1544", strict=False) def test_data_edit_cannot_edit_critical_field_if_dataset_has_task(self): # Need to own a dataset to be able to edit meta-data # Will be creating a forked version of an existing dataset to allow the unit test user diff --git a/tests/test_flows/test_flow.py b/tests/test_flows/test_flow.py index 0b034c3b4..da719d058 100644 --- a/tests/test_flows/test_flow.py +++ b/tests/test_flows/test_flow.py @@ -178,6 +178,7 @@ def test_to_xml_from_xml(self): assert new_flow is not flow @pytest.mark.sklearn() + @pytest.mark.xfail(reason="failures_issue_1544", strict=False) def test_publish_flow(self): flow = openml.OpenMLFlow( name="sklearn.dummy.DummyClassifier", @@ -219,6 +220,7 @@ def test_publish_existing_flow(self, flow_exists_mock): ) @pytest.mark.sklearn() + @pytest.mark.xfail(reason="failures_issue_1544", strict=False) def test_publish_flow_with_similar_components(self): clf = sklearn.ensemble.VotingClassifier( [("lr", sklearn.linear_model.LogisticRegression(solver="lbfgs"))], @@ -269,6 +271,7 @@ def test_publish_flow_with_similar_components(self): TestBase.logger.info(f"collected from {__file__.split('/')[-1]}: {flow3.flow_id}") @pytest.mark.sklearn() + @pytest.mark.xfail(reason="failures_issue_1544", strict=False) def test_semi_legal_flow(self): # TODO: Test if parameters are set correctly! # should not throw error as it contains two differentiable forms of @@ -377,6 +380,7 @@ def get_sentinel(): assert not flow_id @pytest.mark.sklearn() + @pytest.mark.xfail(reason="failures_issue_1544", strict=False) def test_existing_flow_exists(self): # create a flow nb = sklearn.naive_bayes.GaussianNB() @@ -417,6 +421,7 @@ def test_existing_flow_exists(self): assert downloaded_flow_id == flow.flow_id @pytest.mark.sklearn() + @pytest.mark.xfail(reason="failures_issue_1544", strict=False) def test_sklearn_to_upload_to_flow(self): iris = sklearn.datasets.load_iris() X = iris.data diff --git a/tests/test_flows/test_flow_functions.py b/tests/test_flows/test_flow_functions.py index 9f8ec5e36..0be65ceac 100644 --- a/tests/test_flows/test_flow_functions.py +++ b/tests/test_flows/test_flow_functions.py @@ -274,6 +274,7 @@ def test_are_flows_equal_ignore_if_older(self): assert_flows_equal(flow, flow, ignore_parameter_values_on_older_children=None) @pytest.mark.sklearn() + @pytest.mark.xfail(reason="failures_issue_1544", strict=False) @unittest.skipIf( Version(sklearn.__version__) < Version("0.20"), reason="OrdinalEncoder introduced in 0.20. " @@ -388,6 +389,7 @@ def test_get_flow_reinstantiate_flow_not_strict_pre_023(self): assert "sklearn==0.19.1" not in flow.dependencies @pytest.mark.sklearn() + @pytest.mark.xfail(reason="failures_issue_1544", strict=False) def test_get_flow_id(self): if self.long_version: list_all = openml.utils._list_all diff --git a/tests/test_runs/test_run.py b/tests/test_runs/test_run.py index 034b731aa..71651d431 100644 --- a/tests/test_runs/test_run.py +++ b/tests/test_runs/test_run.py @@ -118,6 +118,7 @@ def _check_array(array, type_): assert run_prime_trace_content is None @pytest.mark.sklearn() + @pytest.mark.xfail(reason="failures_issue_1544", strict=False) def test_to_from_filesystem_vanilla(self): model = Pipeline( [ @@ -153,6 +154,7 @@ def test_to_from_filesystem_vanilla(self): @pytest.mark.sklearn() @pytest.mark.flaky() + @pytest.mark.xfail(reason="failures_issue_1544", strict=False) def test_to_from_filesystem_search(self): model = Pipeline( [ @@ -187,6 +189,7 @@ def test_to_from_filesystem_search(self): ) @pytest.mark.sklearn() + @pytest.mark.xfail(reason="failures_issue_1544", strict=False) def test_to_from_filesystem_no_model(self): model = Pipeline( [("imputer", SimpleImputer(strategy="mean")), ("classifier", DummyClassifier())], @@ -292,6 +295,7 @@ def assert_run_prediction_data(task, run, model): assert_method(y_test, saved_y_test) @pytest.mark.sklearn() + @pytest.mark.xfail(reason="failures_issue_1544", strict=False) def test_publish_with_local_loaded_flow(self): """ Publish a run tied to a local flow after it has first been saved to @@ -335,6 +339,7 @@ def test_publish_with_local_loaded_flow(self): openml.runs.get_run(loaded_run.run_id) @pytest.mark.sklearn() + @pytest.mark.xfail(reason="failures_issue_1544", strict=False) def test_offline_and_online_run_identical(self): extension = SklearnExtension() diff --git a/tests/test_runs/test_run_functions.py b/tests/test_runs/test_run_functions.py index e4cec56ab..305d859d9 100644 --- a/tests/test_runs/test_run_functions.py +++ b/tests/test_runs/test_run_functions.py @@ -413,6 +413,7 @@ def test_run_regression_on_classif_task(self): task=task, ) + @pytest.mark.xfail(reason="failures_issue_1544", strict=False) @pytest.mark.sklearn() def test_check_erronous_sklearn_flow_fails(self): task_id = 115 # diabetes; crossvalidation @@ -881,6 +882,7 @@ def test_run_and_upload_maskedarrays(self): ########################################################################## + @pytest.mark.xfail(reason="failures_issue_1544", strict=False) @pytest.mark.sklearn() def test_learning_curve_task_1(self): task_id = 801 # diabates dataset @@ -905,6 +907,7 @@ def test_learning_curve_task_1(self): ) self._check_sample_evaluations(run.sample_evaluations, num_repeats, num_folds, num_samples) + @pytest.mark.xfail(reason="failures_issue_1544", strict=False) @pytest.mark.sklearn() def test_learning_curve_task_2(self): task_id = 801 # diabates dataset @@ -941,6 +944,7 @@ def test_learning_curve_task_2(self): ) self._check_sample_evaluations(run.sample_evaluations, num_repeats, num_folds, num_samples) + @pytest.mark.xfail(reason="failures_issue_1544", strict=False) @pytest.mark.sklearn() @unittest.skipIf( Version(sklearn.__version__) < Version("0.21"), @@ -1019,6 +1023,7 @@ def _test_local_evaluations(self, run): assert alt_scores[idx] >= 0 assert alt_scores[idx] <= 1 + @pytest.mark.xfail(reason="failures_issue_1544", strict=False) @pytest.mark.sklearn() def test_local_run_swapped_parameter_order_model(self): clf = DecisionTreeClassifier() @@ -1034,6 +1039,7 @@ def test_local_run_swapped_parameter_order_model(self): self._test_local_evaluations(run) + @pytest.mark.xfail(reason="failures_issue_1544", strict=False) @pytest.mark.sklearn() @unittest.skipIf( Version(sklearn.__version__) < Version("0.20"), @@ -1062,6 +1068,7 @@ def test_local_run_swapped_parameter_order_flow(self): self._test_local_evaluations(run) + @pytest.mark.xfail(reason="failures_issue_1544", strict=False) @pytest.mark.sklearn() @unittest.skipIf( Version(sklearn.__version__) < Version("0.20"), @@ -1099,6 +1106,7 @@ def test_online_run_metric_score(self): self._test_local_evaluations(run) + @pytest.mark.xfail(reason="failures_issue_1544", strict=False) @pytest.mark.sklearn() @unittest.skipIf( Version(sklearn.__version__) < Version("0.20"), @@ -1160,6 +1168,7 @@ def test_initialize_model_from_run(self): assert flowS.components["Imputer"].parameters["strategy"] == '"most_frequent"' assert flowS.components["VarianceThreshold"].parameters["threshold"] == "0.05" + @pytest.mark.xfail(reason="failures_issue_1544", strict=False) @pytest.mark.sklearn() @unittest.skipIf( Version(sklearn.__version__) < Version("0.20"), @@ -1219,6 +1228,7 @@ def test__run_exists(self): run_ids = run_exists(task.task_id, setup_exists) assert run_ids, (run_ids, clf) + @pytest.mark.xfail(reason="failures_issue_1544", strict=False) @pytest.mark.sklearn() def test_run_with_illegal_flow_id(self): # check the case where the user adds an illegal flow id to a @@ -1238,6 +1248,7 @@ def test_run_with_illegal_flow_id(self): avoid_duplicate_runs=True, ) + @pytest.mark.xfail(reason="failures_issue_1544", strict=False) @pytest.mark.sklearn() def test_run_with_illegal_flow_id_after_load(self): # Same as `test_run_with_illegal_flow_id`, but test this error is also @@ -1294,6 +1305,7 @@ def test_run_with_illegal_flow_id_1(self): avoid_duplicate_runs=True, ) + @pytest.mark.xfail(reason="failures_issue_1544", strict=False) @pytest.mark.sklearn() def test_run_with_illegal_flow_id_1_after_load(self): # Same as `test_run_with_illegal_flow_id_1`, but test this error is @@ -1332,6 +1344,7 @@ def test_run_with_illegal_flow_id_1_after_load(self): loaded_run.publish, ) + @pytest.mark.xfail(reason="failures_issue_1544", strict=False) @pytest.mark.sklearn() @unittest.skipIf( Version(sklearn.__version__) < Version("0.20"), @@ -1559,6 +1572,7 @@ def test_get_runs_list_by_tag(self): runs = openml.runs.list_runs(tag="curves", size=2) assert len(runs) >= 1 + @pytest.mark.xfail(reason="failures_issue_1544", strict=False) @pytest.mark.sklearn() @unittest.skipIf( Version(sklearn.__version__) < Version("0.20"), @@ -1595,6 +1609,7 @@ def test_run_on_dataset_with_missing_labels_dataframe(self): # repeat, fold, row_id, 6 confidences, prediction and correct label assert len(row) == 12 + @pytest.mark.xfail(reason="failures_issue_1544", strict=False) @pytest.mark.sklearn() @unittest.skipIf( Version(sklearn.__version__) < Version("0.20"), @@ -1647,6 +1662,7 @@ def test_get_uncached_run(self): with pytest.raises(openml.exceptions.OpenMLCacheException): openml.runs.functions._get_cached_run(10) + @pytest.mark.xfail(reason="failures_issue_1544", strict=False) @pytest.mark.sklearn() def test_run_flow_on_task_downloaded_flow(self): model = sklearn.ensemble.RandomForestClassifier(n_estimators=33) @@ -1687,6 +1703,7 @@ def test_format_prediction_classification_no_probabilities(self): with pytest.raises(ValueError, match="`proba` is required for classification task"): format_prediction(classification, *ignored_input, proba=None) + @pytest.mark.xfail(reason="failures_issue_1544", strict=False) def test_format_prediction_classification_incomplete_probabilities(self): classification = openml.tasks.get_task( self.TEST_SERVER_TASK_SIMPLE["task_id"], @@ -1707,6 +1724,7 @@ def test_format_prediction_task_without_classlabels_set(self): with pytest.raises(ValueError, match="The classification task must have class labels set"): format_prediction(classification, *ignored_input, proba={}) + @pytest.mark.xfail(reason="failures_issue_1544", strict=False) def test_format_prediction_task_learning_curve_sample_not_set(self): learning_curve = openml.tasks.get_task(801, download_data=False) # diabetes;crossvalidation probabilities = {c: 0.2 for c in learning_curve.class_labels} @@ -1714,6 +1732,7 @@ def test_format_prediction_task_learning_curve_sample_not_set(self): with pytest.raises(ValueError, match="`sample` can not be none for LearningCurveTask"): format_prediction(learning_curve, *ignored_input, sample=None, proba=probabilities) + @pytest.mark.xfail(reason="failures_issue_1544", strict=False) def test_format_prediction_task_regression(self): task_meta_data = self.TEST_SERVER_TASK_REGRESSION["task_meta_data"] _task_id = check_task_existence(**task_meta_data) @@ -1743,6 +1762,7 @@ def test_format_prediction_task_regression(self): + @pytest.mark.xfail(reason="failures_issue_1544", strict=False) @unittest.skipIf( Version(sklearn.__version__) < Version("0.20"), reason="SimpleImputer doesn't handle mixed type DataFrame as input", @@ -1843,6 +1863,7 @@ def test_delete_unknown_run(mock_delete, test_files_directory, test_api_key): @pytest.mark.sklearn() +@pytest.mark.xfail(reason="failures_issue_1544", strict=False) @unittest.skipIf( Version(sklearn.__version__) < Version("0.21"), reason="couldn't perform local tests successfully w/o bloating RAM", @@ -1919,6 +1940,7 @@ def test__run_task_get_arffcontent_2(parallel_mock): ) +@pytest.mark.xfail(reason="failures_issue_1544", strict=False) @pytest.mark.sklearn() @unittest.skipIf( Version(sklearn.__version__) < Version("0.21"), diff --git a/tests/test_setups/test_setup_functions.py b/tests/test_setups/test_setup_functions.py index 42af5362b..a3b698a37 100644 --- a/tests/test_setups/test_setup_functions.py +++ b/tests/test_setups/test_setup_functions.py @@ -34,6 +34,7 @@ def setUp(self): self.extension = SklearnExtension() super().setUp() + @pytest.mark.xfail(reason="failures_issue_1544", strict=False) @pytest.mark.sklearn() def test_nonexisting_setup_exists(self): # first publish a non-existing flow @@ -81,6 +82,7 @@ def _existing_setup_exists(self, classif): setup_id = openml.setups.setup_exists(flow) assert setup_id == run.setup_id + @pytest.mark.xfail(reason="failures_issue_1544", strict=False) @pytest.mark.sklearn() def test_existing_setup_exists_1(self): def side_effect(self): @@ -96,11 +98,13 @@ def side_effect(self): nb = sklearn.naive_bayes.GaussianNB() self._existing_setup_exists(nb) + @pytest.mark.xfail(reason="failures_issue_1544", strict=False) @pytest.mark.sklearn() def test_exisiting_setup_exists_2(self): # Check a flow with one hyperparameter self._existing_setup_exists(sklearn.naive_bayes.GaussianNB()) + @pytest.mark.xfail(reason="failures_issue_1544", strict=False) @pytest.mark.sklearn() def test_existing_setup_exists_3(self): # Check a flow with many hyperparameters diff --git a/tests/test_tasks/test_classification_task.py b/tests/test_tasks/test_classification_task.py index d4f2ed9d7..5528cabf2 100644 --- a/tests/test_tasks/test_classification_task.py +++ b/tests/test_tasks/test_classification_task.py @@ -18,6 +18,7 @@ def setUp(self, n_levels: int = 1): self.task_type = TaskType.SUPERVISED_CLASSIFICATION self.estimation_procedure = 5 + @pytest.mark.xfail(reason="failures_issue_1544", strict=False) def test_download_task(self): task = super().test_download_task() assert task.task_id == self.task_id @@ -25,11 +26,13 @@ def test_download_task(self): assert task.dataset_id == 20 assert task.estimation_procedure_id == self.estimation_procedure + @pytest.mark.xfail(reason="failures_issue_1544", strict=False) def test_class_labels(self): task = get_task(self.task_id) assert task.class_labels == ["tested_negative", "tested_positive"] +@pytest.mark.xfail(reason="failures_issue_1544", strict=False) @pytest.mark.server() def test_get_X_and_Y(): task = get_task(119) diff --git a/tests/test_tasks/test_learning_curve_task.py b/tests/test_tasks/test_learning_curve_task.py index 4a3dede4e..5f4b3e0ab 100644 --- a/tests/test_tasks/test_learning_curve_task.py +++ b/tests/test_tasks/test_learning_curve_task.py @@ -18,6 +18,7 @@ def setUp(self, n_levels: int = 1): self.task_type = TaskType.LEARNING_CURVE self.estimation_procedure = 13 + @pytest.mark.xfail(reason="failures_issue_1544", strict=False) def test_get_X_and_Y(self): X, Y = super().test_get_X_and_Y() assert X.shape == (768, 8) @@ -26,12 +27,14 @@ def test_get_X_and_Y(self): assert isinstance(Y, pd.Series) assert pd.api.types.is_categorical_dtype(Y) + @pytest.mark.xfail(reason="failures_issue_1544", strict=False) def test_download_task(self): task = super().test_download_task() assert task.task_id == self.task_id assert task.task_type_id == TaskType.LEARNING_CURVE assert task.dataset_id == 20 + @pytest.mark.xfail(reason="failures_issue_1544", strict=False) def test_class_labels(self): task = get_task(self.task_id) assert task.class_labels == ["tested_negative", "tested_positive"] diff --git a/tests/test_tasks/test_regression_task.py b/tests/test_tasks/test_regression_task.py index 3e324c4f8..0cd2d96e2 100644 --- a/tests/test_tasks/test_regression_task.py +++ b/tests/test_tasks/test_regression_task.py @@ -49,6 +49,7 @@ def setUp(self, n_levels: int = 1): self.task_type = TaskType.SUPERVISED_REGRESSION + @pytest.mark.xfail(reason="failures_issue_1544", strict=False) def test_get_X_and_Y(self): X, Y = super().test_get_X_and_Y() assert X.shape == (194, 32) @@ -57,6 +58,7 @@ def test_get_X_and_Y(self): assert isinstance(Y, pd.Series) assert pd.api.types.is_numeric_dtype(Y) + @pytest.mark.xfail(reason="failures_issue_1544", strict=False) def test_download_task(self): task = super().test_download_task() assert task.task_id == self.task_id diff --git a/tests/test_tasks/test_task.py b/tests/test_tasks/test_task.py index e4c9418f2..67f715d2b 100644 --- a/tests/test_tasks/test_task.py +++ b/tests/test_tasks/test_task.py @@ -4,6 +4,8 @@ import unittest from random import randint, shuffle +import pytest + from openml.datasets import ( get_dataset, list_datasets, @@ -33,6 +35,7 @@ def setUp(self, n_levels: int = 1): def test_download_task(self): return get_task(self.task_id) + @pytest.mark.xfail(reason="failures_issue_1544", strict=False) def test_upload_task(self): # We don't know if the task in question already exists, so we try a few times. Checking # beforehand would not be an option because a concurrent unit test could potentially diff --git a/tests/test_tasks/test_task_functions.py b/tests/test_tasks/test_task_functions.py index 0aa2dcc9b..110459711 100644 --- a/tests/test_tasks/test_task_functions.py +++ b/tests/test_tasks/test_task_functions.py @@ -152,6 +152,7 @@ def test_get_task(self): os.path.join(self.workdir, "org", "openml", "test", "datasets", "1", "dataset.arff") ) + @pytest.mark.xfail(reason="failures_issue_1544", strict=False) def test_get_task_lazy(self): task = openml.tasks.get_task(2, download_data=False) # anneal; crossvalidation assert isinstance(task, OpenMLTask)