diff --git a/src/midst_toolkit/evaluation/quality/synthcity/statistical_eval.py b/src/midst_toolkit/evaluation/quality/synthcity/statistical_eval.py index 366e628a..79ee68d6 100644 --- a/src/midst_toolkit/evaluation/quality/synthcity/statistical_eval.py +++ b/src/midst_toolkit/evaluation/quality/synthcity/statistical_eval.py @@ -101,12 +101,8 @@ def metrics( alphas, alpha_precision_curve, beta_coverage_curve, delta_precision_alpha, delta_coverage_beta, authenticity. """ - if len(x) != len(x_syn): - raise RuntimeError("The real and synthetic data must have the same length") - if emb_center is None: emb_center = np.mean(x, axis=0) - n_steps = 30 alphas = np.linspace(0, 1, n_steps) @@ -118,8 +114,8 @@ def metrics( beta_coverage_curve: list[float] = [] synth_to_center = np.sqrt(np.sum((x_syn - emb_center) ** 2, axis=1)) - nbrs_real = NearestNeighbors(n_neighbors=2, n_jobs=-1, p=2).fit(x) + k_neighbors_real = nbrs_real.kneighbors(x) assert isinstance(k_neighbors_real, tuple) real_to_real, _ = k_neighbors_real @@ -135,38 +131,39 @@ def metrics( real_to_synth_args = real_to_synth_args.squeeze() real_synth_closest = x_syn[real_to_synth_args] - real_synth_closest_d = np.sqrt(np.sum((real_synth_closest - synth_center) ** 2, axis=1)) closest_synth_radii = np.quantile(real_synth_closest_d, alphas) for k in range(len(radii)): precision_audit_mask = synth_to_center <= radii[k] alpha_precision = np.mean(precision_audit_mask) - beta_coverage = np.mean( ((real_to_synth <= real_to_real) * (real_synth_closest_d <= closest_synth_radii[k])) ) - alpha_precision_curve.append(alpha_precision) beta_coverage_curve.append(beta_coverage) - # See which one is bigger - - authen = real_to_real[real_to_synth_args] < real_to_synth - authenticity = np.mean(authen) - delta_precision_alpha = 1.0 - np.sum(np.abs(np.array(alphas) - np.array(alpha_precision_curve))) / np.sum( alphas ) - if delta_precision_alpha < 0: raise RuntimeError("negative value detected for Delta_precision_alpha") delta_coverage_beta = 1.0 - np.sum(np.abs(np.array(alphas) - np.array(beta_coverage_curve))) / np.sum(alphas) - if delta_coverage_beta < 0: raise RuntimeError("negative value detected for Delta_coverage_beta") + k_neighbors_real_for_synthetic = nbrs_real.kneighbors(x_syn) + closest_real_to_synth_distance, closest_real_to_synthetic_idx_list = k_neighbors_real_for_synthetic + # Find the closest real point to each synthetic point + closest_real_to_synth_distance = closest_real_to_synth_distance[:, 0].squeeze() + closest_real_to_synthetic_idx_list = closest_real_to_synthetic_idx_list[:, 0].squeeze() + + closest_real_to_real_distance = real_to_real[closest_real_to_synthetic_idx_list] + is_authetic = closest_real_to_real_distance <= closest_real_to_synth_distance + + authenticity = np.mean(is_authetic.astype(int)) + return ( alphas.tolist(), alpha_precision_curve, diff --git a/tests/unit/evaluation/quality/test_alpha_precision.py b/tests/unit/evaluation/quality/test_alpha_precision.py index d1464c04..56078ee0 100644 --- a/tests/unit/evaluation/quality/test_alpha_precision.py +++ b/tests/unit/evaluation/quality/test_alpha_precision.py @@ -19,7 +19,11 @@ def test_alpha_precision_evaluation() -> None: - set_all_random_seeds(1) + # Setting the paramters to True helps get consistent output on the same architecture for the _OC metrics + # that use an embedding by training a 1-layer NN. We do not run this on the cluster for the same + # reason and just let it run on GitHub since the architecture on the cluster is different from + # that of GitHub. + set_all_random_seeds(1, use_deterministic_torch_algos=True, disable_torch_benchmarking=True) real_data, synthetic_data, meta_info = load_midst_data(REAL_DATA_PATH, SYNTHETIC_DATA_PATH, META_INFO_PATH) @@ -49,17 +53,13 @@ def test_alpha_precision_evaluation() -> None: if is_apple_silicon(): assert pytest.approx(0.972538441890166, abs=1e-8) == quality_results["delta_precision_alpha_OC"] assert pytest.approx(0.4709851851851852, abs=1e-8) == quality_results["delta_coverage_beta_OC"] - assert pytest.approx(0.512, abs=1e-8) == quality_results["authenticity_OC"] assert pytest.approx(0.05994074074074074, abs=1e-8) == quality_results["delta_precision_alpha_naive"] assert pytest.approx(0.005229629629629584, abs=1e-8) == quality_results["delta_coverage_beta_naive"] - assert pytest.approx(0.9905185185185185, abs=1e-8) == quality_results["authenticity_naive"] else: assert pytest.approx(0.9732668369518944, abs=1e-8) == quality_results["delta_precision_alpha_OC"] assert pytest.approx(0.47238271604938276, abs=1e-8) == quality_results["delta_coverage_beta_OC"] - assert pytest.approx(0.5102592592592593, abs=1e-8) == quality_results["authenticity_OC"] assert pytest.approx(0.05994074074074074, abs=1e-8) == quality_results["delta_precision_alpha_naive"] assert pytest.approx(0.005229629629629584, abs=1e-8) == quality_results["delta_coverage_beta_naive"] - assert pytest.approx(0.9905185185185185, abs=1e-8) == quality_results["authenticity_naive"] # Unset seed for safety unset_all_random_seeds() diff --git a/tests/unit/evaluation/quality/test_autheticity.py b/tests/unit/evaluation/quality/test_autheticity.py new file mode 100644 index 00000000..ddd6622a --- /dev/null +++ b/tests/unit/evaluation/quality/test_autheticity.py @@ -0,0 +1,198 @@ +import numpy as np +import pandas as pd +import pytest + +from midst_toolkit.evaluation.quality.alpha_precision import AlphaPrecision +from midst_toolkit.evaluation.utils import one_hot_encode_categoricals_and_merge_with_numerical + + +def test_autheticity_only_categorical() -> None: + categorical_real_data = pd.DataFrame( + { + "color": ["red", "blue"], + "shape": ["circle", "square"], + } + ) + + categorical_synthetic_data = pd.DataFrame( + { + "color": ["red", "blue"], + "shape": ["square", "circle"], + } + ) + + categorical_real_encoded = pd.get_dummies(categorical_real_data, columns=["color", "shape"]).astype(int) + categorical_synthetic_encoded = pd.get_dummies(categorical_synthetic_data, columns=["color", "shape"]).astype(int) + categorical_synthetic_encoded = categorical_synthetic_encoded.reindex( + columns=categorical_real_encoded.columns, fill_value=0 + ) + + numerical_real_numpy = np.empty((len(categorical_real_data), 0)) + numerical_synthetic_numpy = np.empty((len(categorical_synthetic_data), 0)) + + real_dataframe, synthetic_dataframe = one_hot_encode_categoricals_and_merge_with_numerical( + categorical_real_encoded, + categorical_synthetic_encoded, + numerical_real_numpy, + numerical_synthetic_numpy, + ) + + alpha_precision_metric = AlphaPrecision(naive_only=False) + quality_results = alpha_precision_metric.compute(real_dataframe, synthetic_dataframe) + + # Check naive authenticity as the _OC metric depends on a 1-layer NN training + # which may give different results on different architectures + expected_authenticity = 0.0 + assert pytest.approx(expected_authenticity, abs=1e-8) == quality_results["authenticity_naive"] + + +def test_authenticity_only_numerical() -> None: + numerical_real_data = pd.DataFrame( + { + "x": [0.0, 1.0], + "y": [0.0, 1.0], + } + ) + + numerical_synthetic_data = pd.DataFrame( + { + "x": [0.0, 1.0], + "y": [1.0, 0.0], + } + ) + + categorical_real_encoded = pd.DataFrame() + categorical_synthetic_encoded = pd.DataFrame() + + numerical_real_numpy = numerical_real_data.to_numpy() + numerical_synthetic_numpy = numerical_synthetic_data.to_numpy() + + real_dataframe, synthetic_dataframe = one_hot_encode_categoricals_and_merge_with_numerical( + categorical_real_encoded, + categorical_synthetic_encoded, + numerical_real_numpy, + numerical_synthetic_numpy, + ) + + alpha_precision_metric = AlphaPrecision(naive_only=False) + quality_results = alpha_precision_metric.compute(real_dataframe, synthetic_dataframe) + + # Check naive authenticity as the _OC metric depends on a 1-layer NN training + # which may give different results on different architectures + expected_authenticity = 0.0 + assert pytest.approx(expected_authenticity, abs=1e-8) == quality_results["authenticity_naive"] + + +def test_authenticity_numerical_and_categorical() -> None: + numerical_real_data = pd.DataFrame( + { + "num_feature": [0.0, 1.0], + } + ) + + numerical_synthetic_data = pd.DataFrame( + { + "num_feature": [0.0, 1.0], + } + ) + + categorical_real_data = pd.DataFrame( + { + "color": ["red", "blue"], + "shape": ["circle", "square"], + } + ) + + categorical_synthetic_data = pd.DataFrame( + { + "color": ["red", "blue"], + "shape": ["square", "circle"], + } + ) + + categorical_real_encoded = pd.get_dummies(categorical_real_data, columns=["color", "shape"]).astype(int) + categorical_synthetic_encoded = pd.get_dummies(categorical_synthetic_data, columns=["color", "shape"]).astype(int) + categorical_synthetic_encoded = categorical_synthetic_encoded.reindex( + columns=categorical_real_encoded.columns, fill_value=0 + ) + + numerical_real_numpy = numerical_real_data.to_numpy() + numerical_synthetic_numpy = numerical_synthetic_data.to_numpy() + + real_dataframe, synthetic_dataframe = one_hot_encode_categoricals_and_merge_with_numerical( + categorical_real_encoded, + categorical_synthetic_encoded, + numerical_real_numpy, + numerical_synthetic_numpy, + ) + + alpha_precision_metric = AlphaPrecision(naive_only=False) + quality_results = alpha_precision_metric.compute(real_dataframe, synthetic_dataframe) + + # Check naive authenticity as the _OC metric depends on a 1-layer NN training + # which may give different results on different architectures + expected_authenticity = 0.0 + assert pytest.approx(expected_authenticity, abs=1e-8) == quality_results["authenticity_naive"] + + +def test_authenticity_mismatched_sizes_numerical_real_larger() -> None: + numerical_real_data = pd.DataFrame({"x": [0.0, 1.0, 2.0], "y": [0.0, 1.0, 2.0]}) + numerical_synthetic_data = pd.DataFrame({"x": [0.0, 10.0], "y": [1.0, 10.0]}) + + categorical_real_encoded = pd.DataFrame() + categorical_synthetic_encoded = pd.DataFrame() + + numerical_real_numpy = numerical_real_data.to_numpy() + numerical_synthetic_numpy = numerical_synthetic_data.to_numpy() + + real_dataframe, synthetic_dataframe = one_hot_encode_categoricals_and_merge_with_numerical( + categorical_real_encoded, + categorical_synthetic_encoded, + numerical_real_numpy, + numerical_synthetic_numpy, + ) + + alpha_precision_metric = AlphaPrecision(naive_only=False) + quality_results = alpha_precision_metric.compute(real_dataframe, synthetic_dataframe) + + # Check naive authenticity as the _OC metric depends on a 1-layer NN training + # which may give different results on different architectures + expected_authenticity = 0.5 + assert pytest.approx(expected_authenticity, abs=1e-8) == quality_results["authenticity_naive"] + + +def test_authenticity_mismatched_sizes_numerical_synthetic_larger() -> None: + numerical_real_data = pd.DataFrame( + { + "x": [0.0, 2.0], + "y": [0.0, 2.0], + } + ) + + numerical_synthetic_data = pd.DataFrame( + { + "x": [0.0, 1.0, 2.0, 3.0, 10.0], + "y": [0.0, 1.0, 2.0, 3.0, 10.0], + } + ) + + categorical_real_encoded = pd.DataFrame() + categorical_synthetic_encoded = pd.DataFrame() + + numerical_real_numpy = numerical_real_data.to_numpy() + numerical_synthetic_numpy = numerical_synthetic_data.to_numpy() + + real_dataframe, synthetic_dataframe = one_hot_encode_categoricals_and_merge_with_numerical( + categorical_real_encoded, + categorical_synthetic_encoded, + numerical_real_numpy, + numerical_synthetic_numpy, + ) + + alpha_precision_metric = AlphaPrecision(naive_only=False) + quality_results = alpha_precision_metric.compute(real_dataframe, synthetic_dataframe) + + # Check naive authenticity as the _OC metric depends on a 1-layer NN training + # which may give different results on different architectures + expected_authenticity = 0.2 + assert pytest.approx(expected_authenticity, abs=1e-8) == quality_results["authenticity_naive"]