VectorInstitute · bzamanlooy · Nov 7, 2025 · Nov 4, 2025 · Nov 4, 2025 · Nov 6, 2025
diff --git a/src/midst_toolkit/evaluation/quality/synthcity/statistical_eval.py b/src/midst_toolkit/evaluation/quality/synthcity/statistical_eval.py
@@ -101,12 +101,8 @@ def metrics(
             alphas, alpha_precision_curve, beta_coverage_curve, delta_precision_alpha, delta_coverage_beta,
             authenticity.
         """
-        if len(x) != len(x_syn):
-            raise RuntimeError("The real and synthetic data must have the same length")
-
         if emb_center is None:
             emb_center = np.mean(x, axis=0)
-
         n_steps = 30
         alphas = np.linspace(0, 1, n_steps)
 
@@ -118,8 +114,8 @@ def metrics(
         beta_coverage_curve: list[float] = []
 
         synth_to_center = np.sqrt(np.sum((x_syn - emb_center) ** 2, axis=1))
-
         nbrs_real = NearestNeighbors(n_neighbors=2, n_jobs=-1, p=2).fit(x)
+
         k_neighbors_real = nbrs_real.kneighbors(x)
         assert isinstance(k_neighbors_real, tuple)
         real_to_real, _ = k_neighbors_real
@@ -135,38 +131,39 @@ def metrics(
         real_to_synth_args = real_to_synth_args.squeeze()
 
         real_synth_closest = x_syn[real_to_synth_args]
-
         real_synth_closest_d = np.sqrt(np.sum((real_synth_closest - synth_center) ** 2, axis=1))
         closest_synth_radii = np.quantile(real_synth_closest_d, alphas)
 
         for k in range(len(radii)):
             precision_audit_mask = synth_to_center <= radii[k]
             alpha_precision = np.mean(precision_audit_mask)
-
             beta_coverage = np.mean(
                 ((real_to_synth <= real_to_real) * (real_synth_closest_d <= closest_synth_radii[k]))
             )
-
             alpha_precision_curve.append(alpha_precision)
             beta_coverage_curve.append(beta_coverage)
 
-        # See which one is bigger
-
-        authen = real_to_real[real_to_synth_args] < real_to_synth
-        authenticity = np.mean(authen)
-
         delta_precision_alpha = 1.0 - np.sum(np.abs(np.array(alphas) - np.array(alpha_precision_curve))) / np.sum(
             alphas
         )
-
         if delta_precision_alpha < 0:
             raise RuntimeError("negative value detected for Delta_precision_alpha")
 
         delta_coverage_beta = 1.0 - np.sum(np.abs(np.array(alphas) - np.array(beta_coverage_curve))) / np.sum(alphas)
-
         if delta_coverage_beta < 0:
             raise RuntimeError("negative value detected for Delta_coverage_beta")
 
+        k_neighbors_real_for_synthetic = nbrs_real.kneighbors(x_syn)
+        closest_real_to_synth_distance, closest_real_to_synthetic_idx_list = k_neighbors_real_for_synthetic
+        # Find the closest real point to each synthetic point
+        closest_real_to_synth_distance = closest_real_to_synth_distance[:, 0].squeeze()
+        closest_real_to_synthetic_idx_list = closest_real_to_synthetic_idx_list[:, 0].squeeze()
+
+        closest_real_to_real_distance = real_to_real[closest_real_to_synthetic_idx_list]
+        is_authetic = closest_real_to_real_distance <= closest_real_to_synth_distance
+
+        authenticity = np.mean(is_authetic.astype(int))
+
         return (
             alphas.tolist(),
             alpha_precision_curve,

diff --git a/tests/unit/evaluation/quality/test_alpha_precision.py b/tests/unit/evaluation/quality/test_alpha_precision.py
@@ -19,7 +19,11 @@
 
 
 def test_alpha_precision_evaluation() -> None:
-    set_all_random_seeds(1)
+    # Setting the paramters to True helps get consistent output on the same architecture for the _OC metrics
+    # that use an embedding by training a 1-layer NN. We do not run this on the cluster for the same
+    # reason and just let it run on GitHub since the architecture on the cluster is different from
+    # that of GitHub.
+    set_all_random_seeds(1, use_deterministic_torch_algos=True, disable_torch_benchmarking=True)
 
     real_data, synthetic_data, meta_info = load_midst_data(REAL_DATA_PATH, SYNTHETIC_DATA_PATH, META_INFO_PATH)
 
@@ -49,17 +53,13 @@ def test_alpha_precision_evaluation() -> None:
     if is_apple_silicon():
         assert pytest.approx(0.972538441890166, abs=1e-8) == quality_results["delta_precision_alpha_OC"]
         assert pytest.approx(0.4709851851851852, abs=1e-8) == quality_results["delta_coverage_beta_OC"]
-        assert pytest.approx(0.512, abs=1e-8) == quality_results["authenticity_OC"]
         assert pytest.approx(0.05994074074074074, abs=1e-8) == quality_results["delta_precision_alpha_naive"]
         assert pytest.approx(0.005229629629629584, abs=1e-8) == quality_results["delta_coverage_beta_naive"]
-        assert pytest.approx(0.9905185185185185, abs=1e-8) == quality_results["authenticity_naive"]
     else:
         assert pytest.approx(0.9732668369518944, abs=1e-8) == quality_results["delta_precision_alpha_OC"]
         assert pytest.approx(0.47238271604938276, abs=1e-8) == quality_results["delta_coverage_beta_OC"]
-        assert pytest.approx(0.5102592592592593, abs=1e-8) == quality_results["authenticity_OC"]
         assert pytest.approx(0.05994074074074074, abs=1e-8) == quality_results["delta_precision_alpha_naive"]
         assert pytest.approx(0.005229629629629584, abs=1e-8) == quality_results["delta_coverage_beta_naive"]
-        assert pytest.approx(0.9905185185185185, abs=1e-8) == quality_results["authenticity_naive"]
 
     # Unset seed for safety
     unset_all_random_seeds()
diff --git a/tests/unit/evaluation/quality/test_autheticity.py b/tests/unit/evaluation/quality/test_autheticity.py
@@ -0,0 +1,198 @@
+import numpy as np
+import pandas as pd
+import pytest
+
+from midst_toolkit.evaluation.quality.alpha_precision import AlphaPrecision
+from midst_toolkit.evaluation.utils import one_hot_encode_categoricals_and_merge_with_numerical
+
+
+def test_autheticity_only_categorical() -> None:
+    categorical_real_data = pd.DataFrame(
+        {
+            "color": ["red", "blue"],
+            "shape": ["circle", "square"],
+        }
+    )
+
+    categorical_synthetic_data = pd.DataFrame(
+        {
+            "color": ["red", "blue"],
+            "shape": ["square", "circle"],
+        }
+    )
+
+    categorical_real_encoded = pd.get_dummies(categorical_real_data, columns=["color", "shape"]).astype(int)
+    categorical_synthetic_encoded = pd.get_dummies(categorical_synthetic_data, columns=["color", "shape"]).astype(int)
+    categorical_synthetic_encoded = categorical_synthetic_encoded.reindex(
+        columns=categorical_real_encoded.columns, fill_value=0
+    )
+
+    numerical_real_numpy = np.empty((len(categorical_real_data), 0))
+    numerical_synthetic_numpy = np.empty((len(categorical_synthetic_data), 0))
+
+    real_dataframe, synthetic_dataframe = one_hot_encode_categoricals_and_merge_with_numerical(
+        categorical_real_encoded,
+        categorical_synthetic_encoded,
+        numerical_real_numpy,
+        numerical_synthetic_numpy,
+    )
+
+    alpha_precision_metric = AlphaPrecision(naive_only=False)
+    quality_results = alpha_precision_metric.compute(real_dataframe, synthetic_dataframe)
+
+    # Check naive authenticity as the _OC metric depends on a 1-layer NN training
+    # which may give different results on different architectures
+    expected_authenticity = 0.0
+    assert pytest.approx(expected_authenticity, abs=1e-8) == quality_results["authenticity_naive"]
+
+
+def test_authenticity_only_numerical() -> None:
+    numerical_real_data = pd.DataFrame(
+        {
+            "x": [0.0, 1.0],
+            "y": [0.0, 1.0],
+        }
+    )
+
+    numerical_synthetic_data = pd.DataFrame(
+        {
+            "x": [0.0, 1.0],
+            "y": [1.0, 0.0],
+        }
+    )
+
+    categorical_real_encoded = pd.DataFrame()
+    categorical_synthetic_encoded = pd.DataFrame()
+
+    numerical_real_numpy = numerical_real_data.to_numpy()
+    numerical_synthetic_numpy = numerical_synthetic_data.to_numpy()
+
+    real_dataframe, synthetic_dataframe = one_hot_encode_categoricals_and_merge_with_numerical(
+        categorical_real_encoded,
+        categorical_synthetic_encoded,
+        numerical_real_numpy,
+        numerical_synthetic_numpy,
+    )
+
+    alpha_precision_metric = AlphaPrecision(naive_only=False)
+    quality_results = alpha_precision_metric.compute(real_dataframe, synthetic_dataframe)
+
+    # Check naive authenticity as the _OC metric depends on a 1-layer NN training
+    # which may give different results on different architectures
+    expected_authenticity = 0.0
+    assert pytest.approx(expected_authenticity, abs=1e-8) == quality_results["authenticity_naive"]
+
+
+def test_authenticity_numerical_and_categorical() -> None:
+    numerical_real_data = pd.DataFrame(
+        {
+            "num_feature": [0.0, 1.0],
+        }
+    )
+
+    numerical_synthetic_data = pd.DataFrame(
+        {
+            "num_feature": [0.0, 1.0],
+        }
+    )
+
+    categorical_real_data = pd.DataFrame(
+        {
+            "color": ["red", "blue"],
+            "shape": ["circle", "square"],
+        }
+    )
+
+    categorical_synthetic_data = pd.DataFrame(
+        {
+            "color": ["red", "blue"],
+            "shape": ["square", "circle"],
+        }
+    )
+
+    categorical_real_encoded = pd.get_dummies(categorical_real_data, columns=["color", "shape"]).astype(int)
+    categorical_synthetic_encoded = pd.get_dummies(categorical_synthetic_data, columns=["color", "shape"]).astype(int)
+    categorical_synthetic_encoded = categorical_synthetic_encoded.reindex(
+        columns=categorical_real_encoded.columns, fill_value=0
+    )
+
+    numerical_real_numpy = numerical_real_data.to_numpy()
+    numerical_synthetic_numpy = numerical_synthetic_data.to_numpy()
+
+    real_dataframe, synthetic_dataframe = one_hot_encode_categoricals_and_merge_with_numerical(
+        categorical_real_encoded,
+        categorical_synthetic_encoded,
+        numerical_real_numpy,
+        numerical_synthetic_numpy,
+    )
+
+    alpha_precision_metric = AlphaPrecision(naive_only=False)
+    quality_results = alpha_precision_metric.compute(real_dataframe, synthetic_dataframe)
+
+    # Check naive authenticity as the _OC metric depends on a 1-layer NN training
+    # which may give different results on different architectures
+    expected_authenticity = 0.0
+    assert pytest.approx(expected_authenticity, abs=1e-8) == quality_results["authenticity_naive"]
+
+
+def test_authenticity_mismatched_sizes_numerical_real_larger() -> None:
+    numerical_real_data = pd.DataFrame({"x": [0.0, 1.0, 2.0], "y": [0.0, 1.0, 2.0]})
+    numerical_synthetic_data = pd.DataFrame({"x": [0.0, 10.0], "y": [1.0, 10.0]})
+
+    categorical_real_encoded = pd.DataFrame()
+    categorical_synthetic_encoded = pd.DataFrame()
+
+    numerical_real_numpy = numerical_real_data.to_numpy()
+    numerical_synthetic_numpy = numerical_synthetic_data.to_numpy()
+
+    real_dataframe, synthetic_dataframe = one_hot_encode_categoricals_and_merge_with_numerical(
+        categorical_real_encoded,
+        categorical_synthetic_encoded,
+        numerical_real_numpy,
+        numerical_synthetic_numpy,
+    )
+
+    alpha_precision_metric = AlphaPrecision(naive_only=False)
+    quality_results = alpha_precision_metric.compute(real_dataframe, synthetic_dataframe)
+
+    # Check naive authenticity as the _OC metric depends on a 1-layer NN training
+    # which may give different results on different architectures
+    expected_authenticity = 0.5
+    assert pytest.approx(expected_authenticity, abs=1e-8) == quality_results["authenticity_naive"]
+
+
+def test_authenticity_mismatched_sizes_numerical_synthetic_larger() -> None:
+    numerical_real_data = pd.DataFrame(
+        {
+            "x": [0.0, 2.0],
+            "y": [0.0, 2.0],
+        }
+    )
+
+    numerical_synthetic_data = pd.DataFrame(
+        {
+            "x": [0.0, 1.0, 2.0, 3.0, 10.0],
+            "y": [0.0, 1.0, 2.0, 3.0, 10.0],
+        }
+    )
+
+    categorical_real_encoded = pd.DataFrame()
+    categorical_synthetic_encoded = pd.DataFrame()
+
+    numerical_real_numpy = numerical_real_data.to_numpy()
+    numerical_synthetic_numpy = numerical_synthetic_data.to_numpy()
+
+    real_dataframe, synthetic_dataframe = one_hot_encode_categoricals_and_merge_with_numerical(
+        categorical_real_encoded,
+        categorical_synthetic_encoded,
+        numerical_real_numpy,
+        numerical_synthetic_numpy,
+    )
+
+    alpha_precision_metric = AlphaPrecision(naive_only=False)
+    quality_results = alpha_precision_metric.compute(real_dataframe, synthetic_dataframe)
+
+    # Check naive authenticity as the _OC metric depends on a 1-layer NN training
+    # which may give different results on different architectures
+    expected_authenticity = 0.2
+    assert pytest.approx(expected_authenticity, abs=1e-8) == quality_results["authenticity_naive"]