Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
27 changes: 12 additions & 15 deletions src/midst_toolkit/evaluation/quality/synthcity/statistical_eval.py
Original file line number Diff line number Diff line change
Expand Up @@ -101,12 +101,8 @@ def metrics(
alphas, alpha_precision_curve, beta_coverage_curve, delta_precision_alpha, delta_coverage_beta,
authenticity.
"""
if len(x) != len(x_syn):
raise RuntimeError("The real and synthetic data must have the same length")

if emb_center is None:
emb_center = np.mean(x, axis=0)

n_steps = 30
alphas = np.linspace(0, 1, n_steps)

Expand All @@ -118,8 +114,8 @@ def metrics(
beta_coverage_curve: list[float] = []

synth_to_center = np.sqrt(np.sum((x_syn - emb_center) ** 2, axis=1))

nbrs_real = NearestNeighbors(n_neighbors=2, n_jobs=-1, p=2).fit(x)

k_neighbors_real = nbrs_real.kneighbors(x)
assert isinstance(k_neighbors_real, tuple)
real_to_real, _ = k_neighbors_real
Expand All @@ -135,38 +131,39 @@ def metrics(
real_to_synth_args = real_to_synth_args.squeeze()

real_synth_closest = x_syn[real_to_synth_args]

real_synth_closest_d = np.sqrt(np.sum((real_synth_closest - synth_center) ** 2, axis=1))
closest_synth_radii = np.quantile(real_synth_closest_d, alphas)

for k in range(len(radii)):
precision_audit_mask = synth_to_center <= radii[k]
alpha_precision = np.mean(precision_audit_mask)

beta_coverage = np.mean(
((real_to_synth <= real_to_real) * (real_synth_closest_d <= closest_synth_radii[k]))
)

alpha_precision_curve.append(alpha_precision)
beta_coverage_curve.append(beta_coverage)

# See which one is bigger

authen = real_to_real[real_to_synth_args] < real_to_synth
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This error brought to you by someone who couldn't be bothered to actually fully understand what the nearest neighbor function does...yikes.

authenticity = np.mean(authen)

delta_precision_alpha = 1.0 - np.sum(np.abs(np.array(alphas) - np.array(alpha_precision_curve))) / np.sum(
alphas
)

if delta_precision_alpha < 0:
raise RuntimeError("negative value detected for Delta_precision_alpha")

delta_coverage_beta = 1.0 - np.sum(np.abs(np.array(alphas) - np.array(beta_coverage_curve))) / np.sum(alphas)

if delta_coverage_beta < 0:
raise RuntimeError("negative value detected for Delta_coverage_beta")

k_neighbors_real_for_synthetic = nbrs_real.kneighbors(x_syn)
closest_real_to_synth_distance, closest_real_to_synthetic_idx_list = k_neighbors_real_for_synthetic
# Find the closest real point to each synthetic point
closest_real_to_synth_distance = closest_real_to_synth_distance[:, 0].squeeze()
closest_real_to_synthetic_idx_list = closest_real_to_synthetic_idx_list[:, 0].squeeze()

closest_real_to_real_distance = real_to_real[closest_real_to_synthetic_idx_list]
is_authetic = closest_real_to_real_distance <= closest_real_to_synth_distance

authenticity = np.mean(is_authetic.astype(int))

return (
alphas.tolist(),
alpha_precision_curve,
Expand Down
10 changes: 5 additions & 5 deletions tests/unit/evaluation/quality/test_alpha_precision.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,11 @@


def test_alpha_precision_evaluation() -> None:
set_all_random_seeds(1)
# Setting the paramters to True helps get consistent output on the same architecture for the _OC metrics
# that use an embedding by training a 1-layer NN. We do not run this on the cluster for the same
# reason and just let it run on GitHub since the architecture on the cluster is different from
# that of GitHub.
set_all_random_seeds(1, use_deterministic_torch_algos=True, disable_torch_benchmarking=True)

real_data, synthetic_data, meta_info = load_midst_data(REAL_DATA_PATH, SYNTHETIC_DATA_PATH, META_INFO_PATH)

Expand Down Expand Up @@ -49,17 +53,13 @@ def test_alpha_precision_evaluation() -> None:
if is_apple_silicon():
assert pytest.approx(0.972538441890166, abs=1e-8) == quality_results["delta_precision_alpha_OC"]
assert pytest.approx(0.4709851851851852, abs=1e-8) == quality_results["delta_coverage_beta_OC"]
assert pytest.approx(0.512, abs=1e-8) == quality_results["authenticity_OC"]
assert pytest.approx(0.05994074074074074, abs=1e-8) == quality_results["delta_precision_alpha_naive"]
assert pytest.approx(0.005229629629629584, abs=1e-8) == quality_results["delta_coverage_beta_naive"]
assert pytest.approx(0.9905185185185185, abs=1e-8) == quality_results["authenticity_naive"]
else:
assert pytest.approx(0.9732668369518944, abs=1e-8) == quality_results["delta_precision_alpha_OC"]
assert pytest.approx(0.47238271604938276, abs=1e-8) == quality_results["delta_coverage_beta_OC"]
assert pytest.approx(0.5102592592592593, abs=1e-8) == quality_results["authenticity_OC"]
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is probably a naive question, but does your change affect the authenticity_OC metric as well? If so, then removing this makes sense. Just want to check.

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes exactly. The process for calculating authenticity is shared between them, only that in OC the data is embedded using the one layer NN.

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Got it!

assert pytest.approx(0.05994074074074074, abs=1e-8) == quality_results["delta_precision_alpha_naive"]
assert pytest.approx(0.005229629629629584, abs=1e-8) == quality_results["delta_coverage_beta_naive"]
assert pytest.approx(0.9905185185185185, abs=1e-8) == quality_results["authenticity_naive"]

# Unset seed for safety
unset_all_random_seeds()
198 changes: 198 additions & 0 deletions tests/unit/evaluation/quality/test_autheticity.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,198 @@
import numpy as np
import pandas as pd
import pytest

from midst_toolkit.evaluation.quality.alpha_precision import AlphaPrecision
from midst_toolkit.evaluation.utils import one_hot_encode_categoricals_and_merge_with_numerical


def test_autheticity_only_categorical() -> None:
categorical_real_data = pd.DataFrame(
{
"color": ["red", "blue"],
"shape": ["circle", "square"],
}
)

categorical_synthetic_data = pd.DataFrame(
{
"color": ["red", "blue"],
"shape": ["square", "circle"],
}
)

categorical_real_encoded = pd.get_dummies(categorical_real_data, columns=["color", "shape"]).astype(int)
categorical_synthetic_encoded = pd.get_dummies(categorical_synthetic_data, columns=["color", "shape"]).astype(int)
categorical_synthetic_encoded = categorical_synthetic_encoded.reindex(
columns=categorical_real_encoded.columns, fill_value=0
)

numerical_real_numpy = np.empty((len(categorical_real_data), 0))
numerical_synthetic_numpy = np.empty((len(categorical_synthetic_data), 0))

real_dataframe, synthetic_dataframe = one_hot_encode_categoricals_and_merge_with_numerical(
categorical_real_encoded,
categorical_synthetic_encoded,
numerical_real_numpy,
numerical_synthetic_numpy,
)

alpha_precision_metric = AlphaPrecision(naive_only=False)
quality_results = alpha_precision_metric.compute(real_dataframe, synthetic_dataframe)

# Check naive authenticity as the _OC metric depends on a 1-layer NN training
# which may give different results on different architectures
expected_authenticity = 0.0
assert pytest.approx(expected_authenticity, abs=1e-8) == quality_results["authenticity_naive"]


def test_authenticity_only_numerical() -> None:
numerical_real_data = pd.DataFrame(
{
"x": [0.0, 1.0],
"y": [0.0, 1.0],
}
)

numerical_synthetic_data = pd.DataFrame(
{
"x": [0.0, 1.0],
"y": [1.0, 0.0],
}
)

categorical_real_encoded = pd.DataFrame()
categorical_synthetic_encoded = pd.DataFrame()

numerical_real_numpy = numerical_real_data.to_numpy()
numerical_synthetic_numpy = numerical_synthetic_data.to_numpy()

real_dataframe, synthetic_dataframe = one_hot_encode_categoricals_and_merge_with_numerical(
categorical_real_encoded,
categorical_synthetic_encoded,
numerical_real_numpy,
numerical_synthetic_numpy,
)

alpha_precision_metric = AlphaPrecision(naive_only=False)
quality_results = alpha_precision_metric.compute(real_dataframe, synthetic_dataframe)

# Check naive authenticity as the _OC metric depends on a 1-layer NN training
# which may give different results on different architectures
expected_authenticity = 0.0
assert pytest.approx(expected_authenticity, abs=1e-8) == quality_results["authenticity_naive"]


def test_authenticity_numerical_and_categorical() -> None:
numerical_real_data = pd.DataFrame(
{
"num_feature": [0.0, 1.0],
}
)

numerical_synthetic_data = pd.DataFrame(
{
"num_feature": [0.0, 1.0],
}
)

categorical_real_data = pd.DataFrame(
{
"color": ["red", "blue"],
"shape": ["circle", "square"],
}
)

categorical_synthetic_data = pd.DataFrame(
{
"color": ["red", "blue"],
"shape": ["square", "circle"],
}
)

categorical_real_encoded = pd.get_dummies(categorical_real_data, columns=["color", "shape"]).astype(int)
categorical_synthetic_encoded = pd.get_dummies(categorical_synthetic_data, columns=["color", "shape"]).astype(int)
categorical_synthetic_encoded = categorical_synthetic_encoded.reindex(
columns=categorical_real_encoded.columns, fill_value=0
)

numerical_real_numpy = numerical_real_data.to_numpy()
numerical_synthetic_numpy = numerical_synthetic_data.to_numpy()

real_dataframe, synthetic_dataframe = one_hot_encode_categoricals_and_merge_with_numerical(
categorical_real_encoded,
categorical_synthetic_encoded,
numerical_real_numpy,
numerical_synthetic_numpy,
)

alpha_precision_metric = AlphaPrecision(naive_only=False)
quality_results = alpha_precision_metric.compute(real_dataframe, synthetic_dataframe)

# Check naive authenticity as the _OC metric depends on a 1-layer NN training
# which may give different results on different architectures
expected_authenticity = 0.0
assert pytest.approx(expected_authenticity, abs=1e-8) == quality_results["authenticity_naive"]


def test_authenticity_mismatched_sizes_numerical_real_larger() -> None:
numerical_real_data = pd.DataFrame({"x": [0.0, 1.0, 2.0], "y": [0.0, 1.0, 2.0]})
numerical_synthetic_data = pd.DataFrame({"x": [0.0, 10.0], "y": [1.0, 10.0]})

categorical_real_encoded = pd.DataFrame()
categorical_synthetic_encoded = pd.DataFrame()

numerical_real_numpy = numerical_real_data.to_numpy()
numerical_synthetic_numpy = numerical_synthetic_data.to_numpy()

real_dataframe, synthetic_dataframe = one_hot_encode_categoricals_and_merge_with_numerical(
categorical_real_encoded,
categorical_synthetic_encoded,
numerical_real_numpy,
numerical_synthetic_numpy,
)

alpha_precision_metric = AlphaPrecision(naive_only=False)
quality_results = alpha_precision_metric.compute(real_dataframe, synthetic_dataframe)

# Check naive authenticity as the _OC metric depends on a 1-layer NN training
# which may give different results on different architectures
expected_authenticity = 0.5
assert pytest.approx(expected_authenticity, abs=1e-8) == quality_results["authenticity_naive"]


def test_authenticity_mismatched_sizes_numerical_synthetic_larger() -> None:
numerical_real_data = pd.DataFrame(
{
"x": [0.0, 2.0],
"y": [0.0, 2.0],
}
)

numerical_synthetic_data = pd.DataFrame(
{
"x": [0.0, 1.0, 2.0, 3.0, 10.0],
"y": [0.0, 1.0, 2.0, 3.0, 10.0],
}
)

categorical_real_encoded = pd.DataFrame()
categorical_synthetic_encoded = pd.DataFrame()

numerical_real_numpy = numerical_real_data.to_numpy()
numerical_synthetic_numpy = numerical_synthetic_data.to_numpy()

real_dataframe, synthetic_dataframe = one_hot_encode_categoricals_and_merge_with_numerical(
categorical_real_encoded,
categorical_synthetic_encoded,
numerical_real_numpy,
numerical_synthetic_numpy,
)

alpha_precision_metric = AlphaPrecision(naive_only=False)
quality_results = alpha_precision_metric.compute(real_dataframe, synthetic_dataframe)

# Check naive authenticity as the _OC metric depends on a 1-layer NN training
# which may give different results on different architectures
expected_authenticity = 0.2
assert pytest.approx(expected_authenticity, abs=1e-8) == quality_results["authenticity_naive"]