-
Notifications
You must be signed in to change notification settings - Fork 1
fix(metrics-autheticity): correct authenticity logic to match paper definition. #86
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
b1d942f
e1258f3
e9581eb
65c57af
71d4e42
98bad54
ca6648e
48ca75c
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -19,7 +19,11 @@ | |
|
|
||
|
|
||
| def test_alpha_precision_evaluation() -> None: | ||
| set_all_random_seeds(1) | ||
| # Setting the paramters to True helps get consistent output on the same architecture for the _OC metrics | ||
| # that use an embedding by training a 1-layer NN. We do not run this on the cluster for the same | ||
| # reason and just let it run on GitHub since the architecture on the cluster is different from | ||
| # that of GitHub. | ||
| set_all_random_seeds(1, use_deterministic_torch_algos=True, disable_torch_benchmarking=True) | ||
|
|
||
| real_data, synthetic_data, meta_info = load_midst_data(REAL_DATA_PATH, SYNTHETIC_DATA_PATH, META_INFO_PATH) | ||
|
|
||
|
|
@@ -49,17 +53,13 @@ def test_alpha_precision_evaluation() -> None: | |
| if is_apple_silicon(): | ||
| assert pytest.approx(0.972538441890166, abs=1e-8) == quality_results["delta_precision_alpha_OC"] | ||
| assert pytest.approx(0.4709851851851852, abs=1e-8) == quality_results["delta_coverage_beta_OC"] | ||
| assert pytest.approx(0.512, abs=1e-8) == quality_results["authenticity_OC"] | ||
| assert pytest.approx(0.05994074074074074, abs=1e-8) == quality_results["delta_precision_alpha_naive"] | ||
| assert pytest.approx(0.005229629629629584, abs=1e-8) == quality_results["delta_coverage_beta_naive"] | ||
| assert pytest.approx(0.9905185185185185, abs=1e-8) == quality_results["authenticity_naive"] | ||
| else: | ||
| assert pytest.approx(0.9732668369518944, abs=1e-8) == quality_results["delta_precision_alpha_OC"] | ||
| assert pytest.approx(0.47238271604938276, abs=1e-8) == quality_results["delta_coverage_beta_OC"] | ||
| assert pytest.approx(0.5102592592592593, abs=1e-8) == quality_results["authenticity_OC"] | ||
|
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This is probably a naive question, but does your change affect the
Collaborator
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Yes exactly. The process for calculating authenticity is shared between them, only that in OC the data is embedded using the one layer NN.
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Got it! |
||
| assert pytest.approx(0.05994074074074074, abs=1e-8) == quality_results["delta_precision_alpha_naive"] | ||
| assert pytest.approx(0.005229629629629584, abs=1e-8) == quality_results["delta_coverage_beta_naive"] | ||
| assert pytest.approx(0.9905185185185185, abs=1e-8) == quality_results["authenticity_naive"] | ||
|
|
||
| # Unset seed for safety | ||
| unset_all_random_seeds() | ||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,198 @@ | ||
| import numpy as np | ||
| import pandas as pd | ||
| import pytest | ||
|
|
||
| from midst_toolkit.evaluation.quality.alpha_precision import AlphaPrecision | ||
| from midst_toolkit.evaluation.utils import one_hot_encode_categoricals_and_merge_with_numerical | ||
|
|
||
|
|
||
| def test_autheticity_only_categorical() -> None: | ||
| categorical_real_data = pd.DataFrame( | ||
| { | ||
| "color": ["red", "blue"], | ||
| "shape": ["circle", "square"], | ||
| } | ||
| ) | ||
|
|
||
| categorical_synthetic_data = pd.DataFrame( | ||
| { | ||
| "color": ["red", "blue"], | ||
| "shape": ["square", "circle"], | ||
| } | ||
| ) | ||
|
|
||
| categorical_real_encoded = pd.get_dummies(categorical_real_data, columns=["color", "shape"]).astype(int) | ||
| categorical_synthetic_encoded = pd.get_dummies(categorical_synthetic_data, columns=["color", "shape"]).astype(int) | ||
| categorical_synthetic_encoded = categorical_synthetic_encoded.reindex( | ||
| columns=categorical_real_encoded.columns, fill_value=0 | ||
| ) | ||
|
|
||
| numerical_real_numpy = np.empty((len(categorical_real_data), 0)) | ||
| numerical_synthetic_numpy = np.empty((len(categorical_synthetic_data), 0)) | ||
|
|
||
| real_dataframe, synthetic_dataframe = one_hot_encode_categoricals_and_merge_with_numerical( | ||
| categorical_real_encoded, | ||
| categorical_synthetic_encoded, | ||
| numerical_real_numpy, | ||
| numerical_synthetic_numpy, | ||
| ) | ||
|
|
||
| alpha_precision_metric = AlphaPrecision(naive_only=False) | ||
| quality_results = alpha_precision_metric.compute(real_dataframe, synthetic_dataframe) | ||
|
|
||
| # Check naive authenticity as the _OC metric depends on a 1-layer NN training | ||
| # which may give different results on different architectures | ||
| expected_authenticity = 0.0 | ||
| assert pytest.approx(expected_authenticity, abs=1e-8) == quality_results["authenticity_naive"] | ||
|
|
||
|
|
||
| def test_authenticity_only_numerical() -> None: | ||
| numerical_real_data = pd.DataFrame( | ||
| { | ||
| "x": [0.0, 1.0], | ||
| "y": [0.0, 1.0], | ||
| } | ||
| ) | ||
|
|
||
| numerical_synthetic_data = pd.DataFrame( | ||
| { | ||
| "x": [0.0, 1.0], | ||
| "y": [1.0, 0.0], | ||
| } | ||
| ) | ||
|
|
||
| categorical_real_encoded = pd.DataFrame() | ||
| categorical_synthetic_encoded = pd.DataFrame() | ||
|
|
||
| numerical_real_numpy = numerical_real_data.to_numpy() | ||
| numerical_synthetic_numpy = numerical_synthetic_data.to_numpy() | ||
|
|
||
| real_dataframe, synthetic_dataframe = one_hot_encode_categoricals_and_merge_with_numerical( | ||
| categorical_real_encoded, | ||
| categorical_synthetic_encoded, | ||
| numerical_real_numpy, | ||
| numerical_synthetic_numpy, | ||
| ) | ||
|
|
||
| alpha_precision_metric = AlphaPrecision(naive_only=False) | ||
| quality_results = alpha_precision_metric.compute(real_dataframe, synthetic_dataframe) | ||
|
|
||
| # Check naive authenticity as the _OC metric depends on a 1-layer NN training | ||
| # which may give different results on different architectures | ||
| expected_authenticity = 0.0 | ||
| assert pytest.approx(expected_authenticity, abs=1e-8) == quality_results["authenticity_naive"] | ||
|
|
||
|
|
||
| def test_authenticity_numerical_and_categorical() -> None: | ||
| numerical_real_data = pd.DataFrame( | ||
| { | ||
| "num_feature": [0.0, 1.0], | ||
| } | ||
| ) | ||
|
|
||
| numerical_synthetic_data = pd.DataFrame( | ||
| { | ||
| "num_feature": [0.0, 1.0], | ||
| } | ||
| ) | ||
|
|
||
| categorical_real_data = pd.DataFrame( | ||
| { | ||
| "color": ["red", "blue"], | ||
| "shape": ["circle", "square"], | ||
| } | ||
| ) | ||
|
|
||
| categorical_synthetic_data = pd.DataFrame( | ||
| { | ||
| "color": ["red", "blue"], | ||
| "shape": ["square", "circle"], | ||
| } | ||
| ) | ||
|
|
||
| categorical_real_encoded = pd.get_dummies(categorical_real_data, columns=["color", "shape"]).astype(int) | ||
| categorical_synthetic_encoded = pd.get_dummies(categorical_synthetic_data, columns=["color", "shape"]).astype(int) | ||
| categorical_synthetic_encoded = categorical_synthetic_encoded.reindex( | ||
| columns=categorical_real_encoded.columns, fill_value=0 | ||
| ) | ||
|
|
||
| numerical_real_numpy = numerical_real_data.to_numpy() | ||
| numerical_synthetic_numpy = numerical_synthetic_data.to_numpy() | ||
|
|
||
| real_dataframe, synthetic_dataframe = one_hot_encode_categoricals_and_merge_with_numerical( | ||
| categorical_real_encoded, | ||
| categorical_synthetic_encoded, | ||
| numerical_real_numpy, | ||
| numerical_synthetic_numpy, | ||
| ) | ||
|
|
||
| alpha_precision_metric = AlphaPrecision(naive_only=False) | ||
| quality_results = alpha_precision_metric.compute(real_dataframe, synthetic_dataframe) | ||
|
|
||
| # Check naive authenticity as the _OC metric depends on a 1-layer NN training | ||
| # which may give different results on different architectures | ||
| expected_authenticity = 0.0 | ||
| assert pytest.approx(expected_authenticity, abs=1e-8) == quality_results["authenticity_naive"] | ||
|
|
||
|
|
||
| def test_authenticity_mismatched_sizes_numerical_real_larger() -> None: | ||
| numerical_real_data = pd.DataFrame({"x": [0.0, 1.0, 2.0], "y": [0.0, 1.0, 2.0]}) | ||
| numerical_synthetic_data = pd.DataFrame({"x": [0.0, 10.0], "y": [1.0, 10.0]}) | ||
|
|
||
| categorical_real_encoded = pd.DataFrame() | ||
| categorical_synthetic_encoded = pd.DataFrame() | ||
|
|
||
| numerical_real_numpy = numerical_real_data.to_numpy() | ||
| numerical_synthetic_numpy = numerical_synthetic_data.to_numpy() | ||
|
|
||
| real_dataframe, synthetic_dataframe = one_hot_encode_categoricals_and_merge_with_numerical( | ||
| categorical_real_encoded, | ||
| categorical_synthetic_encoded, | ||
| numerical_real_numpy, | ||
| numerical_synthetic_numpy, | ||
| ) | ||
|
|
||
| alpha_precision_metric = AlphaPrecision(naive_only=False) | ||
| quality_results = alpha_precision_metric.compute(real_dataframe, synthetic_dataframe) | ||
|
|
||
| # Check naive authenticity as the _OC metric depends on a 1-layer NN training | ||
| # which may give different results on different architectures | ||
| expected_authenticity = 0.5 | ||
| assert pytest.approx(expected_authenticity, abs=1e-8) == quality_results["authenticity_naive"] | ||
|
|
||
|
|
||
| def test_authenticity_mismatched_sizes_numerical_synthetic_larger() -> None: | ||
| numerical_real_data = pd.DataFrame( | ||
| { | ||
| "x": [0.0, 2.0], | ||
| "y": [0.0, 2.0], | ||
| } | ||
| ) | ||
|
|
||
| numerical_synthetic_data = pd.DataFrame( | ||
| { | ||
| "x": [0.0, 1.0, 2.0, 3.0, 10.0], | ||
| "y": [0.0, 1.0, 2.0, 3.0, 10.0], | ||
| } | ||
| ) | ||
|
|
||
| categorical_real_encoded = pd.DataFrame() | ||
| categorical_synthetic_encoded = pd.DataFrame() | ||
|
|
||
| numerical_real_numpy = numerical_real_data.to_numpy() | ||
| numerical_synthetic_numpy = numerical_synthetic_data.to_numpy() | ||
|
|
||
| real_dataframe, synthetic_dataframe = one_hot_encode_categoricals_and_merge_with_numerical( | ||
| categorical_real_encoded, | ||
| categorical_synthetic_encoded, | ||
| numerical_real_numpy, | ||
| numerical_synthetic_numpy, | ||
| ) | ||
|
|
||
| alpha_precision_metric = AlphaPrecision(naive_only=False) | ||
| quality_results = alpha_precision_metric.compute(real_dataframe, synthetic_dataframe) | ||
|
|
||
| # Check naive authenticity as the _OC metric depends on a 1-layer NN training | ||
| # which may give different results on different architectures | ||
| expected_authenticity = 0.2 | ||
| assert pytest.approx(expected_authenticity, abs=1e-8) == quality_results["authenticity_naive"] |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This error brought to you by someone who couldn't be bothered to actually fully understand what the nearest neighbor function does...yikes.