From 25aa0647852d18ca71b74bd95986fc7c2ee2f032 Mon Sep 17 00:00:00 2001 From: Tobias Schmidt Date: Sun, 25 Nov 2018 09:09:10 +0100 Subject: [PATCH 1/2] allow InListValidation to ignore missings --- pandas_schema/validation.py | 13 ++++++++++--- test/test_validation.py | 38 +++++++++++++++++++++++++++++++++++++ 2 files changed, 48 insertions(+), 3 deletions(-) diff --git a/pandas_schema/validation.py b/pandas_schema/validation.py index f3cfab3..adeab8a 100644 --- a/pandas_schema/validation.py +++ b/pandas_schema/validation.py @@ -364,12 +364,14 @@ class InListValidation(_SeriesValidation): Checks that each element in this column is contained within a list of possibilities """ - def __init__(self, options: typing.Iterable, case_sensitive: bool = True, **kwargs): + def __init__(self, options: typing.Iterable, case_sensitive: bool = True, ignore_nas = False, **kwargs): """ :param options: A list of values to check. If the value of a cell is in this list, it is considered to pass the validation + :param ignore_nas: ignore nas (boolean, default False) """ self.case_sensitive = case_sensitive + self.ignore_nan = ignore_nan self.options = options super().__init__(**kwargs) @@ -380,9 +382,14 @@ def default_message(self): def validate(self, series: pd.Series) -> pd.Series: if self.case_sensitive: - return series.isin(self.options) + is_in_options = series.isin(self.options) else: - return series.str.lower().isin([s.lower() for s in self.options]) + is_in_options = series.str.lower().isin([s.lower() for s in self.options]) + + if self.ignore_nas: + is_in_options = is_in_options | series.isna() + + return is_in_options class DateFormatValidation(_SeriesValidation): diff --git a/test/test_validation.py b/test/test_validation.py index 41a0161..26af016 100644 --- a/test/test_validation.py +++ b/test/test_validation.py @@ -315,6 +315,44 @@ def test_invalid_elements(self): 'accepts elements that are not in the validation list' ) +class InListIgnoringNas(ValidationTestBase): + def setUp(self): + self.validator = InListValidation(['a', 'b', 'c'], ignore_nas=True) + + def test_valid_elements(self): + self.validate_and_compare( + [ + np.NaN + ], + True, + 'does not ignore NaNs when it should' + ) + +class InListNotIgnoringNas(ValidationTestBase): + def setUp(self): + self.validator = InListValidation(['a', 'b', 'c'], ignore_nas=False) + + def test_valid_elements(self): + self.validate_and_compare( + [ + np.NaN + ], + False, + 'ignores NaNs when it should not' + ) + +class InListIgnoringNas(ValidationTestBase): + def setUp(self): + self.validator = InListValidation(['a', 'b', 'c'], ignore_nas=True) + + def test_valid_elements(self): + self.validate_and_compare( + [ + np.NaN + ], + True, + 'does not ignore NaNs' + ) class DateFormat(ValidationTestBase): def setUp(self): From f306a15eb12d6ebeb95b3d2be7d48be56b4e2095 Mon Sep 17 00:00:00 2001 From: Tobias Schmidt Date: Sun, 25 Nov 2018 09:13:22 +0100 Subject: [PATCH 2/2] add type annotation --- pandas_schema/validation.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas_schema/validation.py b/pandas_schema/validation.py index adeab8a..5b7d0f9 100644 --- a/pandas_schema/validation.py +++ b/pandas_schema/validation.py @@ -364,7 +364,7 @@ class InListValidation(_SeriesValidation): Checks that each element in this column is contained within a list of possibilities """ - def __init__(self, options: typing.Iterable, case_sensitive: bool = True, ignore_nas = False, **kwargs): + def __init__(self, options: typing.Iterable, case_sensitive: bool = True, ignore_nas: bool = False, **kwargs): """ :param options: A list of values to check. If the value of a cell is in this list, it is considered to pass the validation