-
Notifications
You must be signed in to change notification settings - Fork 18
Introduce SelectFields
#118
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
3f2d71c
5513929
f468b21
9ac6a24
1c7cb78
9601862
8e78430
e1e2451
ff9fc71
1a42d48
a493052
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -1,6 +1,8 @@ | ||
| import asyncio | ||
| import random | ||
| from typing import Optional | ||
| import warnings | ||
| from collections import defaultdict | ||
| from typing import DefaultDict, Optional | ||
|
|
||
| import attrs | ||
| import pytest | ||
|
|
@@ -23,6 +25,7 @@ | |
| HttpResponse, | ||
| Injectable, | ||
| ItemPage, | ||
| SelectFields, | ||
| field, | ||
| item_from_fields, | ||
| item_from_fields_sync, | ||
|
|
@@ -630,3 +633,172 @@ def x(self) -> int: | |
| assert info["x"] == FieldInfo(name="x", meta=None, out=None, disabled=True) | ||
| assert info["y"] == FieldInfo(name="y", meta=None, out=None, disabled=False) | ||
| assert info["z"] == FieldInfo(name="z", meta=None, out=None, disabled=True) | ||
|
|
||
|
|
||
| @attrs.define | ||
| class BigItem: | ||
| x: int | ||
| y: Optional[int] = None | ||
| z: Optional[int] = None | ||
|
|
||
|
|
||
| @attrs.define | ||
| class BigPage(ItemPage[BigItem]): | ||
| select_fields: Optional[SelectFields] = None | ||
| call_counter: DefaultDict = attrs.field(factory=lambda: defaultdict(int)) | ||
|
|
||
| @field | ||
| def x(self): | ||
| self.call_counter["x"] += 1 | ||
| return 1 | ||
|
|
||
| @field(disabled=False) | ||
| def y(self): | ||
| self.call_counter["y"] += 1 | ||
| return 2 | ||
|
|
||
| @field(disabled=True) | ||
| def z(self): | ||
| self.call_counter["z"] += 1 | ||
| return 3 | ||
|
|
||
|
|
||
| @pytest.mark.asyncio | ||
| async def test_select_fields() -> None: | ||
| # Required fields from the item cls which are not included raise an TypeError | ||
| expected_type_error_msg = ( | ||
| r"__init__\(\) missing 1 required positional argument: 'x'" | ||
| ) | ||
|
|
||
| # When SelectFields isn't set, it should simply extract the non-disabled | ||
| # fields. | ||
| page = BigPage() | ||
| item = await page.to_item() | ||
| assert item == BigItem(x=1, y=2, z=None) | ||
| assert page.fields_to_extract == ["x", "y"] | ||
| assert page.call_counter == {"x": 1, "y": 1} | ||
|
|
||
| # If no field selection directive is given but SelectFields is set, it would | ||
| # use the default fields that are not disabled. | ||
| page = BigPage(SelectFields(fields=None)) | ||
| item = await page.to_item() | ||
| assert item == BigItem(x=1, y=2, z=None) | ||
| assert page.fields_to_extract == ["x", "y"] | ||
| assert page.call_counter == {"x": 1, "y": 1} | ||
|
|
||
| # Same case as above but given an empty dict. | ||
| page = BigPage(SelectFields(fields={})) | ||
| item = await page.to_item() | ||
| assert item == BigItem(x=1, y=2, z=None) | ||
| assert page.fields_to_extract == ["x", "y"] | ||
| assert page.call_counter == {"x": 1, "y": 1} | ||
|
|
||
| # Select all fields | ||
| page = BigPage(SelectFields(fields={"*": True})) | ||
| item = await page.to_item() | ||
| assert item == BigItem(x=1, y=2, z=3) | ||
| assert page.fields_to_extract == ["x", "y", "z"] | ||
| assert page.call_counter == {"x": 1, "y": 1, "z": 1} | ||
|
|
||
| # Don't select all fields | ||
| page = BigPage(SelectFields(fields={"*": False})) | ||
| with pytest.raises(TypeError, match=expected_type_error_msg): | ||
| await page.to_item() | ||
| assert page.fields_to_extract == [] | ||
| assert page.call_counter == {} | ||
|
|
||
| # Exclude all but one | ||
| page = BigPage(SelectFields(fields={"*": False, "x": True})) | ||
| item = await page.to_item() | ||
| assert item == BigItem(x=1, y=None, z=None) | ||
| assert page.fields_to_extract == ["x"] | ||
| assert page.call_counter == {"x": 1} | ||
|
|
||
| # Include all fields but one | ||
| page = BigPage(SelectFields(fields={"*": True, "z": False})) | ||
| item = await page.to_item() | ||
| assert item == BigItem(x=1, y=2, z=None) | ||
| assert page.fields_to_extract == ["x", "y"] | ||
| assert page.call_counter == {"x": 1, "y": 1} | ||
|
|
||
| # overlapping directives on the same field should be okay | ||
| page = BigPage(SelectFields(fields={"*": True, "x": True, "y": True, "z": True})) | ||
| item = await page.to_item() | ||
| assert item == BigItem(x=1, y=2, z=3) | ||
| assert page.fields_to_extract == ["x", "y", "z"] | ||
| assert page.call_counter == {"x": 1, "y": 1, "z": 1} | ||
|
|
||
| # Excluding a required field throws an error | ||
| page = BigPage(SelectFields(fields={"x": False})) | ||
| with pytest.raises(TypeError, match=expected_type_error_msg): | ||
| item = await page.to_item() | ||
| assert page.fields_to_extract == ["y"] | ||
| assert page.call_counter == {"y": 1} | ||
|
|
||
| # boolean-like values are not supported. They are simply ignored and the | ||
| # page would revert back to the default field directives. | ||
| page = BigPage(SelectFields(fields={"x": 0, "y": 0, "z": 1})) # type: ignore[dict-item] | ||
| item = await page.to_item() | ||
| assert item == BigItem(x=1, y=2, z=None) | ||
| assert page.fields_to_extract == ["x", "y"] | ||
| assert page.call_counter == {"x": 1, "y": 1} | ||
|
|
||
| # If a non-SelectFields instance was passed to the `select_fields` init | ||
| # parameter, it would simply ignore it and use the default field directives. | ||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. It makes more sense to me to raise an error in this case. |
||
| page = BigPage(select_fields="not the instance it's expecting") # type: ignore[arg-type] | ||
| item = await page.to_item() | ||
| assert item == BigItem(x=1, y=2, z=None) | ||
| assert page.fields_to_extract == ["x", "y"] | ||
| assert page.call_counter == {"x": 1, "y": 1} | ||
|
|
||
| # The remaining tests below checks the different behaviors when encountering a | ||
| # field which doesn't existing in the PO | ||
| fields = {"x": True, "not_existing": True} | ||
| expected_attribute_error_msg = ( | ||
| "The {'not_existing'} fields isn't available in tests.test_fields.BigPage" | ||
| ) | ||
|
|
||
| # Unknown field raises an AttributeError by default | ||
| page = BigPage(SelectFields(fields=fields)) | ||
| with pytest.raises(AttributeError, match=expected_attribute_error_msg): | ||
| await page.to_item() | ||
| with pytest.raises(AttributeError, match=expected_attribute_error_msg): | ||
| assert page.fields_to_extract | ||
|
|
||
| page = BigPage(SelectFields(fields=fields, on_unknown_field="raise")) | ||
| with pytest.raises(AttributeError, match=expected_attribute_error_msg): | ||
| await page.to_item() | ||
| with pytest.raises(AttributeError, match=expected_attribute_error_msg): | ||
| assert page.fields_to_extract | ||
|
|
||
| # It should safely ignore it as well. | ||
| page = BigPage(SelectFields(fields=fields, on_unknown_field="ignore")) | ||
| with warnings.catch_warnings(record=True) as caught_warnings: | ||
| item = await page.to_item() | ||
| assert not caught_warnings | ||
| assert item == BigItem(x=1, y=2, z=None) | ||
| with warnings.catch_warnings(record=True) as caught_warnings: | ||
| assert page.fields_to_extract == ["x", "y"] | ||
| assert not caught_warnings | ||
| assert page.call_counter == {"x": 1, "y": 1} | ||
|
|
||
| # When 'warn' is used, the same msg when 'raise' is used. | ||
| page = BigPage(SelectFields(fields=fields, on_unknown_field="warn")) | ||
| with pytest.warns(UserWarning, match=expected_attribute_error_msg): | ||
| item = await page.to_item() | ||
| assert item == BigItem(x=1, y=2, z=None) | ||
| with pytest.warns(UserWarning, match=expected_attribute_error_msg): | ||
| assert page.fields_to_extract == ["x", "y"] | ||
| assert page.call_counter == {"x": 1, "y": 1} | ||
|
|
||
| # When SelectFields receive an invalid 'on_unknown_field' value, it should | ||
| # ignore it. Moreover, mypy should also raise an error (see | ||
| # tests_typing/test_fields) | ||
| page = BigPage( | ||
| # ignore mypy error since it's expecting a valid value inside the Literal. | ||
| SelectFields(fields=fields, on_unknown_field="bad val") # type: ignore[arg-type] | ||
| ) | ||
| item = await page.to_item() | ||
| assert item == BigItem(x=1, y=2, z=None) | ||
| assert page.fields_to_extract == ["x", "y"] | ||
| assert page.call_counter == {"x": 1, "y": 1} | ||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -5,7 +5,17 @@ | |
| import inspect | ||
| from contextlib import suppress | ||
| from functools import update_wrapper, wraps | ||
| from typing import Callable, Dict, List, Optional, Type, TypeVar | ||
| from typing import ( | ||
| Callable, | ||
| Dict, | ||
| Iterable, | ||
| List, | ||
| Literal, | ||
| MutableMapping, | ||
| Optional, | ||
| Type, | ||
| TypeVar, | ||
| ) | ||
|
|
||
| import attrs | ||
| from itemadapter import ItemAdapter | ||
|
|
@@ -172,13 +182,18 @@ def get_fields_dict( | |
|
|
||
|
|
||
| T = TypeVar("T") | ||
| UnknownFieldActions = Literal["ignore", "warn", "raise"] | ||
|
|
||
|
|
||
| # FIXME: type is ignored as a workaround for https://github.com/python/mypy/issues/3737 | ||
| # inference works properly if a non-default item_cls is passed; for dict | ||
| # it's not working (return type is Any) | ||
| async def item_from_fields( | ||
| obj, item_cls: Type[T] = dict, *, skip_nonitem_fields: bool = False # type: ignore[assignment] | ||
| obj, | ||
| item_cls: Type[T] = dict, # type: ignore[assignment] | ||
| *, | ||
| skip_nonitem_fields: bool = False, | ||
| field_names: Optional[Iterable[str]] = None, | ||
|
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. If an unknown field has been unintentionally passed, it'd error out with: I think this is descriptive enough and we don't need additional handling of unknown fields here. Handling it in the |
||
| ) -> T: | ||
| """Return an item of ``item_cls`` type, with its attributes populated | ||
| from the ``obj`` methods decorated with :class:`field` decorator. | ||
|
|
@@ -190,8 +205,13 @@ async def item_from_fields( | |
| to ``item_cls.__init__``, possibly causing exceptions if | ||
| ``item_cls.__init__`` doesn't support them. | ||
| """ | ||
| item_dict = item_from_fields_sync(obj, item_cls=dict, skip_nonitem_fields=False) | ||
| field_names = list(item_dict.keys()) | ||
| item_dict = item_from_fields_sync( | ||
| obj, | ||
| item_cls=dict, | ||
| skip_nonitem_fields=False, | ||
| field_names=field_names, | ||
| ) | ||
| field_names = field_names or list(item_dict.keys()) | ||
| if skip_nonitem_fields: | ||
| field_names = _without_unsupported_field_names(item_cls, field_names) | ||
| return item_cls( | ||
|
|
@@ -200,19 +220,43 @@ async def item_from_fields( | |
|
|
||
|
|
||
| def item_from_fields_sync( | ||
| obj, item_cls: Type[T] = dict, *, skip_nonitem_fields: bool = False # type: ignore[assignment] | ||
| obj, | ||
| item_cls: Type[T] = dict, # type: ignore[assignment] | ||
| *, | ||
| skip_nonitem_fields: bool = False, | ||
| field_names: Optional[Iterable[str]] = None, | ||
| ) -> T: | ||
| """Synchronous version of :func:`item_from_fields`.""" | ||
| field_names = list(get_fields_dict(obj)) | ||
| if field_names is None: | ||
| field_names = list(get_fields_dict(obj)) | ||
| if skip_nonitem_fields: | ||
| field_names = _without_unsupported_field_names(item_cls, field_names) | ||
| return item_cls(**{name: getattr(obj, name) for name in field_names}) | ||
|
|
||
|
|
||
| def _without_unsupported_field_names( | ||
| item_cls: type, field_names: List[str] | ||
| item_cls: type, field_names: Iterable[str] | ||
| ) -> List[str]: | ||
| item_field_names = ItemAdapter.get_field_names_from_class(item_cls) | ||
| if item_field_names is None: # item_cls doesn't define field names upfront | ||
| return field_names[:] | ||
| return list(field_names) | ||
| return list(set(field_names) & set(item_field_names)) | ||
|
|
||
|
|
||
| @attrs.define | ||
| class SelectFields: | ||
| """This is used as a dependency in a page object to control which fields it | ||
| would populate the item class that it returns. | ||
| """ | ||
|
|
||
| #: Fields that the page object would use to populate its item class. It's a | ||
| #: mapping of field names to boolean values to where ``True`` would indicate | ||
| #: it being included in ``.to_item()`` calls. | ||
| fields: Optional[MutableMapping[str, bool]] = None | ||
|
|
||
| #: Controls what happens when encountering an unknown field. For example, | ||
| #: an unknown field was passed and the page object doesn't recognize it. | ||
| #: | ||
| #: Setting it to ``"raise"`` would raise an :class:`AttributeError`, | ||
| #: ``"warn"`` produces a :class:`UserWarning`, while ``"ignore"`` does nothing. | ||
| on_unknown_field: UnknownFieldActions = "raise" | ||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Hey! Could you please elaborate on the use cases for on_unknown_field? Also, what does unknown field mean? Does it mean that the field is not in the item, or does it mean that the field is not defined on the page object?
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
It's mostly regarding about this but because of the parity between the page object and its item class, the field should generally not be in the item as well. The unknown field can be derived by something like I'm also thinking of removing this parameter and simply raise any unknown fields by default. What do you think?
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Aha, I see. It seems the case with field not in the item and a field not in the page object are very different. If a field is not in the item, we should probably raise an exception. But there is a case where skip_nonitem_fields=True is used; in this case it actually could make sense to have fields in SelectFields which are not in the item. If a field is not defined in the page object, but is present in the item, then final item.field returned by to_item is going to have a default value, None in most cases. It could also be the case that the attribute is populated in to_item method without using web-poet fields. It's not an error. It seems that we should allow to pick any fields which are present in the item, even if they're not fields of the page object. We should have a clear answer though what happens when some of the item attributes are extracted using There is also a complicated case where there is a field defined, but then the value assigned to the final item is changed in to_item method. Probably it's fine not to support this case, but we should at least document it. Overall the problem looks more complicated than I initially thought :)
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. When I suggested such an option, I was thinking of a scenario like this one: B team writes But it can indeed be more complex than that. When a field is removed, B team may want a different resolution. For example, they might want a warning for new fields, but an exception for missing fields. |
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I think we should raise an error, not ignore wrong values.