Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions .flake8
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,7 @@ per-file-ignores =
web_poet/serialization/__init__.py:F401,F403
web_poet/testing/__init__.py:F401,F403
tests/po_lib_to_return/__init__.py:D102
tests/test_fields.py:D102

# the suggestion makes the code worse
tests/test_serialization.py:B028
174 changes: 173 additions & 1 deletion tests/test_fields.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
import asyncio
import random
from typing import Optional
import warnings
from collections import defaultdict
from typing import DefaultDict, Optional

import attrs
import pytest
Expand All @@ -23,6 +25,7 @@
HttpResponse,
Injectable,
ItemPage,
SelectFields,
field,
item_from_fields,
item_from_fields_sync,
Expand Down Expand Up @@ -630,3 +633,172 @@ def x(self) -> int:
assert info["x"] == FieldInfo(name="x", meta=None, out=None, disabled=True)
assert info["y"] == FieldInfo(name="y", meta=None, out=None, disabled=False)
assert info["z"] == FieldInfo(name="z", meta=None, out=None, disabled=True)


@attrs.define
class BigItem:
x: int
y: Optional[int] = None
z: Optional[int] = None


@attrs.define
class BigPage(ItemPage[BigItem]):
select_fields: Optional[SelectFields] = None
call_counter: DefaultDict = attrs.field(factory=lambda: defaultdict(int))

@field
def x(self):
self.call_counter["x"] += 1
return 1

@field(disabled=False)
def y(self):
self.call_counter["y"] += 1
return 2

@field(disabled=True)
def z(self):
self.call_counter["z"] += 1
return 3


@pytest.mark.asyncio
async def test_select_fields() -> None:
# Required fields from the item cls which are not included raise an TypeError
expected_type_error_msg = (
r"__init__\(\) missing 1 required positional argument: 'x'"
)

# When SelectFields isn't set, it should simply extract the non-disabled
# fields.
page = BigPage()
item = await page.to_item()
assert item == BigItem(x=1, y=2, z=None)
assert page.fields_to_extract == ["x", "y"]
assert page.call_counter == {"x": 1, "y": 1}

# If no field selection directive is given but SelectFields is set, it would
# use the default fields that are not disabled.
page = BigPage(SelectFields(fields=None))
item = await page.to_item()
assert item == BigItem(x=1, y=2, z=None)
assert page.fields_to_extract == ["x", "y"]
assert page.call_counter == {"x": 1, "y": 1}

# Same case as above but given an empty dict.
page = BigPage(SelectFields(fields={}))
item = await page.to_item()
assert item == BigItem(x=1, y=2, z=None)
assert page.fields_to_extract == ["x", "y"]
assert page.call_counter == {"x": 1, "y": 1}

# Select all fields
page = BigPage(SelectFields(fields={"*": True}))
item = await page.to_item()
assert item == BigItem(x=1, y=2, z=3)
assert page.fields_to_extract == ["x", "y", "z"]
assert page.call_counter == {"x": 1, "y": 1, "z": 1}

# Don't select all fields
page = BigPage(SelectFields(fields={"*": False}))
with pytest.raises(TypeError, match=expected_type_error_msg):
await page.to_item()
assert page.fields_to_extract == []
assert page.call_counter == {}

# Exclude all but one
page = BigPage(SelectFields(fields={"*": False, "x": True}))
item = await page.to_item()
assert item == BigItem(x=1, y=None, z=None)
assert page.fields_to_extract == ["x"]
assert page.call_counter == {"x": 1}

# Include all fields but one
page = BigPage(SelectFields(fields={"*": True, "z": False}))
item = await page.to_item()
assert item == BigItem(x=1, y=2, z=None)
assert page.fields_to_extract == ["x", "y"]
assert page.call_counter == {"x": 1, "y": 1}

# overlapping directives on the same field should be okay
page = BigPage(SelectFields(fields={"*": True, "x": True, "y": True, "z": True}))
item = await page.to_item()
assert item == BigItem(x=1, y=2, z=3)
assert page.fields_to_extract == ["x", "y", "z"]
assert page.call_counter == {"x": 1, "y": 1, "z": 1}

# Excluding a required field throws an error
page = BigPage(SelectFields(fields={"x": False}))
with pytest.raises(TypeError, match=expected_type_error_msg):
item = await page.to_item()
assert page.fields_to_extract == ["y"]
assert page.call_counter == {"y": 1}

# boolean-like values are not supported. They are simply ignored and the
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think we should raise an error, not ignore wrong values.

# page would revert back to the default field directives.
page = BigPage(SelectFields(fields={"x": 0, "y": 0, "z": 1})) # type: ignore[dict-item]
item = await page.to_item()
assert item == BigItem(x=1, y=2, z=None)
assert page.fields_to_extract == ["x", "y"]
assert page.call_counter == {"x": 1, "y": 1}

# If a non-SelectFields instance was passed to the `select_fields` init
# parameter, it would simply ignore it and use the default field directives.
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It makes more sense to me to raise an error in this case.

page = BigPage(select_fields="not the instance it's expecting") # type: ignore[arg-type]
item = await page.to_item()
assert item == BigItem(x=1, y=2, z=None)
assert page.fields_to_extract == ["x", "y"]
assert page.call_counter == {"x": 1, "y": 1}

# The remaining tests below checks the different behaviors when encountering a
# field which doesn't existing in the PO
fields = {"x": True, "not_existing": True}
expected_attribute_error_msg = (
"The {'not_existing'} fields isn't available in tests.test_fields.BigPage"
)

# Unknown field raises an AttributeError by default
page = BigPage(SelectFields(fields=fields))
with pytest.raises(AttributeError, match=expected_attribute_error_msg):
await page.to_item()
with pytest.raises(AttributeError, match=expected_attribute_error_msg):
assert page.fields_to_extract

page = BigPage(SelectFields(fields=fields, on_unknown_field="raise"))
with pytest.raises(AttributeError, match=expected_attribute_error_msg):
await page.to_item()
with pytest.raises(AttributeError, match=expected_attribute_error_msg):
assert page.fields_to_extract

# It should safely ignore it as well.
page = BigPage(SelectFields(fields=fields, on_unknown_field="ignore"))
with warnings.catch_warnings(record=True) as caught_warnings:
item = await page.to_item()
assert not caught_warnings
assert item == BigItem(x=1, y=2, z=None)
with warnings.catch_warnings(record=True) as caught_warnings:
assert page.fields_to_extract == ["x", "y"]
assert not caught_warnings
assert page.call_counter == {"x": 1, "y": 1}

# When 'warn' is used, the same msg when 'raise' is used.
page = BigPage(SelectFields(fields=fields, on_unknown_field="warn"))
with pytest.warns(UserWarning, match=expected_attribute_error_msg):
item = await page.to_item()
assert item == BigItem(x=1, y=2, z=None)
with pytest.warns(UserWarning, match=expected_attribute_error_msg):
assert page.fields_to_extract == ["x", "y"]
assert page.call_counter == {"x": 1, "y": 1}

# When SelectFields receive an invalid 'on_unknown_field' value, it should
# ignore it. Moreover, mypy should also raise an error (see
# tests_typing/test_fields)
page = BigPage(
# ignore mypy error since it's expecting a valid value inside the Literal.
SelectFields(fields=fields, on_unknown_field="bad val") # type: ignore[arg-type]
)
item = await page.to_item()
assert item == BigItem(x=1, y=2, z=None)
assert page.fields_to_extract == ["x", "y"]
assert page.call_counter == {"x": 1, "y": 1}
10 changes: 10 additions & 0 deletions tests_typing/test_fields.mypy-testing
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ from web_poet import (
field,
item_from_fields,
item_from_fields_sync,
SelectFields,
)


Expand Down Expand Up @@ -138,3 +139,12 @@ async def test_item_from_fields_default_item_cls() -> None:
page = Page()
item1 = await item_from_fields(page)
reveal_type(item1) # R: builtins.dict[Any, Any]


@pytest.mark.mypy_testing
def test_select_fields_on_unknown_field() -> None:
SelectFields(fields={}, on_unknown_field="raise")
SelectFields(fields={}, on_unknown_field="warn")
SelectFields(fields={}, on_unknown_field="ignore")

SelectFields(fields={}, on_unknown_field="bad value") # E: Argument "on_unknown_field" to "SelectFields" has incompatible type "Literal['bad value']"; expected "Literal['ignore', 'warn', 'raise']" [arg-type]
2 changes: 1 addition & 1 deletion web_poet/__init__.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
from .fields import field, item_from_fields, item_from_fields_sync
from .fields import SelectFields, field, item_from_fields, item_from_fields_sync
from .page_inputs import (
BrowserHtml,
HttpClient,
Expand Down
60 changes: 52 additions & 8 deletions web_poet/fields.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,17 @@
import inspect
from contextlib import suppress
from functools import update_wrapper, wraps
from typing import Callable, Dict, List, Optional, Type, TypeVar
from typing import (
Callable,
Dict,
Iterable,
List,
Literal,
MutableMapping,
Optional,
Type,
TypeVar,
)

import attrs
from itemadapter import ItemAdapter
Expand Down Expand Up @@ -172,13 +182,18 @@ def get_fields_dict(


T = TypeVar("T")
UnknownFieldActions = Literal["ignore", "warn", "raise"]


# FIXME: type is ignored as a workaround for https://github.com/python/mypy/issues/3737
# inference works properly if a non-default item_cls is passed; for dict
# it's not working (return type is Any)
async def item_from_fields(
obj, item_cls: Type[T] = dict, *, skip_nonitem_fields: bool = False # type: ignore[assignment]
obj,
item_cls: Type[T] = dict, # type: ignore[assignment]
*,
skip_nonitem_fields: bool = False,
field_names: Optional[Iterable[str]] = None,
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

If an unknown field has been unintentionally passed, it'd error out with:

# omitted stack trace

File ~/dev/z/web-poet/web_poet/fields.py:235, in <dictcomp>(.0)
    233     if skip_nonitem_fields:
    234         field_names = _without_unsupported_field_names(item_cls, field_names)
--> 235 return item_cls(**{name: getattr(obj, name) for name in field_names})

AttributeError: 'SomePage' object has no attribute 'unknown_field'

I think this is descriptive enough and we don't need additional handling of unknown fields here. Handling it in the .fields_to_extract property method should be enough.

) -> T:
"""Return an item of ``item_cls`` type, with its attributes populated
from the ``obj`` methods decorated with :class:`field` decorator.
Expand All @@ -190,8 +205,13 @@ async def item_from_fields(
to ``item_cls.__init__``, possibly causing exceptions if
``item_cls.__init__`` doesn't support them.
"""
item_dict = item_from_fields_sync(obj, item_cls=dict, skip_nonitem_fields=False)
field_names = list(item_dict.keys())
item_dict = item_from_fields_sync(
obj,
item_cls=dict,
skip_nonitem_fields=False,
field_names=field_names,
)
field_names = field_names or list(item_dict.keys())
if skip_nonitem_fields:
field_names = _without_unsupported_field_names(item_cls, field_names)
return item_cls(
Expand All @@ -200,19 +220,43 @@ async def item_from_fields(


def item_from_fields_sync(
obj, item_cls: Type[T] = dict, *, skip_nonitem_fields: bool = False # type: ignore[assignment]
obj,
item_cls: Type[T] = dict, # type: ignore[assignment]
*,
skip_nonitem_fields: bool = False,
field_names: Optional[Iterable[str]] = None,
) -> T:
"""Synchronous version of :func:`item_from_fields`."""
field_names = list(get_fields_dict(obj))
if field_names is None:
field_names = list(get_fields_dict(obj))
if skip_nonitem_fields:
field_names = _without_unsupported_field_names(item_cls, field_names)
return item_cls(**{name: getattr(obj, name) for name in field_names})


def _without_unsupported_field_names(
item_cls: type, field_names: List[str]
item_cls: type, field_names: Iterable[str]
) -> List[str]:
item_field_names = ItemAdapter.get_field_names_from_class(item_cls)
if item_field_names is None: # item_cls doesn't define field names upfront
return field_names[:]
return list(field_names)
return list(set(field_names) & set(item_field_names))


@attrs.define
class SelectFields:
"""This is used as a dependency in a page object to control which fields it
would populate the item class that it returns.
"""

#: Fields that the page object would use to populate its item class. It's a
#: mapping of field names to boolean values to where ``True`` would indicate
#: it being included in ``.to_item()`` calls.
fields: Optional[MutableMapping[str, bool]] = None

#: Controls what happens when encountering an unknown field. For example,
#: an unknown field was passed and the page object doesn't recognize it.
#:
#: Setting it to ``"raise"`` would raise an :class:`AttributeError`,
#: ``"warn"`` produces a :class:`UserWarning`, while ``"ignore"`` does nothing.
on_unknown_field: UnknownFieldActions = "raise"
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Hey! Could you please elaborate on the use cases for on_unknown_field?

Also, what does unknown field mean? Does it mean that the field is not in the item, or does it mean that the field is not defined on the page object?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

does it mean that the field is not defined on the page object?

It's mostly regarding about this but because of the parity between the page object and its item class, the field should generally not be in the item as well.

The unknown field can be derived by something like set(fields_to_include) - set(fields_to_exclude). This means unknown fields that are to be excluded won't be considered as actual unknown fields.

I'm also thinking of removing this parameter and simply raise any unknown fields by default. What do you think?

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Aha, I see.

It seems the case with field not in the item and a field not in the page object are very different.

If a field is not in the item, we should probably raise an exception. But there is a case where skip_nonitem_fields=True is used; in this case it actually could make sense to have fields in SelectFields which are not in the item.

If a field is not defined in the page object, but is present in the item, then final item.field returned by to_item is going to have a default value, None in most cases. It could also be the case that the attribute is populated in to_item method without using web-poet fields. It's not an error. It seems that we should allow to pick any fields which are present in the item, even if they're not fields of the page object.

We should have a clear answer though what happens when some of the item attributes are extracted using @fields, and some of them are added later, in the to_item method.

There is also a complicated case where there is a field defined, but then the value assigned to the final item is changed in to_item method. Probably it's fine not to support this case, but we should at least document it.

Overall the problem looks more complicated than I initially thought :)

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

When I suggested such an option, I was thinking of a scenario like this one:

B team writes BPage, subclassing APage from A team, and returning BItem, which does not subclass AItem. B team might not want new fields in APage to break BPage. But they might want a warning or error to look into it eventually in case we want to do something about it.

But it can indeed be more complex than that. When a field is removed, B team may want a different resolution. For example, they might want a warning for new fields, but an exception for missing fields.

Loading