11import os
22
3+ from urllib .request import Request , urlopen
4+ from urllib .parse import urlparse , urlunparse
5+ from warnings import warn
6+
37import pytest
48
59from doi import (
812)
913
1014
15+ def simplify_url (u ):
16+ return urlparse (u )._replace (query = '' , fragment = '' )
17+
18+
19+ def resolve_redirects (u ):
20+ # Unconditionally upgrade to https, since some resolvers seem to require it
21+ # If removed, it'd make sense to canonicalize in simplify_url instead to
22+ # prevent spurious test failures
23+ u = urlunparse (urlparse (u )._replace (scheme = 'https' ))
24+ req = Request (u , headers = {'User-Agent' : 'Mozilla/5.0' })
25+ with urlopen (req ) as r :
26+ return simplify_url (r .url )
27+
28+
29+ def normalize_eq (u , v ):
30+ if u == v :
31+ return True
32+ warn (f"{ u } textually differs from { v } , please update the relevant case.\n "
33+ "Attempting to recover by resolving redirects" )
34+ return (simplify_url (u ) == simplify_url (v )
35+ or resolve_redirects (u ) == resolve_redirects (v )
36+ )
37+
38+
1139@pytest .mark .net
1240def test_validate_doi () -> None :
1341 data = [
@@ -25,7 +53,7 @@ def test_validate_doi() -> None:
2553 "https://linkinghub.elsevier.com/retrieve/pii/S0009261497040141" ),
2654 ]
2755 for doi , url in data :
28- assert url == validate_doi (doi )
56+ assert normalize_eq ( url , validate_doi (doi ) )
2957
3058 for doi in ["" , "asdf" ]:
3159 try :
@@ -42,7 +70,7 @@ def test_get_real_url_from_doi() -> None:
4270 "article/abs/pii/S0009261497040141" ),
4371 ]
4472 for doi , url in data :
45- assert url == get_real_url_from_doi (doi )
73+ assert normalize_eq ( url , get_real_url_from_doi (doi ) )
4674
4775
4876def test_find_doi_in_line () -> None :
0 commit comments