Skip to content

Commit 2e4b622

Browse files
committed
Resolve redirects when testing URLs for equality
The URL DOIs resolve to can move around, with redirects pointing to the new location. To make the tests more robust, only fail if the URLs differ after redirections. See also https://www.crossref.org/blog/urls-and-dois-a-complicated-relationship/
1 parent 34ea67c commit 2e4b622

File tree

1 file changed

+30
-2
lines changed

1 file changed

+30
-2
lines changed

tests/test_doi.py

Lines changed: 30 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,9 @@
11
import os
22

3+
from urllib.request import Request, urlopen
4+
from urllib.parse import urlparse, urlunparse
5+
from warnings import warn
6+
37
import pytest
48

59
from doi import (
@@ -8,6 +12,30 @@
812
)
913

1014

15+
def simplify_url(u):
16+
return urlparse(u)._replace(query='', fragment='')
17+
18+
19+
def resolve_redirects(u):
20+
# Unconditionally upgrade to https, since some resolvers seem to require it
21+
# If removed, it'd make sense to canonicalize in simplify_url instead to
22+
# prevent spurious test failures
23+
u = urlunparse(urlparse(u)._replace(scheme='https'))
24+
req = Request(u, headers={'User-Agent': 'Mozilla/5.0'})
25+
with urlopen(req) as r:
26+
return simplify_url(r.url)
27+
28+
29+
def normalize_eq(u, v):
30+
if u == v:
31+
return True
32+
warn(f"{u} textually differs from {v}, please update the relevant case.\n"
33+
"Attempting to recover by resolving redirects")
34+
return (simplify_url(u) == simplify_url(v)
35+
or resolve_redirects(u) == resolve_redirects(v)
36+
)
37+
38+
1139
@pytest.mark.net
1240
def test_validate_doi() -> None:
1341
data = [
@@ -25,7 +53,7 @@ def test_validate_doi() -> None:
2553
"https://linkinghub.elsevier.com/retrieve/pii/S0009261497040141"),
2654
]
2755
for doi, url in data:
28-
assert url == validate_doi(doi)
56+
assert normalize_eq(url, validate_doi(doi))
2957

3058
for doi in ["", "asdf"]:
3159
try:
@@ -42,7 +70,7 @@ def test_get_real_url_from_doi() -> None:
4270
"article/abs/pii/S0009261497040141"),
4371
]
4472
for doi, url in data:
45-
assert url == get_real_url_from_doi(doi)
73+
assert normalize_eq(url, get_real_url_from_doi(doi))
4674

4775

4876
def test_find_doi_in_line() -> None:

0 commit comments

Comments
 (0)