Skip to content

Commit 289b398

Browse files
committed
Resolve redirects when testing URLs for equality
The URL DOIs resolve to can move around, with redirects pointing to the new location. To make the tests more robust, only fail if the URLs differ after redirections. See also https://www.crossref.org/blog/urls-and-dois-a-complicated-relationship/
1 parent 0518560 commit 289b398

File tree

1 file changed

+21
-2
lines changed

1 file changed

+21
-2
lines changed

tests/test_doi.py

Lines changed: 21 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -3,13 +3,32 @@
33
import os
44
from pkg_resources import parse_version
55

6+
from urllib.error import HTTPError
7+
from urllib.request import Request, urlopen
8+
from urllib.parse import urlparse, urlunparse
9+
610
import pytest
711

812
from doi import (
913
validate_doi, find_doi_in_text, __version__, pdf_to_doi,
1014
get_real_url_from_doi
1115
)
1216

17+
def simplify_url(u):
18+
return urlparse(u)._replace(query='', fragment='')
19+
20+
def resolve_redirects(u):
21+
u = urlunparse(urlparse(u)._replace(scheme='https'))
22+
req = Request(u, headers={'User-Agent': 'Mozilla/5.0'})
23+
with urlopen(req) as r:
24+
return simplify_url(r.url)
25+
26+
def normalize_eq(u, v):
27+
return (u == v
28+
or simplify_url(u) == simplify_url(v)
29+
or resolve_redirects(u) == resolve_redirects(v)
30+
)
31+
1332

1433
def test_valid_version() -> None:
1534
"""Check that the package defines a valid __version__"""
@@ -32,7 +51,7 @@ def test_validate_doi() -> None:
3251
'https://linkinghub.elsevier.com/retrieve/pii/S0009261497040141'),
3352
]
3453
for doi, url in data:
35-
assert url == validate_doi(doi)
54+
assert normalize_eq(url, validate_doi(doi))
3655

3756
for doi in ['', 'asdf']:
3857
try:
@@ -49,7 +68,7 @@ def test_get_real_url_from_doi() -> None:
4968
'article/abs/pii/S0009261497040141'),
5069
]
5170
for doi, url in data:
52-
assert url == get_real_url_from_doi(doi)
71+
assert normalize_eq(url, get_real_url_from_doi(doi))
5372

5473

5574
def test_find_doi_in_line() -> None:

0 commit comments

Comments
 (0)