diff --git a/pid_resolver_lib/doi_ra_handler.py b/pid_resolver_lib/doi_ra_handler.py
index 6ac31c4..c91bb8d 100644
--- a/pid_resolver_lib/doi_ra_handler.py
+++ b/pid_resolver_lib/doi_ra_handler.py
@@ -24,7 +24,7 @@
RAs: Dict[str, Dict[str, Union[str, int]]] = {
'DataCite': {'mime': 'application/ld+json', 'sleep': 120},
'Crossref': {'mime': 'application/rdf+xml', 'sleep': 0},
- 'mEDRA': {'mime': 'application/rdf+xml', 'sleep': 0}
+ 'mEDRA': {'mime': 'application/vnd.medra.onixdoi+xml', 'sleep': 0}
}
REGISTRATION_AGENCY = 'RA:'
diff --git a/pid_resolver_lib/pid_analyzer.py b/pid_resolver_lib/pid_analyzer.py
index f2805a4..31c53e5 100644
--- a/pid_resolver_lib/pid_analyzer.py
+++ b/pid_resolver_lib/pid_analyzer.py
@@ -282,8 +282,9 @@ def analyze_doi_record_crossref(cache_dir: Path, doi: str, orcid_info: Dict[str,
def analyze_author_info_medra(creator: etree.Element, namespace_map: Any, orcid_info: List[OrcidProfile]) -> Optional[AuthorInfo]:
- given_name_ele: Optional[etree.Element] = creator.find('.//foaf:givenName', namespaces=namespace_map)
- family_name_ele: Optional[etree.Element] = creator.find('.//foaf:familyName', namespaces=namespace_map)
+ given_name_ele: Optional[etree.Element] = creator.find('.//NamesBeforeKey', namespaces=namespace_map)
+ family_name_ele: Optional[etree.Element] = creator.find('.//KeyNames', namespaces=namespace_map)
+ orcid_ele: Optional[etree.Element] = creator.find('.//NameIdentifier/IDValue', namespaces=namespace_map)
orcid: Optional[str]
origin_orcid: Optional[str]
@@ -292,9 +293,12 @@ def analyze_author_info_medra(creator: etree.Element, namespace_map: Any, orcid_
given_name = given_name_ele.text.strip()
family_name = family_name_ele.text.strip()
- orcid, origin_orcid = _match_name_with_orcid_profile(orcid_info, given_name, family_name)
-
- return AuthorInfo(given_name=given_name, family_name=family_name, orcid=orcid, origin_orcid=origin_orcid, ror=None)
+ if orcid_ele is not None:
+ orcid = _get_orcid_id_from_url(orcid_ele.text)
+ return AuthorInfo(given_name=given_name, family_name=family_name, orcid=orcid, origin_orcid='doi', ror=None)
+ else:
+ orcid, origin_orcid = _match_name_with_orcid_profile(orcid_info, given_name, family_name)
+ return AuthorInfo(given_name=given_name, family_name=family_name, orcid=orcid, origin_orcid=origin_orcid, ror=None)
# return None if insufficient information is provided.
return None
@@ -304,12 +308,13 @@ def analyze_doi_record_medra(cache_dir: Path, doi: str, orcid_info: Dict[str, Li
try:
rec_str = read_from_cache(doi, cache_dir)
- root = etree.fromstring(rec_str)
+ # encode to bytes because of Unicode strings with encoding declaration
+ root = etree.fromstring(rec_str.encode())
- title_ele: Optional[etree.Element] = root.find('.//bibo:Article/dc:title', namespaces=root.nsmap)
+ title_ele: List[etree.Element] = root.xpath('.//onix:Title[parent::onix:ContentItem|parent::onix:DOIMonographicProduct and onix:TitleType[contains(text(), "01")]][1]/onix:TitleText', namespaces={'onix': 'http://www.editeur.org/onix/DOIMetadata/2.0'})
- if title_ele is not None:
- title = title_ele.text.strip()
+ if len(title_ele) == 1:
+ title = title_ele[0].text.strip()
else:
title = None
@@ -318,7 +323,8 @@ def analyze_doi_record_medra(cache_dir: Path, doi: str, orcid_info: Dict[str, Li
else:
orcid_author_info = []
- creators: List[etree.Element] = root.findall('.//dc:creator/foaf:Person', namespaces=root.nsmap)
+ # set prefix for namespace used in whole file
+ creators: List[etree.Element] = root.xpath('.//onix:Contributor[onix:ContributorRole[contains(text(), "A01")]]', namespaces={'onix': 'http://www.editeur.org/onix/DOIMetadata/2.0'})
authors: List[Optional[AuthorInfo]] = list(
map(lambda creator: analyze_author_info_medra(creator, root.nsmap, orcid_author_info), creators))
diff --git a/tests/test_pid_analyzer.py b/tests/test_pid_analyzer.py
index 526dbee..7e2fc01 100644
--- a/tests/test_pid_analyzer.py
+++ b/tests/test_pid_analyzer.py
@@ -108,12 +108,13 @@ def test_analyze_doi_record_medra(self):
assert res is not None
assert res.doi == '10.26342/2020-64-4'
- assert res.title == 'Predicting the humorousness of tweets using gaussian process\n preference learning'
+ assert res.title == 'Predicting the humorousness of tweets using gaussian process preference learning'
assert len(res.authors) == 4
- assert res.authors[0].given_name == 'Edwin'
- assert res.authors[0].family_name == 'Simpson'
+ assert res.authors[0].given_name == 'Tristan'
+ assert res.authors[0].family_name == 'Miller'
+ assert res.authors[0].orcid == '0000-0001-6157-8808'
def test_get_orcids_from_resolved_dois(self):
diff --git a/tests/testdata/medra_test.xml b/tests/testdata/medra_test.xml
index 19d55f5..bba0332 100644
--- a/tests/testdata/medra_test.xml
+++ b/tests/testdata/medra_test.xml
@@ -1,81 +1,98 @@
-
-
- 44
-
- 10.26342/2020-64-4
-
- Predicting the humorousness of tweets using gaussian process
- preference learning
-
-
-
- Simpson
-
- Edwin
-
- Edwin Simpson
-
-
-
- 37
-
-
- Do Dinh
- Erik-Lân
-
- Erik-Lân Do Dinh
-
-
-
-
- 44
-
-
- 1989-7553
-
- urn:issn:1989-7553
-
- 1989-7553
-
- 1989-7553
-
- Procesamiento del Lenguaje Natural
-
-
-
-
- 2020
- 10.26342/2020-64-4
-
- info:doi/10.26342/2020-64-4
-
- 37
-
-
- Gurevych
-
- Iryna
-
- Iryna Gurevych
-
-
-
- doi:10.26342/2020-64-4
- 10.26342/2020-64-4
-
-
- Miller
-
- Tristan
-
- Tristan Miller
-
-
-
- Sociedad Española para el Procesamiento del Lenguaje Natural
-
-
\ No newline at end of file
+
+
+
+ mEDRA
+ medra@medra.org
+ PublicService
+ 20240620
+
+
+ 06
+ 10.26342/2020-64-4
+ http://journal.sepln.org/sepln/ojs/ojs/index.php/pln/article/view/6193
+ SEPLN
+ mEDRA
+
+
+
+ 01
+ Procesamiento del Lenguaje Natural
+
+
+ 01
+ Sociedad Española para el Procesamiento del Lenguaje Natural
+
+ ES
+
+
+
+ 07
+ 1989-7553
+
+ JD
+
+
+
+
+ 05
+ 2020
+
+
+
+
+
+ 37
+ 44
+
+
+
+ 01
+ Predicting the humorousness of tweets using gaussian process preference learning
+
+
+ 1
+ A01
+
+ 21
+ http://orcid.org/0000-0001-6157-8808
+
+ Tristan Miller
+ Miller, Tristan
+ Tristan
+ Miller
+
+
+ 2
+ A01
+ Erik-Lân Do Dinh
+ Do Dinh, Erik-Lân
+ Erik-Lân
+ Do Dinh
+
+
+ 3
+ A01
+ Edwin Simpson
+ Simpson, Edwin
+ Edwin
+ Simpson
+
+
+ 4
+ A01
+ Iryna Gurevych
+ Gurevych, Iryna
+ Iryna
+ Gurevych
+
+
+ 01
+ eng
+
+ 2020
+
+
+
\ No newline at end of file