Connectome-Implementation-Team · tobiasschweizer · Jun 20, 2024 · Jun 20, 2024 · Jun 21, 2024 · Jun 21, 2024
diff --git a/pid_resolver_lib/doi_ra_handler.py b/pid_resolver_lib/doi_ra_handler.py
@@ -24,7 +24,7 @@
 RAs: Dict[str, Dict[str, Union[str, int]]] = {
     'DataCite': {'mime': 'application/ld+json', 'sleep': 120},
     'Crossref': {'mime': 'application/rdf+xml', 'sleep': 0},
-    'mEDRA': {'mime': 'application/rdf+xml', 'sleep': 0}
+    'mEDRA': {'mime': 'application/vnd.medra.onixdoi+xml', 'sleep': 0}
 }
 
 REGISTRATION_AGENCY = 'RA:'

diff --git a/pid_resolver_lib/pid_analyzer.py b/pid_resolver_lib/pid_analyzer.py
@@ -282,8 +282,9 @@ def analyze_doi_record_crossref(cache_dir: Path, doi: str, orcid_info: Dict[str,
 
 
 def analyze_author_info_medra(creator: etree.Element, namespace_map: Any, orcid_info: List[OrcidProfile]) -> Optional[AuthorInfo]:
-    given_name_ele: Optional[etree.Element] = creator.find('.//foaf:givenName', namespaces=namespace_map)
-    family_name_ele: Optional[etree.Element] = creator.find('.//foaf:familyName', namespaces=namespace_map)
+    given_name_ele: Optional[etree.Element] = creator.find('.//NamesBeforeKey', namespaces=namespace_map)
+    family_name_ele: Optional[etree.Element] = creator.find('.//KeyNames', namespaces=namespace_map)
+    orcid_ele: Optional[etree.Element] = creator.find('.//NameIdentifier/IDValue', namespaces=namespace_map)
 
     orcid: Optional[str]
     origin_orcid: Optional[str]
@@ -292,9 +293,12 @@ def analyze_author_info_medra(creator: etree.Element, namespace_map: Any, orcid_
         given_name = given_name_ele.text.strip()
         family_name = family_name_ele.text.strip()
 
-        orcid, origin_orcid = _match_name_with_orcid_profile(orcid_info, given_name, family_name)
-
-        return AuthorInfo(given_name=given_name, family_name=family_name, orcid=orcid, origin_orcid=origin_orcid, ror=None)
+        if orcid_ele is not None:
+            orcid = _get_orcid_id_from_url(orcid_ele.text)
+            return AuthorInfo(given_name=given_name, family_name=family_name, orcid=orcid, origin_orcid='doi', ror=None)
+        else:
+            orcid, origin_orcid = _match_name_with_orcid_profile(orcid_info, given_name, family_name)
+            return AuthorInfo(given_name=given_name, family_name=family_name, orcid=orcid, origin_orcid=origin_orcid, ror=None)
 
     # return None if insufficient information is provided.
     return None
@@ -304,12 +308,13 @@ def analyze_doi_record_medra(cache_dir: Path, doi: str, orcid_info: Dict[str, Li
     try:
         rec_str = read_from_cache(doi, cache_dir)
 
-        root = etree.fromstring(rec_str)
+        # encode to bytes because of Unicode strings with encoding declaration
+        root = etree.fromstring(rec_str.encode())
 
-        title_ele: Optional[etree.Element] = root.find('.//bibo:Article/dc:title', namespaces=root.nsmap)
+        title_ele: List[etree.Element] = root.xpath('.//onix:Title[parent::onix:ContentItem|parent::onix:DOIMonographicProduct and onix:TitleType[contains(text(), "01")]][1]/onix:TitleText', namespaces={'onix': 'http://www.editeur.org/onix/DOIMetadata/2.0'})
 
-        if title_ele is not None:
-            title = title_ele.text.strip()
+        if len(title_ele) == 1:
+            title = title_ele[0].text.strip()
         else:
             title = None
 
@@ -318,7 +323,8 @@ def analyze_doi_record_medra(cache_dir: Path, doi: str, orcid_info: Dict[str, Li
         else:
             orcid_author_info = []
 
-        creators: List[etree.Element] = root.findall('.//dc:creator/foaf:Person', namespaces=root.nsmap)
+        # set prefix for namespace used in whole file
+        creators: List[etree.Element] = root.xpath('.//onix:Contributor[onix:ContributorRole[contains(text(), "A01")]]', namespaces={'onix': 'http://www.editeur.org/onix/DOIMetadata/2.0'})
 
         authors: List[Optional[AuthorInfo]] = list(
             map(lambda creator: analyze_author_info_medra(creator, root.nsmap, orcid_author_info), creators))

diff --git a/tests/test_pid_analyzer.py b/tests/test_pid_analyzer.py
@@ -108,12 +108,13 @@ def test_analyze_doi_record_medra(self):
             assert res is not None
 
             assert res.doi == '10.26342/2020-64-4'
-            assert res.title == 'Predicting the humorousness of tweets using gaussian process\n            preference learning'
+            assert res.title == 'Predicting the humorousness of tweets using gaussian process preference learning'
 
             assert len(res.authors) == 4
 
-            assert res.authors[0].given_name == 'Edwin'
-            assert res.authors[0].family_name == 'Simpson'
+            assert res.authors[0].given_name == 'Tristan'
+            assert res.authors[0].family_name == 'Miller'
+            assert res.authors[0].orcid == '0000-0001-6157-8808'
 
 
     def test_get_orcids_from_resolved_dois(self):

diff --git a/tests/testdata/medra_test.xml b/tests/testdata/medra_test.xml
@@ -1,81 +1,98 @@
-<rdf:RDF xmlns:dc="http://purl.org/dc/terms/" xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#"
-         xmlns:prism="http://prismstandard.org/namespaces/basic/2.1/" xmlns:owl="http://www.w3.org/2002/07/owl#"
-         xmlns:bibo="http://purl.org/ontology/bibo/" xmlns:foaf="http://xmlns.com/foaf/0.1/">
-    <bibo:Article
-            rdf:about="https://doi.org/10.26342/2020-64-4">
-        <bibo:pageEnd>44</bibo:pageEnd>
-        <dc:identifier>
-            10.26342/2020-64-4
-        </dc:identifier>
-        <dc:title>Predicting the humorousness of tweets using gaussian process
-            preference learning
-        </dc:title>
-        <dc:creator>
-            <foaf:Person rdf:about="http://id.medra.org/contributor/Edwin+Simpson-10.26342%2F2020-64-4-2">
-                <foaf:familyName>Simpson</foaf:familyName>
-                <foaf:givenName>
-                    Edwin
-                </foaf:givenName>
-                <foaf:name>Edwin Simpson</foaf:name>
-            </foaf:Person>
-
-        </dc:creator>
-        <prism:startingPage>37</prism:startingPage>
-        <dc:creator>
-            <foaf:Person rdf:about="http://id.medra.org/contributor/Erik-L%C3%A2n+Do+Dinh-10.26342%2F2020-64-4-1">
-                <foaf:familyName>Do Dinh</foaf:familyName>
-                <foaf:givenName>Erik-Lân</foaf:givenName>
-                <foaf:name>
-                    Erik-Lân Do Dinh
-                </foaf:name>
-            </foaf:Person>
-
-        </dc:creator>
-        <prism:endingPage>44</prism:endingPage>
-        <dc:isPartOf>
-            <bibo:Journal rdf:about="http://id.medra.org/issn/1989-7553">
-                <dc:identifier>1989-7553</dc:identifier>
-                <owl:sameAs>
-                    urn:issn:1989-7553
-                </owl:sameAs>
-                <prism:eIssn>1989-7553</prism:eIssn>
-                <bibo:eissn>
-                    1989-7553
-                </bibo:eissn>
-                <dc:title>Procesamiento del Lenguaje Natural</dc:title>
-                <dc:hasPart
-                        rdf:resource="https://doi.org/10.26342/2020-64-4"/>
-            </bibo:Journal>
-
-        </dc:isPartOf>
-        <dc:date>2020</dc:date>
-        <bibo:doi>10.26342/2020-64-4</bibo:doi>
-        <owl:sameAs>
-            info:doi/10.26342/2020-64-4
-        </owl:sameAs>
-        <bibo:pageStart>37</bibo:pageStart>
-        <dc:creator>
-            <foaf:Person rdf:about="http://id.medra.org/contributor/Iryna+Gurevych-10.26342%2F2020-64-4-3">
-                <foaf:familyName>Gurevych</foaf:familyName>
-                <foaf:givenName>
-                    Iryna
-                </foaf:givenName>
-                <foaf:name>Iryna Gurevych</foaf:name>
-            </foaf:Person>
-
-        </dc:creator>
-        <owl:sameAs>doi:10.26342/2020-64-4</owl:sameAs>
-        <prism:doi>10.26342/2020-64-4</prism:doi>
-        <dc:creator>
-            <foaf:Person rdf:about="http://id.medra.org/contributor/Tristan+Miller-10.26342%2F2020-64-4-0">
-                <foaf:familyName>Miller</foaf:familyName>
-                <foaf:givenName>
-                    Tristan
-                </foaf:givenName>
-                <foaf:name>Tristan Miller</foaf:name>
-            </foaf:Person>
-
-        </dc:creator>
-        <dc:publisher>Sociedad Española para el Procesamiento del Lenguaje Natural</dc:publisher>
-    </bibo:Article>
-</rdf:RDF>
+<?xml version="1.0" encoding="UTF-8"?>
+<ONIXDOISerialArticleWorkRegistrationMessage xmlns="http://www.editeur.org/onix/DOIMetadata/2.0"
+                                             xmlns:cl="http://www.medra.org/DOIMetadata/2.0/Citations"
+                                             xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
+                                             xsi:schemaLocation="http://www.editeur.org/onix/DOIMetadata/2.0 https://www.medra.org/schema/onix/DOIMetadata/2.0/ONIX_DOIMetadata_2.0.xsd">
+   <Header>
+      <FromCompany>mEDRA</FromCompany>
+      <FromEmail>[email protected]</FromEmail>
+      <ToCompany>PublicService</ToCompany>
+      <SentDate>20240620</SentDate>
+   </Header>
+   <DOISerialArticleWork>
+      <NotificationType>06</NotificationType>
+      <DOI>10.26342/2020-64-4</DOI>
+      <DOIWebsiteLink>http://journal.sepln.org/sepln/ojs/ojs/index.php/pln/article/view/6193</DOIWebsiteLink>
+      <RegistrantName>SEPLN</RegistrantName>
+      <RegistrationAuthority>mEDRA</RegistrationAuthority>
+      <SerialPublication>
+         <SerialWork>
+            <Title>
+               <TitleType>01</TitleType>
+               <TitleText>Procesamiento del Lenguaje Natural</TitleText>
+            </Title>
+            <Publisher>
+               <PublishingRole>01</PublishingRole>
+               <PublisherName>Sociedad Española para el Procesamiento del Lenguaje Natural</PublisherName>
+            </Publisher>
+            <CountryOfPublication>ES</CountryOfPublication>
+         </SerialWork>
+         <SerialVersion>
+            <ProductIdentifier>
+               <ProductIDType>07</ProductIDType>
+               <IDValue>1989-7553</IDValue>
+            </ProductIdentifier>
+            <ProductForm>JD</ProductForm>
+         </SerialVersion>
+      </SerialPublication>
+      <JournalIssue>
+         <JournalIssueDate>
+            <DateFormat>05</DateFormat>
+            <Date>2020</Date>
+         </JournalIssueDate>
+      </JournalIssue>
+      <ContentItem>
+         <TextItem>
+            <PageRun>
+               <FirstPageNumber>37</FirstPageNumber>
+               <LastPageNumber>44</LastPageNumber>
+            </PageRun>
+         </TextItem>
+         <Title>
+            <TitleType>01</TitleType>
+            <TitleText>Predicting the humorousness of tweets using gaussian process preference learning</TitleText>
+         </Title>
+         <Contributor>
+            <SequenceNumber>1</SequenceNumber>
+            <ContributorRole>A01</ContributorRole>
+            <NameIdentifier>
+               <NameIDType>21</NameIDType>
+               <IDValue>http://orcid.org/0000-0001-6157-8808</IDValue>
+            </NameIdentifier>
+            <PersonName>Tristan Miller</PersonName>
+            <PersonNameInverted>Miller, Tristan</PersonNameInverted>
+            <NamesBeforeKey>Tristan</NamesBeforeKey>
+            <KeyNames>Miller</KeyNames>
+         </Contributor>
+         <Contributor>
+            <SequenceNumber>2</SequenceNumber>
+            <ContributorRole>A01</ContributorRole>
+            <PersonName>Erik-Lân Do Dinh</PersonName>
+            <PersonNameInverted>Do Dinh, Erik-Lân</PersonNameInverted>
+            <NamesBeforeKey>Erik-Lân</NamesBeforeKey>
+            <KeyNames>Do Dinh</KeyNames>
+         </Contributor>
+         <Contributor>
+            <SequenceNumber>3</SequenceNumber>
+            <ContributorRole>A01</ContributorRole>
+            <PersonName>Edwin Simpson</PersonName>
+            <PersonNameInverted>Simpson, Edwin</PersonNameInverted>
+            <NamesBeforeKey>Edwin</NamesBeforeKey>
+            <KeyNames>Simpson</KeyNames>
+         </Contributor>
+         <Contributor>
+            <SequenceNumber>4</SequenceNumber>
+            <ContributorRole>A01</ContributorRole>
+            <PersonName>Iryna Gurevych</PersonName>
+            <PersonNameInverted>Gurevych, Iryna</PersonNameInverted>
+            <NamesBeforeKey>Iryna</NamesBeforeKey>
+            <KeyNames>Gurevych</KeyNames>
+         </Contributor>
+         <Language>
+            <LanguageRole>01</LanguageRole>
+            <LanguageCode>eng</LanguageCode>
+         </Language>
+         <PublicationDate>2020</PublicationDate>
+      </ContentItem>
+   </DOISerialArticleWork>
+</ONIXDOISerialArticleWorkRegistrationMessage>