Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion pid_resolver_lib/doi_ra_handler.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@
RAs: Dict[str, Dict[str, Union[str, int]]] = {
'DataCite': {'mime': 'application/ld+json', 'sleep': 120},
'Crossref': {'mime': 'application/rdf+xml', 'sleep': 0},
'mEDRA': {'mime': 'application/rdf+xml', 'sleep': 0}
'mEDRA': {'mime': 'application/vnd.medra.onixdoi+xml', 'sleep': 0}
}

REGISTRATION_AGENCY = 'RA:'
Expand Down
26 changes: 16 additions & 10 deletions pid_resolver_lib/pid_analyzer.py
Original file line number Diff line number Diff line change
Expand Up @@ -282,8 +282,9 @@ def analyze_doi_record_crossref(cache_dir: Path, doi: str, orcid_info: Dict[str,


def analyze_author_info_medra(creator: etree.Element, namespace_map: Any, orcid_info: List[OrcidProfile]) -> Optional[AuthorInfo]:
given_name_ele: Optional[etree.Element] = creator.find('.//foaf:givenName', namespaces=namespace_map)
family_name_ele: Optional[etree.Element] = creator.find('.//foaf:familyName', namespaces=namespace_map)
given_name_ele: Optional[etree.Element] = creator.find('.//NamesBeforeKey', namespaces=namespace_map)
family_name_ele: Optional[etree.Element] = creator.find('.//KeyNames', namespaces=namespace_map)
orcid_ele: Optional[etree.Element] = creator.find('.//NameIdentifier/IDValue', namespaces=namespace_map)

orcid: Optional[str]
origin_orcid: Optional[str]
Expand All @@ -292,9 +293,12 @@ def analyze_author_info_medra(creator: etree.Element, namespace_map: Any, orcid_
given_name = given_name_ele.text.strip()
family_name = family_name_ele.text.strip()

orcid, origin_orcid = _match_name_with_orcid_profile(orcid_info, given_name, family_name)

return AuthorInfo(given_name=given_name, family_name=family_name, orcid=orcid, origin_orcid=origin_orcid, ror=None)
if orcid_ele is not None:
orcid = _get_orcid_id_from_url(orcid_ele.text)
return AuthorInfo(given_name=given_name, family_name=family_name, orcid=orcid, origin_orcid='doi', ror=None)
else:
orcid, origin_orcid = _match_name_with_orcid_profile(orcid_info, given_name, family_name)
return AuthorInfo(given_name=given_name, family_name=family_name, orcid=orcid, origin_orcid=origin_orcid, ror=None)

# return None if insufficient information is provided.
return None
Expand All @@ -304,12 +308,13 @@ def analyze_doi_record_medra(cache_dir: Path, doi: str, orcid_info: Dict[str, Li
try:
rec_str = read_from_cache(doi, cache_dir)

root = etree.fromstring(rec_str)
# encode to bytes because of Unicode strings with encoding declaration
root = etree.fromstring(rec_str.encode())

title_ele: Optional[etree.Element] = root.find('.//bibo:Article/dc:title', namespaces=root.nsmap)
title_ele: List[etree.Element] = root.xpath('.//onix:Title[parent::onix:ContentItem|parent::onix:DOIMonographicProduct and onix:TitleType[contains(text(), "01")]][1]/onix:TitleText', namespaces={'onix': 'http://www.editeur.org/onix/DOIMetadata/2.0'})

if title_ele is not None:
title = title_ele.text.strip()
if len(title_ele) == 1:
title = title_ele[0].text.strip()
else:
title = None

Expand All @@ -318,7 +323,8 @@ def analyze_doi_record_medra(cache_dir: Path, doi: str, orcid_info: Dict[str, Li
else:
orcid_author_info = []

creators: List[etree.Element] = root.findall('.//dc:creator/foaf:Person', namespaces=root.nsmap)
# set prefix for namespace used in whole file
creators: List[etree.Element] = root.xpath('.//onix:Contributor[onix:ContributorRole[contains(text(), "A01")]]', namespaces={'onix': 'http://www.editeur.org/onix/DOIMetadata/2.0'})

authors: List[Optional[AuthorInfo]] = list(
map(lambda creator: analyze_author_info_medra(creator, root.nsmap, orcid_author_info), creators))
Expand Down
7 changes: 4 additions & 3 deletions tests/test_pid_analyzer.py
Original file line number Diff line number Diff line change
Expand Up @@ -108,12 +108,13 @@ def test_analyze_doi_record_medra(self):
assert res is not None

assert res.doi == '10.26342/2020-64-4'
assert res.title == 'Predicting the humorousness of tweets using gaussian process\n preference learning'
assert res.title == 'Predicting the humorousness of tweets using gaussian process preference learning'

assert len(res.authors) == 4

assert res.authors[0].given_name == 'Edwin'
assert res.authors[0].family_name == 'Simpson'
assert res.authors[0].given_name == 'Tristan'
assert res.authors[0].family_name == 'Miller'
assert res.authors[0].orcid == '0000-0001-6157-8808'


def test_get_orcids_from_resolved_dois(self):
Expand Down
179 changes: 98 additions & 81 deletions tests/testdata/medra_test.xml
Original file line number Diff line number Diff line change
@@ -1,81 +1,98 @@
<rdf:RDF xmlns:dc="http://purl.org/dc/terms/" xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#"
xmlns:prism="http://prismstandard.org/namespaces/basic/2.1/" xmlns:owl="http://www.w3.org/2002/07/owl#"
xmlns:bibo="http://purl.org/ontology/bibo/" xmlns:foaf="http://xmlns.com/foaf/0.1/">
<bibo:Article
rdf:about="https://doi.org/10.26342/2020-64-4">
<bibo:pageEnd>44</bibo:pageEnd>
<dc:identifier>
10.26342/2020-64-4
</dc:identifier>
<dc:title>Predicting the humorousness of tweets using gaussian process
preference learning
</dc:title>
<dc:creator>
<foaf:Person rdf:about="http://id.medra.org/contributor/Edwin+Simpson-10.26342%2F2020-64-4-2">
<foaf:familyName>Simpson</foaf:familyName>
<foaf:givenName>
Edwin
</foaf:givenName>
<foaf:name>Edwin Simpson</foaf:name>
</foaf:Person>

</dc:creator>
<prism:startingPage>37</prism:startingPage>
<dc:creator>
<foaf:Person rdf:about="http://id.medra.org/contributor/Erik-L%C3%A2n+Do+Dinh-10.26342%2F2020-64-4-1">
<foaf:familyName>Do Dinh</foaf:familyName>
<foaf:givenName>Erik-Lân</foaf:givenName>
<foaf:name>
Erik-Lân Do Dinh
</foaf:name>
</foaf:Person>

</dc:creator>
<prism:endingPage>44</prism:endingPage>
<dc:isPartOf>
<bibo:Journal rdf:about="http://id.medra.org/issn/1989-7553">
<dc:identifier>1989-7553</dc:identifier>
<owl:sameAs>
urn:issn:1989-7553
</owl:sameAs>
<prism:eIssn>1989-7553</prism:eIssn>
<bibo:eissn>
1989-7553
</bibo:eissn>
<dc:title>Procesamiento del Lenguaje Natural</dc:title>
<dc:hasPart
rdf:resource="https://doi.org/10.26342/2020-64-4"/>
</bibo:Journal>

</dc:isPartOf>
<dc:date>2020</dc:date>
<bibo:doi>10.26342/2020-64-4</bibo:doi>
<owl:sameAs>
info:doi/10.26342/2020-64-4
</owl:sameAs>
<bibo:pageStart>37</bibo:pageStart>
<dc:creator>
<foaf:Person rdf:about="http://id.medra.org/contributor/Iryna+Gurevych-10.26342%2F2020-64-4-3">
<foaf:familyName>Gurevych</foaf:familyName>
<foaf:givenName>
Iryna
</foaf:givenName>
<foaf:name>Iryna Gurevych</foaf:name>
</foaf:Person>

</dc:creator>
<owl:sameAs>doi:10.26342/2020-64-4</owl:sameAs>
<prism:doi>10.26342/2020-64-4</prism:doi>
<dc:creator>
<foaf:Person rdf:about="http://id.medra.org/contributor/Tristan+Miller-10.26342%2F2020-64-4-0">
<foaf:familyName>Miller</foaf:familyName>
<foaf:givenName>
Tristan
</foaf:givenName>
<foaf:name>Tristan Miller</foaf:name>
</foaf:Person>

</dc:creator>
<dc:publisher>Sociedad Española para el Procesamiento del Lenguaje Natural</dc:publisher>
</bibo:Article>
</rdf:RDF>
<?xml version="1.0" encoding="UTF-8"?>
<ONIXDOISerialArticleWorkRegistrationMessage xmlns="http://www.editeur.org/onix/DOIMetadata/2.0"
xmlns:cl="http://www.medra.org/DOIMetadata/2.0/Citations"
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="http://www.editeur.org/onix/DOIMetadata/2.0 https://www.medra.org/schema/onix/DOIMetadata/2.0/ONIX_DOIMetadata_2.0.xsd">
<Header>
<FromCompany>mEDRA</FromCompany>
<FromEmail>[email protected]</FromEmail>
<ToCompany>PublicService</ToCompany>
<SentDate>20240620</SentDate>
</Header>
<DOISerialArticleWork>
<NotificationType>06</NotificationType>
<DOI>10.26342/2020-64-4</DOI>
<DOIWebsiteLink>http://journal.sepln.org/sepln/ojs/ojs/index.php/pln/article/view/6193</DOIWebsiteLink>
<RegistrantName>SEPLN</RegistrantName>
<RegistrationAuthority>mEDRA</RegistrationAuthority>
<SerialPublication>
<SerialWork>
<Title>
<TitleType>01</TitleType>
<TitleText>Procesamiento del Lenguaje Natural</TitleText>
</Title>
<Publisher>
<PublishingRole>01</PublishingRole>
<PublisherName>Sociedad Española para el Procesamiento del Lenguaje Natural</PublisherName>
</Publisher>
<CountryOfPublication>ES</CountryOfPublication>
</SerialWork>
<SerialVersion>
<ProductIdentifier>
<ProductIDType>07</ProductIDType>
<IDValue>1989-7553</IDValue>
</ProductIdentifier>
<ProductForm>JD</ProductForm>
</SerialVersion>
</SerialPublication>
<JournalIssue>
<JournalIssueDate>
<DateFormat>05</DateFormat>
<Date>2020</Date>
</JournalIssueDate>
</JournalIssue>
<ContentItem>
<TextItem>
<PageRun>
<FirstPageNumber>37</FirstPageNumber>
<LastPageNumber>44</LastPageNumber>
</PageRun>
</TextItem>
<Title>
<TitleType>01</TitleType>
<TitleText>Predicting the humorousness of tweets using gaussian process preference learning</TitleText>
</Title>
<Contributor>
<SequenceNumber>1</SequenceNumber>
<ContributorRole>A01</ContributorRole>
<NameIdentifier>
<NameIDType>21</NameIDType>
<IDValue>http://orcid.org/0000-0001-6157-8808</IDValue>
</NameIdentifier>
<PersonName>Tristan Miller</PersonName>
<PersonNameInverted>Miller, Tristan</PersonNameInverted>
<NamesBeforeKey>Tristan</NamesBeforeKey>
<KeyNames>Miller</KeyNames>
</Contributor>
<Contributor>
<SequenceNumber>2</SequenceNumber>
<ContributorRole>A01</ContributorRole>
<PersonName>Erik-Lân Do Dinh</PersonName>
<PersonNameInverted>Do Dinh, Erik-Lân</PersonNameInverted>
<NamesBeforeKey>Erik-Lân</NamesBeforeKey>
<KeyNames>Do Dinh</KeyNames>
</Contributor>
<Contributor>
<SequenceNumber>3</SequenceNumber>
<ContributorRole>A01</ContributorRole>
<PersonName>Edwin Simpson</PersonName>
<PersonNameInverted>Simpson, Edwin</PersonNameInverted>
<NamesBeforeKey>Edwin</NamesBeforeKey>
<KeyNames>Simpson</KeyNames>
</Contributor>
<Contributor>
<SequenceNumber>4</SequenceNumber>
<ContributorRole>A01</ContributorRole>
<PersonName>Iryna Gurevych</PersonName>
<PersonNameInverted>Gurevych, Iryna</PersonNameInverted>
<NamesBeforeKey>Iryna</NamesBeforeKey>
<KeyNames>Gurevych</KeyNames>
</Contributor>
<Language>
<LanguageRole>01</LanguageRole>
<LanguageCode>eng</LanguageCode>
</Language>
<PublicationDate>2020</PublicationDate>
</ContentItem>
</DOISerialArticleWork>
</ONIXDOISerialArticleWorkRegistrationMessage>