Skip to content

Commit

Permalink
handle oecd fos classification
Browse files Browse the repository at this point in the history
  • Loading branch information
mfenner committed Oct 12, 2024
1 parent 8ffa489 commit 4401e1c
Show file tree
Hide file tree
Showing 11 changed files with 273 additions and 202 deletions.
2 changes: 1 addition & 1 deletion commonmeta/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@
"""

__title__ = "commonmeta-py"
__version__ = "0.32"
__version__ = "0.33"
__author__ = "Martin Fenner"
__license__ = "MIT"

Expand Down
8 changes: 5 additions & 3 deletions commonmeta/crossref_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -112,9 +112,11 @@ def insert_group_title(metadata, xml):
"""Insert group title"""
if metadata.subjects is None or len(metadata.subjects) == 0:
return xml
etree.SubElement(xml, "group_title").text = metadata.subjects[0].get(
"subject", None
)
group_title = metadata.subjects[0].get("subject", None)
# strip optional FOS (Field of Science) prefix
if group_title.startswith("FOS: "):
group_title = group_title[5:]
etree.SubElement(xml, "group_title").text = group_title
return xml


Expand Down
20 changes: 10 additions & 10 deletions commonmeta/readers/json_feed_reader.py
Original file line number Diff line number Diff line change
Expand Up @@ -114,7 +114,10 @@ def read_json_feed_item(data: Optional[dict], **kwargs) -> Commonmeta:
if category is not None:
subjects = [name_to_fos(py_.human_case(category))]
else:
subjects = None
subjects = []
tags = wrap(py_.get(meta, "tags", None))
if tags is not None:
subjects += wrap([format_subject(i) for i in tags])
references = get_references(wrap(meta.get("reference", None)))
funding_references = get_funding_references(meta)
relations = get_relations(wrap(meta.get("relationships", None)))
Expand Down Expand Up @@ -414,13 +417,10 @@ def get_json_feed_blog_slug(id: str):
return py_.get(post, "blog.slug", None)


def get_json_feed_blog_slug(id: str):
"""get JSON Feed item by id and return blog slug"""
if id is None:
def format_subject(subject: str) -> Optional[dict]:
"""format subject"""
if subject is None or not isinstance(subject, str):
return None
url = f"https://api.rogue-scholar.org/posts/#{id}"
response = httpx.get(url, timeout=10)
if response.status_code != 200:
return None
post = response.json()
return py_.get(post, "blog.slug", None)
return {
"subject": subject,
}
98 changes: 58 additions & 40 deletions commonmeta/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -77,6 +77,57 @@
HTTP_SCHEME = "http://"
HTTPS_SCHEME = "https://"

FOS_MAPPINGS = {
"Natural sciences": "http://www.oecd.org/science/inno/38235147.pdf?1",
"Mathematics": "http://www.oecd.org/science/inno/38235147.pdf?1.1",
"Computer and information sciences": "http://www.oecd.org/science/inno/38235147.pdf?1.2",
"Physical sciences": "http://www.oecd.org/science/inno/38235147.pdf?1.3",
"Chemical sciences": "http://www.oecd.org/science/inno/38235147.pdf?1.4",
"Earth and related environmental sciences": "http://www.oecd.org/science/inno/38235147.pdf?1.5",
"Biological sciences": "http://www.oecd.org/science/inno/38235147.pdf?1.6",
"Other natural sciences": "http://www.oecd.org/science/inno/38235147.pdf?1.7",
"Engineering and technology": "http://www.oecd.org/science/inno/38235147.pdf?2",
"Civil engineering": "http://www.oecd.org/science/inno/38235147.pdf?2.1",
"Electrical engineering, electronic engineering, information engineering": "http://www.oecd.org/science/inno/38235147.pdf?2.2",
"Mechanical engineering": "http://www.oecd.org/science/inno/38235147.pdf?2.3",
"Chemical engineering": "http://www.oecd.org/science/inno/38235147.pdf?2.4",
"Materials engineering": "http://www.oecd.org/science/inno/38235147.pdf?2.5",
"Medical engineering": "http://www.oecd.org/science/inno/38235147.pdf?2.6",
"Environmental engineering": "http://www.oecd.org/science/inno/38235147.pdf?2.7",
"Environmental biotechnology": "http://www.oecd.org/science/inno/38235147.pdf?2.8",
"Industrial biotechnology": "http://www.oecd.org/science/inno/38235147.pdf?2.9",
"Nano technology": "http://www.oecd.org/science/inno/38235147.pdf?2.10",
"Other engineering and technologies": "http://www.oecd.org/science/inno/38235147.pdf?2.11",
"Medical and health sciences": "http://www.oecd.org/science/inno/38235147.pdf?3",
"Basic medicine": "http://www.oecd.org/science/inno/38235147.pdf?3.1",
"Clinical medicine": "http://www.oecd.org/science/inno/38235147.pdf?3.2",
"Health sciences": "http://www.oecd.org/science/inno/38235147.pdf?3.3",
"Health biotechnology": "http://www.oecd.org/science/inno/38235147.pdf?3.4",
"Other medical sciences": "http://www.oecd.org/science/inno/38235147.pdf?3.5",
"Agricultural sciences": "http://www.oecd.org/science/inno/38235147.pdf?4",
"Agriculture, forestry, and fisheries": "http://www.oecd.org/science/inno/38235147.pdf?4.1",
"Animal and dairy science": "http://www.oecd.org/science/inno/38235147",
"Veterinary science": "http://www.oecd.org/science/inno/38235147",
"Agricultural biotechnology": "http://www.oecd.org/science/inno/38235147",
"Other agricultural sciences": "http://www.oecd.org/science/inno/38235147",
"Social science": "http://www.oecd.org/science/inno/38235147.pdf?5",
"Psychology": "http://www.oecd.org/science/inno/38235147.pdf?5.1",
"Economics and business": "http://www.oecd.org/science/inno/38235147.pdf?5.2",
"Educational sciences": "http://www.oecd.org/science/inno/38235147.pdf?5.3",
"Sociology": "http://www.oecd.org/science/inno/38235147.pdf?5.4",
"Law": "http://www.oecd.org/science/inno/38235147.pdf?5.5",
"Political science": "http://www.oecd.org/science/inno/38235147.pdf?5.6",
"Social and economic geography": "http://www.oecd.org/science/inno/38235147.pdf?5.7",
"Media and communications": "http://www.oecd.org/science/inno/38235147.pdf?5.8",
"Other social sciences": "http://www.oecd.org/science/inno/38235147.pdf?5.9",
"Humanities": "http://www.oecd.org/science/inno/38235147.pdf?6",
"History and archaeology": "http://www.oecd.org/science/inno/38235147.pdf?6.1",
"Languages and literature": "http://www.oecd.org/science/inno/38235147.pdf?6.2",
"Philosophy, ethics and religion": "http://www.oecd.org/science/inno/38235147.pdf?6.3",
"Arts (arts, history of arts, performing arts, music)": "http://www.oecd.org/science/inno/38235147.pdf?6.4",
"Other humanities": "http://www.oecd.org/science/inno/38235147.pdf?6.5",
}


def normalize_id(pid: Optional[str], **kwargs) -> Optional[str]:
"""Check for valid DOI or HTTP(S) URL"""
Expand Down Expand Up @@ -982,45 +1033,12 @@ def subjects_as_string(subjects):

def name_to_fos(name: str) -> Optional[dict]:
"""Convert name to Fields of Science (OECD) subject"""
# # first find subject in Fields of Science (OECD)
# fos = JSON.load(File.read(File.expand_path('../../resources/oecd/fos-mappings.json',
# __dir__))).fetch('fosFields')

# subject = fos.find { |l| l['fosLabel'] == name || 'FOS: ' + l['fosLabel'] == name }

# if subject
# return [{
# 'subject': sanitize(name).downcase
# },
# {
# 'subject': 'FOS: ' + subject['fosLabel'],
# 'subjectScheme': 'Fields of Science and Technology (FOS)',
# 'schemeUri': 'http://www.oecd.org/science/inno/38235147.pdf'
# }]
# end

# # if not found, look in Fields of Research (Australian and New Zealand Standard Research Classification)
# # and map to Fields of Science. Add an extra entry for the latter
# fores = JSON.load(File.read(File.expand_path('../../resources/oecd/for-mappings.json',
# __dir__)))
# for_fields = fores.fetch('forFields')
# for_disciplines = fores.fetch('forDisciplines')

# subject = for_fields.find { |l| l['forLabel'] == name } ||
# for_disciplines.find { |l| l['forLabel'] == name }

# if subject
# [{
# 'subject': sanitize(name).downcase
# },
# {
# 'subject': 'FOS: ' + subject['fosLabel'],
# 'subjectScheme': 'Fields of Science and Technology (FOS)',
# 'schemeUri': 'http://www.oecd.org/science/inno/38235147.pdf'
# }]
# else

return {"subject": name.strip()}
subject = name.strip()
fos_subject = FOS_MAPPINGS.get(name, None)
if fos_subject is not None:
return {"subject": f"FOS: {subject}"}
return {"subject": subject}


def encode_doi(prefix):
Expand Down Expand Up @@ -1112,10 +1130,10 @@ def id_from_url(url: Optional[str]) -> Optional[str]:
"""Return a ID from a URL"""
if url is None:
return None

f = furl(url)
# check for allowed scheme if string is a URL
if f.host is not None and f.scheme not in ["http", "https", "ftp"]:
return None

return str(f.path).strip("/")
24 changes: 17 additions & 7 deletions commonmeta/writers/inveniordm_writer.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,11 +3,11 @@
import orjson as json
from typing import Optional

from ..base_utils import compact, wrap, parse_attributes
from ..base_utils import compact, wrap, parse_attributes, presence
from ..date_utils import get_iso8601_date
from ..doi_utils import doi_from_url
from ..constants import CM_TO_INVENIORDM_TRANSLATIONS, INVENIORDM_IDENTIFIER_TYPES
from ..utils import get_language, validate_orcid, id_from_url
from ..utils import get_language, validate_orcid, id_from_url, FOS_MAPPINGS


def write_inveniordm(metadata):
Expand Down Expand Up @@ -73,7 +73,7 @@ def write_inveniordm(metadata):
"type": {"id": "updated"},
}
],
"subjects": subjects,
"subjects": presence(subjects),
"description": parse_attributes(
metadata.descriptions, content="description", first=True
),
Expand Down Expand Up @@ -133,12 +133,22 @@ def format_identifier(id):
)


def to_inveniordm_subject(subject: dict) -> dict:
"""Convert subjects to inveniordm subjects"""
def to_inveniordm_subject(sub: dict) -> Optional[dict]:
"""Convert subject to inveniordm subject"""
if sub.get("subject", None) is None:
return None
if sub.get("subject").startswith("FOS: "):
subject = sub.get("subject")[5:]
id_ = FOS_MAPPINGS.get(subject, None)
return compact(
{
"id": id_,
"subject": subject,
}
)
return compact(
{
"id": subject.get("id", None),
"subject": subject.get("subject", None),
"subject": sub.get("subject"),
}
)

Expand Down
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[tool.poetry]
name="commonmeta-py"
version="0.32"
version="0.33"
repository = "https:/front-matter/commonmeta-py"
homepage = "https://python.commonmeta.org"
documentation = "https://python.commonmeta.org"
Expand Down
Loading

0 comments on commit 4401e1c

Please sign in to comment.