Skip to content

Commit

Permalink
add ris reader
Browse files Browse the repository at this point in the history
  • Loading branch information
mfenner committed Jan 14, 2024
1 parent e987283 commit a610c1c
Show file tree
Hide file tree
Showing 37 changed files with 17,082 additions and 58 deletions.
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,7 @@ Commometa-py reads and/or writes these metadata formats:
| [JATS](https://jats.nlm.nih.gov/) | jats | application/vnd.jats+xml | later | later |
| [CSV](ttps://en.wikipedia.org/wiki/Comma-separated_values) | csv | text/csv | no | later |
| [BibTex](http://en.wikipedia.org/wiki/BibTeX) | bibtex | application/x-bibtex | later | yes |
| [RIS](http://en.wikipedia.org/wiki/RIS_(file_format)) | ris | application/x-research-info-systems | later | yes |
| [RIS](http://en.wikipedia.org/wiki/RIS_(file_format)) | ris | application/x-research-info-systems | yes | yes |
| [InvenioRDM](https://inveniordm.docs.cern.ch/reference/metadata/) | inveniordm | application/vnd.inveniordm.v1+json | later | yes |
| [JSON Feed](https://www.jsonfeed.org/) | json_feed_item | application/feed+json | yes | later |

Expand Down
3 changes: 3 additions & 0 deletions commonmeta/metadata/metadata.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@
read_inveniordm,
read_kbase,
read_commonmeta,
read_ris,
)
from ..writers import (
write_datacite,
Expand Down Expand Up @@ -116,6 +117,8 @@ def __init__(self, string: Optional[str], **kwargs):
elif via == "kbase":
data = json.loads(string)
meta = read_kbase(data)
elif via == "ris":
meta = read_ris(string)
# elif via == "bibtex":
# data = yaml.safe_load(string)
# meta = read_bibtex(data)
Expand Down
1 change: 1 addition & 0 deletions commonmeta/readers/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,3 +11,4 @@
from .inveniordm_reader import get_inveniordm, read_inveniordm
from .kbase_reader import read_kbase
from .commonmeta_reader import read_commonmeta
from .ris_reader import read_ris
92 changes: 37 additions & 55 deletions commonmeta/readers/ris_reader.py
Original file line number Diff line number Diff line change
@@ -1,47 +1,36 @@
"""RIS reader for commonmeta-py"""
from typing import Optional

from ..utils import compact, normalize_url
from ..utils import compact, normalize_url, wrap
from ..author_utils import get_authors
from ..date_utils import get_date_from_parts
from ..doi_utils import normalize_doi, doi_from_url
from ..constants import (
RIS_TO_CM_TRANSLATIONS,
Commonmeta
)
from ..constants import RIS_TO_CM_TRANSLATIONS, Commonmeta


def read_ris(data: Optional[str], **kwargs) -> Commonmeta:
"""read_ris"""

# read_options = ActiveSupport::HashWithIndifferentAccess.
# new(options.except(:doi, :id, :url,
# :sandbox, :validate, :ra))

meta = ris_meta(data=data)

read_options = kwargs or {}

print(meta)
if not isinstance(meta, dict):
return {"state": "not_found"}

id_ = read_options.get('doi', None) or normalize_doi(meta.get("DO", None))
id_ = read_options.get("doi", None) or normalize_doi(meta.get("DO", None))
type_ = RIS_TO_CM_TRANSLATIONS.get(meta.get("TY", None), "Other")
container_type = "Journal" if type_ == "JournalArticle" else None

# author = wrap(meta.get('AU', None)).map {| a | { 'creatorName' = > a } }
def get_author(author):
"""get_author"""
return {"creatorName": author}

date_parts = str(meta.get("PY", None)).split("/")
created_date_parts = str(meta.get("Y1", None)).split("/")
dates = []
authors = [get_author(i) for i in wrap(meta.get("AU", None))]
date = {}
if meta.get("PY", None) is not None:
dates.append({"date": get_date_from_parts(
*date_parts), "dateType": "Issued"})
date["published"] = get_date_from_parts(*str(meta.get("PY", None)).split("/"))
if meta.get("Y1", None) is not None:
dates.append(
{"date": get_date_from_parts(
*created_date_parts), "dateType": "Created"}
)
# publication_year = get_date_from_parts(*date_parts).to_s[0..3]

date["created"] = get_date_from_parts(*str(meta.get("Y1", None)).split("/"))
# related_identifiers = if meta.fetch('T2', nil).present? & & meta.fetch('SN', nil).present?
# [{'type' = > 'Periodical',
# 'id'= > meta.fetch('SN', nil),
Expand All @@ -51,12 +40,14 @@ def read_ris(data: Optional[str], **kwargs) -> Commonmeta:
# else
# []
# end
descriptions = None
if meta.get("AB", None) is not None:
descriptions = [{"description": meta.get("AB"), "descriptionType": "Abstract"}]
if meta.get("T2", None) is not None:
container = compact(
{
"type": "Journal",
"type": container_type,
"title": meta.get("T2", None),
"identifier": meta.get("SN", None),
"volume": meta.get("VL", None),
"issue": meta.get("IS", None),
"firstPage": meta.get("SP", None),
Expand All @@ -65,34 +56,25 @@ def read_ris(data: Optional[str], **kwargs) -> Commonmeta:
)
else:
container = None

subjects = wrap(meta.get("KW", None))
state = "findable" if meta.get("DO", None) or read_options else "not_found"
# subjects = Array.wrap(meta.fetch('KW', nil)).reduce([]) do |sum, subject|
# sum += name_to_fos(subject)

# sum
# end

return {
"id": id_,
"type": type_,
"doi": doi_from_url(id_),
"doi": doi_from_url(id_) if id_ else None,
"url": normalize_url(meta.get("UR", None)),
'titles': None, # meta.get('T1', nil).present? ? [{ 'title': meta.fetch('T1', nil) }] : nil,
'creators': None, # get_authors(author),
"titles": [{"title": meta.get("T1", None)}],
"descriptions": descriptions,
"contributors": get_authors(authors),
"publisher": meta.get("PB", "(:unav)"),
'publication_year': None, # publication_year,
"container": container,
# 'related_identifiers': related_identifiers,
"dates": dates,
# 'descriptions': if meta.fetch('AB', nil).present?
# [{ 'description': sanitize(meta.fetch('AB')),
# 'descriptionType': 'Abstract' }]
# end,
# 'subjects': subjects,
"date": date,
"subjects": subjects,
"language": meta.get("LA", None),
"state": state,
} # .merge(read_options)
} | read_options


def ris_meta(data):
Expand All @@ -101,15 +83,15 @@ def ris_meta(data):
if data is None:
return meta
for line in data.split("\n"):
key, value = line.split("-", 1)
key = key.strip()
value = value.strip()
meta[key] = value
return compact(meta)


# h = Hash.new { |h, k| h[k] = [] }
# string.split("\n").each_with_object(h) do |line, _sum|
# k, v = line.split('-', 2)
# h[k.strip] << v.to_s.strip
# end.transform_values(&:unwrap).compact
values = line.split("-", 2)
key = values[0].strip()
if len(values) == 1:
continue
if meta.get(key, None) is None:
meta[key] = values[1].strip()
elif isinstance(meta[key], str):
meta[key] = [meta[key]]
elif isinstance(meta[key], list):
meta[key].append(values[1].strip())

return meta
2 changes: 1 addition & 1 deletion commonmeta/writers/bibtex_writer.py
Original file line number Diff line number Diff line change
Expand Up @@ -52,7 +52,7 @@ def write_bibtex(metadata: Commonmeta) -> str:
journal = (
container.get("title", None)
if type_ not in ["inbook", "inproceedings"]
and container.get("type") in ["Journal", "Blog"]
and container.get("type") in ["Journal", "Periodical"]
else None
)
booktitle = (
Expand Down
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[tool.poetry]
name="commonmeta-py"
version="0.9.3"
version="0.9.4"
description="Library for conversions to/from the Commonmeta scholarly metadata format"
authors=["Martin Fenner <[email protected]>"]
readme = "README.md"
Expand Down
Loading

0 comments on commit a610c1c

Please sign in to comment.