add ris reader

front-matter · Jan 14, 2024 · a610c1c · a610c1c
1 parent e987283
commit a610c1c
Show file tree

Hide file tree

Showing 37 changed files with 17,082 additions and 58 deletions.
diff --git a/README.md b/README.md
@@ -42,7 +42,7 @@ Commometa-py reads and/or writes these metadata formats:
 | [JATS](https://jats.nlm.nih.gov/) | jats | application/vnd.jats+xml | later | later |
 | [CSV](ttps://en.wikipedia.org/wiki/Comma-separated_values) | csv | text/csv | no | later |
 | [BibTex](http://en.wikipedia.org/wiki/BibTeX) | bibtex | application/x-bibtex | later | yes |
-| [RIS](http://en.wikipedia.org/wiki/RIS_(file_format)) | ris | application/x-research-info-systems | later | yes |
+| [RIS](http://en.wikipedia.org/wiki/RIS_(file_format)) | ris | application/x-research-info-systems | yes  | yes |
 | [InvenioRDM](https://inveniordm.docs.cern.ch/reference/metadata/) | inveniordm | application/vnd.inveniordm.v1+json | later | yes |
 | [JSON Feed](https://www.jsonfeed.org/) | json_feed_item | application/feed+json | yes | later |
 

diff --git a/commonmeta/metadata/metadata.py b/commonmeta/metadata/metadata.py
@@ -28,6 +28,7 @@
  read_inveniordm,
  read_kbase,
  read_commonmeta,
+ read_ris,
 )
 from ..writers import (
  write_datacite,
@@ -116,6 +117,8 @@ def __init__(self, string: Optional[str], **kwargs):
  elif via == "kbase":
  data = json.loads(string)
  meta = read_kbase(data)
+ elif via == "ris":
+ meta = read_ris(string)
  # elif via == "bibtex":
  # data = yaml.safe_load(string)
  # meta = read_bibtex(data)

diff --git a/commonmeta/readers/__init__.py b/commonmeta/readers/__init__.py
@@ -11,3 +11,4 @@
 from .inveniordm_reader import get_inveniordm, read_inveniordm
 from .kbase_reader import read_kbase
 from .commonmeta_reader import read_commonmeta
+from .ris_reader import read_ris
diff --git a/commonmeta/readers/ris_reader.py b/commonmeta/readers/ris_reader.py
@@ -1,47 +1,36 @@
 """RIS reader for commonmeta-py"""
 from typing import Optional
 
-from ..utils import compact, normalize_url
+from ..utils import compact, normalize_url, wrap
+from ..author_utils import get_authors
 from ..date_utils import get_date_from_parts
 from ..doi_utils import normalize_doi, doi_from_url
-from ..constants import (
- RIS_TO_CM_TRANSLATIONS,
- Commonmeta
-)
+from ..constants import RIS_TO_CM_TRANSLATIONS, Commonmeta
 
 
 def read_ris(data: Optional[str], **kwargs) -> Commonmeta:
  """read_ris"""
 
- # read_options = ActiveSupport::HashWithIndifferentAccess.
- # new(options.except(:doi, :id, :url,
- # :sandbox, :validate, :ra))
-
  meta = ris_meta(data=data)
-
  read_options = kwargs or {}
-
+ print(meta)
  if not isinstance(meta, dict):
  return {"state": "not_found"}
 
- id_ = read_options.get('doi', None) or normalize_doi(meta.get("DO", None))
+ id_ = read_options.get("doi", None) or normalize_doi(meta.get("DO", None))
  type_ = RIS_TO_CM_TRANSLATIONS.get(meta.get("TY", None), "Other")
+ container_type = "Journal" if type_ == "JournalArticle" else None
 
- # author = wrap(meta.get('AU', None)).map {| a | { 'creatorName' = > a } }
+ def get_author(author):
+ """get_author"""
+ return {"creatorName": author}
 
- date_parts = str(meta.get("PY", None)).split("/")
- created_date_parts = str(meta.get("Y1", None)).split("/")
- dates = []
+ authors = [get_author(i) for i in wrap(meta.get("AU", None))]
+ date = {}
  if meta.get("PY", None) is not None:
- dates.append({"date": get_date_from_parts(
- *date_parts), "dateType": "Issued"})
+ date["published"] = get_date_from_parts(*str(meta.get("PY", None)).split("/"))
  if meta.get("Y1", None) is not None:
- dates.append(
- {"date": get_date_from_parts(
- *created_date_parts), "dateType": "Created"}
- )
- # publication_year = get_date_from_parts(*date_parts).to_s[0..3]
-
+ date["created"] = get_date_from_parts(*str(meta.get("Y1", None)).split("/"))
  # related_identifiers = if meta.fetch('T2', nil).present? & & meta.fetch('SN', nil).present?
  # [{'type' = > 'Periodical',
  # 'id'= > meta.fetch('SN', nil),
@@ -51,12 +40,14 @@ def read_ris(data: Optional[str], **kwargs) -> Commonmeta:
  # else
  # []
  # end
+ descriptions = None
+ if meta.get("AB", None) is not None:
+ descriptions = [{"description": meta.get("AB"), "descriptionType": "Abstract"}]
  if meta.get("T2", None) is not None:
  container = compact(
  {
- "type": "Journal",
+ "type": container_type,
  "title": meta.get("T2", None),
- "identifier": meta.get("SN", None),
  "volume": meta.get("VL", None),
  "issue": meta.get("IS", None),
  "firstPage": meta.get("SP", None),
@@ -65,34 +56,25 @@ def read_ris(data: Optional[str], **kwargs) -> Commonmeta:
  )
  else:
  container = None
-
+ subjects = wrap(meta.get("KW", None))
  state = "findable" if meta.get("DO", None) or read_options else "not_found"
- # subjects = Array.wrap(meta.fetch('KW', nil)).reduce([]) do |sum, subject|
- # sum += name_to_fos(subject)
-
- # sum
- # end
 
  return {
  "id": id_,
  "type": type_,
- "doi": doi_from_url(id_),
+ "doi": doi_from_url(id_) if id_ else None,
  "url": normalize_url(meta.get("UR", None)),
- 'titles': None, # meta.get('T1', nil).present? ? [{ 'title': meta.fetch('T1', nil) }] : nil,
- 'creators': None, # get_authors(author),
+ "titles": [{"title": meta.get("T1", None)}],
+ "descriptions": descriptions,
+ "contributors": get_authors(authors),
  "publisher": meta.get("PB", "(:unav)"),
- 'publication_year': None, # publication_year,
  "container": container,
  # 'related_identifiers': related_identifiers,
- "dates": dates,
- # 'descriptions': if meta.fetch('AB', nil).present?
- # [{ 'description': sanitize(meta.fetch('AB')),
- # 'descriptionType': 'Abstract' }]
- # end,
- # 'subjects': subjects,
+ "date": date,
+ "subjects": subjects,
  "language": meta.get("LA", None),
  "state": state,
- }  # .merge(read_options)
+ } | read_options
 
 
 def ris_meta(data):
@@ -101,15 +83,15 @@ def ris_meta(data):
  if data is None:
  return meta
  for line in data.split("\n"):
- key, value = line.split("-", 1)
- key = key.strip()
- value = value.strip()
- meta[key] = value
- return compact(meta)
-
-
-# h = Hash.new { |h, k| h[k] = [] }
-#  string.split("\n").each_with_object(h) do |line, _sum|
-# k, v = line.split('-', 2)
-# h[k.strip] << v.to_s.strip
-#  end.transform_values(&:unwrap).compact
+ values = line.split("-", 2)
+ key = values[0].strip()
+ if len(values) == 1:
+  continue
+  if meta.get(key, None) is None:
+ meta[key] = values[1].strip()
+ elif isinstance(meta[key], str):
+  meta[key] = [meta[key]]
+ elif isinstance(meta[key], list):
+  meta[key].append(values[1].strip())
+
+ return meta
diff --git a/commonmeta/writers/bibtex_writer.py b/commonmeta/writers/bibtex_writer.py
@@ -52,7 +52,7 @@ def write_bibtex(metadata: Commonmeta) -> str:
  journal = (
  container.get("title", None)
  if type_ not in ["inbook", "inproceedings"]
- and container.get("type") in ["Journal", "Blog"]
+ and container.get("type") in ["Journal", "Periodical"]
  else None
  )
  booktitle = (

diff --git a/pyproject.toml b/pyproject.toml
@@ -1,6 +1,6 @@
 [tool.poetry]
 name="commonmeta-py"
-version="0.9.3"
+version="0.9.4"
 description="Library for conversions to/from the Commonmeta scholarly metadata format"
 authors=["Martin Fenner <[email protected]>"]
 readme = "README.md"