Skip to content

Commit

Permalink
use unidecode for clinvar
Browse files Browse the repository at this point in the history
  • Loading branch information
brentp committed May 2, 2016
1 parent c5053d5 commit 646109c
Show file tree
Hide file tree
Showing 3 changed files with 4 additions and 3 deletions.
1 change: 1 addition & 0 deletions docs/content/history.rst
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@ Release History
#. Use SQLAlchemy for table definitions to support different RDBMS backends.
#. Several optimizations to loading.
#. X-linked recessive and dominant and de novo tools.
#. Raise Exceptions rathern than sys.exit() to facilitate use as library. (thanks @brainstorm .

0.18.3
======
Expand Down
4 changes: 2 additions & 2 deletions gemini/annotations.py
Original file line number Diff line number Diff line change
Expand Up @@ -604,7 +604,7 @@ def get_clinvar_info(var):

raw_dbsource = info_map['CLNSRC'] or None
#interpret 8-bit strings and convert to plain text
clinvar.clinvar_dbsource = raw_dbsource.decode('utf8', 'ignore').encode('ascii', 'ignore')
clinvar.clinvar_dbsource = unidecode(raw_dbsource.decode('utf8'))
clinvar.clinvar_dbsource_id = info_map['CLNSRCID'] or None
clinvar.clinvar_origin = \
clinvar.lookup_clinvar_origin(info_map['CLNORIGIN'])
Expand All @@ -614,7 +614,7 @@ def get_clinvar_info(var):
clinvar.clinvar_dsdbid = info_map['CLNDSDBID'] or None
# Remap all unicode characters into plain text string replacements
raw_disease_name = info_map['CLNDBN'] or None
clinvar.clinvar_disease_name = raw_disease_name.decode('utf8', 'ignore').encode('ascii', 'ignore')
clinvar.clinvar_disease_name = unidecode(raw_disease_name.decode('utf8')).decode('string_escape')
# Clinvar represents commas as \x2c. Make them commas.

clinvar.clinvar_disease_acc = info_map['CLNACC'] or None
Expand Down
2 changes: 1 addition & 1 deletion master-test.sh
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ fi
echo "Using gemini found at: $SCRIPT_PATH" 1>&2

cd test
#rm ./*.db
rm ./*.db

# setup the testing databases from the testing VCF files
set -e
Expand Down

0 comments on commit 646109c

Please sign in to comment.