Skip to content

Commit

Permalink
Merge pull request #4 from audricschiltknecht/release-1.7
Browse files Browse the repository at this point in the history
Release 1.7
  • Loading branch information
kjd authored Aug 16, 2017
2 parents cb1441c + 4956200 commit 4fc02db
Show file tree
Hide file tree
Showing 39 changed files with 1,987 additions and 495 deletions.
32 changes: 32 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
# Byte-compiled / optimized
__pycache__/
*.py[cod]

# Distribution
build/
dist/
*.egg-info/
PKG-INFO

# Vim's swap files
*.swp

# MacOSX's stuff
.DS_Store

# Virtual env
venv/

# Cover testing
.coverage
cover/

# Installer logs
pip-log.txt
pip-delete-this-directory.txt

# IDE
.idea

# Documentation
doc/_build/
151 changes: 0 additions & 151 deletions PKG-INFO

This file was deleted.

5 changes: 3 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -6,8 +6,8 @@ This version conforms to the specification [draft-davies-idntables-09](https://t

## Acknowledgment

This toolset was implemented by Viagenie (Audric Schiltknecht, David
Drouin and Marc Blanchet) and Wil Tan on an ICANN contract.
This toolset was implemented by Viagenie (Audric Schiltknecht, Julien Bernard,
David Drouin and Marc Blanchet) and Wil Tan on an ICANN contract.

## License

Expand Down Expand Up @@ -101,6 +101,7 @@ Other tools are available to manipulate LGR files and labels:
* `lgr_compare.py` is used to compare 2 LGR (output textual diff, merge, intersection).
* `lgr_diff_collision.py` takes 2 LGR and one set of labels,
and test for collisions between labels and generated variants from the 2 LGR.
* `lgr_merge_set.py` takes some LGRs and create a merged LGR from the provided set.

### LGR Diff and Collision

Expand Down
13 changes: 12 additions & 1 deletion doc/algo.rst
Original file line number Diff line number Diff line change
Expand Up @@ -138,6 +138,17 @@ Given 2 LGR and a label list, the tool will:
* Generate the index for all labels in the list against the second LGR (same method as collision).
* Compare the generated labels (and variants) for the 2 LGRs.

As generating the labels' variants is a very expensive process, the tool is asynchronous: a notification will be sent by email when the processing is done
As generating the labels' variants is a very expensive process, the tool is asynchronous: a notification will be sent by email when the processing is done

Cross-script variants
---------------------

Given an LGR set and a label list, the tool will iterate through the label list and for each label:

* Check that the label is eligible in the merged LGR.
* Iterate through the element LGR composing the set:
* Generate the variants of the label

As generating the labels' variants is a very expensive process, the tool is asynchronous: a notification will be sent by email when the processing is done

.. _`section 8.5 of RFC 7940`: https://tools.ietf.org/html/rfc7940#section-8.5
39 changes: 39 additions & 0 deletions lgr/char.py
Original file line number Diff line number Diff line change
Expand Up @@ -713,6 +713,45 @@ def get_variants(self, cp_or_sequence):
char = self.get_char(cp_or_sequence)
return char.get_variants()

def get_variant_sets(self):
"""
Return the list of variants set contained in the repertoire.
This function assumes the repertoire is symmetric and transitive.
Note: This function is very stupid and NOT optimised in complexity
nor memory consumption.
:returns: List of variant set, with a variant set being
a list of code points included in the set.
"""
def dfs(char, visited=None):
""" Utility function to iterate in a char/variants (Depth-First Search)."""
if visited is None:
visited = set()
visited.add(char.cp)
for variant in char.get_variants():
if variant.cp in visited:
continue
try:
reverse_char = self.get_char(variant.cp)
except NotInLGR:
# Ignore invalid LGR
continue
dfs(reverse_char, visited)
return visited

variant_sets = set()
for index in sorted(self._chardict.keys()):
for char in self._chardict[index]:
# XXX: Convert to tuple here so it is hashable
variant_set = tuple(sorted(dfs(char)))
if len(variant_set) > 1:
lowest = min(variant_set)
variant_sets.add((lowest, variant_set))

return [variants for _, variants in sorted(variant_sets)]

def del_reference(self, ref_id):
"""
Iterate through the repertoire to remove the reference ref_id
Expand Down
2 changes: 1 addition & 1 deletion lgr/classes.py
Original file line number Diff line number Diff line change
Expand Up @@ -63,7 +63,7 @@ def __init__(self, name=None,
"""
self.name = name
self.comment = comment
self.ref = ref
self.ref = ref or []
self.from_tag = from_tag
self.unicode_property = unicode_property
self.codepoints = set(codepoints or [])
Expand Down
36 changes: 18 additions & 18 deletions lgr/core.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
import collections
import math
from cStringIO import StringIO
from collections import OrderedDict

from lgr.metadata import ReferenceManager, Metadata
from lgr.char import Repertoire, CharSequence
Expand Down Expand Up @@ -35,31 +36,31 @@

# Default disposition used in
# 7.3. Determining a Disposition for a Label or Variant Label, step 3
DEFAULT_DISPOSITION = "allocate"
DEFAULT_DISPOSITION = "allocatable"

# Invalid disposition which cuases the label to be removed
# 7.2. Determining Variants for a Label, step 5
# Invalid disposition which causes the label to be removed
# 8.2. Determining Variants for a Label, step 5
INVALID_DISPOSITION = "invalid"

# Defaults actions
# 6.6. Default Actions
# 7.6. Default Actions
DEFAULT_ACTIONS = (
Action(disp='invalid', comment="Default action for invalid",
any_variant=['invalid']),
Action(disp='block', comment="Default action for block",
any_variant=['block']),
Action(disp='allocate', comment="Default action for allocate",
any_variant=['allocate']),
Action(disp='activate', comment="Default action for activate",
all_variants=['activate']),
Action(disp='allocate', comment="Default catch-all")
Action(disp='blocked', comment="Default action for blocked",
any_variant=['blocked']),
Action(disp='allocatable', comment="Default action for allocatable",
any_variant=['allocatable']),
Action(disp='activated', comment="Default action for activated",
all_variants=['activated']),
Action(disp='valid', comment="Default catch-all")
)
DEFAULT_ACTIONS_XML = (
'<action disp="invalid" comment="Default action for invalid" any-variant="invalid"/>',
'<action disp="block" comment="Default action for block" any-variant="block"/>',
'<action disp="allocate" comment="Default action for allocate" any-variant="allocate"/>',
'<action disp="activate" comment="Default action for activate" all-variants="activate"/>',
'<action disp="allocate" comment="Default catch-all" />'
'<action disp="blocked" comment="Default action for blocked" any-variant="blocked"/>',
'<action disp="allocatable" comment="Default action for allocatable" any-variant="allocatable"/>',
'<action disp="activated" comment="Default action for activated" all-variants="activated"/>',
'<action disp="valid" comment="Default catch-all" />'
)

# Maximum number of variants to generate
Expand Down Expand Up @@ -109,15 +110,15 @@ def __init__(self,
# - store its name in self.rules array (ordered structure).
# - store the rule in the self.rules_lookup dict (indexed by its name).
self.rules = []
self.rules_lookup = {}
self.rules_lookup = OrderedDict()
# Until we know how to edit rules, keep the XML text here
self.rules_xml = []

# Classes are ordered, so when adding a class:
# - store its name in self.classes array (ordered structure).
# - store the rule in the self.classes_lookup dict (indexed by its name).
self.classes = []
self.classes_lookup = {}
self.classes_lookup = OrderedDict()
# Until we know how to edit classes, keep the XML text here
self.classes_xml = []

Expand Down Expand Up @@ -1008,7 +1009,6 @@ def test_label_eligible(self, label, collect_log=True):
ch.setLevel(logging.INFO)
rule_logger.addHandler(ch)


# Start by testing presence of code points in LGR
(valid, label_part, not_in_lgr) = self._test_preliminary_eligibility(label)
if not valid:
Expand Down
18 changes: 18 additions & 0 deletions lgr/exceptions.py
Original file line number Diff line number Diff line change
Expand Up @@ -238,3 +238,21 @@ def __init__(self, rule_name, message):

def __unicode__(self):
return '<rule %s>: %s' % (self.rule_name, self.message)


class LGRInvalidLabelException(LGRException):
"""
Raised when a label is invalid in an LGR
"""
def __init__(self, label, message):
super(LGRInvalidLabelException, self).__init__()
self.label = label
self.message = message


class LGRLabelCollisionException(LGRException):
"""
Raised when a label collide in an LGR set
"""
def __init__(self):
super(LGRLabelCollisionException, self).__init__()
2 changes: 1 addition & 1 deletion lgr/metadata.py
Original file line number Diff line number Diff line change
Expand Up @@ -99,7 +99,7 @@ def __init__(self, value, scope_type=None):
self.scope_type = scope_type

def __unicode__(self):
return self.value
return '{}: {}'.format(self.scope_type, self.value)

def __eq__(self, other):
return (self.value == other.value) \
Expand Down
Loading

0 comments on commit 4fc02db

Please sign in to comment.