Merge pull request #4 from audricschiltknecht/release-1.7

Release 1.7
icann · Aug 16, 2017 · 4fc02db · 4fc02db
2 parents cb1441c + 4956200
commit 4fc02db
Show file tree

Hide file tree

Showing 39 changed files with 1,987 additions and 495 deletions.
diff --git a/.gitignore b/.gitignore
@@ -0,0 +1,32 @@
+# Byte-compiled / optimized
+__pycache__/
+*.py[cod]
+
+# Distribution
+build/
+dist/
+*.egg-info/
+PKG-INFO
+
+# Vim's swap files
+*.swp
+
+# MacOSX's stuff
+.DS_Store
+
+# Virtual env
+venv/
+
+# Cover testing
+.coverage
+cover/
+
+# Installer logs
+pip-log.txt
+pip-delete-this-directory.txt
+
+# IDE
+.idea
+
+# Documentation
+doc/_build/
diff --git a/PKG-INFO b/PKG-INFO
diff --git a/README.md b/README.md
@@ -6,8 +6,8 @@ This version conforms to the specification [draft-davies-idntables-09](https://t
 
 ## Acknowledgment
 
-This toolset was implemented by Viagenie (Audric Schiltknecht, David
-Drouin and Marc Blanchet) and Wil Tan on an ICANN contract.
+This toolset was implemented by Viagenie (Audric Schiltknecht, Julien Bernard,
+David Drouin and Marc Blanchet) and Wil Tan on an ICANN contract.
 
 ## License
 
@@ -101,6 +101,7 @@ Other tools are available to manipulate LGR files and labels:
 * `lgr_compare.py` is used to compare 2 LGR (output textual diff, merge, intersection).
 * `lgr_diff_collision.py` takes 2 LGR and one set of labels,
  and test for collisions between labels and generated variants from the 2 LGR.
+* `lgr_merge_set.py` takes some LGRs and create a merged LGR from the provided set.
 
 ### LGR Diff and Collision
 

diff --git a/doc/algo.rst b/doc/algo.rst
@@ -138,6 +138,17 @@ Given 2 LGR and a label list, the tool will:
 * Generate the index for all labels in the list against the second LGR (same method as collision).
 * Compare the generated labels (and variants) for the 2 LGRs.
 
-As generating the labels' variants is a very expensive process, the tool is asynchronous: a notification will be sent by email when the processing is done 
+As generating the labels' variants is a very expensive process, the tool is asynchronous: a notification will be sent by email when the processing is done
+
+Cross-script variants
+---------------------
+
+Given an LGR set and a label list, the tool will iterate through the label list and for each label:
+
+* Check that the label is eligible in the merged LGR.
+* Iterate through the element LGR composing the set:
+ * Generate the variants of the label
+
+As generating the labels' variants is a very expensive process, the tool is asynchronous: a notification will be sent by email when the processing is done
 
 .. _`section 8.5 of RFC 7940`: https://tools.ietf.org/html/rfc7940#section-8.5
diff --git a/lgr/char.py b/lgr/char.py
@@ -713,6 +713,45 @@ def get_variants(self, cp_or_sequence):
  char = self.get_char(cp_or_sequence)
  return char.get_variants()
 
+ def get_variant_sets(self):
+ """
+ Return the list of variants set contained in the repertoire.
+
+ This function assumes the repertoire is symmetric and transitive.
+
+ Note: This function is very stupid and NOT optimised in complexity
+ nor memory consumption.
+
+ :returns: List of variant set, with a variant set being
+ a list of code points included in the set.
+ """
+ def dfs(char, visited=None):
+ """ Utility function to iterate in a char/variants (Depth-First Search)."""
+ if visited is None:
+ visited = set()
+ visited.add(char.cp)
+ for variant in char.get_variants():
+ if variant.cp in visited:
+ continue
+ try:
+ reverse_char = self.get_char(variant.cp)
+ except NotInLGR:
+ # Ignore invalid LGR
+ continue
+ dfs(reverse_char, visited)
+ return visited
+
+ variant_sets = set()
+ for index in sorted(self._chardict.keys()):
+ for char in self._chardict[index]:
+ # XXX: Convert to tuple here so it is hashable
+ variant_set = tuple(sorted(dfs(char)))
+ if len(variant_set) > 1:
+ lowest = min(variant_set)
+ variant_sets.add((lowest, variant_set))
+
+ return [variants for _, variants in sorted(variant_sets)]
+
  def del_reference(self, ref_id):
  """
  Iterate through the repertoire to remove the reference ref_id

diff --git a/lgr/classes.py b/lgr/classes.py
@@ -63,7 +63,7 @@ def __init__(self, name=None,
  """
  self.name = name
  self.comment = comment
- self.ref = ref
+ self.ref = ref or []
  self.from_tag = from_tag
  self.unicode_property = unicode_property
  self.codepoints = set(codepoints or [])

diff --git a/lgr/core.py b/lgr/core.py
@@ -8,6 +8,7 @@
 import collections
 import math
 from cStringIO import StringIO
+from collections import OrderedDict
 
 from lgr.metadata import ReferenceManager, Metadata
 from lgr.char import Repertoire, CharSequence
@@ -35,31 +36,31 @@
 
 # Default disposition used in
 # 7.3. Determining a Disposition for a Label or Variant Label, step 3
-DEFAULT_DISPOSITION = "allocate"
+DEFAULT_DISPOSITION = "allocatable"
 
-# Invalid disposition which cuases the label to be removed
-# 7.2. Determining Variants for a Label, step 5
+# Invalid disposition which causes the label to be removed
+# 8.2. Determining Variants for a Label, step 5
 INVALID_DISPOSITION = "invalid"
 
 # Defaults actions
-# 6.6. Default Actions
+# 7.6. Default Actions
 DEFAULT_ACTIONS = (
  Action(disp='invalid', comment="Default action for invalid",
  any_variant=['invalid']),
- Action(disp='block', comment="Default action for block",
- any_variant=['block']),
- Action(disp='allocate', comment="Default action for allocate",
- any_variant=['allocate']),
- Action(disp='activate', comment="Default action for activate",
- all_variants=['activate']),
- Action(disp='allocate', comment="Default catch-all")
+ Action(disp='blocked', comment="Default action for blocked",
+ any_variant=['blocked']),
+ Action(disp='allocatable', comment="Default action for allocatable",
+ any_variant=['allocatable']),
+ Action(disp='activated', comment="Default action for activated",
+ all_variants=['activated']),
+ Action(disp='valid', comment="Default catch-all")
 )
 DEFAULT_ACTIONS_XML = (
  '<action disp="invalid" comment="Default action for invalid" any-variant="invalid"/>',
- '<action disp="block" comment="Default action for block" any-variant="block"/>',
- '<action disp="allocate" comment="Default action for allocate" any-variant="allocate"/>',
- '<action disp="activate" comment="Default action for activate" all-variants="activate"/>',
- '<action disp="allocate" comment="Default catch-all" />'
+ '<action disp="blocked" comment="Default action for blocked" any-variant="blocked"/>',
+ '<action disp="allocatable" comment="Default action for allocatable" any-variant="allocatable"/>',
+ '<action disp="activated" comment="Default action for activated" all-variants="activated"/>',
+ '<action disp="valid" comment="Default catch-all" />'
 )
 
 # Maximum number of variants to generate
@@ -109,15 +110,15 @@ def __init__(self,
  # - store its name in self.rules array (ordered structure).
  # - store the rule in the self.rules_lookup dict (indexed by its name).
  self.rules = []
- self.rules_lookup = {}
+ self.rules_lookup = OrderedDict()
  # Until we know how to edit rules, keep the XML text here
  self.rules_xml = []
 
  # Classes are ordered, so when adding a class:
  # - store its name in self.classes array (ordered structure).
  # - store the rule in the self.classes_lookup dict (indexed by its name).
  self.classes = []
- self.classes_lookup = {}
+ self.classes_lookup = OrderedDict()
  # Until we know how to edit classes, keep the XML text here
  self.classes_xml = []
 
@@ -1008,7 +1009,6 @@ def test_label_eligible(self, label, collect_log=True):
  ch.setLevel(logging.INFO)
  rule_logger.addHandler(ch)
 
-
  # Start by testing presence of code points in LGR
  (valid, label_part, not_in_lgr) = self._test_preliminary_eligibility(label)
  if not valid:

diff --git a/lgr/exceptions.py b/lgr/exceptions.py
@@ -238,3 +238,21 @@ def __init__(self, rule_name, message):
 
  def __unicode__(self):
  return '<rule %s>: %s' % (self.rule_name, self.message)
+
+
+class LGRInvalidLabelException(LGRException):
+ """
+ Raised when a label is invalid in an LGR
+ """
+ def __init__(self, label, message):
+ super(LGRInvalidLabelException, self).__init__()
+ self.label = label
+ self.message = message
+
+
+class LGRLabelCollisionException(LGRException):
+ """
+ Raised when a label collide in an LGR set
+ """
+ def __init__(self):
+ super(LGRLabelCollisionException, self).__init__()
diff --git a/lgr/metadata.py b/lgr/metadata.py
@@ -99,7 +99,7 @@ def __init__(self, value, scope_type=None):
  self.scope_type = scope_type
 
  def __unicode__(self):
- return self.value
+ return '{}: {}'.format(self.scope_type, self.value)
 
  def __eq__(self, other):
  return (self.value == other.value) \