Skip to content

Commit

Permalink
Use tuple IDs for graph nodes in processor
Browse files Browse the repository at this point in the history
Joining the parts of node identifiers is unnecessary and loses
information by concealing the boundaries of rule names, label names,
and label indices. The patch employs tuples instead of concatenated strings.
  • Loading branch information
renatahodovan committed May 20, 2024
1 parent 5e8d0c4 commit edd51a4
Show file tree
Hide file tree
Showing 2 changed files with 33 additions and 29 deletions.
42 changes: 23 additions & 19 deletions grammarinator/tool/processor.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# Copyright (c) 2017-2023 Renata Hodovan, Akos Kiss.
# Copyright (c) 2017-2024 Renata Hodovan, Akos Kiss.
# Copyright (c) 2020 Sebastian Kimberk.
#
# Licensed under the BSD 3-Clause License
Expand Down Expand Up @@ -43,9 +43,9 @@ class Node:

def __init__(self, id=None):
if id is None:
id = Node._cnt
id = (Node._cnt, )
Node._cnt += 1
self.id = id
self.id = id if isinstance(id, tuple) else (id, )
self.out_edges = []

@property
Expand Down Expand Up @@ -87,9 +87,11 @@ def __repr__(self):

class RuleNode(Node):

def __init__(self, name, label, label_idx, type):
super().__init__(name if label is None else f'{name}_{label}' if label_idx is None else f'{name}_{label}_{label_idx}')
self.name = name if label is None else f'{name}_{label}'
def __init__(self, name, type):
name = name if isinstance(name, tuple) else (name,)
super().__init__(name)
# Keep rule and label name, but exclude label index if exists.
self.name = '_'.join(part for part in name[:2])
self.type = type
self.min_size = None

Expand All @@ -110,14 +112,14 @@ def __init__(self, name=None):
if not name:
name = f'T__{UnlexerRuleNode._lit_cnt}'
UnlexerRuleNode._lit_cnt += 1
super().__init__(name, None, None, 'UnlexerRule')
super().__init__(name, 'UnlexerRule')
self.start_ranges = None


class UnparserRuleNode(RuleNode):

def __init__(self, name, label=None, label_idx=None):
super().__init__(name, label, label_idx, 'UnparserRule')
def __init__(self, name):
super().__init__(name, 'UnparserRule')


class ImagRuleNode(Node):
Expand Down Expand Up @@ -157,7 +159,7 @@ def __init__(self):
class AlternationNode(Node):

def __init__(self, rule_id, idx, conditions):
super().__init__(f'{rule_id}_alt{idx}')
super().__init__((rule_id, 'alt', idx))
self.rule_id = rule_id # Identifier of the container rule.
self.idx = idx # Index of the alternation in the container rule.
self.conditions = conditions
Expand Down Expand Up @@ -198,7 +200,7 @@ def __str__(self):
class AlternativeNode(Node):

def __init__(self, rule_id, alt_idx, idx):
super().__init__(f'{rule_id}_alt{alt_idx}_{idx}')
super().__init__((rule_id, 'alt', alt_idx, idx))
self.rule_id = rule_id # Identifier of the container rule.
self.alt_idx = alt_idx # Index of the container alternation inside the container rule.
self.idx = idx # Index of the alternative in the container alternation.
Expand All @@ -214,7 +216,7 @@ def __str__(self):
class QuantifierNode(Node):

def __init__(self, rule_id, idx, start, stop):
super().__init__(f'{rule_id}_quant{idx}')
super().__init__((rule_id, 'quant', idx))
self.rule_id = rule_id # Identifier of the container rule.
self.idx = idx # Index of the quantifier in the container rule.
self.start = start
Expand Down Expand Up @@ -336,6 +338,8 @@ def add_node(self, node):
return node.id

def add_edge(self, frm, to, args=None):
frm = frm if isinstance(frm, tuple) else (frm,)
to = to if isinstance(to, tuple) else (to,)
assert frm in self.vertices, f'{frm} not in vertices.'
assert to in self.vertices, f'{to} not in vertices.'
self.vertices[frm].out_edges.append(Edge(dst=self.vertices[to], args=args))
Expand Down Expand Up @@ -900,7 +904,7 @@ def build_expr(node, parent_id):
if labels:
# Add label index to rules to distinguish the alternatives with recurring labels.
label_idx = labels[:i + 1].count(labels[i]) - 1 if labels[i] in recurring_labels else None
rule_node_id = graph.add_node(UnparserRuleNode(name=rule.name, label=labels[i], label_idx=label_idx))
rule_node_id = graph.add_node(UnparserRuleNode(name=(rule.name, labels[i], label_idx) if label_idx is not None else (rule.name, labels[i])))
graph.add_edge(frm=alternative_id, to=rule_node_id)
build_rule(graph.vertices[rule_node_id], child)
else:
Expand All @@ -913,7 +917,7 @@ def build_expr(node, parent_id):
for label in recurring_labels:
# Mask conditions to enable only the alternatives with the common label.
new_conditions = [cond if labels[ci] == label else '0' for ci, cond in enumerate(conditions)]
recurring_rule_id = graph.add_node(UnparserRuleNode(name=rule.name, label=label))
recurring_rule_id = graph.add_node(UnparserRuleNode(name=(rule.name, label)))
labeled_alt_id = graph.add_node(AlternationNode(idx=0,
conditions=append_unique(graph.alt_conds, new_conditions) if all(isfloat(cond) for cond in new_conditions) else new_conditions,
rule_id=recurring_rule_id))
Expand All @@ -923,7 +927,7 @@ def build_expr(node, parent_id):
labeled_alternative_id = graph.add_node(AlternativeNode(rule_id=f'{rule.id}_{label}', alt_idx=0, idx=i))
graph.add_edge(frm=labeled_alt_id, to=labeled_alternative_id)
if labels[i] == label:
graph.add_edge(frm=labeled_alternative_id, to=f'{rule.name}_{label}_{recurring_idx}')
graph.add_edge(frm=labeled_alternative_id, to=(rule.name, label, recurring_idx))
recurring_idx += 1
else:
graph.add_edge(frm=labeled_alternative_id, to=lambda_id)
Expand Down Expand Up @@ -988,7 +992,7 @@ def build_expr(node, parent_id):
else:
if '_dot' not in graph.vertices:
# Create an artificial `_dot` rule with an alternation of all the lexer rules.
parser_dot_id = graph.add_node(UnparserRuleNode(name='_dot', label=None))
parser_dot_id = graph.add_node(UnparserRuleNode(name='_dot'))
unlexer_ids = [v.name for vid, v in graph.vertices.items() if isinstance(v, UnlexerRuleNode)]
alt_id = graph.add_node(AlternationNode(rule_id=parser_dot_id, idx=0, conditions=[1] * len(unlexer_ids)))
graph.add_edge(frm=parser_dot_id, to=alt_id)
Expand Down Expand Up @@ -1124,7 +1128,7 @@ def build_rules(node):
duplicate_rules.append(rule_node.id)

if duplicate_rules:
raise ValueError(f'Rule redefinition(s): {", ".join(duplicate_rules)}')
raise ValueError(f'Rule redefinition(s): {", ".join(["_".join(id) for id in duplicate_rules])}')

# Ensure to process lexer rules first to lookup table from literal constants.
for rule_args in sorted(generator_rules, key=lambda r: int(isinstance(r[0], UnparserRuleNode))):
Expand Down Expand Up @@ -1163,9 +1167,9 @@ def build_rules(node):
def _analyze_graph(graph, root=None):
root = root or graph.default_rule
min_distances = defaultdict(lambda: inf)
min_distances[root] = 0
min_distances[(root,)] = 0

work_list = [root]
work_list = [(root,)]
while work_list:
v = work_list.pop(0)
for out_v in graph.vertices[v].out_neighbours:
Expand Down
20 changes: 10 additions & 10 deletions grammarinator/tool/resources/codegen/GeneratorTemplate.py.jinja
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
{#
# Copyright (c) 2020-2023 Renata Hodovan, Akos Kiss.
# Copyright (c) 2020-2024 Renata Hodovan, Akos Kiss.
#
# Licensed under the BSD 3-Clause License
# <LICENSE.rst or https://opensource.org/licenses/BSD-3-Clause>.
Expand Down Expand Up @@ -30,9 +30,9 @@ pass

{% macro processRuleNode(node, inedge) %}
{% if inedge.reserve != 0 %}
self._reserve({{ inedge.reserve }}, self.{{ node.id }}, {% if inedge.args %}{% for _, k, v in inedge.args %}{% if k %}{{ k }}={% endif %}{{ resolveVarRefs(v) }}, {% endfor %}{% endif %}parent=current)
self._reserve({{ inedge.reserve }}, self.{{ node.id | join('_') }}, {% if inedge.args %}{% for _, k, v in inedge.args %}{% if k %}{{ k }}={% endif %}{{ resolveVarRefs(v) }}, {% endfor %}{% endif %}parent=current)
{% else %}
self.{{ node.id }}({% if inedge.args %}{% for _, k, v in inedge.args %}{% if k %}{{ k }}={% endif %}{{ resolveVarRefs(v) }}, {% endfor %}{% endif %}parent=current)
self.{{ node.id | join('_') }}({% if inedge.args %}{% for _, k, v in inedge.args %}{% if k %}{{ k }}={% endif %}{{ resolveVarRefs(v) }}, {% endfor %}{% endif %}parent=current)
{% endif %}
{% endmacro %}

Expand Down Expand Up @@ -67,15 +67,15 @@ with AlternationContext(rule, {{ node.idx }}, {{ graph.name }}._alt_sizes[{{ nod
{% if simple_lits and simple_rules %}
choice{{ node.idx }} = alt{{ node.idx }}()
alt_src = [{% for lit in simple_lits %}{% if lit is not none %}'{{ lit | escape_string }}'{% else %}None{% endif %}{% if not loop.last %}, {% endif %}{% endfor %}][choice{{ node.idx }}]
alt_rule = [{% for rule in simple_rules %}{% if rule is not none %}self.{{ rule }}{% else %}None{% endif %}{% if not loop.last %}, {% endif %}{% endfor %}][choice{{ node.idx }}]
alt_rule = [{% for rule in simple_rules %}{% if rule is not none %}self.{{ rule | join('_') }}{% else %}None{% endif %}{% if not loop.last %}, {% endif %}{% endfor %}][choice{{ node.idx }}]
if alt_src is not None:
current.src += alt_src
else:
alt_rule(parent=current)
{% elif simple_lits %}
current.src += [{% for lit in simple_lits %}'{{ lit | escape_string }}'{% if not loop.last %}, {% endif %}{% endfor %}][alt{{ node.idx }}()]
{% elif simple_rules %}
[{% for rule in simple_rules %}self.{{ rule }}{% if not loop.last %}, {% endif %}{% endfor %}][alt{{ node.idx }}()](parent=current)
[{% for rule in simple_rules %}self.{{ rule | join('_') }}{% if not loop.last %}, {% endif %}{% endfor %}][alt{{ node.idx }}()](parent=current)
{% else %}
choice{{ node.idx }} = alt{{ node.idx }}()
{% for edge in node.out_edges if not edge.dst.is_lambda_alternative %}
Expand Down Expand Up @@ -135,8 +135,8 @@ else:
class {{ graph.name }}({{ graph.superclass }}):

{% for rule in graph.imag_rules %}
def {{ rule.id }}(self, parent=None):
with UnlexerRuleContext(self, '{{ rule.id }}', parent) as current:
def {{ rule.id | join('_') }}(self, parent=None):
with UnlexerRuleContext(self, '{{ rule.id | join('_') }}', parent) as current:
return current
{% endfor %}

Expand All @@ -145,7 +145,7 @@ class {{ graph.name }}({{ graph.superclass }}):
{% endif %}

{% for rule in graph.rules %}
def {{ rule.id }}(self, {% for t, k, v in rule.args %}{{ k }}{% if t %}:{{ t }}{% endif %}{% if v %}={{ resolveVarRefs(v) }}{% endif %}, {% endfor %}parent=None):
def {{ rule.id | join('_') }}(self, {% for t, k, v in rule.args %}{{ k }}{% if t %}:{{ t }}{% endif %}{% if v %}={{ resolveVarRefs(v) }}{% endif %}, {% endfor %}parent=None):
{% if rule.labels or rule.args or rule.locals or rule.returns %}
local_ctx = {
{%- for _, k, _ in rule.args -%}
Expand Down Expand Up @@ -173,11 +173,11 @@ class {{ graph.name }}({{ graph.superclass }}):

_default_rule = {{ graph.default_rule }}

_immutable_rules = ({% for name in graph.immutables %}'{{ name }}'{% if not loop.last or loop.length == 1 %}, {% endif%}{% endfor %})
_immutable_rules = ({% for name in graph.immutables %}'{{ name | join('_') }}'{% if not loop.last or loop.length == 1 %}, {% endif%}{% endfor %})

_rule_sizes = {
{% for rule in graph.rules %}
'{{ rule.id }}': RuleSize({{ rule.min_size.depth }}, {{ rule.min_size.tokens }}),
'{{ rule.id | join('_') }}': RuleSize({{ rule.min_size.depth }}, {{ rule.min_size.tokens }}),
{% endfor %}
}

Expand Down

0 comments on commit edd51a4

Please sign in to comment.