Skip to content

Commit

Permalink
Ensure that every inline parser literal has its corresponding lexer rule
Browse files Browse the repository at this point in the history
By following ANTLR's practice, every inline parser rule literal is converted
into a lexer rule. Either by finding an approrpiate existing lexer rule or
by artificially creating one and assigning its reference to the literal usage.
  • Loading branch information
renatahodovan committed Sep 25, 2023
1 parent e376875 commit 1980d9e
Show file tree
Hide file tree
Showing 3 changed files with 69 additions and 4 deletions.
28 changes: 24 additions & 4 deletions grammarinator/tool/processor.py
Original file line number Diff line number Diff line change
Expand Up @@ -99,7 +99,12 @@ def __str__(self):

class UnlexerRuleNode(RuleNode):

def __init__(self, name):
_lit_cnt = 0

def __init__(self, name=None):
if not name:
name = f'T__{UnlexerRuleNode._lit_cnt}'
UnlexerRuleNode._lit_cnt += 1
super().__init__(name, None, 'UnlexerRule')
self.start_ranges = None

Expand Down Expand Up @@ -790,8 +795,16 @@ def build_expr(node, parent_id):

if lexer_rule:
rule.start_ranges.append((ord(src[0]), ord(src[0]) + 1))

graph.add_edge(frm=parent_id, to=graph.add_node(LiteralNode(src=src)))
graph.add_edge(frm=parent_id, to=graph.add_node(LiteralNode(src=src)))
else:
# Ensure that every inline literal in parser rules has its lexer rule
# found or implicitly created.
lit_id = literal_lookup.get(src)
if not lit_id:
lit_id = graph.add_node(UnlexerRuleNode())
literal_lookup[src] = lit_id
graph.add_edge(frm=lit_id, to=graph.add_node(LiteralNode(src=src)))
graph.add_edge(frm=parent_id, to=lit_id)

elif isinstance(node, ParserRuleContext) and node.getChildCount():
for child in node.children:
Expand All @@ -802,6 +815,10 @@ def build_expr(node, parent_id):

build_expr(node, rule.id)

# Save lexer rules with constant literals to enable resolving them in parser rules.
if lexer_rule and len(rule.out_edges) == 1 and isinstance(rule.out_edges[0].dst, LiteralNode):
literal_lookup[rule.out_edges[0].dst.src] = rule.id

def build_prequel(node):
assert isinstance(node, ANTLRv4Parser.GrammarSpecContext)

Expand Down Expand Up @@ -863,7 +880,8 @@ def build_rules(node):
if duplicate_rules:
raise ValueError(f'Rule redefinition(s): {", ".join(duplicate_rules)}')

for rule_args in generator_rules:
# Ensure to process lexer rules first to lookup table from literal constants.
for rule_args in sorted(generator_rules, key=lambda r: int(isinstance(r[0], UnparserRuleNode))):
build_rule(*rule_args)

if default_rule:
Expand All @@ -883,6 +901,8 @@ def build_rules(node):
dot_charset = Charset(Charset.dot[graph.dot])
graph.charsets.append(dot_charset)

literal_lookup = {}

for root in [lexer_root, parser_root]:
if root:
build_rules(root)
Expand Down
17 changes: 17 additions & 0 deletions tests/grammars/CustomImplicitLiteralGenerator.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
# Copyright (c) 2023 Renata Hodovan, Akos Kiss.
#
# Licensed under the BSD 3-Clause License
# <LICENSE.rst or https://opensource.org/licenses/BSD-3-Clause>.
# This file may not be copied, modified, or distributed except
# according to those terms.

# This custom unparser is used by ImplicitLiteral.g4

from ImplicitLiteralGenerator import ImplicitLiteralGenerator


class CustomImplicitLiteralGenerator(ImplicitLiteralGenerator):

def HELLO(self, parent=None):
super().HELLO(parent=parent)
self.hello_called = True
28 changes: 28 additions & 0 deletions tests/grammars/ImplicitLiteral.g4
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
/*
* Copyright (c) 2023 Renata Hodovan, Akos Kiss.
*
* Licensed under the BSD 3-Clause License
* <LICENSE.rst or https://opensource.org/licenses/BSD-3-Clause>.
* This file may not be copied, modified, or distributed except
* according to those terms.
*/

/*
* Tests handling of implicit literals in parser rules.
*/

// TEST-PROCESS: {grammar}.g4 -o {tmpdir}
// TEST-GENERATE: Custom{grammar}Generator.Custom{grammar}Generator -r start -j 1 -o {tmpdir}/{grammar}%d.txt


grammar ImplicitLiteral;

@members {
def __init__(self, *, model=None, listeners=None, max_depth=inf):
super().__init__(model=model, listeners=listeners, max_depth=max_depth)
self.hello_called = False
}


start : 'hello' {assert self.hello_called, "Implicit lexer rule was not called."};
HELLO : 'hello' ;

0 comments on commit 1980d9e

Please sign in to comment.