From ead7a0447378cb7c10b213f770761c791d8dec92 Mon Sep 17 00:00:00 2001 From: Erez Sh Date: Thu, 15 Jul 2021 18:24:53 +0300 Subject: [PATCH 01/11] Small fix --- lark/exceptions.py | 2 +- lark/lexer.py | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/lark/exceptions.py b/lark/exceptions.py index 0dfb659c..6f7d32da 100644 --- a/lark/exceptions.py +++ b/lark/exceptions.py @@ -12,7 +12,7 @@ class ConfigurationError(LarkError, ValueError): pass -def assert_config(value, options, msg='Got %r, expected one of %s'): +def assert_config(value, options: list, msg='Got %r, expected one of %s'): if value not in options: raise ConfigurationError(msg % (value, options)) diff --git a/lark/lexer.py b/lark/lexer.py index a0170016..36ed6bc8 100644 --- a/lark/lexer.py +++ b/lark/lexer.py @@ -130,7 +130,7 @@ def __new__(cls, type_, value, start_pos=None, line=None, column=None, end_line= inst = super(Token, cls).__new__(cls, value) inst.type = type_ - inst.start_pos = start_pos if start_pos is not None else pos_in_stream + inst.start_pos = start_pos inst.value = value inst.line = line inst.column = column @@ -287,7 +287,7 @@ def match(self, text, pos): return m.group(0), type_from_index[m.lastindex] -def _regexp_has_newline(r): +def _regexp_has_newline(r: str): r"""Expressions that may indicate newlines in a regexp: - newlines (\n) - escaped newline (\\n) From d0f25985afe7eeedfce36cd2bd9d586c2c279f87 Mon Sep 17 00:00:00 2001 From: Erez Sh Date: Thu, 15 Jul 2021 22:18:26 +0300 Subject: [PATCH 02/11] Fixup: change typehint to Sequence --- lark/exceptions.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/lark/exceptions.py b/lark/exceptions.py index 6f7d32da..76db0fca 100644 --- a/lark/exceptions.py +++ b/lark/exceptions.py @@ -1,3 +1,5 @@ +from collections.abc import Sequence + from .utils import logger, NO_VALUE @@ -12,7 +14,7 @@ class ConfigurationError(LarkError, ValueError): pass -def assert_config(value, options: list, msg='Got %r, expected one of %s'): +def assert_config(value, options: Sequence, msg='Got %r, expected one of %s'): if value not in options: raise ConfigurationError(msg % (value, options)) From ee75166376c22f061a7363ffd5776484861ef5f3 Mon Sep 17 00:00:00 2001 From: Erez Sh Date: Thu, 15 Jul 2021 22:20:18 +0300 Subject: [PATCH 03/11] Remove tests for versions below 3.6 --- .travis.yml | 2 -- 1 file changed, 2 deletions(-) diff --git a/.travis.yml b/.travis.yml index 792c6bd4..5784ae8d 100644 --- a/.travis.yml +++ b/.travis.yml @@ -1,8 +1,6 @@ dist: xenial language: python python: - - "3.4" - - "3.5" - "3.6" - "3.7" - "3.8" From 8aa4bfbd95a6f70ddc5eb551ad6d5e427029345d Mon Sep 17 00:00:00 2001 From: Erez Sh Date: Thu, 15 Jul 2021 22:25:43 +0300 Subject: [PATCH 04/11] Fixup: moved import to standalone. --- lark/exceptions.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/lark/exceptions.py b/lark/exceptions.py index 76db0fca..3fbd807c 100644 --- a/lark/exceptions.py +++ b/lark/exceptions.py @@ -1,9 +1,8 @@ -from collections.abc import Sequence - from .utils import logger, NO_VALUE ###{standalone +from collections.abc import Sequence class LarkError(Exception): From 3e9e5d84497acae2941c4a4f4d5692ca4dcd5f8e Mon Sep 17 00:00:00 2001 From: Erez Sh Date: Mon, 19 Jul 2021 17:35:56 +0300 Subject: [PATCH 05/11] Refactor: Split class to move code out of standalone section --- lark/parser_frontends.py | 18 ++++++++++-------- 1 file changed, 10 insertions(+), 8 deletions(-) diff --git a/lark/parser_frontends.py b/lark/parser_frontends.py index 0e53dd58..47f291ad 100644 --- a/lark/parser_frontends.py +++ b/lark/parser_frontends.py @@ -32,20 +32,13 @@ def __init__(self, parser_type, lexer_type): self.parser_type = parser_type self.lexer_type = lexer_type - def __call__(self, lexer_conf, parser_conf, options): - assert isinstance(lexer_conf, LexerConf) - assert isinstance(parser_conf, ParserConf) - parser_conf.parser_type = self.parser_type - lexer_conf.lexer_type = self.lexer_type - return ParsingFrontend(lexer_conf, parser_conf, options) - def deserialize(self, data, memo, lexer_conf, callbacks, options): parser_conf = ParserConf.deserialize(data['parser_conf'], memo) parser = LALR_Parser.deserialize(data['parser'], memo, callbacks, options.debug) parser_conf.callbacks = callbacks return ParsingFrontend(lexer_conf, parser_conf, options, parser=parser) - + # ... Continued later in the module class ParsingFrontend(Serialize): @@ -237,3 +230,12 @@ def _transform(self, tree): def _apply_callback(self, tree): return self.callbacks[tree.rule](tree.children) + + +class MakeParsingFrontend(MakeParsingFrontend): + def __call__(self, lexer_conf, parser_conf, options): + assert isinstance(lexer_conf, LexerConf) + assert isinstance(parser_conf, ParserConf) + parser_conf.parser_type = self.parser_type + lexer_conf.lexer_type = self.lexer_type + return ParsingFrontend(lexer_conf, parser_conf, options) \ No newline at end of file From 3f507fc073b938464a6c2266525b8f5d2679fab9 Mon Sep 17 00:00:00 2001 From: Erez Sh Date: Fri, 30 Jul 2021 00:17:17 +0300 Subject: [PATCH 06/11] A few more type annotations, reduce use of inline flags --- lark/grammars/python.lark | 4 ++-- lark/indenter.py | 4 ++++ lark/parse_tree_builder.py | 2 +- 3 files changed, 7 insertions(+), 3 deletions(-) diff --git a/lark/grammars/python.lark b/lark/grammars/python.lark index 684193d9..e73362dd 100644 --- a/lark/grammars/python.lark +++ b/lark/grammars/python.lark @@ -10,8 +10,8 @@ DEC_NUMBER: /0|[1-9][\d_]*/i HEX_NUMBER.2: /0x[\da-f]*/i OCT_NUMBER.2: /0o[0-7]*/i BIN_NUMBER.2 : /0b[0-1]*/i -FLOAT_NUMBER.2: /((\d+\.[\d_]*|\.[\d_]+)(e[-+]?\d+)?|\d+(e[-+]?\d+))/i -IMAG_NUMBER.2: /\d+j/i | FLOAT_NUMBER "j"i +FLOAT_NUMBER.2: /((\d+\.[\d_]*|\.[\d_]+)([Ee][-+]?\d+)?|\d+([Ee][-+]?\d+))/ +IMAG_NUMBER.2: /\d+[Jj]/ | FLOAT_NUMBER /[Jj]/ // Comma-separated list (with an optional trailing comma) diff --git a/lark/indenter.py b/lark/indenter.py index 7e1263dd..f5a1895f 100644 --- a/lark/indenter.py +++ b/lark/indenter.py @@ -9,6 +9,10 @@ class DedentError(LarkError): pass class Indenter(PostLex): + OPEN_PAREN_types: list + CLOSE_PAREN_types: list + DEDENT_type: str + def __init__(self): self.paren_level = None self.indent_level = None diff --git a/lark/parse_tree_builder.py b/lark/parse_tree_builder.py index e95003aa..fa418a98 100644 --- a/lark/parse_tree_builder.py +++ b/lark/parse_tree_builder.py @@ -151,7 +151,7 @@ def _should_expand(sym): return not sym.is_term and sym.name.startswith('_') -def maybe_create_child_filter(expansion, keep_all_tokens, ambiguous, _empty_indices): +def maybe_create_child_filter(expansion, keep_all_tokens, ambiguous, _empty_indices: list): # Prepare empty_indices as: How many Nones to insert at each index? if _empty_indices: assert _empty_indices.count(False) == len(expansion) From a8473e7e5d60542f4a68c88ee9fa775ebdb9ffe2 Mon Sep 17 00:00:00 2001 From: Erez Sh Date: Wed, 4 Aug 2021 16:38:13 +0300 Subject: [PATCH 07/11] A tiny bit more typing info --- lark/lexer.py | 2 +- lark/parse_tree_builder.py | 4 +++- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/lark/lexer.py b/lark/lexer.py index 36ed6bc8..fc50b57b 100644 --- a/lark/lexer.py +++ b/lark/lexer.py @@ -184,7 +184,7 @@ def __eq__(self, other): return self.char_pos == other.char_pos and self.newline_char == other.newline_char - def feed(self, token, test_newline=True): + def feed(self, token: Token, test_newline=True): """Consume a token and calculate the new line & column. As an optional optimization, set test_newline=False if token doesn't contain a newline. diff --git a/lark/parse_tree_builder.py b/lark/parse_tree_builder.py index fa418a98..4342b244 100644 --- a/lark/parse_tree_builder.py +++ b/lark/parse_tree_builder.py @@ -1,3 +1,5 @@ +from typing import List + from .exceptions import GrammarError, ConfigurationError from .lexer import Token from .tree import Tree @@ -151,7 +153,7 @@ def _should_expand(sym): return not sym.is_term and sym.name.startswith('_') -def maybe_create_child_filter(expansion, keep_all_tokens, ambiguous, _empty_indices: list): +def maybe_create_child_filter(expansion, keep_all_tokens, ambiguous, _empty_indices: List[bool]): # Prepare empty_indices as: How many Nones to insert at each index? if _empty_indices: assert _empty_indices.count(False) == len(expansion) From 1457e01e7e088b1984825bad531b110fead80ea2 Mon Sep 17 00:00:00 2001 From: Erez Sh Date: Tue, 17 Aug 2021 11:01:40 +0300 Subject: [PATCH 08/11] Fixes to typing and tests --- lark/common.py | 8 ++++---- lark/exceptions.py | 8 +++----- lark/grammar.py | 3 +-- lark/indenter.py | 3 +-- lark/lark.py | 24 ++++++++++++------------ lark/lexer.py | 15 +++++++-------- lark/tools/standalone.py | 7 +++++++ lark/tree.py | 9 +++++---- lark/visitors.py | 2 +- tests/test_tools.py | 2 +- 10 files changed, 42 insertions(+), 39 deletions(-) diff --git a/lark/common.py b/lark/common.py index 12149b64..6c3962e8 100644 --- a/lark/common.py +++ b/lark/common.py @@ -1,14 +1,14 @@ from copy import deepcopy from types import ModuleType +from typing import Callable, Collection, Dict, Optional, TYPE_CHECKING + +if TYPE_CHECKING: + from .lark import PostLex from .utils import Serialize from .lexer import TerminalDef, Token ###{standalone -from typing import Any, Callable, Collection, Dict, Optional, TYPE_CHECKING - -if TYPE_CHECKING: - from .lark import PostLex _Callback = Callable[[Token], Token] diff --git a/lark/exceptions.py b/lark/exceptions.py index 797d5cb6..55e9a3a8 100644 --- a/lark/exceptions.py +++ b/lark/exceptions.py @@ -1,16 +1,14 @@ from .utils import logger, NO_VALUE - - -###{standalone -from collections.abc import Sequence - from typing import Dict, Iterable, Callable, Union, TypeVar, Tuple, Any, List, Set, Optional, TYPE_CHECKING +from collections.abc import Sequence if TYPE_CHECKING: from .lexer import Token from .parsers.lalr_interactive_parser import InteractiveParser from .tree import Tree +###{standalone + class LarkError(Exception): pass diff --git a/lark/grammar.py b/lark/grammar.py index 25aec17b..3d6f0ff5 100644 --- a/lark/grammar.py +++ b/lark/grammar.py @@ -1,10 +1,9 @@ +from typing import Optional, Tuple, ClassVar from .utils import Serialize ###{standalone -from typing import Optional, Tuple, ClassVar - class Symbol(Serialize): __slots__ = ('name',) diff --git a/lark/indenter.py b/lark/indenter.py index b7b3369f..0a18347f 100644 --- a/lark/indenter.py +++ b/lark/indenter.py @@ -1,19 +1,18 @@ "Provides Indentation services for languages with indentation similar to Python" from abc import ABC, abstractmethod +from typing import List, Iterator from .exceptions import LarkError from .lark import PostLex from .lexer import Token ###{standalone -from typing import List, Iterator class DedentError(LarkError): pass class Indenter(PostLex, ABC): - paren_level: int indent_level: List[int] diff --git a/lark/lark.py b/lark/lark.py index 8b8af4e6..aed3346b 100644 --- a/lark/lark.py +++ b/lark/lark.py @@ -1,7 +1,18 @@ from abc import ABC, abstractmethod import sys, os, pickle, hashlib import tempfile - +from typing import ( + TypeVar, Type, List, Dict, Iterator, Callable, Union, Optional, + Tuple, Iterable, IO, Any, TYPE_CHECKING +) +if TYPE_CHECKING: + from .parsers.lalr_interactive_parser import InteractiveParser + from .visitors import Transformer + if sys.version_info >= (3, 8): + from typing import Literal + else: + from typing_extensions import Literal + from .exceptions import ConfigurationError, assert_config, UnexpectedInput from .utils import Serialize, SerializeMemoizer, FS, isascii, logger from .load_grammar import load_grammar, FromPackageLoader, Grammar, verify_used_files, PackageResource @@ -21,18 +32,7 @@ ###{standalone -from typing import ( - TypeVar, Type, List, Dict, Iterator, Callable, Union, Optional, - Tuple, Iterable, IO, Any, TYPE_CHECKING -) -if TYPE_CHECKING: - from .parsers.lalr_interactive_parser import InteractiveParser - from .visitors import Transformer - if sys.version_info >= (3, 8): - from typing import Literal - else: - from typing_extensions import Literal class PostLex(ABC): @abstractmethod diff --git a/lark/lexer.py b/lark/lexer.py index f826e06d..173b3f53 100644 --- a/lark/lexer.py +++ b/lark/lexer.py @@ -3,6 +3,13 @@ from abc import abstractmethod, ABC import re from contextlib import suppress +from typing import ( + TypeVar, Type, List, Dict, Iterator, Collection, Callable, Optional, FrozenSet, Any, + Pattern as REPattern, ClassVar, TYPE_CHECKING +) +from types import ModuleType +if TYPE_CHECKING: + from .common import LexerConf from .utils import classify, get_regexp_width, Serialize from .exceptions import UnexpectedCharacters, LexError, UnexpectedToken @@ -10,14 +17,6 @@ ###{standalone from copy import copy -from types import ModuleType -from typing import ( - TypeVar, Type, Tuple, List, Dict, Iterator, Collection, Callable, Optional, FrozenSet, Any, - Pattern as REPattern, ClassVar, TYPE_CHECKING -) - -if TYPE_CHECKING: - from .common import LexerConf class Pattern(Serialize, ABC): diff --git a/lark/tools/standalone.py b/lark/tools/standalone.py index 1cc8f814..7282699a 100644 --- a/lark/tools/standalone.py +++ b/lark/tools/standalone.py @@ -25,6 +25,13 @@ # from abc import ABC, abstractmethod +from collections.abc import Sequence +from types import ModuleType +from typing import ( + TypeVar, Generic, Type, Tuple, List, Dict, Iterator, Collection, Callable, Optional, FrozenSet, Any, + Union, Iterable, IO, TYPE_CHECKING, + Pattern as REPattern, ClassVar, Set, +) ###} import sys diff --git a/lark/tree.py b/lark/tree.py index 90ec0fe9..1ca0c629 100644 --- a/lark/tree.py +++ b/lark/tree.py @@ -1,3 +1,4 @@ + try: from future_builtins import filter # type: ignore except ImportError: @@ -6,10 +7,7 @@ import sys from copy import deepcopy - -###{standalone -from collections import OrderedDict -from typing import List, Callable, Iterator, Union, Optional, Any, TYPE_CHECKING +from typing import List, Callable, Iterator, Union, Optional, TYPE_CHECKING if TYPE_CHECKING: from .lexer import TerminalDef @@ -18,6 +16,9 @@ else: from typing_extensions import Literal +###{standalone +from collections import OrderedDict + class Meta: empty: bool diff --git a/lark/visitors.py b/lark/visitors.py index 2c7309f5..954886a7 100644 --- a/lark/visitors.py +++ b/lark/visitors.py @@ -1,3 +1,4 @@ +from typing import TypeVar, Tuple, List, Callable, Generic, Type, Union, Optional from abc import ABC from functools import wraps @@ -8,7 +9,6 @@ ###{standalone from inspect import getmembers, getmro -from typing import TypeVar, Tuple, List, Callable, Generic, Type, Union, Optional _T = TypeVar('_T') _R = TypeVar('_R') diff --git a/tests/test_tools.py b/tests/test_tools.py index 7a732d13..fd42b1c5 100644 --- a/tests/test_tools.py +++ b/tests/test_tools.py @@ -24,7 +24,7 @@ def _create_standalone(self, grammar, compress=False): standalone.gen_standalone(Lark(grammar, parser='lalr'), out=code_buf, compress=compress) code = code_buf.getvalue() - context = {'__doc__': None} + context = {'__doc__': None, '__name__': 'test_standalone'} exec(code, context) return context From 0fddb7fef6de455bca7110266633df8341e90f21 Mon Sep 17 00:00:00 2001 From: Erez Sh Date: Tue, 17 Aug 2021 11:14:39 +0300 Subject: [PATCH 09/11] Replace '...' with 'NotImplemented' --- lark/indenter.py | 12 ++++++------ lark/lexer.py | 2 +- 2 files changed, 7 insertions(+), 7 deletions(-) diff --git a/lark/indenter.py b/lark/indenter.py index 0a18347f..a4bbb246 100644 --- a/lark/indenter.py +++ b/lark/indenter.py @@ -74,31 +74,31 @@ def always_accept(self): @property @abstractmethod def NL_type(self) -> str: - ... + return NotImplemented @property @abstractmethod def OPEN_PAREN_types(self) -> List[str]: - ... + return NotImplemented @property @abstractmethod def CLOSE_PAREN_types(self) -> List[str]: - ... + return NotImplemented @property @abstractmethod def INDENT_type(self) -> str: - ... + return NotImplemented @property @abstractmethod def DEDENT_type(self) -> str: - ... + return NotImplemented @property @abstractmethod def tab_len(self) -> int: - ... + return NotImplemented ###} diff --git a/lark/lexer.py b/lark/lexer.py index 173b3f53..512e8ffe 100644 --- a/lark/lexer.py +++ b/lark/lexer.py @@ -359,7 +359,7 @@ class Lexer(ABC): """ @abstractmethod def lex(self, lexer_state: LexerState, parser_state: Any) -> Iterator[Token]: - ... + return NotImplemented def make_lexer_state(self, text): line_ctr = LineCounter(b'\n' if isinstance(text, bytes) else '\n') From a8900c13b71f44ac27f7441203ce96461a7c553f Mon Sep 17 00:00:00 2001 From: Erez Sh Date: Tue, 17 Aug 2021 14:59:30 +0300 Subject: [PATCH 10/11] Changed sequence to collection --- lark/exceptions.py | 5 ++--- lark/utils.py | 2 +- 2 files changed, 3 insertions(+), 4 deletions(-) diff --git a/lark/exceptions.py b/lark/exceptions.py index 7a331ad5..2660e352 100644 --- a/lark/exceptions.py +++ b/lark/exceptions.py @@ -1,6 +1,5 @@ from .utils import logger, NO_VALUE -from typing import Dict, Iterable, Callable, Union, TypeVar, Tuple, Any, List, Set, Optional, TYPE_CHECKING -from collections.abc import Sequence +from typing import Dict, Iterable, Callable, Union, TypeVar, Tuple, Any, List, Set, Optional, Collection, TYPE_CHECKING if TYPE_CHECKING: from .lexer import Token @@ -17,7 +16,7 @@ class ConfigurationError(LarkError, ValueError): pass -def assert_config(value, options: Sequence, msg='Got %r, expected one of %s'): +def assert_config(value, options: Collection, msg='Got %r, expected one of %s'): if value not in options: raise ConfigurationError(msg % (value, options)) diff --git a/lark/utils.py b/lark/utils.py index 2b6e3b68..ffaa593d 100644 --- a/lark/utils.py +++ b/lark/utils.py @@ -241,7 +241,7 @@ def combine_alternatives(lists): try: import atomicwrites except ImportError: - atomicwrites = None + atomicwrites = None # type: ignore class FS: exists = os.path.exists From 343c22e21802fa19d44cc0fff1a0d8c6f1b07244 Mon Sep 17 00:00:00 2001 From: Erez Sh Date: Tue, 17 Aug 2021 15:00:38 +0300 Subject: [PATCH 11/11] NotImplemented -> NotImplmementedError --- lark/indenter.py | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/lark/indenter.py b/lark/indenter.py index a4bbb246..1a9e5871 100644 --- a/lark/indenter.py +++ b/lark/indenter.py @@ -74,31 +74,31 @@ def always_accept(self): @property @abstractmethod def NL_type(self) -> str: - return NotImplemented + raise NotImplementedError() @property @abstractmethod def OPEN_PAREN_types(self) -> List[str]: - return NotImplemented + raise NotImplementedError() @property @abstractmethod def CLOSE_PAREN_types(self) -> List[str]: - return NotImplemented + raise NotImplementedError() @property @abstractmethod def INDENT_type(self) -> str: - return NotImplemented + raise NotImplementedError() @property @abstractmethod def DEDENT_type(self) -> str: - return NotImplemented + raise NotImplementedError() @property @abstractmethod def tab_len(self) -> int: - return NotImplemented + raise NotImplementedError() ###}