Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

More updates to v1.0 #964

Merged
merged 15 commits into from
Aug 21, 2021
10 changes: 5 additions & 5 deletions lark/common.py
Original file line number Diff line number Diff line change
@@ -1,14 +1,14 @@
from types import ModuleType
from copy import deepcopy
from types import ModuleType
from typing import Callable, Collection, Dict, Optional, TYPE_CHECKING

if TYPE_CHECKING:
from .lark import PostLex

from .utils import Serialize
from .lexer import TerminalDef, Token

###{standalone
from typing import Any, Callable, Collection, Dict, Optional, TYPE_CHECKING

if TYPE_CHECKING:
from .lark import PostLex

_Callback = Callable[[Token], Token]

Expand Down
9 changes: 4 additions & 5 deletions lark/exceptions.py
Original file line number Diff line number Diff line change
@@ -1,15 +1,14 @@
from .utils import logger, NO_VALUE


###{standalone

from typing import Dict, Iterable, Callable, Union, TypeVar, Tuple, Any, List, Set, Optional, TYPE_CHECKING
from collections.abc import Sequence

if TYPE_CHECKING:
from .lexer import Token
from .parsers.lalr_interactive_parser import InteractiveParser
from .tree import Tree

###{standalone

class LarkError(Exception):
pass

Expand All @@ -18,7 +17,7 @@ class ConfigurationError(LarkError, ValueError):
pass


def assert_config(value, options, msg='Got %r, expected one of %s'):
def assert_config(value, options: Sequence, msg='Got %r, expected one of %s'):
erezsh marked this conversation as resolved.
Show resolved Hide resolved
if value not in options:
raise ConfigurationError(msg % (value, options))

Expand Down
3 changes: 1 addition & 2 deletions lark/grammar.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,9 @@
from typing import Optional, Tuple, ClassVar

from .utils import Serialize

###{standalone

from typing import Optional, Tuple, ClassVar

class Symbol(Serialize):
__slots__ = ('name',)

Expand Down
4 changes: 2 additions & 2 deletions lark/grammars/python.lark
Original file line number Diff line number Diff line change
Expand Up @@ -10,8 +10,8 @@ DEC_NUMBER: /0|[1-9][\d_]*/i
HEX_NUMBER.2: /0x[\da-f]*/i
OCT_NUMBER.2: /0o[0-7]*/i
BIN_NUMBER.2 : /0b[0-1]*/i
FLOAT_NUMBER.2: /((\d+\.[\d_]*|\.[\d_]+)(e[-+]?\d+)?|\d+(e[-+]?\d+))/i
IMAG_NUMBER.2: /\d+j/i | FLOAT_NUMBER "j"i
FLOAT_NUMBER.2: /((\d+\.[\d_]*|\.[\d_]+)([Ee][-+]?\d+)?|\d+([Ee][-+]?\d+))/
IMAG_NUMBER.2: /\d+[Jj]/ | FLOAT_NUMBER /[Jj]/


// Comma-separated list (with an optional trailing comma)
Expand Down
15 changes: 7 additions & 8 deletions lark/indenter.py
Original file line number Diff line number Diff line change
@@ -1,19 +1,18 @@
"Provides Indentation services for languages with indentation similar to Python"

from abc import ABC, abstractmethod
from typing import List, Iterator

from .exceptions import LarkError
from .lark import PostLex
from .lexer import Token

###{standalone
from typing import Tuple, List, Iterator, Optional

class DedentError(LarkError):
pass

class Indenter(PostLex, ABC):

paren_level: int
indent_level: List[int]

Expand Down Expand Up @@ -75,31 +74,31 @@ def always_accept(self):
@property
@abstractmethod
def NL_type(self) -> str:
...
return NotImplemented
erezsh marked this conversation as resolved.
Show resolved Hide resolved

@property
@abstractmethod
def OPEN_PAREN_types(self) -> List[str]:
...
return NotImplemented

@property
@abstractmethod
def CLOSE_PAREN_types(self) -> List[str]:
...
return NotImplemented

@property
@abstractmethod
def INDENT_type(self) -> str:
...
return NotImplemented

@property
@abstractmethod
def DEDENT_type(self) -> str:
...
return NotImplemented

@property
@abstractmethod
def tab_len(self) -> int:
...
return NotImplemented

###}
24 changes: 12 additions & 12 deletions lark/lark.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,18 @@
from abc import ABC, abstractmethod
import sys, os, pickle, hashlib
import tempfile

from typing import (
TypeVar, Type, List, Dict, Iterator, Callable, Union, Optional,
Tuple, Iterable, IO, Any, TYPE_CHECKING
)
if TYPE_CHECKING:
from .parsers.lalr_interactive_parser import InteractiveParser
from .visitors import Transformer
if sys.version_info >= (3, 8):
from typing import Literal
else:
from typing_extensions import Literal

from .exceptions import ConfigurationError, assert_config, UnexpectedInput
from .utils import Serialize, SerializeMemoizer, FS, isascii, logger
from .load_grammar import load_grammar, FromPackageLoader, Grammar, verify_used_files, PackageResource
Expand All @@ -21,18 +32,7 @@


###{standalone
from typing import (
TypeVar, Type, List, Dict, Iterator, Callable, Union, Optional,
Tuple, Iterable, IO, Any, TYPE_CHECKING
)

if TYPE_CHECKING:
from .parsers.lalr_interactive_parser import InteractiveParser
from .visitors import Transformer
if sys.version_info >= (3, 8):
from typing import Literal
else:
from typing_extensions import Literal

class PostLex(ABC):
@abstractmethod
Expand Down
21 changes: 10 additions & 11 deletions lark/lexer.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,21 +3,20 @@
from abc import abstractmethod, ABC
import re
from contextlib import suppress
from typing import (
TypeVar, Type, List, Dict, Iterator, Collection, Callable, Optional, FrozenSet, Any,
Pattern as REPattern, ClassVar, TYPE_CHECKING
)
from types import ModuleType
if TYPE_CHECKING:
from .common import LexerConf

from .utils import classify, get_regexp_width, Serialize
from .exceptions import UnexpectedCharacters, LexError, UnexpectedToken

###{standalone
from copy import copy

from types import ModuleType
from typing import (
TypeVar, Type, Tuple, List, Dict, Iterator, Collection, Callable, Optional, FrozenSet, Any,
Pattern as REPattern, ClassVar, TYPE_CHECKING
)

if TYPE_CHECKING:
from .common import LexerConf

class Pattern(Serialize, ABC):

Expand Down Expand Up @@ -218,7 +217,7 @@ def __eq__(self, other):

return self.char_pos == other.char_pos and self.newline_char == other.newline_char

def feed(self, token, test_newline=True):
def feed(self, token: Token, test_newline=True):
"""Consume a token and calculate the new line & column.

As an optional optimization, set test_newline=False if token doesn't contain a newline.
Expand Down Expand Up @@ -320,7 +319,7 @@ def match(self, text, pos):
return m.group(0), type_from_index[m.lastindex]


def _regexp_has_newline(r):
def _regexp_has_newline(r: str):
r"""Expressions that may indicate newlines in a regexp:
- newlines (\n)
- escaped newline (\\n)
Expand Down Expand Up @@ -359,7 +358,7 @@ class Lexer(ABC):
"""
@abstractmethod
def lex(self, lexer_state: LexerState, parser_state: Any) -> Iterator[Token]:
...
return NotImplemented

def make_lexer_state(self, text):
line_ctr = LineCounter(b'\n' if isinstance(text, bytes) else '\n')
Expand Down
4 changes: 3 additions & 1 deletion lark/parse_tree_builder.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
from typing import List

from .exceptions import GrammarError, ConfigurationError
from .lexer import Token
from .tree import Tree
Expand Down Expand Up @@ -151,7 +153,7 @@ def _should_expand(sym):
return not sym.is_term and sym.name.startswith('_')


def maybe_create_child_filter(expansion, keep_all_tokens, ambiguous, _empty_indices):
def maybe_create_child_filter(expansion, keep_all_tokens, ambiguous, _empty_indices: List[bool]):
# Prepare empty_indices as: How many Nones to insert at each index?
if _empty_indices:
assert _empty_indices.count(False) == len(expansion)
Expand Down
18 changes: 10 additions & 8 deletions lark/parser_frontends.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,20 +32,13 @@ def __init__(self, parser_type, lexer_type):
self.parser_type = parser_type
self.lexer_type = lexer_type

def __call__(self, lexer_conf, parser_conf, options):
assert isinstance(lexer_conf, LexerConf)
assert isinstance(parser_conf, ParserConf)
parser_conf.parser_type = self.parser_type
lexer_conf.lexer_type = self.lexer_type
return ParsingFrontend(lexer_conf, parser_conf, options)

def deserialize(self, data, memo, lexer_conf, callbacks, options):
parser_conf = ParserConf.deserialize(data['parser_conf'], memo)
parser = LALR_Parser.deserialize(data['parser'], memo, callbacks, options.debug)
parser_conf.callbacks = callbacks
return ParsingFrontend(lexer_conf, parser_conf, options, parser=parser)


# ... Continued later in the module
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Why?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

So it won't be part of the standalone code.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I would suggest not splitting up the class. Instead move the complete class further down.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It's also a bit strange to split the class in the middle. It breaks the indentation, and obfuscates the end of the standalone directive.

The only other place in Lark that happens is tree.Tree, and I'm not sure that it should stay that way.

But my mind isn't made up. If you think splitting the class using inheritance is too weird, I'll accept it.



class ParsingFrontend(Serialize):
Expand Down Expand Up @@ -237,3 +230,12 @@ def _transform(self, tree):

def _apply_callback(self, tree):
return self.callbacks[tree.rule](tree.children)


class MakeParsingFrontend(MakeParsingFrontend):
def __call__(self, lexer_conf, parser_conf, options):
assert isinstance(lexer_conf, LexerConf)
assert isinstance(parser_conf, ParserConf)
parser_conf.parser_type = self.parser_type
lexer_conf.lexer_type = self.lexer_type
return ParsingFrontend(lexer_conf, parser_conf, options)
7 changes: 7 additions & 0 deletions lark/tools/standalone.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,13 @@
#

from abc import ABC, abstractmethod
from collections.abc import Sequence
from types import ModuleType
from typing import (
TypeVar, Generic, Type, Tuple, List, Dict, Iterator, Collection, Callable, Optional, FrozenSet, Any,
Union, Iterable, IO, TYPE_CHECKING,
Pattern as REPattern, ClassVar, Set,
)
###}

import sys
Expand Down
9 changes: 5 additions & 4 deletions lark/tree.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@

try:
from future_builtins import filter # type: ignore
except ImportError:
Expand All @@ -6,10 +7,7 @@
import sys
from copy import deepcopy


###{standalone
from collections import OrderedDict
from typing import List, Callable, Iterator, Union, Optional, Any, TYPE_CHECKING
from typing import List, Callable, Iterator, Union, Optional, TYPE_CHECKING

if TYPE_CHECKING:
from .lexer import TerminalDef
Expand All @@ -18,6 +16,9 @@
else:
from typing_extensions import Literal

###{standalone
from collections import OrderedDict
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Wasn't the goal to move all imports into standalone.py?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Not exactly, I was mostly concerned about repetitive imports.

OrderedDict is only used once (and it a little meaningless in Python3, I just didn't think removing it fits into this PR)

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Fair enough. (Also OrderedDict is still needed for 3.6 on non-CPython.)


class Meta:

empty: bool
Expand Down
2 changes: 1 addition & 1 deletion lark/visitors.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
from typing import TypeVar, Tuple, List, Callable, Generic, Type, Union, Optional
from abc import ABC
from functools import wraps

Expand All @@ -8,7 +9,6 @@

###{standalone
from inspect import getmembers, getmro
from typing import TypeVar, Tuple, List, Callable, Generic, Type, Union, Optional

_T = TypeVar('_T')
_R = TypeVar('_R')
Expand Down
2 changes: 1 addition & 1 deletion tests/test_tools.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ def _create_standalone(self, grammar, compress=False):
standalone.gen_standalone(Lark(grammar, parser='lalr'), out=code_buf, compress=compress)
code = code_buf.getvalue()

context = {'__doc__': None}
context = {'__doc__': None, '__name__': 'test_standalone'}
exec(code, context)
return context

Expand Down