Skip to content

Commit

Permalink
Accelerate block tag iteration (#205)
Browse files Browse the repository at this point in the history
* Accelerate block tag iteration.

* Add changelog entry

* Fix formatting.
  • Loading branch information
peterallenwebb authored Oct 15, 2024
1 parent ed11c6c commit 1bad6b7
Show file tree
Hide file tree
Showing 2 changed files with 47 additions and 2 deletions.
6 changes: 6 additions & 0 deletions .changes/unreleased/Features-20241015-174841.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
kind: Features
body: Accelerate block tag iteration.
time: 2024-10-15T17:48:41.299686-04:00
custom:
Author: peterallenwebb
Issue: "205"
43 changes: 41 additions & 2 deletions dbt_common/clients/_jinja_blocks.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
import dataclasses
import re
from collections import namedtuple
from typing import Iterator, List, Optional, Set, Union
from typing import Dict, Iterator, List, Optional, Set, Union

from dbt_common.exceptions import (
BlockDefinitionNotAtTopError,
Expand Down Expand Up @@ -104,11 +105,25 @@ def end_pat(self) -> re.Pattern:
QUOTE_START_PATTERN = regex(r"""(?P<quote>(['"]))""")


@dataclasses.dataclass
class PositionedMatch:
"""This class is used to cache search information, accelerating TagIterator.
It records the result of searching a string from the start_pos and also
the position of the first match, or None if there is no match."""

start_pos: int
match: Optional[re.Match]


class TagIterator:
def __init__(self, text: str) -> None:
self.text: str = text
self.pos: int = 0

# A cache of the most recent matches seen for each pattern, maintained
# in order to avoid slowly re-searching long inputs many times.
self._past_matches: Dict[re.Pattern, PositionedMatch] = {}

def linepos(self, end: Optional[int] = None) -> str:
"""Return relative position in line.
Expand All @@ -130,7 +145,31 @@ def rewind(self, amount: int = 1) -> None:
self.pos -= amount

def _search(self, pattern: re.Pattern) -> Optional[re.Match]:
return pattern.search(self.text, self.pos)
# Check to see if we have cached a search for this pattern already.
positioned_match = self._past_matches.get(pattern)

if positioned_match is None or positioned_match.start_pos > self.pos:
# We did not have a cached search, or we did, but it was done at a location
# further along in the string and can't be used. Do a search and cache it.
match = pattern.search(self.text, self.pos)
self._past_matches[pattern] = PositionedMatch(self.pos, match)
else:
# We have a cached search and its start position falls before (or at) the
# current search position...
if positioned_match.match is None:
# ...but there is no match in the rest of the text.
match = None
elif positioned_match.match.start() >= self.pos:
# ...and there is a match we can reuse, because we have not yet passed
# the start position of the match. It's still the next match.
match = positioned_match.match
else:
# ...but we have passed the start of the cached match, and need to do a
# new search from our current position and cache it.
match = pattern.search(self.text, self.pos)
self._past_matches[pattern] = PositionedMatch(self.pos, match)

return match

def _match(self, pattern: re.Pattern) -> Optional[re.Match]:
return pattern.match(self.text, self.pos)
Expand Down

0 comments on commit 1bad6b7

Please sign in to comment.