Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Accelerate block tag iteration #205

Merged
merged 3 commits into from
Oct 15, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 6 additions & 0 deletions .changes/unreleased/Features-20241015-174841.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
kind: Features
body: Accelerate block tag iteration.
time: 2024-10-15T17:48:41.299686-04:00
custom:
Author: peterallenwebb
Issue: "205"
43 changes: 41 additions & 2 deletions dbt_common/clients/_jinja_blocks.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
import dataclasses
import re
from collections import namedtuple
from typing import Iterator, List, Optional, Set, Union
from typing import Dict, Iterator, List, Optional, Set, Union

from dbt_common.exceptions import (
BlockDefinitionNotAtTopError,
Expand Down Expand Up @@ -104,11 +105,25 @@ def end_pat(self) -> re.Pattern:
QUOTE_START_PATTERN = regex(r"""(?P<quote>(['"]))""")


@dataclasses.dataclass
class PositionedMatch:
"""This class is used to cache search information, accelerating TagIterator.
It records the result of searching a string from the start_pos and also
the position of the first match, or None if there is no match."""

start_pos: int
match: Optional[re.Match]


class TagIterator:
def __init__(self, text: str) -> None:
self.text: str = text
self.pos: int = 0

# A cache of the most recent matches seen for each pattern, maintained
# in order to avoid slowly re-searching long inputs many times.
self._past_matches: Dict[re.Pattern, PositionedMatch] = {}

def linepos(self, end: Optional[int] = None) -> str:
"""Return relative position in line.

Expand All @@ -130,7 +145,31 @@ def rewind(self, amount: int = 1) -> None:
self.pos -= amount

def _search(self, pattern: re.Pattern) -> Optional[re.Match]:
return pattern.search(self.text, self.pos)
# Check to see if we have cached a search for this pattern already.
positioned_match = self._past_matches.get(pattern)

if positioned_match is None or positioned_match.start_pos > self.pos:
# We did not have a cached search, or we did, but it was done at a location
# further along in the string and can't be used. Do a search and cache it.
match = pattern.search(self.text, self.pos)
self._past_matches[pattern] = PositionedMatch(self.pos, match)
else:
# We have a cached search and its start position falls before (or at) the
# current search position...
if positioned_match.match is None:
# ...but there is no match in the rest of the text.
match = None
elif positioned_match.match.start() >= self.pos:
# ...and there is a match we can reuse, because we have not yet passed
# the start position of the match. It's still the next match.
match = positioned_match.match
else:
# ...but we have passed the start of the cached match, and need to do a
# new search from our current position and cache it.
match = pattern.search(self.text, self.pos)
self._past_matches[pattern] = PositionedMatch(self.pos, match)

return match

def _match(self, pattern: re.Pattern) -> Optional[re.Match]:
return pattern.match(self.text, self.pos)
Expand Down
Loading