nsz Git - libfirm/blob - scripts/jinja2/lexer.py

   1 # -*- coding: utf-8 -*-
   2 """
   3     jinja2.lexer
   4     ~~~~~~~~~~~~
   5
   6     This module implements a Jinja / Python combination lexer. The
   7     `Lexer` class provided by this module is used to do some preprocessing
   8     for Jinja.
   9
  10     On the one hand it filters out invalid operators like the bitshift
  11     operators we don't allow in templates. On the other hand it separates
  12     template code and python code in expressions.
  13
  14     :copyright: (c) 2010 by the Jinja Team.
  15     :license: BSD, see LICENSE for more details.
  16 """
  17 import re
  18 from operator import itemgetter
  19 from collections import deque
  20 from jinja2.exceptions import TemplateSyntaxError
  21 from jinja2.utils import LRUCache, next
  22
  23
  24 # cache for the lexers. Exists in order to be able to have multiple
  25 # environments with the same lexer
  26 _lexer_cache = LRUCache(50)
  27
  28 # static regular expressions
  29 whitespace_re = re.compile(r'\s+', re.U)
  30 string_re = re.compile(r"('([^'\\]*(?:\\.[^'\\]*)*)'"
  31                        r'|"([^"\\]*(?:\\.[^"\\]*)*)")', re.S)
  32 integer_re = re.compile(r'\d+')
  33
  34 # we use the unicode identifier rule if this python version is able
  35 # to handle unicode identifiers, otherwise the standard ASCII one.
  36 try:
  37     compile('föö', '<unknown>', 'eval')
  38 except SyntaxError:
  39     name_re = re.compile(r'\b[a-zA-Z_][a-zA-Z0-9_]*\b')
  40 else:
  41     from jinja2 import _stringdefs
  42     name_re = re.compile(r'[%s][%s]*' % (_stringdefs.xid_start,
  43                                          _stringdefs.xid_continue))
  44
  45 float_re = re.compile(r'(?<!\.)\d+\.\d+')
  46 newline_re = re.compile(r'(\r\n|\r|\n)')
  47
  48 # internal the tokens and keep references to them
  49 TOKEN_ADD = intern('add')
  50 TOKEN_ASSIGN = intern('assign')
  51 TOKEN_COLON = intern('colon')
  52 TOKEN_COMMA = intern('comma')
  53 TOKEN_DIV = intern('div')
  54 TOKEN_DOT = intern('dot')
  55 TOKEN_EQ = intern('eq')
  56 TOKEN_FLOORDIV = intern('floordiv')
  57 TOKEN_GT = intern('gt')
  58 TOKEN_GTEQ = intern('gteq')
  59 TOKEN_LBRACE = intern('lbrace')
  60 TOKEN_LBRACKET = intern('lbracket')
  61 TOKEN_LPAREN = intern('lparen')
  62 TOKEN_LT = intern('lt')
  63 TOKEN_LTEQ = intern('lteq')
  64 TOKEN_MOD = intern('mod')
  65 TOKEN_MUL = intern('mul')
  66 TOKEN_NE = intern('ne')
  67 TOKEN_PIPE = intern('pipe')
  68 TOKEN_POW = intern('pow')
  69 TOKEN_RBRACE = intern('rbrace')
  70 TOKEN_RBRACKET = intern('rbracket')
  71 TOKEN_RPAREN = intern('rparen')
  72 TOKEN_SEMICOLON = intern('semicolon')
  73 TOKEN_SUB = intern('sub')
  74 TOKEN_TILDE = intern('tilde')
  75 TOKEN_WHITESPACE = intern('whitespace')
  76 TOKEN_FLOAT = intern('float')
  77 TOKEN_INTEGER = intern('integer')
  78 TOKEN_NAME = intern('name')
  79 TOKEN_STRING = intern('string')
  80 TOKEN_OPERATOR = intern('operator')
  81 TOKEN_BLOCK_BEGIN = intern('block_begin')
  82 TOKEN_BLOCK_END = intern('block_end')
  83 TOKEN_VARIABLE_BEGIN = intern('variable_begin')
  84 TOKEN_VARIABLE_END = intern('variable_end')
  85 TOKEN_RAW_BEGIN = intern('raw_begin')
  86 TOKEN_RAW_END = intern('raw_end')
  87 TOKEN_COMMENT_BEGIN = intern('comment_begin')
  88 TOKEN_COMMENT_END = intern('comment_end')
  89 TOKEN_COMMENT = intern('comment')
  90 TOKEN_LINESTATEMENT_BEGIN = intern('linestatement_begin')
  91 TOKEN_LINESTATEMENT_END = intern('linestatement_end')
  92 TOKEN_LINECOMMENT_BEGIN = intern('linecomment_begin')
  93 TOKEN_LINECOMMENT_END = intern('linecomment_end')
  94 TOKEN_LINECOMMENT = intern('linecomment')
  95 TOKEN_DATA = intern('data')
  96 TOKEN_INITIAL = intern('initial')
  97 TOKEN_EOF = intern('eof')
  98
  99 # bind operators to token types
 100 operators = {
 101     '+':            TOKEN_ADD,
 102     '-':            TOKEN_SUB,
 103     '/':            TOKEN_DIV,
 104     '//':           TOKEN_FLOORDIV,
 105     '*':            TOKEN_MUL,
 106     '%':            TOKEN_MOD,
 107     '**':           TOKEN_POW,
 108     '~':            TOKEN_TILDE,
 109     '[':            TOKEN_LBRACKET,
 110     ']':            TOKEN_RBRACKET,
 111     '(':            TOKEN_LPAREN,
 112     ')':            TOKEN_RPAREN,
 113     '{':            TOKEN_LBRACE,
 114     '}':            TOKEN_RBRACE,
 115     '==':           TOKEN_EQ,
 116     '!=':           TOKEN_NE,
 117     '>':            TOKEN_GT,
 118     '>=':           TOKEN_GTEQ,
 119     '<':            TOKEN_LT,
 120     '<=':           TOKEN_LTEQ,
 121     '=':            TOKEN_ASSIGN,
 122     '.':            TOKEN_DOT,
 123     ':':            TOKEN_COLON,
 124     '|':            TOKEN_PIPE,
 125     ',':            TOKEN_COMMA,
 126     ';':            TOKEN_SEMICOLON
 127 }
 128
 129 reverse_operators = dict([(v, k) for k, v in operators.iteritems()])
 130 assert len(operators) == len(reverse_operators), 'operators dropped'
 131 operator_re = re.compile('(%s)' % '|'.join(re.escape(x) for x in
 132                          sorted(operators, key=lambda x: -len(x))))
 133
 134 ignored_tokens = frozenset([TOKEN_COMMENT_BEGIN, TOKEN_COMMENT,
 135                             TOKEN_COMMENT_END, TOKEN_WHITESPACE,
 136                             TOKEN_WHITESPACE, TOKEN_LINECOMMENT_BEGIN,
 137                             TOKEN_LINECOMMENT_END, TOKEN_LINECOMMENT])
 138 ignore_if_empty = frozenset([TOKEN_WHITESPACE, TOKEN_DATA,
 139                              TOKEN_COMMENT, TOKEN_LINECOMMENT])
 140
 141
 142 def _describe_token_type(token_type):
 143     if token_type in reverse_operators:
 144         return reverse_operators[token_type]
 145     return {
 146         TOKEN_COMMENT_BEGIN:        'begin of comment',
 147         TOKEN_COMMENT_END:          'end of comment',
 148         TOKEN_COMMENT:              'comment',
 149         TOKEN_LINECOMMENT:          'comment',
 150         TOKEN_BLOCK_BEGIN:          'begin of statement block',
 151         TOKEN_BLOCK_END:            'end of statement block',
 152         TOKEN_VARIABLE_BEGIN:       'begin of print statement',
 153         TOKEN_VARIABLE_END:         'end of print statement',
 154         TOKEN_LINESTATEMENT_BEGIN:  'begin of line statement',
 155         TOKEN_LINESTATEMENT_END:    'end of line statement',
 156         TOKEN_DATA:                 'template data / text',
 157         TOKEN_EOF:                  'end of template'
 158     }.get(token_type, token_type)
 159
 160
 161 def describe_token(token):
 162     """Returns a description of the token."""
 163     if token.type == 'name':
 164         return token.value
 165     return _describe_token_type(token.type)
 166
 167
 168 def describe_token_expr(expr):
 169     """Like `describe_token` but for token expressions."""
 170     if ':' in expr:
 171         type, value = expr.split(':', 1)
 172         if type == 'name':
 173             return value
 174     else:
 175         type = expr
 176     return _describe_token_type(type)
 177
 178
 179 def count_newlines(value):
 180     """Count the number of newline characters in the string.  This is
 181     useful for extensions that filter a stream.
 182     """
 183     return len(newline_re.findall(value))
 184
 185
 186 def compile_rules(environment):
 187     """Compiles all the rules from the environment into a list of rules."""
 188     e = re.escape
 189     rules = [
 190         (len(environment.comment_start_string), 'comment',
 191          e(environment.comment_start_string)),
 192         (len(environment.block_start_string), 'block',
 193          e(environment.block_start_string)),
 194         (len(environment.variable_start_string), 'variable',
 195          e(environment.variable_start_string))
 196     ]
 197
 198     if environment.line_statement_prefix is not None:
 199         rules.append((len(environment.line_statement_prefix), 'linestatement',
 200                       r'^\s*' + e(environment.line_statement_prefix)))
 201     if environment.line_comment_prefix is not None:
 202         rules.append((len(environment.line_comment_prefix), 'linecomment',
 203                       r'(?:^|(?<=\S))[^\S\r\n]*' +
 204                       e(environment.line_comment_prefix)))
 205
 206     return [x[1:] for x in sorted(rules, reverse=True)]
 207
 208
 209 class Failure(object):
 210     """Class that raises a `TemplateSyntaxError` if called.
 211     Used by the `Lexer` to specify known errors.
 212     """
 213
 214     def __init__(self, message, cls=TemplateSyntaxError):
 215         self.message = message
 216         self.error_class = cls
 217
 218     def __call__(self, lineno, filename):
 219         raise self.error_class(self.message, lineno, filename)
 220
 221
 222 class Token(tuple):
 223     """Token class."""
 224     __slots__ = ()
 225     lineno, type, value = (property(itemgetter(x)) for x in range(3))
 226
 227     def __new__(cls, lineno, type, value):
 228         return tuple.__new__(cls, (lineno, intern(str(type)), value))
 229
 230     def __str__(self):
 231         if self.type in reverse_operators:
 232             return reverse_operators[self.type]
 233         elif self.type == 'name':
 234             return self.value
 235         return self.type
 236
 237     def test(self, expr):
 238         """Test a token against a token expression.  This can either be a
 239         token type or ``'token_type:token_value'``.  This can only test
 240         against string values and types.
 241         """
 242         # here we do a regular string equality check as test_any is usually
 243         # passed an iterable of not interned strings.
 244         if self.type == expr:
 245             return True
 246         elif ':' in expr:
 247             return expr.split(':', 1) == [self.type, self.value]
 248         return False
 249
 250     def test_any(self, *iterable):
 251         """Test against multiple token expressions."""
 252         for expr in iterable:
 253             if self.test(expr):
 254                 return True
 255         return False
 256
 257     def __repr__(self):
 258         return 'Token(%r, %r, %r)' % (
 259             self.lineno,
 260             self.type,
 261             self.value
 262         )
 263
 264
 265 class TokenStreamIterator(object):
 266     """The iterator for tokenstreams.  Iterate over the stream
 267     until the eof token is reached.
 268     """
 269
 270     def __init__(self, stream):
 271         self.stream = stream
 272
 273     def __iter__(self):
 274         return self
 275
 276     def next(self):
 277         token = self.stream.current
 278         if token.type is TOKEN_EOF:
 279             self.stream.close()
 280             raise StopIteration()
 281         next(self.stream)
 282         return token
 283
 284
 285 class TokenStream(object):
 286     """A token stream is an iterable that yields :class:`Token`\s.  The
 287     parser however does not iterate over it but calls :meth:`next` to go
 288     one token ahead.  The current active token is stored as :attr:`current`.
 289     """
 290
 291     def __init__(self, generator, name, filename):
 292         self._next = iter(generator).next
 293         self._pushed = deque()
 294         self.name = name
 295         self.filename = filename
 296         self.closed = False
 297         self.current = Token(1, TOKEN_INITIAL, '')
 298         next(self)
 299
 300     def __iter__(self):
 301         return TokenStreamIterator(self)
 302
 303     def __nonzero__(self):
 304         return bool(self._pushed) or self.current.type is not TOKEN_EOF
 305
 306     eos = property(lambda x: not x, doc="Are we at the end of the stream?")
 307
 308     def push(self, token):
 309         """Push a token back to the stream."""
 310         self._pushed.append(token)
 311
 312     def look(self):
 313         """Look at the next token."""
 314         old_token = next(self)
 315         result = self.current
 316         self.push(result)
 317         self.current = old_token
 318         return result
 319
 320     def skip(self, n=1):
 321         """Got n tokens ahead."""
 322         for x in xrange(n):
 323             next(self)
 324
 325     def next_if(self, expr):
 326         """Perform the token test and return the token if it matched.
 327         Otherwise the return value is `None`.
 328         """
 329         if self.current.test(expr):
 330             return next(self)
 331
 332     def skip_if(self, expr):
 333         """Like :meth:`next_if` but only returns `True` or `False`."""
 334         return self.next_if(expr) is not None
 335
 336     def next(self):
 337         """Go one token ahead and return the old one"""
 338         rv = self.current
 339         if self._pushed:
 340             self.current = self._pushed.popleft()
 341         elif self.current.type is not TOKEN_EOF:
 342             try:
 343                 self.current = self._next()
 344             except StopIteration:
 345                 self.close()
 346         return rv
 347
 348     def close(self):
 349         """Close the stream."""
 350         self.current = Token(self.current.lineno, TOKEN_EOF, '')
 351         self._next = None
 352         self.closed = True
 353
 354     def expect(self, expr):
 355         """Expect a given token type and return it.  This accepts the same
 356         argument as :meth:`jinja2.lexer.Token.test`.
 357         """
 358         if not self.current.test(expr):
 359             expr = describe_token_expr(expr)
 360             if self.current.type is TOKEN_EOF:
 361                 raise TemplateSyntaxError('unexpected end of template, '
 362                                           'expected %r.' % expr,
 363                                           self.current.lineno,
 364                                           self.name, self.filename)
 365             raise TemplateSyntaxError("expected token %r, got %r" %
 366                                       (expr, describe_token(self.current)),
 367                                       self.current.lineno,
 368                                       self.name, self.filename)
 369         try:
 370             return self.current
 371         finally:
 372             next(self)
 373
 374
 375 def get_lexer(environment):
 376     """Return a lexer which is probably cached."""
 377     key = (environment.block_start_string,
 378            environment.block_end_string,
 379            environment.variable_start_string,
 380            environment.variable_end_string,
 381            environment.comment_start_string,
 382            environment.comment_end_string,
 383            environment.line_statement_prefix,
 384            environment.line_comment_prefix,
 385            environment.trim_blocks,
 386            environment.newline_sequence)
 387     lexer = _lexer_cache.get(key)
 388     if lexer is None:
 389         lexer = Lexer(environment)
 390         _lexer_cache[key] = lexer
 391     return lexer
 392
 393
 394 class Lexer(object):
 395     """Class that implements a lexer for a given environment. Automatically
 396     created by the environment class, usually you don't have to do that.
 397
 398     Note that the lexer is not automatically bound to an environment.
 399     Multiple environments can share the same lexer.
 400     """
 401
 402     def __init__(self, environment):
 403         # shortcuts
 404         c = lambda x: re.compile(x, re.M | re.S)
 405         e = re.escape
 406
 407         # lexing rules for tags
 408         tag_rules = [
 409             (whitespace_re, TOKEN_WHITESPACE, None),
 410             (float_re, TOKEN_FLOAT, None),
 411             (integer_re, TOKEN_INTEGER, None),
 412             (name_re, TOKEN_NAME, None),
 413             (string_re, TOKEN_STRING, None),
 414             (operator_re, TOKEN_OPERATOR, None)
 415         ]
 416
 417         # assamble the root lexing rule. because "|" is ungreedy
 418         # we have to sort by length so that the lexer continues working
 419         # as expected when we have parsing rules like <% for block and
 420         # <%= for variables. (if someone wants asp like syntax)
 421         # variables are just part of the rules if variable processing
 422         # is required.
 423         root_tag_rules = compile_rules(environment)
 424
 425         # block suffix if trimming is enabled
 426         block_suffix_re = environment.trim_blocks and '\\n?' or ''
 427
 428         self.newline_sequence = environment.newline_sequence
 429
 430         # global lexing rules
 431         self.rules = {
 432             'root': [
 433                 # directives
 434                 (c('(.*?)(?:%s)' % '|'.join(
 435                     [r'(?P<raw_begin>(?:\s*%s\-|%s)\s*raw\s*%s)' % (
 436                         e(environment.block_start_string),
 437                         e(environment.block_start_string),
 438                         e(environment.block_end_string)
 439                     )] + [
 440                         r'(?P<%s_begin>\s*%s\-|%s)' % (n, r, r)
 441                         for n, r in root_tag_rules
 442                     ])), (TOKEN_DATA, '#bygroup'), '#bygroup'),
 443                 # data
 444                 (c('.+'), TOKEN_DATA, None)
 445             ],
 446             # comments
 447             TOKEN_COMMENT_BEGIN: [
 448                 (c(r'(.*?)((?:\-%s\s*|%s)%s)' % (
 449                     e(environment.comment_end_string),
 450                     e(environment.comment_end_string),
 451                     block_suffix_re
 452                 )), (TOKEN_COMMENT, TOKEN_COMMENT_END), '#pop'),
 453                 (c('(.)'), (Failure('Missing end of comment tag'),), None)
 454             ],
 455             # blocks
 456             TOKEN_BLOCK_BEGIN: [
 457                 (c('(?:\-%s\s*|%s)%s' % (
 458                     e(environment.block_end_string),
 459                     e(environment.block_end_string),
 460                     block_suffix_re
 461                 )), TOKEN_BLOCK_END, '#pop'),
 462             ] + tag_rules,
 463             # variables
 464             TOKEN_VARIABLE_BEGIN: [
 465                 (c('\-%s\s*|%s' % (
 466                     e(environment.variable_end_string),
 467                     e(environment.variable_end_string)
 468                 )), TOKEN_VARIABLE_END, '#pop')
 469             ] + tag_rules,
 470             # raw block
 471             TOKEN_RAW_BEGIN: [
 472                 (c('(.*?)((?:\s*%s\-|%s)\s*endraw\s*(?:\-%s\s*|%s%s))' % (
 473                     e(environment.block_start_string),
 474                     e(environment.block_start_string),
 475                     e(environment.block_end_string),
 476                     e(environment.block_end_string),
 477                     block_suffix_re
 478                 )), (TOKEN_DATA, TOKEN_RAW_END), '#pop'),
 479                 (c('(.)'), (Failure('Missing end of raw directive'),), None)
 480             ],
 481             # line statements
 482             TOKEN_LINESTATEMENT_BEGIN: [
 483                 (c(r'\s*(\n|$)'), TOKEN_LINESTATEMENT_END, '#pop')
 484             ] + tag_rules,
 485             # line comments
 486             TOKEN_LINECOMMENT_BEGIN: [
 487                 (c(r'(.*?)()(?=\n|$)'), (TOKEN_LINECOMMENT,
 488                  TOKEN_LINECOMMENT_END), '#pop')
 489             ]
 490         }
 491
 492     def _normalize_newlines(self, value):
 493         """Called for strings and template data to normlize it to unicode."""
 494         return newline_re.sub(self.newline_sequence, value)
 495
 496     def tokenize(self, source, name=None, filename=None, state=None):
 497         """Calls tokeniter + tokenize and wraps it in a token stream.
 498         """
 499         stream = self.tokeniter(source, name, filename, state)
 500         return TokenStream(self.wrap(stream, name, filename), name, filename)
 501
 502     def wrap(self, stream, name=None, filename=None):
 503         """This is called with the stream as returned by `tokenize` and wraps
 504         every token in a :class:`Token` and converts the value.
 505         """
 506         for lineno, token, value in stream:
 507             if token in ignored_tokens:
 508                 continue
 509             elif token == 'linestatement_begin':
 510                 token = 'block_begin'
 511             elif token == 'linestatement_end':
 512                 token = 'block_end'
 513             # we are not interested in those tokens in the parser
 514             elif token in ('raw_begin', 'raw_end'):
 515                 continue
 516             elif token == 'data':
 517                 value = self._normalize_newlines(value)
 518             elif token == 'keyword':
 519                 token = value
 520             elif token == 'name':
 521                 value = str(value)
 522             elif token == 'string':
 523                 # try to unescape string
 524                 try:
 525                     value = self._normalize_newlines(value[1:-1]) \
 526                         .encode('ascii', 'backslashreplace') \
 527                         .decode('unicode-escape')
 528                 except Exception, e:
 529                     msg = str(e).split(':')[-1].strip()
 530                     raise TemplateSyntaxError(msg, lineno, name, filename)
 531                 # if we can express it as bytestring (ascii only)
 532                 # we do that for support of semi broken APIs
 533                 # as datetime.datetime.strftime.  On python 3 this
 534                 # call becomes a noop thanks to 2to3
 535                 try:
 536                     value = str(value)
 537                 except UnicodeError:
 538                     pass
 539             elif token == 'integer':
 540                 value = int(value)
 541             elif token == 'float':
 542                 value = float(value)
 543             elif token == 'operator':
 544                 token = operators[value]
 545             yield Token(lineno, token, value)
 546
 547     def tokeniter(self, source, name, filename=None, state=None):
 548         """This method tokenizes the text and returns the tokens in a
 549         generator.  Use this method if you just want to tokenize a template.
 550         """
 551         source = '\n'.join(unicode(source).splitlines())
 552         pos = 0
 553         lineno = 1
 554         stack = ['root']
 555         if state is not None and state != 'root':
 556             assert state in ('variable', 'block'), 'invalid state'
 557             stack.append(state + '_begin')
 558         else:
 559             state = 'root'
 560         statetokens = self.rules[stack[-1]]
 561         source_length = len(source)
 562
 563         balancing_stack = []
 564
 565         while 1:
 566             # tokenizer loop
 567             for regex, tokens, new_state in statetokens:
 568                 m = regex.match(source, pos)
 569                 # if no match we try again with the next rule
 570                 if m is None:
 571                     continue
 572
 573                 # we only match blocks and variables if brances / parentheses
 574                 # are balanced. continue parsing with the lower rule which
 575                 # is the operator rule. do this only if the end tags look
 576                 # like operators
 577                 if balancing_stack and \
 578                    tokens in ('variable_end', 'block_end',
 579                               'linestatement_end'):
 580                     continue
 581
 582                 # tuples support more options
 583                 if isinstance(tokens, tuple):
 584                     for idx, token in enumerate(tokens):
 585                         # failure group
 586                         if token.__class__ is Failure:
 587                             raise token(lineno, filename)
 588                         # bygroup is a bit more complex, in that case we
 589                         # yield for the current token the first named
 590                         # group that matched
 591                         elif token == '#bygroup':
 592                             for key, value in m.groupdict().iteritems():
 593                                 if value is not None:
 594                                     yield lineno, key, value
 595                                     lineno += value.count('\n')
 596                                     break
 597                             else:
 598                                 raise RuntimeError('%r wanted to resolve '
 599                                                    'the token dynamically'
 600                                                    ' but no group matched'
 601                                                    % regex)
 602                         # normal group
 603                         else:
 604                             data = m.group(idx + 1)
 605                             if data or token not in ignore_if_empty:
 606                                 yield lineno, token, data
 607                             lineno += data.count('\n')
 608
 609                 # strings as token just are yielded as it.
 610                 else:
 611                     data = m.group()
 612                     # update brace/parentheses balance
 613                     if tokens == 'operator':
 614                         if data == '{':
 615                             balancing_stack.append('}')
 616                         elif data == '(':
 617                             balancing_stack.append(')')
 618                         elif data == '[':
 619                             balancing_stack.append(']')
 620                         elif data in ('}', ')', ']'):
 621                             if not balancing_stack:
 622                                 raise TemplateSyntaxError('unexpected \'%s\'' %
 623                                                           data, lineno, name,
 624                                                           filename)
 625                             expected_op = balancing_stack.pop()
 626                             if expected_op != data:
 627                                 raise TemplateSyntaxError('unexpected \'%s\', '
 628                                                           'expected \'%s\'' %
 629                                                           (data, expected_op),
 630                                                           lineno, name,
 631                                                           filename)
 632                     # yield items
 633                     if data or tokens not in ignore_if_empty:
 634                         yield lineno, tokens, data
 635                     lineno += data.count('\n')
 636
 637                 # fetch new position into new variable so that we can check
 638                 # if there is a internal parsing error which would result
 639                 # in an infinite loop
 640                 pos2 = m.end()
 641
 642                 # handle state changes
 643                 if new_state is not None:
 644                     # remove the uppermost state
 645                     if new_state == '#pop':
 646                         stack.pop()
 647                     # resolve the new state by group checking
 648                     elif new_state == '#bygroup':
 649                         for key, value in m.groupdict().iteritems():
 650                             if value is not None:
 651                                 stack.append(key)
 652                                 break
 653                         else:
 654                             raise RuntimeError('%r wanted to resolve the '
 655                                                'new state dynamically but'
 656                                                ' no group matched' %
 657                                                regex)
 658                     # direct state name given
 659                     else:
 660                         stack.append(new_state)
 661                     statetokens = self.rules[stack[-1]]
 662                 # we are still at the same position and no stack change.
 663                 # this means a loop without break condition, avoid that and
 664                 # raise error
 665                 elif pos2 == pos:
 666                     raise RuntimeError('%r yielded empty string without '
 667                                        'stack change' % regex)
 668                 # publish new function and start again
 669                 pos = pos2
 670                 break
 671             # if loop terminated without break we havn't found a single match
 672             # either we are at the end of the file or we have a problem
 673             else:
 674                 # end of text
 675                 if pos >= source_length:
 676                     return
 677                 # something went wrong
 678                 raise TemplateSyntaxError('unexpected char %r at %d' %
 679                                           (source[pos], pos), lineno,
 680                                           name, filename)