nsz Git - libfirm/blob - scripts/jinja2/ext.py

   1 # -*- coding: utf-8 -*-
   2 """
   3     jinja2.ext
   4     ~~~~~~~~~~
   5
   6     Jinja extensions allow to add custom tags similar to the way django custom
   7     tags work.  By default two example extensions exist: an i18n and a cache
   8     extension.
   9
  10     :copyright: (c) 2010 by the Jinja Team.
  11     :license: BSD.
  12 """
  13 from collections import deque
  14 from jinja2 import nodes
  15 from jinja2.defaults import *
  16 from jinja2.environment import get_spontaneous_environment
  17 from jinja2.runtime import Undefined, concat
  18 from jinja2.exceptions import TemplateAssertionError, TemplateSyntaxError
  19 from jinja2.utils import contextfunction, import_string, Markup, next
  20
  21
  22 # the only real useful gettext functions for a Jinja template.  Note
  23 # that ugettext must be assigned to gettext as Jinja doesn't support
  24 # non unicode strings.
  25 GETTEXT_FUNCTIONS = ('_', 'gettext', 'ngettext')
  26
  27
  28 class ExtensionRegistry(type):
  29     """Gives the extension an unique identifier."""
  30
  31     def __new__(cls, name, bases, d):
  32         rv = type.__new__(cls, name, bases, d)
  33         rv.identifier = rv.__module__ + '.' + rv.__name__
  34         return rv
  35
  36
  37 class Extension(object):
  38     """Extensions can be used to add extra functionality to the Jinja template
  39     system at the parser level.  Custom extensions are bound to an environment
  40     but may not store environment specific data on `self`.  The reason for
  41     this is that an extension can be bound to another environment (for
  42     overlays) by creating a copy and reassigning the `environment` attribute.
  43
  44     As extensions are created by the environment they cannot accept any
  45     arguments for configuration.  One may want to work around that by using
  46     a factory function, but that is not possible as extensions are identified
  47     by their import name.  The correct way to configure the extension is
  48     storing the configuration values on the environment.  Because this way the
  49     environment ends up acting as central configuration storage the
  50     attributes may clash which is why extensions have to ensure that the names
  51     they choose for configuration are not too generic.  ``prefix`` for example
  52     is a terrible name, ``fragment_cache_prefix`` on the other hand is a good
  53     name as includes the name of the extension (fragment cache).
  54     """
  55     __metaclass__ = ExtensionRegistry
  56
  57     #: if this extension parses this is the list of tags it's listening to.
  58     tags = set()
  59
  60     def __init__(self, environment):
  61         self.environment = environment
  62
  63     def bind(self, environment):
  64         """Create a copy of this extension bound to another environment."""
  65         rv = object.__new__(self.__class__)
  66         rv.__dict__.update(self.__dict__)
  67         rv.environment = environment
  68         return rv
  69
  70     def preprocess(self, source, name, filename=None):
  71         """This method is called before the actual lexing and can be used to
  72         preprocess the source.  The `filename` is optional.  The return value
  73         must be the preprocessed source.
  74         """
  75         return source
  76
  77     def filter_stream(self, stream):
  78         """It's passed a :class:`~jinja2.lexer.TokenStream` that can be used
  79         to filter tokens returned.  This method has to return an iterable of
  80         :class:`~jinja2.lexer.Token`\s, but it doesn't have to return a
  81         :class:`~jinja2.lexer.TokenStream`.
  82
  83         In the `ext` folder of the Jinja2 source distribution there is a file
  84         called `inlinegettext.py` which implements a filter that utilizes this
  85         method.
  86         """
  87         return stream
  88
  89     def parse(self, parser):
  90         """If any of the :attr:`tags` matched this method is called with the
  91         parser as first argument.  The token the parser stream is pointing at
  92         is the name token that matched.  This method has to return one or a
  93         list of multiple nodes.
  94         """
  95         raise NotImplementedError()
  96
  97     def attr(self, name, lineno=None):
  98         """Return an attribute node for the current extension.  This is useful
  99         to pass constants on extensions to generated template code::
 100
 101             self.attr('_my_attribute', lineno=lineno)
 102         """
 103         return nodes.ExtensionAttribute(self.identifier, name, lineno=lineno)
 104
 105     def call_method(self, name, args=None, kwargs=None, dyn_args=None,
 106                     dyn_kwargs=None, lineno=None):
 107         """Call a method of the extension.  This is a shortcut for
 108         :meth:`attr` + :class:`jinja2.nodes.Call`.
 109         """
 110         if args is None:
 111             args = []
 112         if kwargs is None:
 113             kwargs = []
 114         return nodes.Call(self.attr(name, lineno=lineno), args, kwargs,
 115                           dyn_args, dyn_kwargs, lineno=lineno)
 116
 117
 118 @contextfunction
 119 def _gettext_alias(context, string):
 120     return context.resolve('gettext')(string)
 121
 122
 123 class InternationalizationExtension(Extension):
 124     """This extension adds gettext support to Jinja2."""
 125     tags = set(['trans'])
 126
 127     # TODO: the i18n extension is currently reevaluating values in a few
 128     # situations.  Take this example:
 129     #   {% trans count=something() %}{{ count }} foo{% pluralize
 130     #     %}{{ count }} fooss{% endtrans %}
 131     # something is called twice here.  One time for the gettext value and
 132     # the other time for the n-parameter of the ngettext function.
 133
 134     def __init__(self, environment):
 135         Extension.__init__(self, environment)
 136         environment.globals['_'] = _gettext_alias
 137         environment.extend(
 138             install_gettext_translations=self._install,
 139             install_null_translations=self._install_null,
 140             uninstall_gettext_translations=self._uninstall,
 141             extract_translations=self._extract
 142         )
 143
 144     def _install(self, translations):
 145         gettext = getattr(translations, 'ugettext', None)
 146         if gettext is None:
 147             gettext = translations.gettext
 148         ngettext = getattr(translations, 'ungettext', None)
 149         if ngettext is None:
 150             ngettext = translations.ngettext
 151         self.environment.globals.update(gettext=gettext, ngettext=ngettext)
 152
 153     def _install_null(self):
 154         self.environment.globals.update(
 155             gettext=lambda x: x,
 156             ngettext=lambda s, p, n: (n != 1 and (p,) or (s,))[0]
 157         )
 158
 159     def _uninstall(self, translations):
 160         for key in 'gettext', 'ngettext':
 161             self.environment.globals.pop(key, None)
 162
 163     def _extract(self, source, gettext_functions=GETTEXT_FUNCTIONS):
 164         if isinstance(source, basestring):
 165             source = self.environment.parse(source)
 166         return extract_from_ast(source, gettext_functions)
 167
 168     def parse(self, parser):
 169         """Parse a translatable tag."""
 170         lineno = next(parser.stream).lineno
 171
 172         # find all the variables referenced.  Additionally a variable can be
 173         # defined in the body of the trans block too, but this is checked at
 174         # a later state.
 175         plural_expr = None
 176         variables = {}
 177         while parser.stream.current.type != 'block_end':
 178             if variables:
 179                 parser.stream.expect('comma')
 180
 181             # skip colon for python compatibility
 182             if parser.stream.skip_if('colon'):
 183                 break
 184
 185             name = parser.stream.expect('name')
 186             if name.value in variables:
 187                 parser.fail('translatable variable %r defined twice.' %
 188                             name.value, name.lineno,
 189                             exc=TemplateAssertionError)
 190
 191             # expressions
 192             if parser.stream.current.type == 'assign':
 193                 next(parser.stream)
 194                 variables[name.value] = var = parser.parse_expression()
 195             else:
 196                 variables[name.value] = var = nodes.Name(name.value, 'load')
 197             if plural_expr is None:
 198                 plural_expr = var
 199
 200         parser.stream.expect('block_end')
 201
 202         plural = plural_names = None
 203         have_plural = False
 204         referenced = set()
 205
 206         # now parse until endtrans or pluralize
 207         singular_names, singular = self._parse_block(parser, True)
 208         if singular_names:
 209             referenced.update(singular_names)
 210             if plural_expr is None:
 211                 plural_expr = nodes.Name(singular_names[0], 'load')
 212
 213         # if we have a pluralize block, we parse that too
 214         if parser.stream.current.test('name:pluralize'):
 215             have_plural = True
 216             next(parser.stream)
 217             if parser.stream.current.type != 'block_end':
 218                 name = parser.stream.expect('name')
 219                 if name.value not in variables:
 220                     parser.fail('unknown variable %r for pluralization' %
 221                                 name.value, name.lineno,
 222                                 exc=TemplateAssertionError)
 223                 plural_expr = variables[name.value]
 224             parser.stream.expect('block_end')
 225             plural_names, plural = self._parse_block(parser, False)
 226             next(parser.stream)
 227             referenced.update(plural_names)
 228         else:
 229             next(parser.stream)
 230
 231         # register free names as simple name expressions
 232         for var in referenced:
 233             if var not in variables:
 234                 variables[var] = nodes.Name(var, 'load')
 235
 236         # no variables referenced?  no need to escape
 237         if not referenced:
 238             singular = singular.replace('%%', '%')
 239             if plural:
 240                 plural = plural.replace('%%', '%')
 241
 242         if not have_plural:
 243             plural_expr = None
 244         elif plural_expr is None:
 245             parser.fail('pluralize without variables', lineno)
 246
 247         if variables:
 248             variables = nodes.Dict([nodes.Pair(nodes.Const(x, lineno=lineno), y)
 249                                     for x, y in variables.items()])
 250         else:
 251             variables = None
 252
 253         node = self._make_node(singular, plural, variables, plural_expr)
 254         node.set_lineno(lineno)
 255         return node
 256
 257     def _parse_block(self, parser, allow_pluralize):
 258         """Parse until the next block tag with a given name."""
 259         referenced = []
 260         buf = []
 261         while 1:
 262             if parser.stream.current.type == 'data':
 263                 buf.append(parser.stream.current.value.replace('%', '%%'))
 264                 next(parser.stream)
 265             elif parser.stream.current.type == 'variable_begin':
 266                 next(parser.stream)
 267                 name = parser.stream.expect('name').value
 268                 referenced.append(name)
 269                 buf.append('%%(%s)s' % name)
 270                 parser.stream.expect('variable_end')
 271             elif parser.stream.current.type == 'block_begin':
 272                 next(parser.stream)
 273                 if parser.stream.current.test('name:endtrans'):
 274                     break
 275                 elif parser.stream.current.test('name:pluralize'):
 276                     if allow_pluralize:
 277                         break
 278                     parser.fail('a translatable section can have only one '
 279                                 'pluralize section')
 280                 parser.fail('control structures in translatable sections are '
 281                             'not allowed')
 282             elif parser.stream.eos:
 283                 parser.fail('unclosed translation block')
 284             else:
 285                 assert False, 'internal parser error'
 286
 287         return referenced, concat(buf)
 288
 289     def _make_node(self, singular, plural, variables, plural_expr):
 290         """Generates a useful node from the data provided."""
 291         # singular only:
 292         if plural_expr is None:
 293             gettext = nodes.Name('gettext', 'load')
 294             node = nodes.Call(gettext, [nodes.Const(singular)],
 295                               [], None, None)
 296
 297         # singular and plural
 298         else:
 299             ngettext = nodes.Name('ngettext', 'load')
 300             node = nodes.Call(ngettext, [
 301                 nodes.Const(singular),
 302                 nodes.Const(plural),
 303                 plural_expr
 304             ], [], None, None)
 305
 306         # mark the return value as safe if we are in an
 307         # environment with autoescaping turned on
 308         if self.environment.autoescape:
 309             node = nodes.MarkSafe(node)
 310
 311         if variables:
 312             node = nodes.Mod(node, variables)
 313         return nodes.Output([node])
 314
 315
 316 class ExprStmtExtension(Extension):
 317     """Adds a `do` tag to Jinja2 that works like the print statement just
 318     that it doesn't print the return value.
 319     """
 320     tags = set(['do'])
 321
 322     def parse(self, parser):
 323         node = nodes.ExprStmt(lineno=next(parser.stream).lineno)
 324         node.node = parser.parse_tuple()
 325         return node
 326
 327
 328 class LoopControlExtension(Extension):
 329     """Adds break and continue to the template engine."""
 330     tags = set(['break', 'continue'])
 331
 332     def parse(self, parser):
 333         token = next(parser.stream)
 334         if token.value == 'break':
 335             return nodes.Break(lineno=token.lineno)
 336         return nodes.Continue(lineno=token.lineno)
 337
 338
 339 class WithExtension(Extension):
 340     """Adds support for a django-like with block."""
 341     tags = set(['with'])
 342
 343     def parse(self, parser):
 344         node = nodes.Scope(lineno=next(parser.stream).lineno)
 345         assignments = []
 346         while parser.stream.current.type != 'block_end':
 347             lineno = parser.stream.current.lineno
 348             if assignments:
 349                 parser.stream.expect('comma')
 350             target = parser.parse_assign_target()
 351             parser.stream.expect('assign')
 352             expr = parser.parse_expression()
 353             assignments.append(nodes.Assign(target, expr, lineno=lineno))
 354         node.body = assignments + \
 355             list(parser.parse_statements(('name:endwith',),
 356                                          drop_needle=True))
 357         return node
 358
 359
 360 def extract_from_ast(node, gettext_functions=GETTEXT_FUNCTIONS,
 361                      babel_style=True):
 362     """Extract localizable strings from the given template node.  Per
 363     default this function returns matches in babel style that means non string
 364     parameters as well as keyword arguments are returned as `None`.  This
 365     allows Babel to figure out what you really meant if you are using
 366     gettext functions that allow keyword arguments for placeholder expansion.
 367     If you don't want that behavior set the `babel_style` parameter to `False`
 368     which causes only strings to be returned and parameters are always stored
 369     in tuples.  As a consequence invalid gettext calls (calls without a single
 370     string parameter or string parameters after non-string parameters) are
 371     skipped.
 372
 373     This example explains the behavior:
 374
 375     >>> from jinja2 import Environment
 376     >>> env = Environment()
 377     >>> node = env.parse('{{ (_("foo"), _(), ngettext("foo", "bar", 42)) }}')
 378     >>> list(extract_from_ast(node))
 379     [(1, '_', 'foo'), (1, '_', ()), (1, 'ngettext', ('foo', 'bar', None))]
 380     >>> list(extract_from_ast(node, babel_style=False))
 381     [(1, '_', ('foo',)), (1, 'ngettext', ('foo', 'bar'))]
 382
 383     For every string found this function yields a ``(lineno, function,
 384     message)`` tuple, where:
 385
 386     * ``lineno`` is the number of the line on which the string was found,
 387     * ``function`` is the name of the ``gettext`` function used (if the
 388       string was extracted from embedded Python code), and
 389     *  ``message`` is the string itself (a ``unicode`` object, or a tuple
 390        of ``unicode`` objects for functions with multiple string arguments).
 391
 392     This extraction function operates on the AST and is because of that unable
 393     to extract any comments.  For comment support you have to use the babel
 394     extraction interface or extract comments yourself.
 395     """
 396     for node in node.find_all(nodes.Call):
 397         if not isinstance(node.node, nodes.Name) or \
 398            node.node.name not in gettext_functions:
 399             continue
 400
 401         strings = []
 402         for arg in node.args:
 403             if isinstance(arg, nodes.Const) and \
 404                isinstance(arg.value, basestring):
 405                 strings.append(arg.value)
 406             else:
 407                 strings.append(None)
 408
 409         for arg in node.kwargs:
 410             strings.append(None)
 411         if node.dyn_args is not None:
 412             strings.append(None)
 413         if node.dyn_kwargs is not None:
 414             strings.append(None)
 415
 416         if not babel_style:
 417             strings = tuple(x for x in strings if x is not None)
 418             if not strings:
 419                 continue
 420         else:
 421             if len(strings) == 1:
 422                 strings = strings[0]
 423             else:
 424                 strings = tuple(strings)
 425         yield node.lineno, node.node.name, strings
 426
 427
 428 class _CommentFinder(object):
 429     """Helper class to find comments in a token stream.  Can only
 430     find comments for gettext calls forwards.  Once the comment
 431     from line 4 is found, a comment for line 1 will not return a
 432     usable value.
 433     """
 434
 435     def __init__(self, tokens, comment_tags):
 436         self.tokens = tokens
 437         self.comment_tags = comment_tags
 438         self.offset = 0
 439         self.last_lineno = 0
 440
 441     def find_backwards(self, offset):
 442         try:
 443             for _, token_type, token_value in \
 444                     reversed(self.tokens[self.offset:offset]):
 445                 if token_type in ('comment', 'linecomment'):
 446                     try:
 447                         prefix, comment = token_value.split(None, 1)
 448                     except ValueError:
 449                         continue
 450                     if prefix in self.comment_tags:
 451                         return [comment.rstrip()]
 452             return []
 453         finally:
 454             self.offset = offset
 455
 456     def find_comments(self, lineno):
 457         if not self.comment_tags or self.last_lineno > lineno:
 458             return []
 459         for idx, (token_lineno, _, _) in enumerate(self.tokens[self.offset:]):
 460             if token_lineno > lineno:
 461                 return self.find_backwards(self.offset + idx)
 462         return self.find_backwards(len(self.tokens))
 463
 464
 465 def babel_extract(fileobj, keywords, comment_tags, options):
 466     """Babel extraction method for Jinja templates.
 467
 468     .. versionchanged:: 2.3
 469        Basic support for translation comments was added.  If `comment_tags`
 470        is now set to a list of keywords for extraction, the extractor will
 471        try to find the best preceeding comment that begins with one of the
 472        keywords.  For best results, make sure to not have more than one
 473        gettext call in one line of code and the matching comment in the
 474        same line or the line before.
 475
 476     :param fileobj: the file-like object the messages should be extracted from
 477     :param keywords: a list of keywords (i.e. function names) that should be
 478                      recognized as translation functions
 479     :param comment_tags: a list of translator tags to search for and include
 480                          in the results.
 481     :param options: a dictionary of additional options (optional)
 482     :return: an iterator over ``(lineno, funcname, message, comments)`` tuples.
 483              (comments will be empty currently)
 484     """
 485     extensions = set()
 486     for extension in options.get('extensions', '').split(','):
 487         extension = extension.strip()
 488         if not extension:
 489             continue
 490         extensions.add(import_string(extension))
 491     if InternationalizationExtension not in extensions:
 492         extensions.add(InternationalizationExtension)
 493
 494     environment = get_spontaneous_environment(
 495         options.get('block_start_string', BLOCK_START_STRING),
 496         options.get('block_end_string', BLOCK_END_STRING),
 497         options.get('variable_start_string', VARIABLE_START_STRING),
 498         options.get('variable_end_string', VARIABLE_END_STRING),
 499         options.get('comment_start_string', COMMENT_START_STRING),
 500         options.get('comment_end_string', COMMENT_END_STRING),
 501         options.get('line_statement_prefix') or LINE_STATEMENT_PREFIX,
 502         options.get('line_comment_prefix') or LINE_COMMENT_PREFIX,
 503         str(options.get('trim_blocks', TRIM_BLOCKS)).lower() in \
 504             ('1', 'on', 'yes', 'true'),
 505         NEWLINE_SEQUENCE, frozenset(extensions),
 506         # fill with defaults so that environments are shared
 507         # with other spontaneus environments.  The rest of the
 508         # arguments are optimizer, undefined, finalize, autoescape,
 509         # loader, cache size, auto reloading setting and the
 510         # bytecode cache
 511         True, Undefined, None, False, None, 0, False, None
 512     )
 513
 514     source = fileobj.read().decode(options.get('encoding', 'utf-8'))
 515     try:
 516         node = environment.parse(source)
 517         tokens = list(environment.lex(environment.preprocess(source)))
 518     except TemplateSyntaxError, e:
 519         # skip templates with syntax errors
 520         return
 521
 522     finder = _CommentFinder(tokens, comment_tags)
 523     for lineno, func, message in extract_from_ast(node, keywords):
 524         yield lineno, func, message, finder.find_comments(lineno)
 525
 526
 527 #: nicer import names
 528 i18n = InternationalizationExtension
 529 do = ExprStmtExtension
 530 loopcontrols = LoopControlExtension
 531 with_ = WithExtension