1 # -*- coding: utf-8 -*-
6 Implements a Markup string.
8 :copyright: (c) 2010 by Armin Ronacher.
9 :license: BSD, see LICENSE for more details.
12 from itertools import imap
15 __all__ = ['Markup', 'soft_unicode', 'escape', 'escape_silent']
18 _striptags_re = re.compile(r'(<!--.*?-->|<[^>]*>)')
19 _entity_re = re.compile(r'&([^;]+);')
22 class Markup(unicode):
23 r"""Marks a string as being safe for inclusion in HTML/XML output without
24 needing to be escaped. This implements the `__html__` interface a couple
25 of frameworks and web applications use. :class:`Markup` is a direct
26 subclass of `unicode` and provides all the methods of `unicode` just that
27 it escapes arguments passed and always returns `Markup`.
29 The `escape` function returns markup objects so that double escaping can't
32 The constructor of the :class:`Markup` class can be used for three
33 different things: When passed an unicode object it's assumed to be safe,
34 when passed an object with an HTML representation (has an `__html__`
35 method) that representation is used, otherwise the object passed is
36 converted into a unicode string and then assumed to be safe:
38 >>> Markup("Hello <em>World</em>!")
39 Markup(u'Hello <em>World</em>!')
40 >>> class Foo(object):
41 ... def __html__(self):
42 ... return '<a href="#">foo</a>'
45 Markup(u'<a href="#">foo</a>')
47 If you want object passed being always treated as unsafe you can use the
48 :meth:`escape` classmethod to create a :class:`Markup` object:
50 >>> Markup.escape("Hello <em>World</em>!")
51 Markup(u'Hello <em>World</em>!')
53 Operations on a markup string are markup aware which means that all
54 arguments are passed through the :func:`escape` function:
56 >>> em = Markup("<em>%s</em>")
58 Markup(u'<em>foo & bar</em>')
59 >>> strong = Markup("<strong>%(text)s</strong>")
60 >>> strong % {'text': '<blink>hacker here</blink>'}
61 Markup(u'<strong><blink>hacker here</blink></strong>')
62 >>> Markup("<em>Hello</em> ") + "<foo>"
63 Markup(u'<em>Hello</em> <foo>')
67 def __new__(cls, base=u'', encoding=None, errors='strict'):
68 if hasattr(base, '__html__'):
69 base = base.__html__()
71 return unicode.__new__(cls, base)
72 return unicode.__new__(cls, base, encoding, errors)
77 def __add__(self, other):
78 if hasattr(other, '__html__') or isinstance(other, basestring):
79 return self.__class__(unicode(self) + unicode(escape(other)))
82 def __radd__(self, other):
83 if hasattr(other, '__html__') or isinstance(other, basestring):
84 return self.__class__(unicode(escape(other)) + unicode(self))
87 def __mul__(self, num):
88 if isinstance(num, (int, long)):
89 return self.__class__(unicode.__mul__(self, num))
93 def __mod__(self, arg):
94 if isinstance(arg, tuple):
95 arg = tuple(imap(_MarkupEscapeHelper, arg))
97 arg = _MarkupEscapeHelper(arg)
98 return self.__class__(unicode.__mod__(self, arg))
102 self.__class__.__name__,
103 unicode.__repr__(self)
107 return self.__class__(unicode.join(self, imap(escape, seq)))
108 join.__doc__ = unicode.join.__doc__
110 def split(self, *args, **kwargs):
111 return map(self.__class__, unicode.split(self, *args, **kwargs))
112 split.__doc__ = unicode.split.__doc__
114 def rsplit(self, *args, **kwargs):
115 return map(self.__class__, unicode.rsplit(self, *args, **kwargs))
116 rsplit.__doc__ = unicode.rsplit.__doc__
118 def splitlines(self, *args, **kwargs):
119 return map(self.__class__, unicode.splitlines(self, *args, **kwargs))
120 splitlines.__doc__ = unicode.splitlines.__doc__
123 r"""Unescape markup again into an unicode string. This also resolves
124 known HTML4 and XHTML entities:
126 >>> Markup("Main » <em>About</em>").unescape()
127 u'Main \xbb <em>About</em>'
129 from jinja2._markupsafe._constants import HTML_ENTITIES
132 if name in HTML_ENTITIES:
133 return unichr(HTML_ENTITIES[name])
135 if name[:2] in ('#x', '#X'):
136 return unichr(int(name[2:], 16))
137 elif name.startswith('#'):
138 return unichr(int(name[1:]))
142 return _entity_re.sub(handle_match, unicode(self))
145 r"""Unescape markup into an unicode string and strip all tags. This
146 also resolves known HTML4 and XHTML entities. Whitespace is
149 >>> Markup("Main » <em>About</em>").striptags()
152 stripped = u' '.join(_striptags_re.sub('', self).split())
153 return Markup(stripped).unescape()
157 """Escape the string. Works like :func:`escape` with the difference
158 that for subclasses of :class:`Markup` this function would return the
162 if rv.__class__ is not cls:
166 def make_wrapper(name):
167 orig = getattr(unicode, name)
168 def func(self, *args, **kwargs):
169 args = _escape_argspec(list(args), enumerate(args))
170 _escape_argspec(kwargs, kwargs.iteritems())
171 return self.__class__(orig(self, *args, **kwargs))
172 func.__name__ = orig.__name__
173 func.__doc__ = orig.__doc__
176 for method in '__getitem__', 'capitalize', \
177 'title', 'lower', 'upper', 'replace', 'ljust', \
178 'rjust', 'lstrip', 'rstrip', 'center', 'strip', \
179 'translate', 'expandtabs', 'swapcase', 'zfill':
180 locals()[method] = make_wrapper(method)
183 if hasattr(unicode, 'partition'):
184 partition = make_wrapper('partition'),
185 rpartition = make_wrapper('rpartition')
188 if hasattr(unicode, 'format'):
189 format = make_wrapper('format')
192 if hasattr(unicode, '__getslice__'):
193 __getslice__ = make_wrapper('__getslice__')
195 del method, make_wrapper
198 def _escape_argspec(obj, iterable):
199 """Helper for various string-wrapped functions."""
200 for key, value in iterable:
201 if hasattr(value, '__html__') or isinstance(value, basestring):
202 obj[key] = escape(value)
206 class _MarkupEscapeHelper(object):
207 """Helper for Markup.__mod__"""
209 def __init__(self, obj):
212 __getitem__ = lambda s, x: _MarkupEscapeHelper(s.obj[x])
213 __str__ = lambda s: str(escape(s.obj))
214 __unicode__ = lambda s: unicode(escape(s.obj))
215 __repr__ = lambda s: str(escape(repr(s.obj)))
216 __int__ = lambda s: int(s.obj)
217 __float__ = lambda s: float(s.obj)
220 # we have to import it down here as the speedups and native
221 # modules imports the markup type which is define above.
223 from jinja2._markupsafe._speedups import escape, escape_silent, soft_unicode
225 from jinja2._markupsafe._native import escape, escape_silent, soft_unicode