""" Text Helpers Provides methods for filtering, formatting and transforming strings. """ # Last synced with Rails copy at Revision 6096 on Feb 8th, 2007. # Purposely left out sanitize and strip_tags, should be included at some point likely using # BeautifulSoup. import itertools import re import textwrap import warnings import webhelpers.textile as textile import webhelpers.markdown as _markdown from routes import request_config from webhelpers.rails.tags import content_tag, tag_options AUTO_LINK_RE = re.compile(r""" ( # leading text <\w+.*?>| # leading HTML tag, or [^=!:'"/]| # leading punctuation, or ^ # beginning of line ) ( (?:https?://)| # protocol spec, or (?:www\.) # www.* ) ( [-\w]+ # subdomain or domain (?:\.[-\w]+)* # remaining subdomains or domain (?::\d+)? # port (?:/(?:(?:[~\w\+%-]|(?:[,.;:][^\s$]))+)?)* # path (?:\?[\w\+%&=.;-]+)? # query string (?:\#[\w\-]*)? # trailing anchor ) ([\.,"'?!;:]|\s|<|$) # trailing text """, re.X) def iterdict(items): return dict(items=items, iter=itertools.cycle(items)) def cycle(*args, **kargs): """ Return the next cycle of the given list. Everytime ``cycle`` is called, the value returned will be the next item in the list passed to it. This list is reset on every request, but can also be reset by calling ``reset_cycle()``. You may specify the list as either arguments, or as a single list argument. This can be used to alternate classes for table rows:: # In Myghty... % for item in items: "> ... use item ... % #endfor You can use named cycles to prevent clashes in nested loops. You'll have to reset the inner cycle, manually:: % for item in items: % for value in item.values: '"> item % #endfor <% reset_cycle("colors") %> % #endfor """ if len(args) > 1: items = args else: items = args[0] name = kargs.get('name', 'default') cycles = request_config().environ.setdefault('railshelpers.cycles', {}) cycle = cycles.setdefault(name, iterdict(items)) if cycles[name].get('items') != items: cycle = cycles[name] = iterdict(items) return cycle['iter'].next() def reset_cycle(name='default'): """ Reset a cycle. Resets the cycle so that it starts from the first element in the array the next time it is used. """ try: del request_config().environ['railshelpers.cycles'][name] except KeyError: pass def counter(name='default', start=1, step=1): """Return the next cardinal in a sequence. Every time ``counter`` is called, the value returned will be the next counting number in that sequence. This is reset to ``start`` on every request, but can also be reset by calling ``reset_counter()``. You can optionally specify the number you want to start at by passing in the ``start`` argument (defaults to 1). You can also optionally specify the step size you want by passing in the ``step`` argument (defaults to 1). Sequences will increase monotonically by ``step`` each time it is called, until the heat death of the universe or python explodes. This can be used to count rows in a table:: # In Myghty % for item in items: <% h.counter() %> % #endfor You can use named counters to prevent clashes in nested loops. You'll have to reset the inner cycle manually though. See the documentation for ``webhelpers.text.cycle()`` for a similar example. """ counters = request_config().environ.setdefault('railshelpers.counters', {}) # ripped off of itertools.count def do_counter(start, step): while True: yield start start += step counter = counters.setdefault(name, do_counter(start, step)) return counter.next() def reset_counter(name='default'): """Reset a counter. Resets the counter so that it starts from the ``start`` cardinal in the sequence next time it is used. """ try: del request_config().environ['railshelpers.counters'][name] except KeyError: pass def truncate(text, length=30, truncate_string='...'): """ Truncate ``text`` with replacement characters. ``length`` The maximum length of ``text`` before replacement ``truncate_string`` If ``text`` exceeds the ``length``, this string will replace the end of the string Example:: >>> truncate('Once upon a time in a world far far away', 14) 'Once upon a...' """ if not text: return '' new_len = length-len(truncate_string) if len(text) > length: return text[:new_len] + truncate_string else: return text def highlight(text, phrase, highlighter='\\1', hilighter=None): """ Highlight the ``phrase`` where it is found in the ``text``. The highlighted phrase will be surrounded by the highlighter, by default:: I'm a highlight phrase ``highlighter`` Defines the highlighting phrase. This argument should be a single-quoted string with ``\\1`` where the phrase is supposed to be inserted. Note: The ``phrase`` is sanitized to include only letters, digits, and spaces before use. Example:: >>> highlight('You searched for: Pylons', 'Pylons') 'You searched for: Pylons' """ if hilighter is not None: warnings.warn("The highlight function's hilight keyword argument is deprecated: " "Please use the highlight keyword argument instead.", DeprecationWarning, 2) highlighter = hilighter if not phrase or not text: return text highlight_re = re.compile('(%s)' % re.escape(phrase), re.I) return highlight_re.sub(highlighter, text) def excerpt(text, phrase, radius=100, excerpt_string="..."): """ Extract an excerpt from the ``text``, or '' if the phrase isn't found. ``phrase`` Phrase to excerpt from ``text`` ``radius`` How many surrounding characters to include ``excerpt_string`` Characters surrounding entire excerpt Example:: >>> excerpt("hello my world", "my", 3) '...lo my wo...' """ if not text or not phrase: return text pat = re.compile('(.{0,%s}%s.{0,%s})' % (radius, re.escape(phrase), radius), re.I) match = pat.search(text) if not match: return "" excerpt = match.expand(r'\1') if match.start(1) > 0: excerpt = excerpt_string + excerpt if match.end(1) < len(text): excerpt = excerpt + excerpt_string return excerpt def word_wrap(text, line_width=80): """ Wrap ``text`` into lines of at most ``line_width`` width. Deprecated. This is deprecated: Use python's builtin textwrap.fill instead. This function breaks on the first whitespace character that does not exceed ``line_width``. """ warnings.warn("The word_wrap function has been deprecated: Use python's builtin " "textwrap.fill function instead.", DeprecationWarning, 2) return textwrap.fill(text, line_width) def simple_format(text): """ Return ``text`` transformed into HTML using simple formatting rules. Two or more consecutive newlines(``\\n\\n``) are considered as a paragraph and wrapped in ``

`` tags. One newline (``\\n``) is considered a linebreak and a ``
`` tag is appended. This method does not remove the newlines from the text. """ if text is None: text = '' text = re.sub(r'(\r\n|\n|\r)', r'\n', text) text = re.sub(r'\n\n+', r'\n\n', text) text = re.sub(r'(\n\n)', r'

', text) text = re.sub(r'([^\n])(\n)(?=[^\n])', r'\1\2
', text) text = content_tag("p", text).replace('

', '

') text = re.sub(r'

', r'

', text) return text def auto_link(text, link="all", **href_options): """ Turn all urls and email addresses into clickable links. ``link`` Used to determine what to link. Options are "all", "email_addresses", or "urls" Example:: >>> auto_link("Go to http://www.planetpython.com and say hello to guido@python.org") 'Go to http://www.planetpython.com and say hello to guido@python.org' """ if not text: return "" if link == "all": return auto_link_urls(auto_link_email_addresses(text), **href_options) elif link == "email_addresses": return auto_link_email_addresses(text) else: return auto_link_urls(text, **href_options) def auto_link_urls(text, **href_options): extra_options = tag_options(**href_options) def handle_match(matchobj): all = matchobj.group() a, b, c, d = matchobj.group(1, 2, 3, 4) if re.match(r'%s%s' % (a, b, c, extra_options, text, d) return re.sub(AUTO_LINK_RE, handle_match, text) def auto_link_email_addresses(text): return re.sub(r'([\w\.!#\$%\-+.]+@[A-Za-z0-9\-]+(\.[A-Za-z0-9\-]+)+)', r'\1', text) def strip_links(text): """ Strip link tags from ``text`` leaving just the link label. Example:: >>> strip_links('else') 'else' """ strip_re = re.compile(r'(.*?)<\/a>', re.I | re.M) return strip_re.sub(r'\1', text) def textilize(text, sanitize=False): """Format the text with Textile formatting. This function uses the `PyTextile library `_ which is included with WebHelpers. Additionally, the output can be sanitized which will fix tags like ,
and

for proper XHTML output. """ texer = textile.Textiler(text) return texer.process(sanitize=sanitize) def markdown(text, **kwargs): """Format the text with MarkDown formatting. This function uses the `Python MarkDown library `_ which is included with WebHelpers. """ return _markdown.markdown(text, **kwargs) __all__ = ['cycle', 'reset_cycle', 'counter', 'reset_counter', 'truncate', 'highlight', 'excerpt', 'word_wrap', 'simple_format', 'auto_link', 'strip_links', 'textilize', 'markdown']