"""
Text Helpers
Provides methods for filtering, formatting and transforming strings.
"""
# Last synced with Rails copy at Revision 6096 on Feb 8th, 2007.
# Purposely left out sanitize and strip_tags, should be included at some point likely using
# BeautifulSoup.
import itertools
import re
import textwrap
import warnings
import webhelpers.textile as textile
import webhelpers.markdown as _markdown
from routes import request_config
from webhelpers.rails.tags import content_tag, tag_options
AUTO_LINK_RE = re.compile(r"""
( # leading text
<\w+.*?>| # leading HTML tag, or
[^=!:'"/]| # leading punctuation, or
^ # beginning of line
)
(
(?:https?://)| # protocol spec, or
(?:www\.) # www.*
)
(
[-\w]+ # subdomain or domain
(?:\.[-\w]+)* # remaining subdomains or domain
(?::\d+)? # port
(?:/(?:(?:[~\w\+%-]|(?:[,.;:][^\s$]))+)?)* # path
(?:\?[\w\+%&=.;-]+)? # query string
(?:\#[\w\-]*)? # trailing anchor
)
([\.,"'?!;:]|\s|<|$) # trailing text
""", re.X)
def iterdict(items):
return dict(items=items, iter=itertools.cycle(items))
def cycle(*args, **kargs):
"""
Return the next cycle of the given list.
Everytime ``cycle`` is called, the value returned will be the next
item in the list passed to it. This list is reset on every request,
but can also be reset by calling ``reset_cycle()``.
You may specify the list as either arguments, or as a single list
argument.
This can be used to alternate classes for table rows::
# In Myghty...
% for item in items:
">
... use item ...
% #endfor
You can use named cycles to prevent clashes in nested loops. You'll
have to reset the inner cycle, manually::
% for item in items:
% for value in item.values:
'">
item
% #endfor
<% reset_cycle("colors") %>
|
% #endfor
"""
if len(args) > 1:
items = args
else:
items = args[0]
name = kargs.get('name', 'default')
cycles = request_config().environ.setdefault('railshelpers.cycles', {})
cycle = cycles.setdefault(name, iterdict(items))
if cycles[name].get('items') != items:
cycle = cycles[name] = iterdict(items)
return cycle['iter'].next()
def reset_cycle(name='default'):
"""
Reset a cycle.
Resets the cycle so that it starts from the first element in the
array the next time it is used.
"""
try:
del request_config().environ['railshelpers.cycles'][name]
except KeyError:
pass
def counter(name='default', start=1, step=1):
"""Return the next cardinal in a sequence.
Every time ``counter`` is called, the value returned will be the next
counting number in that sequence. This is reset to ``start`` on every
request, but can also be reset by calling ``reset_counter()``.
You can optionally specify the number you want to start at by passing
in the ``start`` argument (defaults to 1).
You can also optionally specify the step size you want by passing in
the ``step`` argument (defaults to 1).
Sequences will increase monotonically by ``step`` each time it is
called, until the heat death of the universe or python explodes.
This can be used to count rows in a table::
# In Myghty
% for item in items:
<% h.counter() %> |
% #endfor
You can use named counters to prevent clashes in nested loops.
You'll have to reset the inner cycle manually though. See the
documentation for ``webhelpers.text.cycle()`` for a similar
example.
"""
counters = request_config().environ.setdefault('railshelpers.counters', {})
# ripped off of itertools.count
def do_counter(start, step):
while True:
yield start
start += step
counter = counters.setdefault(name, do_counter(start, step))
return counter.next()
def reset_counter(name='default'):
"""Reset a counter.
Resets the counter so that it starts from the ``start`` cardinal in
the sequence next time it is used.
"""
try:
del request_config().environ['railshelpers.counters'][name]
except KeyError:
pass
def truncate(text, length=30, truncate_string='...'):
"""
Truncate ``text`` with replacement characters.
``length``
The maximum length of ``text`` before replacement
``truncate_string``
If ``text`` exceeds the ``length``, this string will replace
the end of the string
Example::
>>> truncate('Once upon a time in a world far far away', 14)
'Once upon a...'
"""
if not text: return ''
new_len = length-len(truncate_string)
if len(text) > length:
return text[:new_len] + truncate_string
else:
return text
def highlight(text, phrase, highlighter='\\1',
hilighter=None):
"""
Highlight the ``phrase`` where it is found in the ``text``.
The highlighted phrase will be surrounded by the highlighter,
by default::
I'm a highlight phrase
``highlighter``
Defines the highlighting phrase. This argument should be a
single-quoted string with ``\\1`` where the phrase is supposed
to be inserted.
Note: The ``phrase`` is sanitized to include only letters, digits,
and spaces before use.
Example::
>>> highlight('You searched for: Pylons', 'Pylons')
'You searched for: Pylons'
"""
if hilighter is not None:
warnings.warn("The highlight function's hilight keyword argument is deprecated: "
"Please use the highlight keyword argument instead.",
DeprecationWarning, 2)
highlighter = hilighter
if not phrase or not text:
return text
highlight_re = re.compile('(%s)' % re.escape(phrase), re.I)
return highlight_re.sub(highlighter, text)
def excerpt(text, phrase, radius=100, excerpt_string="..."):
"""
Extract an excerpt from the ``text``, or '' if the phrase isn't found.
``phrase``
Phrase to excerpt from ``text``
``radius``
How many surrounding characters to include
``excerpt_string``
Characters surrounding entire excerpt
Example::
>>> excerpt("hello my world", "my", 3)
'...lo my wo...'
"""
if not text or not phrase:
return text
pat = re.compile('(.{0,%s}%s.{0,%s})' % (radius, re.escape(phrase), radius), re.I)
match = pat.search(text)
if not match:
return ""
excerpt = match.expand(r'\1')
if match.start(1) > 0:
excerpt = excerpt_string + excerpt
if match.end(1) < len(text):
excerpt = excerpt + excerpt_string
return excerpt
def word_wrap(text, line_width=80):
"""
Wrap ``text`` into lines of at most ``line_width`` width. Deprecated.
This is deprecated: Use python's builtin textwrap.fill instead.
This function breaks on the first whitespace character that does not
exceed ``line_width``.
"""
warnings.warn("The word_wrap function has been deprecated: Use python's builtin "
"textwrap.fill function instead.", DeprecationWarning, 2)
return textwrap.fill(text, line_width)
def simple_format(text):
"""
Return ``text`` transformed into HTML using simple formatting rules.
Two or more consecutive newlines(``\\n\\n``) are considered as a
paragraph and wrapped in ```` tags. One newline (``\\n``) is
considered a linebreak and a ``
`` tag is appended. This method
does not remove the newlines from the text.
"""
if text is None:
text = ''
text = re.sub(r'(\r\n|\n|\r)', r'\n', text)
text = re.sub(r'\n\n+', r'\n\n', text)
text = re.sub(r'(\n\n)', r'
\1', text)
text = re.sub(r'([^\n])(\n)(?=[^\n])', r'\1\2
', text)
text = content_tag("p", text).replace('
', '')
text = re.sub(r'', r'
\n', text)
return text
def auto_link(text, link="all", **href_options):
"""
Turn all urls and email addresses into clickable links.
``link``
Used to determine what to link. Options are "all",
"email_addresses", or "urls"
Example::
>>> auto_link("Go to http://www.planetpython.com and say hello to guido@python.org")
'Go to http://www.planetpython.com and say hello to guido@python.org'
"""
if not text:
return ""
if link == "all":
return auto_link_urls(auto_link_email_addresses(text), **href_options)
elif link == "email_addresses":
return auto_link_email_addresses(text)
else:
return auto_link_urls(text, **href_options)
def auto_link_urls(text, **href_options):
extra_options = tag_options(**href_options)
def handle_match(matchobj):
all = matchobj.group()
a, b, c, d = matchobj.group(1, 2, 3, 4)
if re.match(r'%s%s' % (a, b, c, extra_options, text, d)
return re.sub(AUTO_LINK_RE, handle_match, text)
def auto_link_email_addresses(text):
return re.sub(r'([\w\.!#\$%\-+.]+@[A-Za-z0-9\-]+(\.[A-Za-z0-9\-]+)+)',
r'\1', text)
def strip_links(text):
"""
Strip link tags from ``text`` leaving just the link label.
Example::
>>> strip_links('else')
'else'
"""
strip_re = re.compile(r'(.*?)<\/a>', re.I | re.M)
return strip_re.sub(r'\1', text)
def textilize(text, sanitize=False):
"""Format the text with Textile formatting.
This function uses the `PyTextile library `_
which is included with WebHelpers.
Additionally, the output can be sanitized which will fix tags like
,
and
for proper XHTML output.
"""
texer = textile.Textiler(text)
return texer.process(sanitize=sanitize)
def markdown(text, **kwargs):
"""Format the text with MarkDown formatting.
This function uses the `Python MarkDown library
`_
which is included with WebHelpers.
"""
return _markdown.markdown(text, **kwargs)
__all__ = ['cycle', 'reset_cycle', 'counter', 'reset_counter', 'truncate', 'highlight', 'excerpt',
'word_wrap', 'simple_format', 'auto_link', 'strip_links', 'textilize', 'markdown']