Ñò §ÚêLc@sdZddkZddkZeidƒZeidƒZeidƒZeidƒZeidƒZeidƒZ eid ƒZ eid ƒZ eid ƒZ eid ƒZ eid eiƒZeid ƒZeidƒZdefd„ƒYZdeifd„ƒYZdS(sA parser for HTML and XHTML.iÿÿÿÿNs[&<]s<(/|\Z)s &[a-zA-Z#]s%&([a-zA-Z][-.a-zA-Z0-9]*)[^a-zA-Z0-9]s)&#(?:[0-9]+|[xX][0-9a-fA-F]+)[^0-9a-fA-F]s <[a-zA-Z]t>s--\s*>s[a-zA-Z][-.a-zA-Z0-9:_]*s_\s*([a-zA-Z_][-.:a-zA-Z_0-9]*)(\s*=\s*(\'[^\']*\'|"[^"]*"|[-a-zA-Z0-9./,:;+*%?!&$\(\)_#=~@]*))?sê <[a-zA-Z][-.a-zA-Z0-9:_]* # tag name (?:\s+ # whitespace before attribute name (?:[a-zA-Z_][-.:a-zA-Z0-9_]* # attribute name (?:\s*=\s* # value indicator (?:'[^']*' # LITA-enclosed value |\"[^\"]*\" # LIT-enclosed value |[^'\">\s]+ # bare value ) )? ) )* \s* # trailing whitespace s#tHTMLParseErrorcBs#eZdZdd„Zd„ZRS(s&Exception raised for all parse errors.cCs5|pt‚||_|d|_|d|_dS(Nii(tAssertionErrortmsgtlinenotoffset(tselfRtposition((s"/usr/lib64/python2.6/HTMLParser.pyt__init__4s  cCs[|i}|idj o|d|i}n|idj o|d|id}n|S(Ns , at line %ds , column %di(RRtNoneR(Rtresult((s"/usr/lib64/python2.6/HTMLParser.pyt__str__:s  N(NN(t__name__t __module__t__doc__R RR (((s"/usr/lib64/python2.6/HTMLParser.pyR1s t HTMLParsercBsøeZdZdZd„Zd„Zd„Zd„Zd„ZdZ d„Z d „Z d „Z d „Zd „Zd „Zd„Zd„Zd„Zd„Zd„Zd„Zd„Zd„Zd„Zd„Zd„Zd„ZdZd„ZRS(sÇFind tags and other markup and call handler functions. Usage: p = HTMLParser() p.feed(data) ... p.close() Start tags are handled by calling self.handle_starttag() or self.handle_startendtag(); end tags by self.handle_endtag(). The data between tags is passed from the parser to the derived class by calling self.handle_data() with the data as argument (the data may be split up in arbitrary chunks). Entity references are passed by calling self.handle_entityref() with the entity reference as the argument. Numeric character references are passed to self.handle_charref() with the string containing the reference as the argument. tscripttstylecCs|iƒdS(s#Initialize and reset this instance.N(treset(R((s"/usr/lib64/python2.6/HTMLParser.pyRZscCs/d|_d|_t|_tii|ƒdS(s1Reset this instance. Loses all unprocessed data.ts???N(trawdatatlasttagtinteresting_normalt interestingt markupbaset ParserBaseR(R((s"/usr/lib64/python2.6/HTMLParser.pyR^s   cCs!|i||_|idƒdS(sFeed data to the parser. Call this as often as you want, with as little or as much text as you want (may include ' '). iN(Rtgoahead(Rtdata((s"/usr/lib64/python2.6/HTMLParser.pytfeedescCs|idƒdS(sHandle any buffered data.iN(R(R((s"/usr/lib64/python2.6/HTMLParser.pytclosenscCst||iƒƒ‚dS(N(Rtgetpos(Rtmessage((s"/usr/lib64/python2.6/HTMLParser.pyterrorrscCs|iS(s)Return full source of start tag: '<...>'.(t_HTMLParser__starttag_text(R((s"/usr/lib64/python2.6/HTMLParser.pytget_starttag_textwscCs t|_dS(N(tinteresting_cdataR(R((s"/usr/lib64/python2.6/HTMLParser.pytset_cdata_mode{scCs t|_dS(N(RR(R((s"/usr/lib64/python2.6/HTMLParser.pytclear_cdata_mode~sc Cs)|i}d}t|ƒ}x¼||jo®|ii||ƒ}|o|iƒ}n|}||jo|i|||!ƒn|i||ƒ}||joPn|i}|d|ƒoti ||ƒo|i |ƒ}nº|d|ƒo|i |ƒ}n—|d|ƒo|i |ƒ}nt|d|ƒo|i |ƒ}nQ|d|ƒo|i|ƒ}n.|d|jo|idƒ|d}nP|djo|o|idƒnPn|i||ƒ}q|d |ƒoÀti ||ƒ}|og|iƒd d !} |i| ƒ|iƒ}|d |dƒp|d}n|i||ƒ}qqÕd ||jo*|i|dd !ƒ|i|d ƒ}nPq|d |ƒoti ||ƒ}|oc|idƒ} |i| ƒ|iƒ}|d |dƒp|d}n|i||ƒ}qnti ||ƒ}|o4|o(|iƒ||jo|idƒnPqÕ|d|jo'|id ƒ|i||dƒ}qÕPqdp td‚qW|o7||jo*|i|||!ƒ|i||ƒ}n|||_dS(Nits s junk characters in start tag: %ri(Rs/>(R R!tcheck_for_whole_start_tagRttagfindR0RR9tlowerRtattrfindR7tunescapetappendtstripRtcountR)trfindR tendswiththandle_startendtagthandle_starttagtCDATA_CONTENT_ELEMENTSR$(RR=tendposRtattrsR0R@ttagtmtattrnametrestt attrvalueR9RR((s"/usr/lib64/python2.6/HTMLParser.pyR1ãsR      &&    cCs|i}ti||ƒ}|oÞ|iƒ}|||d!}|djo |dS|djoY|id|ƒo |dS|id|ƒodS|i||dƒ|idƒn|djodS|d jodS|i||ƒ|id ƒntd ƒ‚dS( NiRt/s/>iiÿÿÿÿsmalformed empty start tagRs6abcdefghijklmnopqrstuvwxyz=/ABCDEFGHIJKLMNOPQRSTUVWXYZsmalformed start tagswe should not get here!(RtlocatestarttagendR0R9R.R-R R(RR=RRUR?tnext((s"/usr/lib64/python2.6/HTMLParser.pyREs*        cCsÁ|i}|||d!djp td‚ti||dƒ}|pdS|iƒ}ti||ƒ}|p|id|||!fƒn|idƒ}|i |i ƒƒ|i ƒ|S(Niss&