Ñò
mÈKc           @   s  d  Z  d d k Z d d k Z e i d ƒ Z e i d ƒ Z e i d ƒ Z e i d ƒ Z e i d ƒ Z e i d ƒ Z	 e i d	 ƒ Z
 e i d
 ƒ Z e i d ƒ Z e i d ƒ Z e i d e i ƒ Z e i d	 ƒ Z e i d ƒ Z d e f d „  ƒ  YZ d e i f d „  ƒ  YZ d S(   s   A parser for HTML and XHTML.iÿÿÿÿNs   [&<]s   <(/|\Z)s
   &[a-zA-Z#]s%   &([a-zA-Z][-.a-zA-Z0-9]*)[^a-zA-Z0-9]s)   &#(?:[0-9]+|[xX][0-9a-fA-F]+)[^0-9a-fA-F]s	   <[a-zA-Z]t   >s   --\s*>s   [a-zA-Z][-.a-zA-Z0-9:_]*s_   \s*([a-zA-Z_][-.:a-zA-Z_0-9]*)(\s*=\s*(\'[^\']*\'|"[^"]*"|[-a-zA-Z0-9./,:;+*%?!&$\(\)_#=~@]*))?sê  
  <[a-zA-Z][-.a-zA-Z0-9:_]*          # tag name
  (?:\s+                             # whitespace before attribute name
    (?:[a-zA-Z_][-.:a-zA-Z0-9_]*     # attribute name
      (?:\s*=\s*                     # value indicator
        (?:'[^']*'                   # LITA-enclosed value
          |\"[^\"]*\"                # LIT-enclosed value
          |[^'\">\s]+                # bare value
         )
       )?
     )
   )*
  \s*                                # trailing whitespace
s#   </\s*([a-zA-Z][-.a-zA-Z0-9:_]*)\s*>t   HTMLParseErrorc           B   s#   e  Z d  Z d d „ Z d „  Z RS(   s&   Exception raised for all parse errors.c         C   s5   | p t  ‚ | |  _ | d |  _ | d |  _ d  S(   Ni    i   (   t   AssertionErrort   msgt   linenot   offset(   t   selfR   t   position(    (    s    /usr/lib/python2.6/HTMLParser.pyt   __init__4   s    	c         C   s[   |  i  } |  i d  j	 o | d |  i } n |  i d  j	 o | d |  i d } n | S(   Ns   , at line %ds   , column %di   (   R   R   t   NoneR   (   R   t   result(    (    s    /usr/lib/python2.6/HTMLParser.pyt   __str__:   s    	N(   NN(   t   __name__t
   __module__t   __doc__R	   R   R   (    (    (    s    /usr/lib/python2.6/HTMLParser.pyR   1   s   t
   HTMLParserc           B   sø   e  Z d  Z d Z d „  Z d „  Z d „  Z d „  Z d „  Z d Z
 d „  Z d	 „  Z d
 „  Z d „  Z d „  Z d „  Z d „  Z d „  Z d „  Z d „  Z d „  Z d „  Z d „  Z d „  Z d „  Z d „  Z d „  Z d „  Z d Z d „  Z RS(   sÇ  Find tags and other markup and call handler functions.

    Usage:
        p = HTMLParser()
        p.feed(data)
        ...
        p.close()

    Start tags are handled by calling self.handle_starttag() or
    self.handle_startendtag(); end tags by self.handle_endtag().  The
    data between tags is passed from the parser to the derived class
    by calling self.handle_data() with the data as argument (the data
    may be split up in arbitrary chunks).  Entity references are
    passed by calling self.handle_entityref() with the entity
    reference as the argument.  Numeric character references are
    passed to self.handle_charref() with the string containing the
    reference as the argument.
    t   scriptt   stylec         C   s   |  i  ƒ  d S(   s#   Initialize and reset this instance.N(   t   reset(   R   (    (    s    /usr/lib/python2.6/HTMLParser.pyR   Z   s    c         C   s/   d |  _  d |  _ t |  _ t i i |  ƒ d S(   s1   Reset this instance.  Loses all unprocessed data.t    s   ???N(   t   rawdatat   lasttagt   interesting_normalt   interestingt
   markupbaset
   ParserBaseR   (   R   (    (    s    /usr/lib/python2.6/HTMLParser.pyR   ^   s    			c         C   s!   |  i  | |  _  |  i d ƒ d S(   s   Feed data to the parser.

        Call this as often as you want, with as little or as much text
        as you want (may include '
').
        i    N(   R   t   goahead(   R   t   data(    (    s    /usr/lib/python2.6/HTMLParser.pyt   feede   s    c         C   s   |  i  d ƒ d S(   s   Handle any buffered data.i   N(   R   (   R   (    (    s    /usr/lib/python2.6/HTMLParser.pyt   closen   s    c         C   s   t  | |  i ƒ  ƒ ‚ d  S(   N(   R   t   getpos(   R   t   message(    (    s    /usr/lib/python2.6/HTMLParser.pyt   errorr   s    c         C   s   |  i  S(   s)   Return full source of start tag: '<...>'.(   t   _HTMLParser__starttag_text(   R   (    (    s    /usr/lib/python2.6/HTMLParser.pyt   get_starttag_textw   s    c         C   s   t  |  _ d  S(   N(   t   interesting_cdataR   (   R   (    (    s    /usr/lib/python2.6/HTMLParser.pyt   set_cdata_mode{   s    c         C   s   t  |  _ d  S(   N(   R   R   (   R   (    (    s    /usr/lib/python2.6/HTMLParser.pyt   clear_cdata_mode~   s    c   
      C   sî  |  i  } d } t | ƒ } x| | j  os|  i i | | ƒ } | o | i ƒ  } n | } | | j  o |  i | | | !ƒ n |  i | | ƒ } | | j o Pn | i } | d | ƒ ot i	 | | ƒ o |  i
 | ƒ } nº | d | ƒ o |  i | ƒ } n— | d | ƒ o |  i | ƒ } nt | d | ƒ o |  i | ƒ } nQ | d | ƒ o |  i | ƒ } n. | d | j  o |  i d ƒ | d } n P| d j  o | o |  i d ƒ n Pn |  i | | ƒ } q | d	 | ƒ o… t i	 | | ƒ } | og | i ƒ  d
 d !}	 |  i |	 ƒ | i ƒ  } | d | d ƒ p | d } n |  i | | ƒ } q qšPq | d | ƒ ot i	 | | ƒ } | oc | i d ƒ }	 |  i |	 ƒ | i ƒ  } | d | d ƒ p | d } n |  i | | ƒ } q n t i	 | | ƒ } | o4 | o( | i ƒ  | | j o |  i d ƒ n Pqš| d | j  o' |  i d ƒ |  i | | d ƒ } qšPq d p
 t d ‚ q W| o7 | | j  o* |  i | | | !ƒ |  i | | ƒ } n | | |  _  d  S(   Ni    t   <s   </s   <!--s   <?s   <!i   s   EOF in middle of constructs   &#i   iÿÿÿÿt   ;t   &s#   EOF in middle of entity or char refs   interesting.search() lied(   R   t   lenR   t   searcht   startt   handle_datat	   updatepost
   startswitht   starttagopent   matcht   parse_starttagt   parse_endtagt   parse_commentt   parse_pit   parse_declarationR    t   charreft   groupt   handle_charreft   endt	   entityreft   handle_entityreft
   incompleteR   (
   R   R9   R   t   it   nR0   t   jR.   t   kt   name(    (    s    /usr/lib/python2.6/HTMLParser.pyR   „   sŒ    	   	c         C   s   |  i  } | | | d !d j p
 t d ‚ t i | | d ƒ } | p d S| i ƒ  } |  i | | d | !ƒ | i ƒ  } | S(   Ni   s   <?s   unexpected call to parse_pi()iÿÿÿÿ(   R   R   t   picloseR*   R+   t	   handle_piR9   (   R   R=   R   R0   R?   (    (    s    /usr/lib/python2.6/HTMLParser.pyR4   Ô   s    	"c      
   C   s”  d  |  _ |  i | ƒ } | d j  o | S|  i } | | | !|  _ g  } t i | | d ƒ } | p
 t d ‚ | i ƒ  } | | d | !i ƒ  |  _	 } xð | | j  oâ t
 i | | ƒ } | p Pn | i d d d ƒ \ }	 }
 } |
 p
 d  } nm | d  d j o | d j n p& | d  d j o | d j n o  | d d !} |  i | ƒ } n | i |	 i ƒ  | f ƒ | i ƒ  } q W| | | !i ƒ  } | d j o‘ |  i ƒ  \ } } d |  i j o9 | |  i i d ƒ } t |  i ƒ |  i i d ƒ } n | t |  i ƒ } |  i d | | | !d  f ƒ n | i d
 ƒ o |  i | | ƒ n/ |  i | | ƒ | |  i j o |  i ƒ  n | S(   Ni    i   s#   unexpected call to parse_starttag()i   i   s   'iÿÿÿÿt   "R    s   />s   
s    junk characters in start tag: %ri   (   R    s   />(   R	   R!   t   check_for_whole_start_tagR   t   tagfindR0   R   R9   t   lowerR   t   attrfindR7   t   unescapet   appendt   stripR   t   countR)   t   rfindR    t   endswitht   handle_startendtagt   handle_starttagt   CDATA_CONTENT_ELEMENTSR$   (   R   R=   t   endposR   t   attrsR0   R@   t   tagt   mt   attrnamet   restt	   attrvalueR9   R   R   (    (    s    /usr/lib/python2.6/HTMLParser.pyR1   à   sR    		 
&&	c         C   s  |  i  } t i | | ƒ } | oÞ | i ƒ  } | | | d !} | d j o	 | d S| d j oY | i d | ƒ o	 | d S| i d | ƒ o d S|  i | | d ƒ |  i d ƒ n | d j o d S| d	 j o d S|  i | | ƒ |  i d
 ƒ n t d ƒ ‚ d  S(   Ni   R    t   /s   />i   iÿÿÿÿs   malformed empty start tagR   s6   abcdefghijklmnopqrstuvwxyz=/ABCDEFGHIJKLMNOPQRSTUVWXYZs   malformed start tags   we should not get here!(   R   t   locatestarttagendR0   R9   R.   R-   R    R   (   R   R=   R   RU   R?   t   next(    (    s    /usr/lib/python2.6/HTMLParser.pyRE     s*    			c         C   sÁ   |  i  } | | | d !d j p
 t d ‚ t i | | d ƒ } | p d S| i ƒ  } t i | | ƒ } | p |  i d | | | !f ƒ n | i d ƒ } |  i	 | i
 ƒ  ƒ |  i ƒ  | S(   Ni   s   </s   unexpected call to parse_endtagi   iÿÿÿÿs   bad end tag: %r(   R   R   t	   endendtagR*   R9   t
   endtagfindR0   R    R7   t   handle_endtagRG   R%   (   R   R=   R   R0   R?   RT   (    (    s    /usr/lib/python2.6/HTMLParser.pyR2   1  s    	"
c         C   s!   |  i  | | ƒ |  i | ƒ d  S(   N(   RP   R^   (   R   RT   RS   (    (    s    /usr/lib/python2.6/HTMLParser.pyRO   A  s    c         C   s   d  S(   N(    (   R   RT   RS   (    (    s    /usr/lib/python2.6/HTMLParser.pyRP   F  s    c         C   s   d  S(   N(    (   R   RT   (    (    s    /usr/lib/python2.6/HTMLParser.pyR^   J  s    c         C   s   d  S(   N(    (   R   RA   (    (    s    /usr/lib/python2.6/HTMLParser.pyR8   N  s    c         C   s   d  S(   N(    (   R   RA   (    (    s    /usr/lib/python2.6/HTMLParser.pyR;   R  s    c         C   s   d  S(   N(    (   R   R   (    (    s    /usr/lib/python2.6/HTMLParser.pyR,   V  s    c         C   s   d  S(   N(    (   R   R   (    (    s    /usr/lib/python2.6/HTMLParser.pyt   handle_commentZ  s    c         C   s   d  S(   N(    (   R   t   decl(    (    s    /usr/lib/python2.6/HTMLParser.pyt   handle_decl^  s    c         C   s   d  S(   N(    (   R   R   (    (    s    /usr/lib/python2.6/HTMLParser.pyRC   b  s    c         C   s   |  i  d | f ƒ d  S(   Ns   unknown declaration: %r(   R    (   R   R   (    (    s    /usr/lib/python2.6/HTMLParser.pyt   unknown_decle  s    c            s4   d | j o | S‡  f d †  } t  i d | | ƒ S(   NR(   c            s   |  i  ƒ  d }  |  d d j oI |  d }  |  d d j o t |  d d ƒ } n t |  ƒ } t | ƒ Sd d  k } t i d  j oH h d d	 6} t _ x1 | i i ƒ  D] \ } } t | ƒ | | <qª Wn y ˆ  i |  SWn t	 j
 o d
 |  d SXd  S(   Ni    t   #i   t   xt   Xi   iÿÿÿÿu   't   aposR(   R'   (   Rd   Re   (
   t   groupst   intt   unichrt   htmlentitydefsR   t
   entitydefsR	   t   name2codepointt	   iteritemst   KeyError(   t   st   cRj   Rk   R@   t   v(   R   (    s    /usr/lib/python2.6/HTMLParser.pyt   replaceEntitiesm  s"    
 s#   &(#?[xX]?(?:[0-9a-fA-F]+|\w{1,8}));(   t   ret   sub(   R   Ro   Rr   (    (   R   s    /usr/lib/python2.6/HTMLParser.pyRI   j  s    (   R   R   N(   R   R   R   RQ   R   R   R   R   R    R	   R!   R"   R$   R%   R   R4   R1   RE   R2   RO   RP   R^   R8   R;   R,   R_   Ra   RC   Rb   Rk   RI   (    (    (    s    /usr/lib/python2.6/HTMLParser.pyR   C   s8   										P		3												(   R   R   Rs   t   compileR   R#   R<   R:   R6   R/   RB   t   commentcloseRF   RH   t   VERBOSERZ   R\   R]   t	   ExceptionR   R   R   (    (    (    s    /usr/lib/python2.6/HTMLParser.pyt   <module>   s&   
	