Ñò
mÈKc           @   s}   d  Z  d d k Z d d k Z d g Z d d d „  ƒ  YZ d d d „  ƒ  YZ d d d „  ƒ  YZ d	 e i f d
 „  ƒ  YZ d S(   s<   robotparser.py

    Copyright (C) 2000  Bastian Kleineidam

    You can choose between two licenses when using this package:
    1) GNU GPLv2
    2) PSF license for Python 2.2

    The robots.txt Exclusion Protocol is implemented as specified in
    http://info.webcrawler.com/mak/projects/robots/norobots-rfc.html
iÿÿÿÿNt   RobotFileParserc           B   sb   e  Z d  Z d d „ Z d „  Z d „  Z d „  Z d „  Z d „  Z d „  Z	 d	 „  Z
 d
 „  Z RS(   ss    This class provides a set of methods to read, parse and answer
    questions about a single robots.txt file.

    t    c         C   s>   g  |  _  d  |  _ t |  _ t |  _ |  i | ƒ d |  _ d  S(   Ni    (   t   entriest   Nonet   default_entryt   Falset   disallow_allt	   allow_allt   set_urlt   last_checked(   t   selft   url(    (    s!   /usr/lib/python2.6/robotparser.pyt   __init__   s    				c         C   s   |  i  S(   s·   Returns the time the robots.txt file was last fetched.

        This is useful for long-running web spiders that need to
        check for new robots.txt files periodically.

        (   R	   (   R
   (    (    s!   /usr/lib/python2.6/robotparser.pyt   mtime    s    c         C   s   d d k  } | i  ƒ  |  _ d S(   sY   Sets the time the robots.txt file was last fetched to the
        current time.

        iÿÿÿÿN(   t   timeR	   (   R
   R   (    (    s!   /usr/lib/python2.6/robotparser.pyt   modified)   s    c         C   s/   | |  _  t i | ƒ d d !\ |  _ |  _ d S(   s,   Sets the URL referring to a robots.txt file.i   i   N(   R   t   urlparset   hostt   path(   R
   R   (    (    s!   /usr/lib/python2.6/robotparser.pyR   1   s    	c         C   s¾   t  ƒ  } | i |  i ƒ } g  } | D] } | | i ƒ  q& ~ } | i ƒ  | i |  _ |  i d j o t |  _ nF |  i d j o t |  _ n) |  i d j o | o |  i	 | ƒ n d S(   s4   Reads the robots.txt URL and feeds it to the parser.i‘  i“  i  iÈ   N(   i‘  i“  (
   t	   URLopenert   openR   t   stript   closet   errcodet   TrueR   R   t   parse(   R
   t   openert   ft   _[1]t   linet   lines(    (    s!   /usr/lib/python2.6/robotparser.pyt   read6   s    	'
c         C   s1   d | i  j o | |  _ n |  i i | ƒ d  S(   Nt   *(   t
   useragentsR   R   t   append(   R
   t   entry(    (    s!   /usr/lib/python2.6/robotparser.pyt
   _add_entryD   s    c         C   s9  d } d } t  ƒ  } xü| D]ô} | d 7} | pQ | d j o t  ƒ  } d } q„ | d j o  |  i | ƒ t  ƒ  } d } q„ n | i d ƒ } | d j o | |  } n | i ƒ  } | p q n | i d d ƒ } t | ƒ d j o#| d i ƒ  i ƒ  | d <t i | d i ƒ  ƒ | d <| d d j oE | d j o |  i | ƒ t  ƒ  } n | i	 i
 | d ƒ d } q| d d j o8 | d j o' | i i
 t | d t ƒ ƒ d } qq| d d j o8 | d j o' | i i
 t | d t ƒ ƒ d } qqq q W| d j o |  i i
 | ƒ n d	 S(
   s   parse the input lines from a robots.txt file.
           We allow that a user-agent: line is not preceded by
           one or more blank lines.i    i   i   t   #t   :s
   user-agentt   disallowt   allowN(   t   EntryR$   t   findR   t   splitt   lent   lowert   urllibt   unquoteR!   R"   t	   rulelinest   RuleLineR   R   R   (   R
   R   t   statet
   linenumberR#   R   t   i(    (    s!   /usr/lib/python2.6/robotparser.pyR   K   sP    	 
	
	
c         C   s›   |  i  o t S|  i o t St i t i t i | ƒ ƒ d ƒ p d } x/ |  i D]$ } | i	 | ƒ o | i
 | ƒ SqT W|  i o |  i i
 | ƒ St S(   s=   using the parsed robots.txt decide if useragent can fetch urli   t   /(   R   R   R   R   R.   t   quoteR   R/   R   t
   applies_tot	   allowanceR   (   R
   t	   useragentR   R#   (    (    s!   /usr/lib/python2.6/robotparser.pyt	   can_fetch~   s    

,
 
c         C   s5   d i  g  } |  i D] } | t | ƒ d q ~ ƒ S(   NR   s   
(   t   joinR   t   str(   R
   R   R#   (    (    s!   /usr/lib/python2.6/robotparser.pyt   __str__‘   s    (   t   __name__t
   __module__t   __doc__R   R   R   R   R   R$   R   R:   R=   (    (    (    s!   /usr/lib/python2.6/robotparser.pyR       s   							3	R1   c           B   s)   e  Z d  Z d „  Z d „  Z d „  Z RS(   so   A rule line is a single "Allow:" (allowance==True) or "Disallow:"
       (allowance==False) followed by a path.c         C   s>   | d j o | o
 t  } n t i | ƒ |  _ | |  _ d  S(   NR   (   R   R.   R6   R   R8   (   R
   R   R8   (    (    s!   /usr/lib/python2.6/robotparser.pyR   ˜   s    
c         C   s    |  i  d j p | i |  i  ƒ S(   NR    (   R   t
   startswith(   R
   t   filename(    (    s!   /usr/lib/python2.6/robotparser.pyR7   Ÿ   s    c         C   s    |  i  o d p d d |  i S(   Nt   Allowt   Disallows   : (   R8   R   (   R
   (    (    s!   /usr/lib/python2.6/robotparser.pyR=   ¢   s    (   R>   R?   R@   R   R7   R=   (    (    (    s!   /usr/lib/python2.6/robotparser.pyR1   •   s   		R)   c           B   s2   e  Z d  Z d „  Z d „  Z d „  Z d „  Z RS(   s?   An entry has one or more user-agents and zero or more rulelinesc         C   s   g  |  _  g  |  _ d  S(   N(   R!   R0   (   R
   (    (    s!   /usr/lib/python2.6/robotparser.pyR   ¨   s    	c         C   sj   g  } x' |  i  D] } | i d | d g ƒ q Wx* |  i D] } | i t | ƒ d g ƒ q: Wd i | ƒ S(   Ns   User-agent: s   
R   (   R!   t   extendR0   R<   R;   (   R
   t   rett   agentR   (    (    s!   /usr/lib/python2.6/robotparser.pyR=   ¬   s    
 
 c         C   sa   | i  d ƒ d i ƒ  } xA |  i D]6 } | d j o t S| i ƒ  } | | j o t Sq# Wt S(   s2   check if this entry applies to the specified agentR5   i    R    (   R+   R-   R!   R   R   (   R
   R9   RG   (    (    s!   /usr/lib/python2.6/robotparser.pyR7   ´   s    
 	c         C   s0   x) |  i  D] } | i | ƒ o | i Sq
 Wt S(   sZ   Preconditions:
        - our agent applies to this entry
        - filename is URL decoded(   R0   R7   R8   R   (   R
   RB   R   (    (    s!   /usr/lib/python2.6/robotparser.pyR8   Á   s
    
 (   R>   R?   R@   R   R=   R7   R8   (    (    (    s!   /usr/lib/python2.6/robotparser.pyR)   ¦   s
   			R   c           B   s#   e  Z d  „  Z d „  Z d „  Z RS(   c         G   s    t  i i |  | Œ d |  _ d  S(   NiÈ   (   R.   t   FancyURLopenerR   R   (   R
   t   args(    (    s!   /usr/lib/python2.6/robotparser.pyR   Ë   s    c         C   s   d S(   N(   NN(   R   (   R
   R   t   realm(    (    s!   /usr/lib/python2.6/robotparser.pyt   prompt_user_passwdÏ   s    c         C   s(   | |  _  t i i |  | | | | | ƒ S(   N(   R   R.   RH   t   http_error_default(   R
   R   t   fpR   t   errmsgt   headers(    (    s!   /usr/lib/python2.6/robotparser.pyRL   Ô   s    	(   R>   R?   R   RK   RL   (    (    (    s!   /usr/lib/python2.6/robotparser.pyR   Ê   s   		(    (    (    (	   R@   R   R.   t   __all__R    R1   R)   RH   R   (    (    (    s!   /usr/lib/python2.6/robotparser.pyt   <module>   s   	ƒ$