a
    ӗaC                     @   s   d Z ddlZddlZddlmZ dgZedZedZedZ	edZ
ed	Zed
ZedZedZedZedejZed
ZedZG dd dejZdS )zA parser for HTML and XHTML.    N)unescape
HTMLParserz[&<]z
&[a-zA-Z#]z%&([a-zA-Z][-.a-zA-Z0-9]*)[^a-zA-Z0-9]z)&#(?:[0-9]+|[xX][0-9a-fA-F]+)[^0-9a-fA-F]z	<[a-zA-Z]>z--\s*>z+([a-zA-Z][^\t\n\r\f />\x00]*)(?:\s|/(?!>))*z]((?<=[\'"\s/])[^\s/>][^\s/=>]*)(\s*=+\s*(\'[^\']*\'|"[^"]*"|(?![\'"])[^>\s]*))?(?:\s|/(?!>))*aF  
  <[a-zA-Z][^\t\n\r\f />\x00]*       # tag name
  (?:[\s/]*                          # optional whitespace before attribute name
    (?:(?<=['"\s/])[^\s/>][^\s/=>]*  # attribute name
      (?:\s*=+\s*                    # value indicator
        (?:'[^']*'                   # LITA-enclosed value
          |"[^"]*"                   # LIT-enclosed value
          |(?!['"])[^>\s]*           # bare value
         )
        \s*                          # possibly followed by a space
       )?(?:\s|/(?!>))*
     )*
   )?
  \s*                                # trailing whitespace
z#</\s*([a-zA-Z][-.a-zA-Z0-9:_]*)\s*>c                   @   s   e Zd ZdZdZddddZdd Zd	d
 Zdd ZdZ	dd Z
dd Zdd Zdd Zdd Zd7ddZdd Zdd Zdd  Zd!d" Zd#d$ Zd%d& Zd'd( Zd)d* Zd+d, Zd-d. Zd/d0 Zd1d2 Zd3d4 Zd5d6 ZdS )8r   aE  Find tags and other markup and call handler functions.

    Usage:
        p = HTMLParser()
        p.feed(data)
        ...
        p.close()

    Start tags are handled by calling self.handle_starttag() or
    self.handle_startendtag(); end tags by self.handle_endtag().  The
    data between tags is passed from the parser to the derived class
    by calling self.handle_data() with the data as argument (the data
    may be split up in arbitrary chunks).  If convert_charrefs is
    True the character references are converted automatically to the
    corresponding Unicode character (and self.handle_data() is no
    longer split in chunks), otherwise they are passed by calling
    self.handle_entityref() or self.handle_charref() with the string
    containing respectively the named or numeric reference as the
    argument.
    )scriptstyleT)convert_charrefsc                C   s   || _ |   dS )zInitialize and reset this instance.

        If convert_charrefs is True (the default), all character references
        are automatically converted to the corresponding Unicode characters.
        N)r   reset)selfr    r
   ./home/manager/Python-3.9.10/Lib/html/parser.py__init__V   s    zHTMLParser.__init__c                 C   s(   d| _ d| _t| _d| _tj|  dS )z1Reset this instance.  Loses all unprocessed data. z???N)rawdatalasttaginteresting_normalinteresting
cdata_elem_markupbase
ParserBaser   r	   r
   r
   r   r   _   s
    zHTMLParser.resetc                 C   s   | j | | _ | d dS )zFeed data to the parser.

        Call this as often as you want, with as little or as much text
        as you want (may include '\n').
        r   N)r   goaheadr	   datar
   r
   r   feedg   s    zHTMLParser.feedc                 C   s   |  d dS )zHandle any buffered data.   N)r   r   r
   r
   r   closep   s    zHTMLParser.closeNc                 C   s   | j S )z)Return full source of start tag: '<...>'.)_HTMLParser__starttag_textr   r
   r
   r   get_starttag_textv   s    zHTMLParser.get_starttag_textc                 C   s$   |  | _td| j tj| _d S )Nz</\s*%s\s*>)lowerr   recompileIr   )r	   elemr
   r
   r   set_cdata_modez   s    
zHTMLParser.set_cdata_modec                 C   s   t | _d | _d S N)r   r   r   r   r
   r
   r   clear_cdata_mode~   s    zHTMLParser.clear_cdata_modec                 C   sX  | j }d}t|}||k r| jrv| jsv|d|}|dk r|dt||d }|dkrptd	||spq|}n*| j
	||}|r| }n| jrq|}||k r| jr| js| t|||  n| |||  | ||}||krq|j}|d|rJt||r"| |}	n|d|r:| |}	nn|d|rR| |}	nV|d|rj| |}	n>|d	|r| |}	n&|d
 |k r| d |d
 }	nq|	dk r<|sq|d|d
 }	|	dk r|d|d
 }	|	dk r|d
 }	n|	d
7 }	| jr*| js*| t|||	  n| |||	  | ||	}q|d|rt||}|r| dd }
| |
 | }	|d|	d
 s|	d
 }	| ||	}qn<d||d  v r| |||d   | ||d }qq|d|rt||}|rP|d
}
| |
 | }	|d|	d
 sB|	d
 }	| ||	}qt||}|r|r| ||d  kr| }	|	|kr|}	| ||d
 }qn.|d
 |k r| d | ||d
 }nqqdsJ dq|rF||k rF| jsF| jr(| js(| t|||  n| |||  | ||}||d  | _ d S )Nr   <&"   z[\s;]</<!--<?<!r   r   z&#   ;zinteresting.search() lied)r   lenr   r   findrfindmaxr   r    searchr   starthandle_datar   Z	updatepos
startswithstarttagopenmatchparse_starttagparse_endtagparse_commentparse_piparse_html_declarationcharrefgrouphandle_charrefend	entityrefhandle_entityref
incomplete)r	   rB   r   injZampposr9   r7   knamer
   r
   r   r      s    












zHTMLParser.goaheadc                 C   s   | j }|||d  dks"J d|||d  dkr@| |S |||d  dkr^| |S |||d   d	kr|d
|d }|dkrdS | ||d |  |d S | |S d S )Nr-   r,   z+unexpected call to parse_html_declaration()   r*      z<![	   z	<!doctyper   r.   r   )r   r<   Zparse_marked_sectionr   r1   handle_declparse_bogus_comment)r	   rF   r   gtposr
   r
   r   r>      s    

z!HTMLParser.parse_html_declarationr   c                 C   s`   | j }|||d  dv s"J d|d|d }|dkr>dS |rX| ||d |  |d S )Nr-   )r,   r)   z"unexpected call to parse_comment()r   r.   r   )r   r1   handle_comment)r	   rF   reportr   posr
   r
   r   rO     s    zHTMLParser.parse_bogus_commentc                 C   sd   | j }|||d  dks"J dt||d }|s:dS | }| ||d |  | }|S )Nr-   r+   zunexpected call to parse_pi()r.   )r   picloser4   r5   	handle_pirB   )r	   rF   r   r9   rH   r
   r
   r   r=      s    zHTMLParser.parse_pic                 C   s  d | _ | |}|dk r|S | j}||| | _ g }t||d }|sPJ d| }|d  | _}||k r.t	||}|sq.|ddd\}	}
}|
sd }n\|d d d  kr|dd  ksn |d d d  kr|dd  krn n|dd }|rt
|}||	 |f | }ql|||  }|d	vr|  \}}d
| j v r|| j d
 }t| j | j d
 }n|t| j  }| |||  |S |dr| || n"| || || jv r| | |S )Nr   r   z#unexpected call to parse_starttag()r-   rL   'r.   ")r   />
rX   )r   check_for_whole_start_tagr   tagfind_tolerantr9   rB   r@   r   r   attrfind_tolerantr   appendstripZgetposcountr0   r2   r6   endswithhandle_startendtaghandle_starttagCDATA_CONTENT_ELEMENTSr#   )r	   rF   endposr   attrsr9   rI   tagmattrnamerestZ	attrvaluerB   linenooffsetr
   r
   r   r:   ,  sZ    

&




zHTMLParser.parse_starttagc                 C   s   | j }t||}|r| }|||d  }|dkr>|d S |dkr~|d|rZ|d S |d|rjdS ||krv|S |d S |dkrdS |dv rdS ||kr|S |d S td	d S )
Nr   r   /rX   r-   r.   r   z6abcdefghijklmnopqrstuvwxyz=/ABCDEFGHIJKLMNOPQRSTUVWXYZzwe should not get here!)r   locatestarttagend_tolerantr9   rB   r7   AssertionError)r	   rF   r   rg   rH   nextr
   r
   r   rZ   _  s.    z$HTMLParser.check_for_whole_start_tagc                 C   s.  | j }|||d  dks"J dt||d }|s:dS | }t||}|s| jd urr| |||  |S t||d }|s|||d  dkr|d S | 	|S |
d }|d| }| | |d S |
d }| jd ur|| jkr| |||  |S | | |   |S )	Nr-   r)   zunexpected call to parse_endtagr   r.   rL   z</>r   )r   	endendtagr4   rB   
endtagfindr9   r   r6   r[   rO   r@   r   r1   handle_endtagr%   )r	   rF   r   r9   rP   Z	namematchZtagnamer"   r
   r
   r   r;     s8    



zHTMLParser.parse_endtagc                 C   s   |  || | | d S r$   )rb   rr   r	   rf   re   r
   r
   r   ra     s    zHTMLParser.handle_startendtagc                 C   s   d S r$   r
   rs   r
   r
   r   rb     s    zHTMLParser.handle_starttagc                 C   s   d S r$   r
   )r	   rf   r
   r
   r   rr     s    zHTMLParser.handle_endtagc                 C   s   d S r$   r
   r	   rJ   r
   r
   r   rA     s    zHTMLParser.handle_charrefc                 C   s   d S r$   r
   rt   r
   r
   r   rD     s    zHTMLParser.handle_entityrefc                 C   s   d S r$   r
   r   r
   r
   r   r6     s    zHTMLParser.handle_datac                 C   s   d S r$   r
   r   r
   r
   r   rQ     s    zHTMLParser.handle_commentc                 C   s   d S r$   r
   )r	   Zdeclr
   r
   r   rN     s    zHTMLParser.handle_declc                 C   s   d S r$   r
   r   r
   r
   r   rU     s    zHTMLParser.handle_pic                 C   s   d S r$   r
   r   r
   r
   r   unknown_decl  s    zHTMLParser.unknown_decl)r   )__name__
__module____qualname____doc__rc   r   r   r   r   r   r   r#   r%   r   r>   rO   r=   r:   rZ   r;   ra   rb   rr   rA   rD   r6   rQ   rN   rU   ru   r
   r
   r
   r   r   >   s6   		z
3"()ry   r   r   htmlr   __all__r    r   rE   rC   r?   r8   rT   Zcommentcloser[   r\   VERBOSErm   rp   rq   r   r   r
   r
   r
   r   <module>   s*   










