o
    Oc\O                  	   @   s  d Z ddlZddlZddlZddlZddlZddlZddlZddl	Z	ddl
ZddlZddlZddlmZ ddlmZ ddlmZmZmZmZmZmZmZmZmZmZmZ ddlm Z  ddl!m"Z" ddl#m$Z$m%Z% dd	l&m'Z' dd
l(m)Z) ddl*m+Z+ ddl,m-Z- ddl.m/Z/ ddl0m1Z1 ddl2m3Z3m4Z4 ddl5m6Z6 ddl7m8Z8m9Z9m:Z: erddlm;Z; ne<Z;e=e>Z?ej@jAjBZCeeDeDf ZEdeDdeeD fddZFG dd deGZHde"ddfddZIG dd deGZJdeDde-ddfd d!ZKdeDde-de"fd"d#ZLd$eEdeeD fd%d&ZMd'eDdeDfd(d)ZNd'eDdeDfd*d+ZOe	Pd,e	jQZRd-eDd.eSdeDfd/d0ZTdeDdeDfd1d2ZUd3eeDeeD f d4eDd5eDdee) fd6d7ZVG d8d9 d9ZWG d:d; d;e;ZXd<eXdeXfd=d>ZYeYd?d@dee) fdAdBZZG dCd@ d@Z[G dDdE dEeZ\	dVdFe)dGeeDeGf dHeedI  ddfdJdKZ]	LdWde"dMeSde[fdNdOZ^	dVdFe)dee- ded@ fdPdQZ_G dRdS dSeZ`G dTdU dUZadS )XzO
The main purpose of this module is to expose LinkCollector.collect_sources().
    N)
HTMLParser)Values)TYPE_CHECKINGCallableDictIterableListMutableMapping
NamedTupleOptionalSequenceTupleUnion)requests)Response)
RetryErrorSSLError)NetworkConnectionError)Link)SearchScope)
PipSession)raise_for_status)is_archive_file)pairwiseredact_auth_from_url)vcs   )CandidatesFromPage
LinkSourcebuild_source)Protocolurlreturnc                 C   s6   t jD ]}|  |r| t| dv r|  S qdS )zgLook for VCS schemes in the URL.

    Returns the matched VCS scheme, or None if there's no match.
    z+:N)r   Zschemeslower
startswithlen)r!   scheme r'   ?D:\Flask\env\lib\site-packages\pip\_internal\index\collector.py_match_vcs_scheme:   s
   
r)   c                       s*   e Zd Zdededdf fddZ  ZS )_NotAPIContentcontent_typerequest_descr"   Nc                    s   t  || || _|| _d S N)super__init__r+   r,   )selfr+   r,   	__class__r'   r(   r/   F   s   
z_NotAPIContent.__init__)__name__
__module____qualname__strr/   __classcell__r'   r'   r1   r(   r*   E   s    "r*   responsec                 C   s2   | j dd}| }|drdS t|| jj)z
    Check the Content-Type header to ensure the response contains a Simple
    API Response.

    Raises `_NotAPIContent` if the content type is not a valid content-type.
    Content-TypeUnknown)z	text/htmlz#application/vnd.pypi.simple.v1+html#application/vnd.pypi.simple.v1+jsonN)headersgetr#   r$   r*   requestmethod)r8   r+   content_type_lr'   r'   r(   _ensure_api_headerL   s   rA   c                   @   s   e Zd ZdS )_NotHTTPN)r3   r4   r5   r'   r'   r'   r(   rB   b   s    rB   sessionc                 C   sF   t j| \}}}}}|dvrt |j| dd}t| t| dS )z
    Send a HEAD request to the URL, and ensure the response contains a simple
    API Response.

    Raises `_NotHTTP` if the URL is not available for a HEAD request, or
    `_NotAPIContent` if the content type is not a valid content type.
    >   httphttpsT)allow_redirectsN)urllibparseurlsplitrB   headr   rA   )r!   rC   r&   netlocpathqueryfragmentrespr'   r'   r(   _ensure_api_responsef   s   rP   c                 C   sx   t t| jrt| |d tdt|  |j| dg dddd}t	| t
| tdt| |jd	d
 |S )aY  Access an Simple API response with GET, and return the response.

    This consists of three parts:

    1. If the URL looks suspiciously like an archive, send a HEAD first to
       check the Content-Type is HTML or Simple API, to avoid downloading a
       large file. Raise `_NotHTTP` if the content type cannot be determined, or
       `_NotAPIContent` if it is not HTML or a Simple API.
    2. Actually perform the request. Raise HTTP exceptions on network failures.
    3. Check the Content-Type header to make sure we got a Simple API response,
       and raise `_NotAPIContent` otherwise.
    rC   zGetting page %sz, )r;   z*application/vnd.pypi.simple.v1+html; q=0.1ztext/html; q=0.01z	max-age=0)AcceptzCache-Control)r<   zFetched page %s as %sr9   r:   )r   r   filenamerP   loggerdebugr   r=   joinr   rA   r<   )r!   rC   rO   r'   r'   r(   _get_simple_responsex   s&   rW   r<   c                 C   s<   | rd| v rt j }| d |d< |d}|rt|S dS )z=Determine if we have any encoding information in our headers.r9   zcontent-typecharsetN)emailmessageMessage	get_paramr6   )r<   mrX   r'   r'   r(   _get_encoding_from_headers   s   

r^   partc                 C      t jt j| S )zP
    Clean a "part" of a URL path (i.e. after splitting on "@" characters).
    )rG   rH   quoteunquoter_   r'   r'   r(   _clean_url_path_part   s   rd   c                 C   r`   )z
    Clean the first part of a URL path that corresponds to a local
    filesystem path (i.e. the first part after splitting on "@" characters).
    )rG   r>   pathname2urlurl2pathnamerc   r'   r'   r(   _clean_file_url_path   s   
rg   z(@|%2F)rL   is_local_pathc                 C   s^   |rt }nt}t| }g }tt|dgD ]\}}||| ||  qd	|S )z*
    Clean the path portion of a URL.
     )
rg   rd   _reserved_chars_resplitr   	itertoolschainappendupperrV   )rL   rh   Z
clean_funcpartsZcleaned_partsZto_cleanreservedr'   r'   r(   _clean_url_path   s   

rr   c                 C   s6   t j| }|j }t|j|d}t j|j|dS )z
    Make sure a link is fully quoted.
    For example, if ' ' occurs in the URL, it will be replaced with "%20",
    and without double-quoting other characters.
    )rh   )rL   )rG   rH   urlparserK   rr   rL   
urlunparse_replace)r!   resultrh   rL   r'   r'   r(   _clean_link   s   rw   element_attribspage_urlbase_urlc                 C   sL   |  d}|s	dS ttj||}|  d}|  d}t||||d}|S )zW
    Convert an anchor element's attributes in a simple repository page to a Link.
    hrefNzdata-requires-pythonzdata-yanked)
comes_fromrequires_pythonyanked_reason)r=   rw   rG   rH   urljoinr   )rx   ry   rz   r{   r!   Z	pyrequirer~   linkr'   r'   r(   _create_link_from_element   s   


r   c                   @   s6   e Zd ZdddZdedefdd	Zdefd
dZdS )CacheablePageContentpageIndexContentr"   Nc                 C   s   |j sJ || _d S r-   )cache_link_parsingr   r0   r   r'   r'   r(   r/     s   

zCacheablePageContent.__init__otherc                 C   s   t |t| o| jj|jjkS r-   )
isinstancetyper   r!   )r0   r   r'   r'   r(   __eq__  s   zCacheablePageContent.__eq__c                 C   s   t | jjS r-   )hashr   r!   r0   r'   r'   r(   __hash__"  s   zCacheablePageContent.__hash__)r   r   r"   N)	r3   r4   r5   r/   objectboolr   intr   r'   r'   r'   r(   r     s    
r   c                   @   s"   e Zd Zdddee fddZdS )
ParseLinksr   r   r"   c                 C   s   d S r-   r'   r   r'   r'   r(   __call__'  s   zParseLinks.__call__N)r3   r4   r5   r   r   r   r'   r'   r'   r(   r   &  s    r   fnc                    sP   t jdddtdtt f fddt  dddtt f fd	d
}|S )z
    Given a function that parses an Iterable[Link] from an IndexContent, cache the
    function's result (keyed by CacheablePageContent), unless the IndexContent
    `page` has `page.cache_link_parsing == False`.
    N)maxsizecacheable_pager"   c                    s   t  | jS r-   )listr   )r   )r   r'   r(   wrapper2  s   z*with_cached_index_content.<locals>.wrapperr   r   c                    s   | j r	t| S t | S r-   )r   r   r   )r   r   r   r'   r(   wrapper_wrapper6  s   z2with_cached_index_content.<locals>.wrapper_wrapper)	functools	lru_cacher   r   r   wraps)r   r   r'   r   r(   with_cached_index_content+  s
   
r   r   r   c              
   c   s
   | j  }|drSt| j}|dg D ]9}|d}|du r#q|d}|r2t|ts2d}n|s6d}t	t
tj| j|| j|d||di d	V  qdS t| j}| jp\d
}|| j| | j}|jpm|}	|jD ]}
t|
||	d}|du rqq|V  qqdS )z\
    Parse a Simple API's Index Content, and yield its anchor elements as Link objects.
    r;   filesr!   NZyankedri   zrequires-pythonhashes)r|   r}   r~   r   zutf-8)ry   rz   )r+   r#   r$   jsonloadscontentr=   r   r6   r   rw   rG   rH   r   r!   HTMLLinkParserencodingfeeddecoderz   anchorsr   )r   r@   datafileZfile_urlr~   parserr   r!   rz   anchorr   r'   r'   r(   parse_links?  sH   









r   c                   @   sH   e Zd ZdZ	ddededee dededd	fd
dZdefddZ	d	S )r   z5Represents one response (or page), along with its URLTr   r+   r   r!   r   r"   Nc                 C   s"   || _ || _|| _|| _|| _dS )am  
        :param encoding: the encoding to decode the given content.
        :param url: the URL from which the HTML was downloaded.
        :param cache_link_parsing: whether links parsed from this page's url
                                   should be cached. PyPI index urls should
                                   have this set to False, for example.
        N)r   r+   r   r!   r   )r0   r   r+   r   r!   r   r'   r'   r(   r/   r  s
   
zIndexContent.__init__c                 C   s
   t | jS r-   )r   r!   r   r'   r'   r(   __str__  s   
zIndexContent.__str__T)
r3   r4   r5   __doc__bytesr6   r   r   r/   r   r'   r'   r'   r(   r   o  s"    
c                       sv   e Zd ZdZdeddf fddZdedeeeee f  ddfd	d
Z	deeeee f  dee fddZ
  ZS )r   zf
    HTMLParser that keeps the first base HREF and a list of all anchor
    elements' attributes.
    r!   r"   Nc                    s$   t  jdd || _d | _g | _d S )NT)Zconvert_charrefs)r.   r/   r!   rz   r   )r0   r!   r1   r'   r(   r/     s   
zHTMLLinkParser.__init__tagattrsc                 C   sR   |dkr| j d u r| |}|d ur|| _ d S d S |dkr'| jt| d S d S )Nbasea)rz   get_hrefr   rn   dict)r0   r   r   r{   r'   r'   r(   handle_starttag  s   

zHTMLLinkParser.handle_starttagc                 C   s"   |D ]\}}|dkr|  S qd S )Nr{   r'   )r0   r   namevaluer'   r'   r(   r     s
   zHTMLLinkParser.get_href)r3   r4   r5   r   r6   r/   r   r   r   r   r   r7   r'   r'   r1   r(   r     s
    &.r   r   reasonmeth).Nc                 C   s   |d u rt j}|d| | d S )Nz%Could not fetch URL %s: %s - skipping)rT   rU   )r   r   r   r'   r'   r(   _handle_get_simple_fail  s   r   Tr   c                 C   s&   t | j}t| j| jd || j|dS )Nr9   )r   r!   r   )r^   r<   r   r   r!   )r8   r   r   r'   r'   r(   _make_index_content  s   
r   c           
   
   C   s  |d u rt d| jddd }t|}|r td||  d S tj|\}}}}}}|dkrPt	j
tj|rP|dsC|d7 }tj|d}td	| zt||d
}W n tyh   td|  Y d S  ty } ztd| |j|j W Y d }~d S d }~w ty } zt| | W Y d }~d S d }~w ty } zt| | W Y d }~d S d }~w ty } zd}	|	t|7 }	t| |	tjd W Y d }~d S d }~w tjy } zt| d|  W Y d }~d S d }~w tjy   t| d Y d S w t|| j dS )Nz?_get_html_page() missing 1 required keyword argument: 'session'#r   r   zICannot look at %s URL %s because it does not support lookup as web pages.r   /z
index.htmlz# file: URL is directory, getting %srQ   z`Skipping page %s because it looks like an archive, and cannot be checked by a HTTP HEAD request.zSkipping page %s because the %s request got Content-Type: %s. The only supported Content-Types are application/vnd.pypi.simple.v1+json, application/vnd.pypi.simple.v1+html, and text/htmlz4There was a problem confirming the ssl certificate: )r   zconnection error: z	timed out)r   )!	TypeErrorr!   rk   r)   rT   warningrG   rH   rs   osrL   isdirr>   rf   endswithr   rU   rW   rB   r*   r,   r+   r   r   r   r   r6   infor   ConnectionErrorTimeoutr   r   )
r   rC   r!   Z
vcs_schemer&   _rL   rO   excr   r'   r'   r(   _get_index_content  sv   
r   c                   @   s.   e Zd ZU eee  ed< eee  ed< dS )CollectedSources
find_links
index_urlsN)r3   r4   r5   r   r   r   __annotations__r'   r'   r'   r(   r     s   
 r   c                
   @   s   e Zd ZdZdededdfddZe	dded	ed
e	dd fddZ
edee fddZdedee fddZdededefddZdS )LinkCollectorz
    Responsible for collecting Link objects from all configured locations,
    making network requests as needed.

    The class's main method is its collect_sources() method.
    rC   search_scoper"   Nc                 C   s   || _ || _d S r-   )r   rC   )r0   rC   r   r'   r'   r(   r/     s   
zLinkCollector.__init__Foptionssuppress_no_indexc                 C   s`   |j g|j }|jr|stdddd |D  g }|jp g }tj||d}t	||d}|S )z
        :param session: The Session to use to make requests.
        :param suppress_no_index: Whether to ignore the --no-index option
            when constructing the SearchScope object.
        zIgnoring indexes: %s,c                 s   s    | ]}t |V  qd S r-   )r   ).0r!   r'   r'   r(   	<genexpr>(  s    z'LinkCollector.create.<locals>.<genexpr>r   r   )rC   r   )
	index_urlextra_index_urlsno_indexrT   rU   rV   r   r   creater   )clsrC   r   r   r   r   r   link_collectorr'   r'   r(   r     s"   

zLinkCollector.createc                 C   s   | j jS r-   )r   r   r   r'   r'   r(   r   9  s   zLinkCollector.find_linkslocationc                 C   s   t || jdS )z>
        Fetch an HTML page containing package links.
        rQ   )r   rC   )r0   r   r'   r'   r(   fetch_response=  s   zLinkCollector.fetch_responseproject_namecandidates_from_pagec                    s   t  fddj|D  }t  fddjD  }ttj	rIdd t
||D }t| d| dg| }td| tt|t|d	S )
Nc                 3   &    | ]}t | jjd d dV  qdS )Fr   Zpage_validatorZ
expand_dirr   Nr   rC   Zis_secure_originr   locr   r0   r'   r(   r   I      
z0LinkCollector.collect_sources.<locals>.<genexpr>c                 3   r   )Tr   Nr   r   r   r'   r(   r   S  r   c                 S   s*   g | ]}|d ur|j d urd|j  qS )Nz* )r   )r   sr'   r'   r(   
<listcomp>_  s    
z1LinkCollector.collect_sources.<locals>.<listcomp>z' location(s) to search for versions of :
r   )collectionsOrderedDictr   Zget_index_urls_locationsvaluesr   rT   isEnabledForloggingDEBUGrl   rm   r%   rU   rV   r   r   )r0   r   r   Zindex_url_sourcesZfind_links_sourceslinesr'   r   r(   collect_sourcesC  s2   
	
	

zLinkCollector.collect_sources)F)r3   r4   r5   r   r   r   r/   classmethodr   r   r   propertyr   r6   r   r   r   r   r   r   r   r   r'   r'   r'   r(   r     s<    
 r   r-   r   )br   r   email.messagerY   r   rl   r   r   r   reurllib.parserG   urllib.requestZxml.etree.ElementTreexmlZhtml.parserr   optparser   typingr   r   r   r   r   r	   r
   r   r   r   r   pip._vendorr   Zpip._vendor.requestsr   Zpip._vendor.requests.exceptionsr   r   pip._internal.exceptionsr   pip._internal.models.linkr   Z!pip._internal.models.search_scoper   pip._internal.network.sessionr   Zpip._internal.network.utilsr   pip._internal.utils.filetypesr   pip._internal.utils.miscr   r   pip._internal.vcsr   Zsourcesr   r   r   r    r   	getLoggerr3   rT   ZetreeZElementTreeZElementZHTMLElementr6   ZResponseHeadersr)   	Exceptionr*   rA   rB   rP   rW   r^   rd   rg   compile
IGNORECASErj   r   rr   rw   r   r   r   r   r   r   r   r   r   r   r   r   r'   r'   r'   r(   <module>   s    4

?
/




D