o
    D2h                     @  sn   d dl mZ d dlZd dlZd dlZdddZd ddZd!d"ddZ		d#d$ddZd%ddZ	d!d&ddZ
dS )'    )annotationsNblobpathstrreturnbytesc              
   C  s   |  ds<|  ds<zdd l}W n ty! } ztd|d }~ww || d}| W  d    S 1 s7w   Y  dd l}|| }|  |jS )Nzhttp://zhttps://r   Oblobfile is not installed. Please install it by running `pip install blobfile`.rb)	
startswithblobfileImportErrorBlobFilereadrequestsgetraise_for_statuscontent)r   r
   efr   resp r   a/var/www/html/alexa/alex_system/speach-to-text/venv/lib/python3.10/site-packages/tiktoken/load.py	read_file   s$    
r   dataexpected_hashboolc                 C  s   t |  }||kS N)hashlibsha256	hexdigest)r   r   actual_hashr   r   r   
check_hash   s   r    
str | Nonec                 C  s  d}dt jv rt jd }ndt jv rt jd }ndd l}t j| d}d}|dkr/t| S t| 	 
 }t j||}t j|rzt|d}| }W d    n1 sYw   Y  |d u sgt||ri|S zt | W n	 tyy   Y nw t| }	|rt|	|std	|  d
| ddd l}
z6t j|dd |d t|
  d }t|d}||	 W d    n1 sw   Y  t || W |	S  ty   |rԂ Y |	S w )NTTIKTOKEN_CACHE_DIRDATA_GYM_CACHE_DIRr   zdata-gym-cacheF r   z'Hash mismatch for data downloaded from z (expected z<). This may indicate a corrupted download. Please try again.)exist_ok.z.tmpwb)osenvirontempfilepathjoin
gettempdirr   r   sha1encoder   existsopenr   r    removeOSError
ValueErroruuidmakedirsr   uuid4writerename)r   r   user_specified_cache	cache_dirr*   	cache_key
cache_pathr   r   contentsr5   tmp_filenamer   r   r   read_file_cached    sV   


r@   vocab_bpe_fileencoder_json_filevocab_bpe_hashencoder_json_hashdict[bytes, int]c                   s>  dd t dD }dd |D  d}t dD ]}||vr-|| | td| < |d7 }qt|dks6J t| | }dd |d	dd
 D }d fdddd t|D }	t|	}|D ]\}
}||	|
| < |d7 }qadd l}|	t||}fdd|
 D }|dd  |dd  |	|ksJ |	S )Nc                 S  s(   g | ]}t | rt |d kr|qS ) )chrisprintable.0br   r   r   
<listcomp>]   s   ( z3data_gym_to_mergeable_bpe_ranks.<locals>.<listcomp>   c                 S  s   i | ]}t ||qS r   )rG   rI   r   r   r   
<dictcomp>_   s    z3data_gym_to_mergeable_bpe_ranks.<locals>.<dictcomp>r      c                 S  s   g | ]}t | qS r   )tuplesplit)rJ   	merge_strr   r   r   rL   j   s    
valuer   r   r   c                   s   t  fdd| D S )Nc                 3  s    | ]} | V  qd S r   r   rI   data_gym_byte_to_byter   r   	<genexpr>m   s    zKdata_gym_to_mergeable_bpe_ranks.<locals>.decode_data_gym.<locals>.<genexpr>r   )rU   rV   r   r   decode_data_gyml   s   z8data_gym_to_mergeable_bpe_ranks.<locals>.decode_data_gymc                 S  s   i | ]
\}}t |g|qS r   rY   )rJ   irK   r   r   r   rN   p   s    c                   s   i | ]	\}} ||qS r   r   )rJ   kv)rZ   r   r   rN   }   s    s   <|endoftext|>s   <|startoftext|>)rU   r   r   r   )rangeappendrG   lenr@   decoderQ   	enumeratejsonloadsitemspop)rA   rB   rC   rD   rank_to_intbytenrK   vocab_bpe_contents
bpe_merges	bpe_ranksfirstsecondrc   encoder_jsonencoder_json_loadedr   )rW   rZ   r   data_gym_to_mergeable_bpe_ranksV   s2   

rp   rk   tiktoken_bpe_fileNonec              
   C  s   zdd l }W n ty } ztd|d }~ww ||d*}t|  dd dD ]\}}|t|d t|	  d  q)W d    d S 1 sKw   Y  d S )	Nr   r   r'   c                 S  s   | d S )NrO   r   )xr   r   r   <lambda>   s    z#dump_tiktoken_bpe.<locals>.<lambda>)key       
)
r
   r   r   sortedre   r8   base64	b64encoder   r/   )rk   rq   r
   r   r   tokenrankr   r   r   dump_tiktoken_bpe   s   &"r}   c                 C  sv   t | |}i }| D ]-}|sqz| \}}t||t|< W q ty8 } ztd|d|  |d }~ww |S )NzError parsing line z in )r@   
splitlinesrQ   intry   	b64decode	Exceptionr4   )rq   r   r>   retliner{   r|   r   r   r   r   load_tiktoken_bpe   s   
r   )r   r   r   r   )r   r   r   r   r   r   r   )r   r   r   r!   r   r   )NN)
rA   r   rB   r   rC   r!   rD   r!   r   rE   )rk   rE   rq   r   r   rr   )rq   r   r   r!   r   rE   )
__future__r   ry   r   r(   r   r    r@   rp   r}   r   r   r   r   r   <module>   s    

9
0