o
    f2h,                  	   @   st  d dl Z d dlZd dlZd dlZd dlZd dlmZmZmZm	Z	 e
 Zedkr-dd Zndd Zdd Zd	d
 Zdd Zdd ZdefddZ	d0dededefddZdee dee fddZdee dee fddZG dd dZG d d! d!eZG d"d# d#eZG d$d% d%eZG d&d' d'eZG d(d) d)eZG d*d+ d+eZd,ed-edeee	egdf fd.d/Z dS )1    N)CallableListOptionalTextIOutf-8c                 C   s   | j tddtS )Nreplace)errors)encodesystem_encodingdecodestring r   a/var/www/html/alexa/alex_system/speach-to-text/venv/lib/python3.10/site-packages/whisper/utils.py	make_safe   s   r   c                 C   s   | S Nr   r   r   r   r   r      s   c                 C   s   | | dksJ | | S )Nr   r   )xyr   r   r   	exact_div   s   r   c                 C   s6   ddd}| |v r||  S t dt|  d|  )NTF)TrueFalsezExpected one of z, got )
ValueErrorsetkeys)r   str2valr   r   r   str2bool   s   
r   c                 C      | dkrd S t | S NNone)intr   r   r   r   optional_int%      r    c                 C   r   r   )floatr   r   r   r   optional_float)   r!   r#   returnc                 C   s    |  d}t|tt| S )Nr   )r	   lenzlibcompress)text
text_bytesr   r   r   compression_ratio-   s   
r*   F.secondsalways_include_hoursdecimal_markerc                 C   s   | dksJ dt | d }|d }||d 8 }|d }||d 8 }|d } || d 8 }|s2|dkr8|ddnd	}| |dd| d| |d
S )Nr   znon-negative timestamp expectedg     @@i6 i`    02d: 03d)round)r,   r-   r.   millisecondshoursminuteshours_markerr   r   r   format_timestamp2   s   r9   segmentsc                 C   s&   t dd | D | r| d d S d S )Nc                 s   s&    | ]}|d  D ]}|d V  qqdS )wordsstartNr   .0swr   r   r   	<genexpr>I   s   $ zget_start.<locals>.<genexpr>r   r<   )nextr:   r   r   r   	get_startG   s   rD   c                 C   s*   t dd t| D | r| d d S d S )Nc                 s   s*    | ]}t |d  D ]}|d V  q
qdS )r;   endN)reversedr=   r   r   r   rA   P   s   ( zget_end.<locals>.<genexpr>rE   )rB   rF   rC   r   r   r   get_endN   s   rH   c                   @   s`   e Zd ZU eed< defddZ	ddededee fd	d
Z	ddede	dee fddZ
dS )ResultWriter	extension
output_dirc                 C   s
   || _ d S r   rK   )selfrK   r   r   r   __init__X   s   
zResultWriter.__init__Nresult
audio_pathoptionsc                 K   s   t j|}t j|d }t j| j|d | j }t|ddd}| j|f||d| W d    d S 1 s:w   Y  d S )Nr   r+   r@   r   )encoding)filerQ   )	ospathbasenamesplitextjoinrK   rJ   openwrite_result)rM   rO   rP   rQ   kwargsaudio_basenameoutput_pathfr   r   r   __call__[   s   "zResultWriter.__call__rS   c                 K   s   t r   )NotImplementedErrorrM   rO   rS   rQ   r[   r   r   r   rZ   g   s   zResultWriter.write_resultr   )__name__
__module____qualname__str__annotations__rN   dictr   r_   r   rZ   r   r   r   r   rI   U   s&   
 
rI   c                   @   8   e Zd ZU dZeed< 	d	dededee fddZ	dS )
WriteTXTtxtrJ   NrO   rS   rQ   c                 K   s(   |d D ]}t |d  |dd qd S )Nr:   r(   TrS   flush)printstriprM   rO   rS   rQ   r[   segmentr   r   r   rZ   p   s   zWriteTXT.write_resultr   
rb   rc   rd   rJ   re   rf   rg   r   r   rZ   r   r   r   r   ri   m      
 ri   c                   @   sn   e Zd ZU eed< eed< 	dddddddedee dee d	ee d
edee fddZ	de
fddZdS )SubtitlesWriterr-   r.   NF)max_line_widthmax_line_counthighlight_wordsmax_words_per_linerO   rQ   rt   ru   rv   rw   c             	   #   s   |pi }p| dp| d|p| dd}p!| dd u p)d u p-dp1dfdd}td	 d
krdd	 d
 v r| D ]e}| |d
 d }	| |d d }
ddd |D }|r|	}dd |D }t|D ]0\ }| |d }| |d }||kr|||fV  ||d fddt|D fV  |}q|qO|	|
|fV  qOd S d	 D ] }| |d }| |d }|d  dd}|||fV  qd S )Nrt   ru   rv   Frw   r/   c                  3   s   d} d}g }t d pd}d D ]}d}}|t|d k rt|d | }t|d | kr5|}t|d |||  D ]\}}	|	 }
 oS|
d | dk}| t|
d  k}|dkoit|dkoi}| dkr}|r}|s}|s}| t|
d 7 } n:|
d  |
d< t|dkr d ur|s| ks|r|V  g }d}n| dkr|d7 }d	|
d  |
d< t|
d  } ||
 |
d }qA|7 }|t|d k s!qt|dkr|V  d S d S )
Nr      r:   g        r;   r<   g      @word
)rD   r%   	enumeratecopyrn   append)line_len
line_countsubtitlelastrp   chunk_indexwords_countremaining_wordsioriginal_timingtiming
long_pausehas_room	seg_break)ru   rt   rw   preserve_segmentsrO   r   r   iterate_subtitles   sf   

*
z9SubtitlesWriter.iterate_result.<locals>.iterate_subtitlesr:   r   r;   r<   rG   rE   r2   c                 S      g | ]}|d  qS ry   r   )r>   ry   r   r   r   
<listcomp>       z2SubtitlesWriter.iterate_result.<locals>.<listcomp>c                 S   r   r   r   )r>   r   r   r   r   r      r   c                    s*   g | ]\}}| krt d d|n|qS )z^(\s*)(.*)$z\1<u>\2</u>)resub)r>   jry   )r   r   r   r      s    r(   z-->z->)getr%   r9   rX   r{   rn   r   )rM   rO   rQ   rt   ru   rv   rw   r   r   subtitle_startsubtitle_endsubtitle_textr   	all_words	this_wordr<   rE   rp   segment_startsegment_endsegment_textr   )r   ru   rt   rw   r   rO   r   iterate_result{   sL   
 6

zSubtitlesWriter.iterate_resultr,   c                 C   s   t || j| jdS )N)r,   r-   r.   )r9   r-   r.   )rM   r,   r   r   r   r9      s
   z SubtitlesWriter.format_timestampr   )rb   rc   rd   boolrf   re   rg   r   r   r   r"   r9   r   r   r   r   rs   w   s.   
 
irs   c                   @   P   e Zd ZU dZeed< dZeed< dZeed< 	dde	d	e
d
ee	 fddZdS )WriteVTTvttrJ   Fr-   r+   r.   NrO   rS   rQ   c                 K   sP   t d|d | j||fi |D ]\}}}t | d| d| d|dd qd S )NzWEBVTT
)rS    --> rz   Trk   )rm   r   )rM   rO   rS   rQ   r[   r<   rE   r(   r   r   r   rZ      s   "zWriteVTT.write_resultr   rb   rc   rd   rJ   re   rf   r-   r   r.   rg   r   r   rZ   r   r   r   r   r         
 r   c                   @   r   )WriteSRTsrtrJ   Tr-   ,r.   NrO   rS   rQ   c           	   
   K   sV   t | j||fi |ddD ]\}\}}}t| d| d| d| d|dd qd S )Nrx   )r<   rz   r   Trk   )r{   r   rm   )	rM   rO   rS   rQ   r[   r   r<   rE   r(   r   r   r   rZ      s
   (zWriteSRT.write_resultr   r   r   r   r   r   r      r   r   c                   @   s<   e Zd ZU dZdZeed< 	d
dedede	e fdd	Z
dS )WriteTSVa  
    Write a transcript to a file in TSV (tab-separated values) format containing lines like:
    <start time in integer milliseconds>	<end time in integer milliseconds>	<transcript text>

    Using integer milliseconds as start and end times means there's no chance of interference from
    an environment setting a language encoding that causes the decimal in a floating point number
    to appear as a comma; also is faster and more efficient to parse & store, e.g., in C++.
    tsvrJ   NrO   rS   rQ   c                 K   sv   t dddd|d |d D ]+}t td|d  |dd t td|d  |dd t |d  dd	|d
d qd S )Nr<   rE   r(   	)seprS   r:   r/   )rS   rE    Trk   )rm   r4   rn   r   ro   r   r   r   rZ     s    zWriteTSV.write_resultr   )rb   rc   rd   __doc__rJ   re   rf   rg   r   r   rZ   r   r   r   r   r     s   
 	r   c                   @   rh   )
	WriteJSONjsonrJ   NrO   rS   rQ   c                 K   s   t || d S r   )r   dumpra   r   r   r   rZ      s   zWriteJSON.write_resultr   rq   r   r   r   r   r     rr   r   output_formatrK   c                    s`   t ttttd}| dkr*fdd| D  	 d
dtdtdtt f fdd	}|S ||  S )N)rj   r   r   r   r   allc                    s   g | ]}| qS r   r   )r>   writerrL   r   r   r   2  r   zget_writer.<locals>.<listcomp>rO   rS   rQ   c                    s"    D ]}|| ||fi | qd S r   r   )rO   rS   rQ   r[   r   )all_writersr   r   	write_all4  s   zget_writer.<locals>.write_allr   )	ri   r   r   r   r   valuesrg   r   r   )r   rK   writersr   r   )r   rK   r   
get_writer&  s$   r   )Fr+   )!r   rT   r   sysr&   typingr   r   r   r   getdefaultencodingr
   r   r   r   r    r#   r"   r*   r   re   r9   rg   rD   rH   rI   ri   rs   r   r   r   r   r   r   r   r   r   <module>   sN    


u	