o
    f2h1                     @   s  d dl Z d dlZd dlZd dlmZ d dlmZmZ d dlZd dl	Z
d dlZd dlm  mZ ddlmZmZmZ ddlmZ erGddlmZ dejd	efd
dZejddde
jfddZejdddde
jfddZd7ddZdejde
jfddZ eG dd dZ!ddddd d!ed"ee d#ejd$ed%ed&e"dee! fd'd(Z#d)ee! d*e$d+e$fd,d-Z%d.d/d0d1ee& dd d!ed#ejd$ed2e$d3e$d4e"fd5d6Z'dS )8    N)	dataclass)TYPE_CHECKINGList   )
HOP_LENGTHSAMPLE_RATETOKENS_PER_SECOND)	Tokenizer)Whisperxfilter_widthc              	   C   s   |d }| j d |kr| S | j }dkr| ddddf } |dkr'|d dks+J dd}tj| |d |d ddfdd} | jr_zdd	lm} || |}W n ttj	fy^   t
d
 Y nw |du rt| d|d d d|d f }|dkr||d }|S )zMApply a median filter of width `filter_width` along the last dimension of `x`   Nr   r   z&`filter_width` should be an odd numberreflect)mode)median_filter_cudaz}Failed to launch Triton kernels, likely due to missing CUDA toolkit; falling back to a slower median kernel implementation....r   r   )shapendimFpadis_cuda
triton_opsr   RuntimeError
subprocessCalledProcessErrorwarningswarnunfoldsort)r   r   	pad_widthr   resultr    r"   b/var/www/html/alexa/alex_system/speach-to-text/venv/lib/python3.10/site-packages/whisper/timing.pymedian_filter   s0    "r$   T)nopythontracec                 C   s   | j d d }| j d d }d| dd d f< d| d d df< g }|dks(|dkrj||d |d f | ||f dkrD|d8 }|d8 }n| ||f dkrQ|d8 }n| ||f dkr^|d8 }ntd|dks(|dks(t|}|d d dd d f jS )Nr   r   r   zUnexpected trace[i, j]r   )r   append
ValueErrornparrayT)r&   ijr!   r"   r"   r#   	backtrace9   s$   



r.   )r%   parallelc                 C   s$  | j \}}tj|d |d ftjdtj }tj|d |d ftjd }d|d< td|d D ]]}td|d D ]S}||d |d f }||d |f }|||d f }	||k rc||	k rc|d}
}n||k rq||	k rq|d}
}n|	d}
}| |d |d f |
 |||f< ||||f< q9q0t|S )Nr   dtyper   r   r   )r   r)   onesfloat32infranger.   )r   NMcostr&   r-   r,   c0c1c2ctr"   r"   r#   dtw_cpuR   s$   
"
 r>      c                 C   s8  ddl m} | j\}}||k sJ d|tj| d|d ftjd d |||   ||| }|j	
 }t|| d |d tj }d|d< | }tj|tjd}|d	 ||||d|d|d|||d
	 |j	 d |d || d   |d || d d d d |d f }t|  S )Nr   )
dtw_kernelz$M should be smaller than BLOCK_SIZE=r   )valuer   r   r0   )r   )
BLOCK_SIZE   )r   r@   r   r   r   r)   r4   flattenreshaper+   
contiguoustorchr2   cuda
zeros_likeint32strider.   cpunumpy)r   rB   r@   r7   r6   x_skewr8   r&   r"   r"   r#   dtw_cudal   s2   
8
4rO   returnc              	   C   sH   | j rzt| W S  ttjfy   td Y nw t|  	 
 S )NzsFailed to launch Triton kernels, likely due to missing CUDA toolkit; falling back to a slower DTW implementation...)r   rO   r   r   r   r   r   r>   doublerL   rM   )r   r"   r"   r#   dtw   s   
rR   c                   @   s:   e Zd ZU eed< ee ed< eed< eed< eed< dS )
WordTimingwordtokensstartendprobabilityN)__name__
__module____qualname__str__annotations__r   intfloatr"   r"   r"   r#   rS      s   
 rS      g      ?)medfilt_widthqk_scalemodelr
   	tokenizertext_tokensmel
num_framesra   rb   c             	      s  t |dkrg S tg |j|j||j| j}d g| jj	   fddt
| jjD }ddlm}	 t K |	 8 | |d|dd }
|
t |jd d |jf }|jdd}|tt ||f  W d    n1 syw   Y  W d    n1 sw   Y  |D ]}|  qt fdd| j jD }|d d d d d |d	 f }|| jdd}tj|d
ddd\}}|| | }t||}|jdd}|t |jd }t| \}}|||jg \}}t |dkrg S tt dd |d d D d}tjt!|ddd"t#}|| t$ }||d d  }||dd   }fddt%|d d |dd  D }dd t%|||||D S )Nr   c                    s(   g | ]\}}|j |f fd d	qS )c                    s     ||d d S )Nr   r   )__setitem__)_insoutsindexQKsr"   r#   <lambda>       z+find_alignment.<locals>.<listcomp>.<lambda>)
cross_attnregister_forward_hook).0r,   blockrm   r"   r#   
<listcomp>   s    z"find_alignment.<locals>.<listcomp>r   )disable_sdpar   )dimc                    s   g | ]
\}} | | qS r"   r"   )rs   _l_hrm   r"   r#   ru      s    r   TF)rw   keepdimunbiased)axisc                 S   s   g | ]}t |qS r"   )lenrs   r=   r"   r"   r#   ru      rp   )r   r   )constant_valuesc                    s"   g | ]\}}t  || qS r"   )r)   mean)rs   r,   r-   )text_token_probsr"   r#   ru      s    c                 S   s&   g | ]\}}}}}t |||||qS r"   )rS   )rs   rT   rU   rV   rW   rX   r"   r"   r#   ru      s    )&r~   rG   tensorsot_sequenceno_timestampseottodevicedimsn_text_layer	enumeratedecoderblocksrc   rv   no_grad	unsqueezesoftmaxr)   arangetolistremovestackalignment_headsindicesr+   std_meanr$   r   rR   split_to_word_tokensr   cumsumdiffastypeboolr   zip)rc   rd   re   rf   rg   ra   rb   rU   hooksrv   logitssampled_logitstoken_probshookweightsstdr   matrixtext_indicestime_indiceswordsword_tokensword_boundariesjumps
jump_timesstart_times	end_timesword_probabilitiesr"   )rn   r   r#   find_alignment   sp   




 
 
$

r   	alignment	prependedappendedc                 C   s  t | d }t | d }|dkrD| | }| | }|jdr:|j |v r:|j|j |_|j|j |_d|_g |_n|}|d8 }|dksd}d}|t | k r| | }| | }|jdsv|j|v rv|j|j |_|j|j |_d|_g |_n|}|d7 }|t | k sNd S d S )Nr   r   r     )r~   rT   
startswithstriprU   endswith)r   r   r   r,   r-   previous	followingr"   r"   r#   merge_punctuations   s4   r   u   "'“¿([{-u   "'.。,，!！?？:：”)]}、)prepend_punctuationsappend_punctuationssegmentsr   r   last_speech_timestampc              	      s  t | dkrd S  fdd| D }	ttj|	}
t| |
||fi |}tdd |D }||  }t |dkr@t	|nd}t
dt|}|d }t |dkrd}td	t |D ]3}|| j|| j |kr|| j|v r||| j| || _q\||d	  j|v r|| j| || _q\t||| | d d
 t t }d}t| |	D ]7\}}
d}g }|t |k r|t |
k r|| }|jr|t|jt||j dt||j d|jd |t |j7 }|d	7 }|t |k r|t |
k st |dkr|d d | |d krp|d d |d d  |ks.t |d	krp|d	 d |d d  |d krpt |d	kra|d	 d |d	 d  |krat|d	 d d |d	 d | }| |d d< |d	 d< td|d d | |d d< |d |d d k r|d d |d d krtdt
|d d | |d |d d< n|d d |d< |d |d d kr|d d |d d k rt|d d | |d |d d< n|d d |d< |d }||d< qd S )Nr   c                    s"   g | ]} fd d|d D qS )c                    s   g | ]	}| j k r|qS r"   )r   )rs   tokenrd   r"   r#   ru   '  s    z2add_word_timestamps.<locals>.<listcomp>.<listcomp>rU   r"   )rs   segmentr   r"   r#   ru   &  s    z'add_word_timestamps.<locals>.<listcomp>c                 S   s   g | ]}|j |j qS r"   )rW   rV   r   r"   r"   r#   ru   -  s    g        gffffff?r   u   .。!！?？r   seek)rT   rV   rW   rX   rW      rV   g      ?r   r   )r~   list	itertoolschainfrom_iterabler   r)   r*   nonzeromedianminr_   r5   rW   rV   rT   r   r   r   r   r'   dictroundrX   rU   max)r   rc   rd   rf   rg   r   r   r   kwargstext_tokens_per_segmentre   r   word_durationsmedian_durationmax_durationsentence_end_marksr,   time_offset
word_indexr   saved_tokensr   timingboundaryr"   r   r#   add_word_timestamps  s   
	""
r   )r?   )(r   r   r   dataclassesr   typingr   r   numbarM   r)   rG   torch.nn.functionalnn
functionalr   audior   r   r   rd   r	   rc   r
   Tensorr^   r$   jitndarrayr.   r>   rO   rR   rS   r_   r   r\   r   r   r   r"   r"   r"   r#   <module>   sz    
&
!	
R)	