o
    f2h                     @   s   d dl mZ d dlZd dlZzd dlZd dlmZ W n e	y%   e
dw ejdejfddZedddefd	d
ZdejdefddZdS )    )	lru_cacheNz4triton import failed; try `pip install --pre triton`
BLOCK_SIZEc	                 C   sz  t d|}	|	|k }
td|| d D ]}t   | |d |  }| ||  }| ||  d }t j||	 |
d}t j||	 |
d}t j||	 |
d}t j||d |  |	 |
dd}|t t ||| }| |d |  d }t j||	 ||
d ||d |  d }t j||	 d|
||k@ ||k@ d t j||	 d|
||k@ ||k@ d t j||	 d|
||k@ ||k@ d qd S )Nr      mask)r   other   )tlarangerangedebug_barrierloadminimumstore)costtracexx_stridecost_stridetrace_strideNMr   offsetsr   kp0p1p2c0c1c2x_rowcost_rowcost_ptr	trace_ptr r$   f/var/www/html/alexa/alex_system/speach-to-text/venv/lib/python3.10/site-packages/whisper/triton_ops.py
dtw_kernel   s&    $$&r&   )maxsizefilter_widthc              	      s   t jdtjfdd}t |j}|jdddd t	 D |_|jdd	 fd
dt	 d d D |_|jdd d  |_|S )Nr   c           
      S   sV   t d}t d|}||k }|||  }| ||  }	t t t j|	| t|d d S )Nr   r   )r	   
program_idr
   LOAD_ALL_ROWS_HEREBUBBLESORT_HEREr   MIDDLE_ROW_HERE)
yr   r   y_strider   row_idxr   r   x_ptry_ptrr$   r$   r%   kernel-   s   
zmedian_kernel.<locals>.kernelz    LOAD_ALL_ROWS_HERE
c                 S   s   g | ]}d | d| dqS )    rowz = tl.load(x_ptr + offsets + z, mask=mask)r$   .0ir$   r$   r%   
<listcomp>B   s    z!median_kernel.<locals>.<listcomp>z    BUBBLESORT_HERE

c                    s,   g | ]}d  dd t | d D qS )r9   c                 S   sv   g | ]7}d  d| d|d  d| d|d  d	d| d|d  d| d|d  d	d| d	d|d  d
gqS )r3   z    smaller = tl.where(rowz < rowr   z, row)z    larger = tl.where(rowz > rowr4   z
 = smallerz	 = larger)join)r6   jr$   r$   r%   r8   M   s    	$$
z,median_kernel.<locals>.<listcomp>.<listcomp>r   )r;   r   r5   r(   r$   r%   r8   K   s    	r   r   r,   row)
tritonjitr	   	constexprJITFunctionfnsrcreplacer;   r   )r(   r2   r$   r=   r%   median_kernel+   s.   	
rF   r   c                 C   sz   |   d|d}t|jdd }t|}t|d }d|dd 	 > }||f || | d|d|d |S )zBApply a median filter of given width along the last dimension of xr   N).r   )r   )

contiguousunfoldnpprodshaperF   torch
empty_likestride
bit_length)r   r(   slicesgridr2   r-   r   r$   r$   r%   median_filter_cudab   s   $rT   )	functoolsr   numpyrK   rN   r?   triton.languagelanguager	   ImportErrorRuntimeErrorr@   rA   r&   intrF   TensorrT   r$   r$   r$   r%   <module>   s     6