diff --git a/src/transformers/audio_utils.py b/src/transformers/audio_utils.py
index 694ab5f50df1..e8684ab70218 100644
--- a/src/transformers/audio_utils.py
+++ b/src/transformers/audio_utils.py
@@ -1081,146 +1081,3 @@ def amplitude_to_db_batch(
         spectrogram = np.clip(spectrogram, a_min=max_values - db_range, a_max=None)
 
     return spectrogram
-
-
-### deprecated functions below this line ###
-
-
-def get_mel_filter_banks(
-    nb_frequency_bins: int,
-    nb_mel_filters: int,
-    frequency_min: float,
-    frequency_max: float,
-    sample_rate: int,
-    norm: Optional[str] = None,
-    mel_scale: str = "htk",
-) -> np.array:
-    warnings.warn(
-        "The function `get_mel_filter_banks` is deprecated and will be removed in version 4.31.0 of Transformers",
-        FutureWarning,
-    )
-    return mel_filter_bank(
-        num_frequency_bins=nb_frequency_bins,
-        num_mel_filters=nb_mel_filters,
-        min_frequency=frequency_min,
-        max_frequency=frequency_max,
-        sampling_rate=sample_rate,
-        norm=norm,
-        mel_scale=mel_scale,
-    )
-
-
-def fram_wave(waveform: np.array, hop_length: int = 160, fft_window_size: int = 400, center: bool = True):
-    """
-    In order to compute the short time fourier transform, the waveform needs to be split in overlapping windowed
-    segments called `frames`.
-
-    The window length (window_length) defines how much of the signal is contained in each frame, while the hop length
-    defines the step between the beginning of each new frame.
-
-
-    Args:
-        waveform (`np.array` of shape `(sample_length,)`):
-            The raw waveform which will be split into smaller chunks.
-        hop_length (`int`, *optional*, defaults to 160):
-            Step between each window of the waveform.
-        fft_window_size (`int`, *optional*, defaults to 400):
-            Defines the size of the window.
-        center (`bool`, defaults to `True`):
-            Whether or not to center each frame around the middle of the frame. Centering is done by reflecting the
-            waveform on the left and on the right.
-
-    Return:
-        framed_waveform (`np.array` of shape `(waveform.shape // hop_length , fft_window_size)`):
-            The framed waveforms that can be fed to `np.fft`.
-    """
-    warnings.warn(
-        "The function `fram_wave` is deprecated and will be removed in version 4.31.0 of Transformers",
-        FutureWarning,
-    )
-    frames = []
-    for i in range(0, waveform.shape[0] + 1, hop_length):
-        if center:
-            half_window = (fft_window_size - 1) // 2 + 1
-            start = i - half_window if i > half_window else 0
-            end = i + half_window if i < waveform.shape[0] - half_window else waveform.shape[0]
-            frame = waveform[start:end]
-            if start == 0:
-                padd_width = (-i + half_window, 0)
-                frame = np.pad(frame, pad_width=padd_width, mode="reflect")
-
-            elif end == waveform.shape[0]:
-                padd_width = (0, (i - waveform.shape[0] + half_window))
-                frame = np.pad(frame, pad_width=padd_width, mode="reflect")
-
-        else:
-            frame = waveform[i : i + fft_window_size]
-            frame_width = frame.shape[0]
-            if frame_width < waveform.shape[0]:
-                frame = np.pad(frame, pad_width=(0, fft_window_size - frame_width), mode="constant", constant_values=0)
-        frames.append(frame)
-
-    frames = np.stack(frames, 0)
-    return frames
-
-
-def stft(frames: np.array, windowing_function: np.array, fft_window_size: Optional[int] = None):
-    """
-    Calculates the complex Short-Time Fourier Transform (STFT) of the given framed signal. Should give the same results
-    as `torch.stft`.
-
-    Args:
-        frames (`np.array` of dimension `(num_frames, fft_window_size)`):
-            A framed audio signal obtained using `audio_utils.fram_wav`.
-        windowing_function (`np.array` of dimension `(nb_frequency_bins, nb_mel_filters)`:
-            A array representing the function that will be used to reduces the amplitude of the discontinuities at the
-            boundaries of each frame when computing the STFT. Each frame will be multiplied by the windowing_function.
-            For more information on the discontinuities, called *Spectral leakage*, refer to [this
-            tutorial]https://download.ni.com/evaluation/pxi/Understanding%20FFTs%20and%20Windowing.pdf
-        fft_window_size (`int`, *optional*):
-            Size of the window om which the Fourier transform is applied. This controls the frequency resolution of the
-            spectrogram. 400 means that the fourier transform is computed on windows of 400 samples. The number of
-            frequency bins (`nb_frequency_bins`) used to divide the window into equal strips is equal to
-            `(1+fft_window_size)//2`. An increase of the fft_window_size slows the calculus time proportionally.
-
-    Example:
-
-    ```python
-    >>> from transformers.audio_utils import stft, fram_wave
-    >>> import numpy as np
-
-    >>> audio = np.random.rand(50)
-    >>> fft_window_size = 10
-    >>> hop_length = 2
-    >>> framed_audio = fram_wave(audio, hop_length, fft_window_size)
-    >>> spectrogram = stft(framed_audio, np.hanning(fft_window_size + 1))
-    ```
-
-    Returns:
-        spectrogram (`np.ndarray`):
-            A spectrogram of shape `(num_frames, nb_frequency_bins)` obtained using the STFT algorithm
-    """
-    warnings.warn(
-        "The function `stft` is deprecated and will be removed in version 4.31.0 of Transformers",
-        FutureWarning,
-    )
-    frame_size = frames.shape[1]
-
-    if fft_window_size is None:
-        fft_window_size = frame_size
-
-    if fft_window_size < frame_size:
-        raise ValueError("FFT size must greater or equal the frame size")
-    # number of FFT bins to store
-    nb_frequency_bins = (fft_window_size >> 1) + 1
-
-    spectrogram = np.empty((len(frames), nb_frequency_bins), dtype=np.complex64)
-    fft_signal = np.zeros(fft_window_size)
-
-    for f, frame in enumerate(frames):
-        if windowing_function is not None:
-            np.multiply(frame, windowing_function, out=fft_signal[:frame_size])
-        else:
-            fft_signal[:frame_size] = frame
-        spectrogram[f] = np.fft.fft(fft_signal, axis=0)[:nb_frequency_bins]
-    return spectrogram.T