import librosa
from google.colab import drive
drive.mount('/content/drive')
Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).
given a sound file, estimate its tempo
y, sr = librosa.load('/content/drive/MyDrive/python_scratch/audio/giant_steps_small/1030011.LOFI.mp3', duration=20)
/usr/local/lib/python3.7/dist-packages/librosa/core/audio.py:162: UserWarning: PySoundFile failed. Trying audioread instead. warnings.warn("PySoundFile failed. Trying audioread instead.")
# listen to the audio file
import IPython.display as ipd
from IPython.core.display import display
display(ipd.Audio(y,rate=sr))
# run the tempo function
librosa_bpm = librosa.beat.tempo(y, sr=sr, hop_length=128)
print(librosa_bpm)
[166.70866935]
The following are steps needed to obtain a tempo estimation from an audio signal.
import numpy as np
D = librosa.stft(y)
magnitude = np.abs(D)
power_spectrum = magnitude ** 2
--------------------------------------------------------------------------- NameError Traceback (most recent call last) <ipython-input-1-933fcd9d0455> in <module>() 1 import numpy as np ----> 2 D = librosa.stft(y) 3 magnitude = np.abs(D) 4 power_spectrum = magnitude ** 2 NameError: name 'librosa' is not defined
mel_spectrogram = librosa.feature.melspectrogram(S=power_spectrum)
def onset_strength(S):
d = np.diff(S, n=1, axis=1)
r = np.maximum(0.0, d)
m = np.mean(r, axis=0)
m -= m.min()
m /= m.max()
return m
onset_strength_envelope = onset_strength(mel_spectrogram)
import matplotlib.pyplot as plt
plt.plot(onset_strength_envelope)
# 1. compute the signal's correlation with itself
r = librosa.autocorrelate(onset_strength_envelope)
# 2. clip with maxima and minima tempi
r[240:] = 0
r[:40] = 0
# 3. the index to the max value is the estimate
a_tempo = r.argmax()
print(a_tempo)
plt.plot(r[40:240])
plt.vlines(a_tempo, 0, r.max(), colors='r')
D = librosa.stft(onset_strength_envelope, hop_length=1, n_fft=512)
magnitude = np.abs(D)
power_spectrum = magnitude ** 2
# normalize
power_spectrum -= power_spectrum.min()
power_spectrum /= power_spectrum.max()
# plot the tempogram
fig = plt.figure(figsize=(12,8))
plt.imshow(librosa.power_to_db(power_spectrum), origin='lower')
tempogram = np.mean(power_spectrum, axis=1)
bpms = librosa.tempo_frequencies(r.shape[0], hop_length=128, sr=sr)
points = [ i for i, x in enumerate(tempogram[:240])]
est = bpms[max(points)]
plt.hlines(est, 0, power_spectrum.argmax(), colors='r')
print(est)
Define a function to estimate the tempo from an audio file using the three methods above
def tempo_estimate(filename):
"""
Estimates the tempo from an input filename using three methods
1. librosa's tempo function
2. custom method with a) autocorrelation and b) dft estimation
"""
Get the tempo estimates of 10 files of your choosing and compare them.