In [None]:
import librosa
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


# Tempo estimation
given a sound file, estimate its tempo

## Load a sound

In [None]:
y, sr = librosa.load('/content/drive/MyDrive/python_scratch/audio/giant_steps_small/1030011.LOFI.mp3', duration=20)



## Listen to it

In [None]:
# listen to the audio file
import IPython.display as ipd
from IPython.core.display import display
display(ipd.Audio(y,rate=sr))

## Librosa's beat.tempo function


In [None]:
# run the tempo function
librosa_bpm = librosa.beat.tempo(y, sr=sr, hop_length=128)
print(librosa_bpm)

[166.70866935]


## Custom Tempo Estimation

The following are steps needed to obtain a tempo estimation from an audio signal.

### 1. Compute the Power Spectrum of a signal

In [1]:
import numpy as np
D = librosa.stft(y)
magnitude = np.abs(D)
power_spectrum = magnitude ** 2

NameError: ignored

### 2. Compute the Mel Spectrogram using the Power Spectrum

In [None]:
mel_spectrogram = librosa.feature.melspectrogram(S=power_spectrum)

### 3. Compute the onset strength envelope using the Mel Spectrogram

In [None]:
def onset_strength(S):
    d = np.diff(S, n=1, axis=1)
    r = np.maximum(0.0, d)
    m = np.mean(r, axis=0)
    m -= m.min()
    m /= m.max()
    return m

onset_strength_envelope = onset_strength(mel_spectrogram)

In [None]:
import matplotlib.pyplot as plt
plt.plot(onset_strength_envelope)

### 4. Estimate the tempo using Autocorrelation or Frequency Estimation

#### 4.1 Autocorrelation

In [None]:
# 1. compute the signal's correlation with itself
r = librosa.autocorrelate(onset_strength_envelope)
# 2. clip with maxima and minima tempi
r[240:] = 0
r[:40]  = 0
# 3. the index to the max value is the estimate
a_tempo = r.argmax()
print(a_tempo)
plt.plot(r[40:240])
plt.vlines(a_tempo, 0, r.max(), colors='r')

#### 4.1 Frequency Estimation (DFT Method)

In [None]:
D = librosa.stft(onset_strength_envelope, hop_length=1, n_fft=512)
magnitude = np.abs(D)
power_spectrum = magnitude ** 2
# normalize
power_spectrum -= power_spectrum.min()
power_spectrum /= power_spectrum.max()

# plot the tempogram
fig = plt.figure(figsize=(12,8))
plt.imshow(librosa.power_to_db(power_spectrum), origin='lower')

tempogram = np.mean(power_spectrum, axis=1)

bpms = librosa.tempo_frequencies(r.shape[0], hop_length=128, sr=sr)

points = [ i for i, x in enumerate(tempogram[:240])]
est = bpms[max(points)]
plt.hlines(est, 0, power_spectrum.argmax(), colors='r')
print(est)

# Exercise 1
Define a function to estimate the tempo from an audio file using the three methods above

In [None]:
def tempo_estimate(filename):
    """
    Estimates the tempo from an input filename using three methods
    1. librosa's tempo function
    2. custom method with a) autocorrelation and b) dft estimation
    """

# Exercise 2
Get the tempo estimates of 10 files of your choosing and compare them. 