import librosa
import librosa.display
import IPython.display as ipd
import matplotlib.pyplot as plt
import numpy as np
plt.rcParams['figure.figsize'] = (12, 5)
from google.colab import drive
import glob
drive.mount('/content/drive')
from ipywidgets import interact

Mounted at /content/drive


filename = "/content/drive/MyDrive/python_scratch/audio/admiralbob77_-_Choice_-_Drum-bass.ogg"
y, sr = librosa.load(filename, duration=10) # default sr = 22050
ipd.Audio(y, rate=sr)


hop_length = 512
start_bpm = 120.
tightness_exp = 2.0
tempo = librosa.beat.tempo(y=y, 
                           hop_length=hop_length,
                           start_bpm=start_bpm, 
                           max_tempo=240.0,
                           aggregate=np.mean)
print(tempo[0])
bpm, beats = librosa.beat.beat_track(y=y, 
                                   sr=sr, 
                                   hop_length=hop_length,
                                   bpm=tempo[0],
                                   tightness=10**tightness_exp,)
print(beats)

135.99917763157896
[  3  21  40  59  78  96 116 135 154 173 192 211 230 249 268 287 306 325
 344 363]


def f(hop_length, start_bpm, tightness_exp):

    tempo = librosa.beat.tempo(y=y, 
                                hop_length=hop_length,
                                start_bpm=start_bpm, 
                                max_tempo=240.0,
                                aggregate=np.mean)
    _, beats = librosa.beat.beat_track(y=y, 
                                sr=sr, 
                                hop_length=hop_length,
                                bpm=tempo[0],
                                tightness=10**tightness_exp,)
    return tempo, beats

interact(f,hop_length = 512, start_bpm = 120., tightness_exp = 2.0)

<function __main__.f>


# optionally adjust edges
# beats_centered = np.concatenate([ [0], beats, [len(y)/hop_length] ]).astype(np.int32)
# print(beats_centered)

# convert to time units
beats_times = librosa.frames_to_time(beats,
                                     sr=sr,
                                     hop_length=hop_length)
# plot waveform
librosa.display.waveplot(y, sr=sr, x_axis='time')
# plot red vertical lines
plt.vlines(beats_times, -1, 1, colors='r')

<matplotlib.collections.LineCollection at 0x7f1473262c10>


def audio_add(x,y):
    if x.shape[0] < y.shape[0] : y = y[:x.shape[0]]
    if y.shape[0] < x.shape[0] : x = x[:y.shape[0]]
    return np.array( x + y ) * 0.5


clicks = librosa.clicks(beats_times,sr=sr,length=len(y))
ipd.Audio( y + clicks, rate=sr)


n_fft = 4096
hop_length = 128
# short time fourier transform
D = librosa.stft(y, 
                 n_fft=n_fft, 
                 win_length=n_fft//2, 
                 hop_length=hop_length, 
                 window="hann",
                 center=True)


# take power spectrum
PS = np.abs(D)**2
# plot it
librosa.display.specshow(librosa.power_to_db(PS), y_axis='log')
plt.colorbar(format="%+2.0f dB")

<matplotlib.colorbar.Colorbar at 0x7f1455e32310>


# Generate a mel-spectrogram
n_mels = 128
fmax = 9000
S = librosa.feature.melspectrogram(S=PS+0.001,
                                   n_mels=n_mels,
                                   fmax=fmax)
# Normalize by peak energy
S = S / S.max()

# Put on a log scale
S = librosa.power_to_db(S)
# plot
librosa.display.specshow(S, y_axis='log')
plt.colorbar(format="%+2.0f dB")

<matplotlib.colorbar.Colorbar at 0x7f1455d7a290>


onset_strength = librosa.onset.onset_strength(S=S,
                                              detrend=False, 
                                              max_size=1, 
                                              lag=1)
# onset_strength = librosa.onset.onset_strength(y=y,sr=sr)
plt.plot(onset_strength)
# plt.vlines(beats, 0, onset_strength.max(), colors='r', linestyle='--')

[<matplotlib.lines.Line2D at 0x7f1455cecd10>]


# Onset strength function for beat tracking

# normalization helper function
def normalize(x):
    x -= x.min()
    x /= x.max()
    return x

def custom_onset(S, n=1):
    # S -= np.mean(S)
    n_order_diff =  np.diff(S, n=n, axis=1)
    half_wave_rect = np.maximum(0.0, n_order_diff)
    mean = np.mean(half_wave_rect, axis=0)
    onset_env = normalize(mean)
    return onset_env


custom_onset_strength = custom_onset(S, n=1)
plt.plot(custom_onset_strength)

[<matplotlib.lines.Line2D at 0x7f144b9a7b90>]


pre_max  = 0.05  # number of samples before n over which max is computed
post_max = 0.00  # number of samples after n over which max is computed
pre_avg  = 0.01   # number of samples before n over which mean is computed
post_avg = 0.10   # number of samples after n over which mean is computed
delta    = 0.07  # threshold offset for mean
wait     = 0.3   # number of samples to wait after picking a peak

peaks = librosa.util.peak_pick(onset_strength,
                                          pre_max*sr//hop_length,
                                          post_max*sr//hop_length+1,
                                          pre_avg*sr//hop_length,
                                          post_avg*sr//hop_length+1,
                                          delta,
                                          wait*sr//hop_length)
plt.plot(onset_strength)
plt.vlines(peaks, 0, onset_strength.max(), colors='r', linestyle='--')

<matplotlib.collections.LineCollection at 0x7f1455e32bd0>


peak_times = librosa.frames_to_time(peaks, 
                                       sr=sr, 
                                       hop_length=hop_length)
peak_clicks = librosa.clicks(peak_times,sr=sr,hop_length=hop_length)
ipd.Audio(audio_add(peak_clicks, y), rate=sr)


print(peaks)

[   3   75  143  227  302  372  454  529  606  685  757  822  911  984
 1062 1139 1211 1292 1365 1443 1518 1594 1669]


# r = librosa.autocorrelate(onset_strength, max_size=5000)
r = librosa.autocorrelate(custom_onset_strength, max_size=5000)
plt.plot(r)

[<matplotlib.lines.Line2D at 0x7f144b8a2090>]


def compute_autocor(onset_strength, i_min=240, i_max=40):
    r = librosa.autocorrelate(onset_strength, max_size=5000)
    # Define lower and upper limits for the autocorrelation argmax.
    # i_min = 240
    # i_max = 40

    r[:i_max] = 0
    r[i_min:] = 0

    # Find the location of the maximum autocorrelation.
    return r.argmax()
# plt.vlines(tempo_estimate,r.min(),r.max(),color='r')
# print(f'Tempo estimate: is {tempo_estimate} bpm')
# plt.plot(r)


# compute the DFT of the onset strength envelope
p = np.fft.fft(onset_strength)
# p = np.fft.fft(custom_onset_strength)
# get rid of second half (symmetrical), and the first element
half_length = p.shape[0]//2
pabs_half = p[1:half_length-1]
# compute the power spectrum
pabs = np.abs(pabs_half)**2

# pabs = normalize(pabs)

# normalize
pabs -= pabs.min()
# pabs /= np.linalg.norm(pabs)
pabs /= pabs.max()

# plot the normalized power spectrum
max_x = 240
plot_x = np.linspace(0, max_x, max_x)
plot_y = pabs[:max_x]
plt.plot(plot_x,plot_y)

# filter points with a threshold
thresh = 0.1

points = [ i for i, x in enumerate(pabs[:max_x]) if x > thresh]

p_estimate = max(points)
# plot points and printout tempo estimate
plt.plot(points, pabs[points], 'x')
plt.vlines(p_estimate,0.3,1,colors='r',linestyles='--')
print(f'Tempo estimate with DFT is {p_estimate} bpm')
print(points)

Tempo estimate with DFT is 135 bpm
[10, 22, 23, 33, 44, 45, 56, 67, 78, 89, 90, 101, 135]


!pip install essentia
from essentia.standard import *

Collecting essentia
  Downloading https://files.pythonhosted.org/packages/71/fd/cbb601736ebdf5bfdaf8d215e7741b76b6519371ddf1f64427cf275af05d/essentia-2.1b6.dev374-cp37-cp37m-manylinux1_x86_64.whl (12.0MB)
     |████████████████████████████████| 12.0MB 6.1MB/s 
Requirement already satisfied: numpy>=1.8.2 in /usr/local/lib/python3.7/dist-packages (from essentia) (1.19.5)
Requirement already satisfied: pyyaml in /usr/local/lib/python3.7/dist-packages (from essentia) (3.13)
Requirement already satisfied: six in /usr/local/lib/python3.7/dist-packages (from essentia) (1.15.0)
Installing collected packages: essentia
Successfully installed essentia-2.1b6.dev374


# Loading audio file
audio = MonoLoader(filename=filename, sampleRate=sr)()

# Compute beat positions and BPM
rhythm_extractor = RhythmExtractor2013(method="multifeature")
e_bpm, e_beats, confidence, _, intervals = rhythm_extractor(audio)

print("BPM:", e_bpm)
print("Beats", e_beats)
print("Beat estimation confidence:", confidence)

# Mark beat positions on the audio and write it to a file
# Let's use beeps instead of white noise to mark them, 
# as it's more distinctive
marker = AudioOnsetsMarker(onsets=e_beats, type='beep')
marked_audio = marker(audio)
ipd.Audio(marked_audio, rate=sr)


print(int(bpm), "Librosa beats.beat_track")
print(int(tempo),"Librosa beats.tempo")
print(int(tempo_estimate),"Custom librosa with autocorrelation")
print(int(p_estimate),"Custom librosa with DFT")
print(int(e_bpm), "Essentia")


import pandas as pd


def dft_estimate(x, max_x=240, thresh=0.1):
    # compute the DFT of the onset strength envelope
    p = np.fft.fft(x)
    # p = np.fft.fft(custom_onset_strength)
    # get rid of second half (symmetrical), and the first element
    half_length = p.shape[0]//2
    pabs_half = p[1:half_length-1]
    # compute the power spectrum
    pabs = np.abs(pabs_half)**2

    # pabs = normalize(pabs)

    # normalize
    pabs -= pabs.min()
    # pabs /= np.linalg.norm(pabs)
    pabs /= pabs.max()

    # filter points with a threshold

    points = [ i for i, x in enumerate(pabs[:max_x]) if x > thresh]

    return max(points)


def autocor_estimate(x, max_size=5000, i_min=240, i_max=40):
    r = librosa.autocorrelate(x, max_size=max_size)


    r[:i_max] = 0
    r[i_min:] = 0

    # Find the location of the maximum autocorrelation.
    return r.argmax()


hop_length = 512
start_bpm = 120.
tightness_exp = 2.0
n_mels = 128
fmax = 9000
pre_max  = 0.05  # number of samples before n over which max is computed
post_max = 0.00  # number of samples after n over which max is computed
pre_avg  = 0.01   # number of samples before n over which mean is computed
post_avg = 0.10   # number of samples after n over which mean is computed
delta    = 0.07  # threshold offset for mean
wait     = 0.3   # number of samples to wait after picking a peak

# Define lower and upper limits for the autocorrelation argmax.
i_min = 240
i_max = 40
thresh = 0.1

duration = 30

dataArray = []

def cooked_bpm(f):
    if "1030011" in f:
        return 126
    elif "1068430" in f:
        return 170
    elif "1084996" in f:
        return  140
    elif "1092771" in f:
        return  136
    elif "1114156" in f:
        return  86
    elif "1118326" in f:
        return  171.419
    elif "1120171" in f:
        return  174
    elif "1171800" in f:
        return  140


where = "/content/drive/MyDrive/python_scratch/audio/giant_steps_small/*.mp3"

file_list = [f for f in glob.glob(where)]



for filename in file_list:

    y, sr = librosa.load(filename, duration=duration) # default sr = 22050

    librosa_tempo = librosa.beat.tempo(y=y, 
                            hop_length=hop_length,
                            start_bpm=start_bpm, 
                            max_tempo=240.0,
                            aggregate=np.mean)[0]

    _, librosa_beats = librosa.beat.beat_track(y=y, 
                                    sr=sr, 
                                    hop_length=hop_length,
                                    bpm=librosa_tempo,
                                    tightness=10**tightness_exp,)

    # short time fourier transform
    D = librosa.stft(y, 
                    n_fft=n_fft, 
                    win_length=n_fft//2, 
                    hop_length=hop_length, 
                    window="hann",
                    center=True)

    # take power spectrum
    PS = np.abs(D)**2

    S = librosa.feature.melspectrogram(S=PS+0.001,
                                    n_mels=n_mels,
                                    fmax=fmax)

    # Normalize by peak energy
    S = S / S.max()

    librosa_os = librosa.onset.onset_strength(S=S,
                                                detrend=False, 
                                                max_size=1, 
                                                lag=1)
    custom_os = custom_onset(S, n=1)

    librosa_beats_librosa_os = librosa.util.peak_pick(librosa_os,
                                            pre_max*sr//hop_length,
                                            post_max*sr//hop_length+1,
                                            pre_avg*sr//hop_length,
                                            post_avg*sr//hop_length+1,
                                            delta,
                                            wait*sr//hop_length)

    librosa_beats_custom_os = librosa.util.peak_pick(custom_os,
                                            pre_max*sr//hop_length,
                                            post_max*sr//hop_length+1,
                                            pre_avg*sr//hop_length,
                                            post_avg*sr//hop_length+1,
                                            delta,
                                            wait*sr//hop_length)

    tempo_aut_est_librosa_os = autocor_estimate(librosa_os, i_min=i_min, i_max=i_max)
    tempo_dft_est_librosa_os = dft_estimate(librosa_os, max_x=i_min, thresh=thresh)

    tempo_aut_est_custom_os = autocor_estimate(custom_os, i_min=i_min, i_max=i_max)
    tempo_dft_est_custom_os = dft_estimate(custom_os, max_x=i_min, thresh=thresh)

    rhythm_extractor = RhythmExtractor2013()

    # Compute beat positions and BPM
    e_bpm, e_beats, _, _, _ = rhythm_extractor(y)



    fname = filename.split('/')[-1]
    dataArray.append(list([
            fname,
            len(librosa_beats), 
            len(librosa_beats_librosa_os), 
            len(librosa_beats_custom_os), 
            len(e_beats),
            librosa_tempo, 
            tempo_aut_est_librosa_os, 
            tempo_dft_est_librosa_os, 
            tempo_aut_est_custom_os, 
            tempo_dft_est_custom_os, 
            e_bpm,
            cooked_bpm(fname),
        ]))
###### loop ends here ---------------------------------------------------

columns=list([
              "File Name",
              "Librosa Beat Tracking",
              "Librosa Peaks w/ Librosa OS",
              "Librosa Peaks w/ Custom OS",
              "Essentia Beat Tracking",
              "Librosa Tempo",
              "Autocorr w/ Librosa OS",
              "Autocorr w/ Custom OS",
              "DFT estimate w/ Librosa OS",
              "DFT estimate w/ Custom OS",
              "Essentia Tempo",
              "Cooked BPM",
              ])

pd.DataFrame(data=dataArray, columns=columns)

/usr/local/lib/python3.7/dist-packages/librosa/core/audio.py:162: UserWarning: PySoundFile failed. Trying audioread instead.
  warnings.warn("PySoundFile failed. Trying audioread instead.")
/usr/local/lib/python3.7/dist-packages/librosa/core/audio.py:162: UserWarning: PySoundFile failed. Trying audioread instead.
  warnings.warn("PySoundFile failed. Trying audioread instead.")
/usr/local/lib/python3.7/dist-packages/librosa/core/audio.py:162: UserWarning: PySoundFile failed. Trying audioread instead.
  warnings.warn("PySoundFile failed. Trying audioread instead.")
/usr/local/lib/python3.7/dist-packages/librosa/core/audio.py:162: UserWarning: PySoundFile failed. Trying audioread instead.
  warnings.warn("PySoundFile failed. Trying audioread instead.")
/usr/local/lib/python3.7/dist-packages/librosa/core/audio.py:162: UserWarning: PySoundFile failed. Trying audioread instead.
  warnings.warn("PySoundFile failed. Trying audioread instead.")
/usr/local/lib/python3.7/dist-packages/librosa/core/audio.py:162: UserWarning: PySoundFile failed. Trying audioread instead.
  warnings.warn("PySoundFile failed. Trying audioread instead.")
/usr/local/lib/python3.7/dist-packages/librosa/core/audio.py:162: UserWarning: PySoundFile failed. Trying audioread instead.
  warnings.warn("PySoundFile failed. Trying audioread instead.")
/usr/local/lib/python3.7/dist-packages/librosa/core/audio.py:162: UserWarning: PySoundFile failed. Trying audioread instead.
  warnings.warn("PySoundFile failed. Trying audioread instead.")


where = "/content/drive/MyDrive/python_scratch/audio/giant_steps_small/*.mp3"

file_list = [f for f in glob.glob(where)]


array_of_sound, samplerate_integer = librosa.load(file_list[0], duration=30)
# print(y.shape[0]/sr)

# Librosa beat_track
tempo, beats = librosa.beat.beat_track(array_of_sound, samplerate_integer)

# Librosa's tempo
tempo

# Librosa's onset strength autocorrelation

# Custom onset strength autocorrelation
# Librosa's onset strength DFT method

# Custom  onset strength DFT method

# Essentia's beat tracking algorithm

/usr/local/lib/python3.7/dist-packages/librosa/core/audio.py:162: UserWarning: PySoundFile failed. Trying audioread instead.
  warnings.warn("PySoundFile failed. Trying audioread instead.")

30.0


import librosa as librosa


seconds_of_file = 10
hop_length = 512
start_bpm = 120.
n_fft = 4096
hop_length = 128
fmax = 9000
n_mels = 128

y, sr = librosa.load(file_list[0], duration=seconds_of_file)

tempo = librosa.beat.tempo(y=y, 
                           hop_length=hop_length,
                           start_bpm=start_bpm, 
                           max_tempo=240.0,
                           aggregate=np.mean)

## compute onset strength envelope
def compute_onset_strength(audio):
    # short time fourier transform
    D = librosa.stft(y=audio, 
                    n_fft=n_fft, 
                    win_length=n_fft//2, 
                    hop_length=hop_length, 
                    window="hann",
                    center=True)

    # Generate a mel-spectrogram

    # take power spectrum
    PS = np.abs(D)**2

    S = librosa.feature.melspectrogram(S=PS+0.001,
                                    n_mels=n_mels,
                                    fmax=fmax)
    # Normalize by peak energy
    S = S / S.max()


    return custom_onset(S)

custom_onset_envelope = compute_onset_strength(y)

print(tempo)

/usr/local/lib/python3.7/dist-packages/librosa/core/audio.py:162: UserWarning: PySoundFile failed. Trying audioread instead.
  warnings.warn("PySoundFile failed. Trying audioread instead.")

[172.265625]


print(librosa.note_to_hz("C1"))

32.70319566257483

Audio Analysis¶

A bit of history¶

Beat Tracking¶

Beat¶

Pulse¶

Usage¶

Algorithm structure¶

Imports¶

Audio Signal¶

Librosa's Beat Tracking¶

Listen¶

Spectral Analysis¶

Spectral Energy Flux¶

Custom onset strength function¶

Detection Function¶

Listen¶

Periodicity Estimation¶

Autocorrelation¶

DFT¶

Essentia¶

Compare Tempo Estimations¶

References¶

References¶

Exercise¶

	File Name	Librosa Beat Tracking	Librosa Peaks w/ Custom OS	Essentia Beat Tracking	Librosa Tempo	Autocorr w/ Librosa OS	Autocorr w/ Custom OS	DFT estimate w/ Librosa OS	DFT estimate w/ Custom OS	Essentia Tempo	Cooked BPM
0	1030011.LOFI.mp3	82	67	20	172.265625	41	188	41	188	83.303864	126.000
1	1068430.LOFI.mp3	57	67	21	117.453835	59	174	59	174	86.684395	170.000
2	1084996.LOFI.mp3	60	73	17	143.554688	74	192	74	192	68.772003	140.000
3	1092771.LOFI.mp3	52	79	16	107.666016	76	237	76	237	67.999596	136.000
4	1114156.LOFI.mp3	42	66	21	86.132812	60	171	60	171	85.984192	86.000
5	1118326.LOFI.mp3	85	79	28	172.265625	60	171	60	171	114.075554	171.419
6	1120171.LOFI.mp3	42	40	21	117.453835	59	239	59	238	86.606300	174.000
7	1171800.LOFI.mp3	68	75	17	143.554688	148	209	148	209	70.016792	140.000