from google.colab import drive
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.linear_model import LinearRegression
import statsmodels.api as sm
from scipy.stats import ttest_ind
import seaborn as sns
drive.mount('content')
/usr/local/lib/python3.7/dist-packages/statsmodels/tools/_testing.py:19: FutureWarning: pandas.util.testing is deprecated. Use the functions in the public API at pandas.testing instead. import pandas.util.testing as tm
Mounted at content
First, we need to import pandas. We should also import some data!
beyonce = pd.read_csv("content/MyDrive/python_scratch/spotify_data/beyonce.csv")
Alright! Now our data is available in a dataframe.
We can look at the whole thing by typing in beyonce
, or we can get a brief look at it by typing in print(beyonce)
.
print(beyonce)
Unnamed: 0 artist_name ... mode_name key_mode 0 1 Beyoncé ... major B major 1 2 Beyoncé ... minor C# minor 2 3 Beyoncé ... minor F minor 3 4 Beyoncé ... major C major 4 5 Beyoncé ... minor A minor .. ... ... ... ... ... 433 434 Beyoncé ... major F# major 434 435 Beyoncé ... minor F minor 435 436 Beyoncé ... minor B minor 436 437 Beyoncé ... major F major 437 438 Beyoncé ... major E major [438 rows x 40 columns]
For an even more succinct (and possibly more helpful) view of the dataframe, the .info
function can be used.
beyonce.info()
<class 'pandas.core.frame.DataFrame'> RangeIndex: 438 entries, 0 to 437 Data columns (total 40 columns): # Column Non-Null Count Dtype --- ------ -------------- ----- 0 Unnamed: 0 438 non-null int64 1 artist_name 438 non-null object 2 artist_id 438 non-null object 3 album_id 438 non-null object 4 album_type 438 non-null object 5 album_images 438 non-null object 6 album_release_date 438 non-null object 7 album_release_year 438 non-null int64 8 album_release_date_precision 438 non-null object 9 danceability 438 non-null float64 10 energy 438 non-null float64 11 key 438 non-null int64 12 loudness 438 non-null float64 13 mode 438 non-null int64 14 speechiness 438 non-null float64 15 acousticness 438 non-null float64 16 instrumentalness 438 non-null float64 17 liveness 438 non-null float64 18 valence 438 non-null float64 19 tempo 438 non-null float64 20 track_id 438 non-null object 21 analysis_url 438 non-null object 22 time_signature 438 non-null int64 23 artists 438 non-null object 24 available_markets 438 non-null object 25 disc_number 438 non-null int64 26 duration_ms 438 non-null int64 27 explicit 438 non-null bool 28 track_href 438 non-null object 29 is_local 438 non-null bool 30 track_name 438 non-null object 31 track_preview_url 436 non-null object 32 track_number 438 non-null int64 33 type 438 non-null object 34 track_uri 438 non-null object 35 external_urls.spotify 438 non-null object 36 album_name 438 non-null object 37 key_name 438 non-null object 38 mode_name 438 non-null object 39 key_mode 438 non-null object dtypes: bool(2), float64(9), int64(8), object(21) memory usage: 131.0+ KB
There are some methods and attributes associated with Pandas objects that allow for us to more easily retrieve information. The most commonly used ones are:
.head()
.tail()
With these, you can get the first or last n
rows of a dataframe. Unlike unix, in which the default is 10, the default in Pandas is 5.
## shows the last 5 rows:
beyonce.tail()
Unnamed: 0 | artist_name | artist_id | album_id | album_type | album_images | album_release_date | album_release_year | album_release_date_precision | danceability | energy | key | loudness | mode | speechiness | acousticness | instrumentalness | liveness | valence | tempo | track_id | analysis_url | time_signature | artists | available_markets | disc_number | duration_ms | explicit | track_href | is_local | track_name | track_preview_url | track_number | type | track_uri | external_urls.spotify | album_name | key_name | mode_name | key_mode | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
433 | 434 | Beyoncé | 6vWDO969PvNqNYHIOW5v0m | 25hVFAxTlDvXbx2X2QkUkE | album | list(height = c(640, 300, 64), url = c("https:... | 2003 | 2003 | year | 0.619 | 0.4020 | 6 | -8.054 | 1 | 0.0915 | 0.334 | 0.0 | 0.177 | 0.489 | 99.947 | 29W4t00Bj5uBwmtuphAhyY | https://api.spotify.com/v1/audio-analysis/29W4... | 4 | list(href = "https://api.spotify.com/v1/artist... | c("AT", "CH", "DE", "LI") | 1 | 293106 | False | https://api.spotify.com/v1/tracks/29W4t00Bj5uB... | False | Dangerously In Love | https://p.scdn.co/mp3-preview/aa580bad422a1e7d... | 12 | track | spotify:track:29W4t00Bj5uBwmtuphAhyY | https://open.spotify.com/track/29W4t00Bj5uBwmt... | Dangerously In Love (Alben für die Ewigkeit) | F# | major | F# major |
434 | 435 | Beyoncé | 6vWDO969PvNqNYHIOW5v0m | 25hVFAxTlDvXbx2X2QkUkE | album | list(height = c(640, 300, 64), url = c("https:... | 2003 | 2003 | year | 0.693 | 0.0457 | 5 | -20.803 | 0 | 0.8030 | 0.423 | 0.0 | 0.122 | 0.541 | 133.173 | 74LAP3swRIVaUFEcWi90Iv | https://api.spotify.com/v1/audio-analysis/74LA... | 3 | list(href = "https://api.spotify.com/v1/artist... | c("AT", "CH", "DE", "LI") | 1 | 16360 | False | https://api.spotify.com/v1/tracks/74LAP3swRIVa... | False | Beyoncé Interlude | https://p.scdn.co/mp3-preview/f3d6675d78a00d62... | 13 | track | spotify:track:74LAP3swRIVaUFEcWi90Iv | https://open.spotify.com/track/74LAP3swRIVaUFE... | Dangerously In Love (Alben für die Ewigkeit) | F | minor | F minor |
435 | 436 | Beyoncé | 6vWDO969PvNqNYHIOW5v0m | 25hVFAxTlDvXbx2X2QkUkE | album | list(height = c(640, 300, 64), url = c("https:... | 2003 | 2003 | year | 0.349 | 0.2650 | 11 | -12.790 | 0 | 0.0542 | 0.904 | 0.0 | 0.241 | 0.311 | 93.615 | 1ABGVrP1QEzjMPfzM0KXKt | https://api.spotify.com/v1/audio-analysis/1ABG... | 3 | list(href = "https://api.spotify.com/v1/artist... | c("AT", "CH", "DE", "LI") | 1 | 163706 | False | https://api.spotify.com/v1/tracks/1ABGVrP1QEzj... | False | Gift from Virgo | https://p.scdn.co/mp3-preview/e7e629a306fd3438... | 14 | track | spotify:track:1ABGVrP1QEzjMPfzM0KXKt | https://open.spotify.com/track/1ABGVrP1QEzjMPf... | Dangerously In Love (Alben für die Ewigkeit) | B | minor | B minor |
436 | 437 | Beyoncé | 6vWDO969PvNqNYHIOW5v0m | 25hVFAxTlDvXbx2X2QkUkE | album | list(height = c(640, 300, 64), url = c("https:... | 2003 | 2003 | year | 0.683 | 0.7630 | 5 | -6.588 | 1 | 0.1910 | 0.427 | 0.0 | 0.184 | 0.948 | 173.954 | 23npFplgR5Uuj5FwyVvy7h | https://api.spotify.com/v1/audio-analysis/23np... | 4 | list(href = "https://api.spotify.com/v1/artist... | c("AT", "CH", "DE", "LI") | 1 | 246066 | False | https://api.spotify.com/v1/tracks/23npFplgR5Uu... | False | Work It Out | https://p.scdn.co/mp3-preview/e9898d1c265ea607... | 15 | track | spotify:track:23npFplgR5Uuj5FwyVvy7h | https://open.spotify.com/track/23npFplgR5Uuj5F... | Dangerously In Love (Alben für die Ewigkeit) | F | major | F major |
437 | 438 | Beyoncé | 6vWDO969PvNqNYHIOW5v0m | 25hVFAxTlDvXbx2X2QkUkE | album | list(height = c(640, 300, 64), url = c("https:... | 2003 | 2003 | year | 0.515 | 0.7050 | 4 | -7.924 | 1 | 0.3700 | 0.240 | 0.0 | 0.182 | 0.198 | 89.861 | 6IeECiLESgmQDy5IKt5tiA | https://api.spotify.com/v1/audio-analysis/6IeE... | 4 | list(href = c("https://api.spotify.com/v1/arti... | character(0) | 1 | 517120 | False | https://api.spotify.com/v1/tracks/6IeECiLESgmQ... | False | '03 Bonnie & Clyde | NaN | 16 | track | spotify:track:6IeECiLESgmQDy5IKt5tiA | https://open.spotify.com/track/6IeECiLESgmQDy5... | Dangerously In Love (Alben für die Ewigkeit) | E | major | E major |
#shows the first 3 rows:
beyonce.head(3)
Unnamed: 0 | artist_name | artist_id | album_id | album_type | album_images | album_release_date | album_release_year | album_release_date_precision | danceability | energy | key | loudness | mode | speechiness | acousticness | instrumentalness | liveness | valence | tempo | track_id | analysis_url | time_signature | artists | available_markets | disc_number | duration_ms | explicit | track_href | is_local | track_name | track_preview_url | track_number | type | track_uri | external_urls.spotify | album_name | key_name | mode_name | key_mode | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | 1 | Beyoncé | 6vWDO969PvNqNYHIOW5v0m | 2UJwKSBUz6rtW4QLK74kQu | album | list(height = c(640, 300, 64), url = c("https:... | 2014-11-24 | 2014 | day | 0.512 | 0.625 | 11 | -6.764 | 1 | 0.0508 | 0.00308 | 0.000164 | 0.346 | 0.201 | 129.935 | 7lUA4P03AhwAw40JHkdyGr | https://api.spotify.com/v1/audio-analysis/7lUA... | 4 | list(href = "https://api.spotify.com/v1/artist... | c("AD", "AR", "AT", "AU", "BE", "BG", "BO", "B... | 1 | 257653 | False | https://api.spotify.com/v1/tracks/7lUA4P03AhwA... | False | Pretty Hurts | https://p.scdn.co/mp3-preview/fd617af2cdf7ea1b... | 1 | track | spotify:track:7lUA4P03AhwAw40JHkdyGr | https://open.spotify.com/track/7lUA4P03AhwAw40... | BEYONCÉ [Platinum Edition] | B | major | B major |
1 | 2 | Beyoncé | 6vWDO969PvNqNYHIOW5v0m | 2UJwKSBUz6rtW4QLK74kQu | album | list(height = c(640, 300, 64), url = c("https:... | 2014-11-24 | 2014 | day | 0.436 | 0.534 | 1 | -9.416 | 0 | 0.0773 | 0.10300 | 0.006430 | 0.507 | 0.325 | 122.822 | 7cioKB5CHVzk09SOtTyn0T | https://api.spotify.com/v1/audio-analysis/7cio... | 4 | list(href = "https://api.spotify.com/v1/artist... | c("AD", "AR", "AT", "AU", "BE", "BG", "BO", "B... | 1 | 369040 | True | https://api.spotify.com/v1/tracks/7cioKB5CHVzk... | False | Haunted | https://p.scdn.co/mp3-preview/63fcd43b454b1dc9... | 2 | track | spotify:track:7cioKB5CHVzk09SOtTyn0T | https://open.spotify.com/track/7cioKB5CHVzk09S... | BEYONCÉ [Platinum Edition] | C# | minor | C# minor |
2 | 3 | Beyoncé | 6vWDO969PvNqNYHIOW5v0m | 2UJwKSBUz6rtW4QLK74kQu | album | list(height = c(640, 300, 64), url = c("https:... | 2014-11-24 | 2014 | day | 0.589 | 0.621 | 5 | -6.902 | 0 | 0.0468 | 0.00969 | 0.001040 | 0.181 | 0.401 | 140.030 | 6jG2YzhxptolDzLHTGLt7S | https://api.spotify.com/v1/audio-analysis/6jG2... | 4 | list(href = c("https://api.spotify.com/v1/arti... | c("AD", "AR", "AT", "AU", "BE", "BG", "BO", "B... | 1 | 323480 | True | https://api.spotify.com/v1/tracks/6jG2Yzhxptol... | False | Drunk in Love | https://p.scdn.co/mp3-preview/7052c45ae9298c38... | 3 | track | spotify:track:6jG2YzhxptolDzLHTGLt7S | https://open.spotify.com/track/6jG2YzhxptolDzL... | BEYONCÉ [Platinum Edition] | F | minor | F minor |
There are also some And attributes:
.columns
.dtypes
.shape
With the .columns
function, you can access the column labels of the dataframe.
Using .dtypes
returns the data types of each column in the dataframe.
The .shape
function returns number of rows and columns in the dataframe.
beyonce.columns
Index(['Unnamed: 0', 'artist_name', 'artist_id', 'album_id', 'album_type', 'album_images', 'album_release_date', 'album_release_year', 'album_release_date_precision', 'danceability', 'energy', 'key', 'loudness', 'mode', 'speechiness', 'acousticness', 'instrumentalness', 'liveness', 'valence', 'tempo', 'track_id', 'analysis_url', 'time_signature', 'artists', 'available_markets', 'disc_number', 'duration_ms', 'explicit', 'track_href', 'is_local', 'track_name', 'track_preview_url', 'track_number', 'type', 'track_uri', 'external_urls.spotify', 'album_name', 'key_name', 'mode_name', 'key_mode'], dtype='object')
beyonce.dtypes
Unnamed: 0 int64 artist_name object artist_id object album_id object album_type object album_images object album_release_date object album_release_year int64 album_release_date_precision object danceability float64 energy float64 key int64 loudness float64 mode int64 speechiness float64 acousticness float64 instrumentalness float64 liveness float64 valence float64 tempo float64 track_id object analysis_url object time_signature int64 artists object available_markets object disc_number int64 duration_ms int64 explicit bool track_href object is_local bool track_name object track_preview_url object track_number int64 type object track_uri object external_urls.spotify object album_name object key_name object mode_name object key_mode object dtype: object
beyonce.shape
(438, 40)
.iloc
, (integer location) can look up rows by position.
.loc
can find columns by label or lookup.
###finds the fourth row:
beyonce.iloc[3]
Unnamed: 0 4 artist_name Beyoncé artist_id 6vWDO969PvNqNYHIOW5v0m album_id 2UJwKSBUz6rtW4QLK74kQu album_type album album_images list(height = c(640, 300, 64), url = c("https:... album_release_date 2014-11-24 album_release_year 2014 album_release_date_precision day danceability 0.875 energy 0.434 key 0 loudness -7.683 mode 1 speechiness 0.091 acousticness 0.242 instrumentalness 0.00146 liveness 0.234 valence 0.777 tempo 120.065 track_id 6wwrYruEgWlowPDZMq5116 analysis_url https://api.spotify.com/v1/audio-analysis/6wwr... time_signature 4 artists list(href = "https://api.spotify.com/v1/artist... available_markets c("AD", "AR", "AT", "AU", "BE", "BG", "BO", "B... disc_number 1 duration_ms 309720 explicit False track_href https://api.spotify.com/v1/tracks/6wwrYruEgWlo... is_local False track_name Blow track_preview_url https://p.scdn.co/mp3-preview/7939ef46f6f8992a... track_number 4 type track track_uri spotify:track:6wwrYruEgWlowPDZMq5116 external_urls.spotify https://open.spotify.com/track/6wwrYruEgWlowPD... album_name BEYONCÉ [Platinum Edition] key_name C mode_name major key_mode C major Name: 3, dtype: object
## or a range:
beyonce.iloc[-1]
Unnamed: 0 438 artist_name Beyoncé artist_id 6vWDO969PvNqNYHIOW5v0m album_id 25hVFAxTlDvXbx2X2QkUkE album_type album album_images list(height = c(640, 300, 64), url = c("https:... album_release_date 2003 album_release_year 2003 album_release_date_precision year danceability 0.515 energy 0.705 key 4 loudness -7.924 mode 1 speechiness 0.37 acousticness 0.24 instrumentalness 0 liveness 0.182 valence 0.198 tempo 89.861 track_id 6IeECiLESgmQDy5IKt5tiA analysis_url https://api.spotify.com/v1/audio-analysis/6IeE... time_signature 4 artists list(href = c("https://api.spotify.com/v1/arti... available_markets character(0) disc_number 1 duration_ms 517120 explicit False track_href https://api.spotify.com/v1/tracks/6IeECiLESgmQ... is_local False track_name '03 Bonnie & Clyde track_preview_url NaN track_number 16 type track track_uri spotify:track:6IeECiLESgmQDy5IKt5tiA external_urls.spotify https://open.spotify.com/track/6IeECiLESgmQDy5... album_name Dangerously In Love (Alben für die Ewigkeit) key_name E mode_name major key_mode E major Name: 437, dtype: object
With .loc
we can select columns based on both their row index and column name. For example:
beyonce.loc[1:3, 'tempo']
1 122.822 2 140.030 3 120.065 Name: tempo, dtype: float64
We can find columns by names like so:
beyonce_tempi = beyonce['tempo']
for i in beyonce_tempi:
if i < 120:
print("slow")
else:
print("fast")
###or just find fast pieces:
tempo = beyonce['tempo']
fast = beyonce[tempo > 160]
print(fast)
Let's look at how to find some summary statistics first.
## the mean "acousticness" in Beyonce.
beyonce["acousticness"].mean()
0.18018508515981732
## the median "acousticness" in Beyonce.
beyonce["acousticness"].median()
0.0947
beyonce[["tempo", "danceability", "acousticness"]].describe()
tempo | danceability | acousticness | |
---|---|---|---|
count | 438.000000 | 438.000000 | 438.000000 |
mean | 117.535203 | 0.579792 | 0.180185 |
std | 32.090024 | 0.168656 | 0.210869 |
min | 63.345000 | 0.143000 | 0.000066 |
25% | 93.615000 | 0.467250 | 0.025875 |
50% | 107.051000 | 0.576000 | 0.094700 |
75% | 137.093500 | 0.709750 | 0.247000 |
max | 200.053000 | 0.925000 | 0.904000 |
## are Beyonce's "explicit" songs faster?
beyonce[["tempo", "explicit"]].groupby("explicit").mean()
tempo | |
---|---|
explicit | |
False | 117.232375 |
True | 131.970000 |
## do they differ in other ways?
beyonce.groupby("explicit").mean()
Unnamed: 0 | album_release_year | danceability | energy | key | loudness | mode | speechiness | acousticness | instrumentalness | liveness | valence | tempo | time_signature | disc_number | duration_ms | is_local | track_number | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
explicit | ||||||||||||||||||
False | 223.622378 | 2007.983683 | 0.581019 | 0.645393 | 5.275058 | -6.981506 | 0.731935 | 0.174589 | 0.180921 | 0.013613 | 0.244634 | 0.469716 | 117.232375 | 3.895105 | 1.146853 | 241044.209790 | False | 7.393939 |
True | 23.000000 | 2013.444444 | 0.521333 | 0.550889 | 4.777778 | -8.107333 | 0.555556 | 0.171011 | 0.145088 | 0.009180 | 0.325533 | 0.353178 | 131.970000 | 3.888889 | 1.222222 | 327531.555556 | False | 5.777778 |
### does meter matter?
beyonce.groupby(["explicit", "time_signature"])["tempo"].mean()
explicit time_signature False 1 125.738000 3 113.264684 4 118.316360 5 79.544625 True 3 140.415000 4 130.914375 Name: tempo, dtype: float64
# what are these counts, though?
beyonce.groupby(["explicit", "time_signature"])["tempo"].count()
explicit time_signature False 1 5 3 38 4 378 5 8 True 3 1 4 8 Name: tempo, dtype: int64
## standard deviation
beyonce["acousticness"].std()
0.21086927911388673
tempo = beyonce['tempo']
danceability = beyonce['danceability']
tempo = tempo.values.reshape(-1, 1) # values converts it into a numpy array
danceability = danceability.values.reshape(-1, 1)
linear_regressor = LinearRegression() # create object for the class
estimate = linear_regressor.fit(tempo, danceability) # perform linear regression
danceability_pred = linear_regressor.predict(tempo) # make predictions
plt.scatter(tempo, danceability)
plt.plot(tempo, danceability_pred, color='red')
plt.show()
### using the stats toolkit (sm) we can get specifics of the model.
tempo_2 = sm.add_constant(tempo)
est = sm.OLS(danceability, tempo_2)
est2 = est.fit()
print(est2.summary())
OLS Regression Results ============================================================================== Dep. Variable: y R-squared: 0.001 Model: OLS Adj. R-squared: -0.001 Method: Least Squares F-statistic: 0.3908 Date: Mon, 29 Mar 2021 Prob (F-statistic): 0.532 Time: 19:01:01 Log-Likelihood: 158.80 No. Observations: 438 AIC: -313.6 Df Residuals: 436 BIC: -305.4 Df Model: 1 Covariance Type: nonrobust ============================================================================== coef std err t P>|t| [0.025 0.975] ------------------------------------------------------------------------------ const 0.5613 0.031 18.313 0.000 0.501 0.622 x1 0.0002 0.000 0.625 0.532 -0.000 0.001 ============================================================================== Omnibus: 18.672 Durbin-Watson: 1.261 Prob(Omnibus): 0.000 Jarque-Bera (JB): 9.201 Skew: -0.130 Prob(JB): 0.0100 Kurtosis: 2.339 Cond. No. 463. ============================================================================== Warnings: [1] Standard Errors assume that the covariance matrix of the errors is correctly specified.
Let's state a hypothesis:
H1: Beyonce's music becomes less "danceable" over time.
year = beyonce['album_release_year'].values.reshape(-1,1)
danceability = beyonce['danceability'].values.reshape(-1,1)
linear_regressor = LinearRegression() # create object for the class
estimate = linear_regressor.fit(year, danceability) # perform linear regression
danceability_pred = linear_regressor.predict(year) # make predictions
plt.scatter(year, danceability)
plt.plot(year, danceability_pred, color='red')
plt.show()
### using the stats toolkit (sm) we can get specifics of the model.
tempo_2 = sm.add_constant(tempo)
est = sm.OLS(year, tempo_2)
est2 = est.fit()
print(est2.summary())
OLS Regression Results ============================================================================== Dep. Variable: y R-squared: 0.004 Model: OLS Adj. R-squared: 0.001 Method: Least Squares F-statistic: 1.600 Date: Mon, 29 Mar 2021 Prob (F-statistic): 0.207 Time: 19:02:50 Log-Likelihood: -1094.0 No. Observations: 438 AIC: 2192. Df Residuals: 436 BIC: 2200. Df Model: 1 Covariance Type: nonrobust ============================================================================== coef std err t P>|t| [0.025 0.975] ------------------------------------------------------------------------------ const 2007.4426 0.535 3750.100 0.000 2006.390 2008.495 x1 0.0056 0.004 1.265 0.207 -0.003 0.014 ============================================================================== Omnibus: 9.957 Durbin-Watson: 0.011 Prob(Omnibus): 0.007 Jarque-Bera (JB): 9.879 Skew: 0.336 Prob(JB): 0.00716 Kurtosis: 2.699 Cond. No. 463. ============================================================================== Warnings: [1] Standard Errors assume that the covariance matrix of the errors is correctly specified.
We were not able to reject the null hypothesis.
Let's keep digging!
Is Beyonce's music faster than Taylor Swift's?
Ttest_indResult(statistic=array([-0.57972078]), pvalue=array([0.56223576]))
taylor = pd.read_csv("content/MyDrive/python_scratch/spotify_data/taylor.csv")
beyonce_tempo = beyonce['tempo'].values.reshape(-1,1)
taylor_tempo = taylor['tempo'].values.reshape(-1,1)
tset, pval = ttest_ind(beyonce_tempo, taylor_tempo)
print(f"the p-value is:{pval}, tset")
if pval < 0.05: # alpha value is 0.05 or 5%
print("We are rejecting the null hypothesis. Hooray")
else:
print("We are accepting null hypothesis that the distributions are not significantly different.")
the p-value is:[0.56223576], tset We are accepting null hypothesis that the distributions are not significantly different.
tempi = list(zip(taylor_tempo, beyonce_tempo))
df = pd.DataFrame(tempi, columns = ['Taylor', 'Beyonce'])
print(df)
Taylor Beyonce 0 [160.015] [129.935] 1 [159.07299999999998] [122.822] 2 [82.98899999999999] [140.03] 3 [135.917] [120.065] 4 [95.045] [111.58] .. ... ... 433 [96.03399999999999] [99.947] 434 [108.76700000000001] [133.173] 435 [100.02799999999999] [93.615] 436 [61.867] [173.954] 437 [95.48700000000001] [89.861] [438 rows x 2 columns]
sns.distplot(df[['Taylor']], hist=False, rug=True)
sns.distplot(df[['Beyonce']], hist=False, rug=True)
/usr/local/lib/python3.7/dist-packages/seaborn/distributions.py:2557: FutureWarning: `distplot` is a deprecated function and will be removed in a future version. Please adapt your code to use either `displot` (a figure-level function with similar flexibility) or `histplot` (an axes-level function for histograms). warnings.warn(msg, FutureWarning) /usr/local/lib/python3.7/dist-packages/seaborn/distributions.py:2056: FutureWarning: The `axis` variable is no longer used and will be removed. Instead, assign variables directly to `x` or `y`. warnings.warn(msg, FutureWarning) /usr/local/lib/python3.7/dist-packages/seaborn/distributions.py:2557: FutureWarning: `distplot` is a deprecated function and will be removed in a future version. Please adapt your code to use either `displot` (a figure-level function with similar flexibility) or `histplot` (an axes-level function for histograms). warnings.warn(msg, FutureWarning) /usr/local/lib/python3.7/dist-packages/seaborn/distributions.py:2056: FutureWarning: The `axis` variable is no longer used and will be removed. Instead, assign variables directly to `x` or `y`. warnings.warn(msg, FutureWarning)
<matplotlib.axes._subplots.AxesSubplot at 0x7f562f9dce90>
We've done a lot of things on a single object.
Wouldn't this be a good use of a class? How would we turn all of these ideas that we've just analyzed into a class?
### your code here.
class Compare_Data:
def __init__(self, source1, source2):
self.file_1 = pd.read_csv(source1)
self.file_2 = pd.read_csv(source2)
def plot_tempi(self):
tempo1 = self.file_1["tempo"].values.reshape(-1,1)
tempo2 = self.file_2["tempo"].values.reshape(-1,1)
sns.distplot(df[[tempo1]], hist=False, rug=True)
sns.distplot(df[[tempo2]], hist=False, rug=True)
# def compare_danceability() :
comparison = Compare_Data("content/MyDrive/python_scratch/spotify_data/taylor.csv", "content/MyDrive/python_scratch/spotify_data/beyonce.csv")
comparison.plot_tempi()
1[[160.015] [159.073] [ 82.989] [135.917] [ 95.045] [128.07 ] [ 74.957] [ 92.027] [172.054] [110.01 ] [160.024] [120.085] [163.96 ] [163.954] [ 94.922] [ 93.061] [139.997] [126.068] [124.985] [157.043] [ 99.959] [178.013] [104.009] [ 77.318] [117.97 ] [163.886] [119.997] [ 74.957] [ 95.122] [121.034] [114.977] [129.962] [162.012] [124.894] [109.995] [171.791] [116.992] [100.163] [ 81.963] [ 96.006] [159.837] [ 92.008] [ 79.926] [114.984] [143.95 ] [117.935] [ 96.005] [ 73.849] [143.944] [ 80.007] [ 79.025] [146.147] [139.908] [128.057] [116.001] [118.986] [184.014] [ 92.673] [143.936] [126.014] [159.965] [ 67.303] [160.015] [172.909] [159.073] [ 98.753] [ 82.989] [ 86.424] [135.917] [ 85.319] [ 95.045] [ 77.411] [128.07 ] [123.703] [ 74.957] [ 76.531] [ 92.027] [129.971] [172.054] [ 83.407] [110.01 ] [129.863] [160.024] [100.283] [120.085] [130.302] [163.96 ] [ 68.253] [163.954] [130.315] [ 94.922] [ 69.091] [117.023] [ 96.009] [ 95.021] [ 91.993] [ 96.969] [160.02 ] [118.027] [170.157] [140.06 ] [119.982] [ 71.981] [159.959] [103.981] [117.023] [ 96.009] [ 95.021] [ 91.993] [ 96.969] [160.02 ] [118.027] [170.157] [140.06 ] [119.982] [ 71.981] [159.959] [103.981] [117.015] [191.963] [ 94.997] [ 92.012] [ 96.995] [159.967] [117.976] [170.057] [140.046] [119.98 ] [144.057] [160.052] [103.989] [ 77.502] [116.992] [100.356] [ 95.997] [ 88.155] [ 94.933] [ 93.705] [ 92.008] [105.468] [ 96.97 ] [ 87.915] [160.078] [ 79.281] [118.035] [ 81.512] [170.216] [ 86.883] [139.997] [ 79.214] [119.997] [ 78.761] [143.95 ] [113.854] [159.965] [ 99.561] [103.97 ] [117.012] [ 96.031] [ 94.992] [ 92.008] [ 97.031] [160.041] [118.026] [170.141] [140.035] [119.987] [143.919] [ 79.999] [103.975] [129.943] [124.962] [110.001] [154.228] [ 93.052] [103.986] [145.901] [ 86.035] [ 99.989] [ 93.986] [157.001] [129.975] [117.942] [ 80.042] [126.003] [ 78.748] [129.979] [124.988] [110.007] [ 77.02 ] [ 93.01 ] [104.001] [146.084] [ 86.014] [ 99.993] [ 94.016] [157.053] [129.992] [117.972] [ 80.034] [126.022] [ 79.002] [129.986] [124.989] [110.04 ] [ 77.001] [ 93.014] [103.989] [146.034] [ 86.005] [100.002] [ 94.015] [157.02 ] [129.965] [117.99 ] [ 80.048] [126.025] [ 78.966] [ 88.084] [129.968] [ 92.483] [124.978] [173.317] [109.995] [ 87.576] [ 77.019] [123.849] [ 93.06 ] [133.933] [103.987] [ 95.101] [145.865] [ 68.151] [ 85.984] [ 77.449] [ 99.981] [ 94.766] [ 94.069] [ 87.227] [157.043] [ 84.382] [129.987] [ 78.286] [117.937] [101.873] [ 80.016] [134.89 ] [126.018] [ 95.371] [ 79.025] [121.07 ] [114.987] [141.893] [118.975] [119.386] [163.974] [139.898] [124.91 ] [163.893] [145.821] [134.05 ] [162.088] [ 89.038] [204.125] [121.045] [114.987] [141.971] [118.981] [119.512] [163.973] [140.012] [124.995] [ 82.003] [146.023] [134.024] [162.185] [159.012] [102.016] [114.976] [121.013] [139.756] [ 82.008] [ 78.323] [142.043] [145.921] [118.879] [158.027] [ 81.995] [102.11 ] [ 97.406] [119.395] [ 81.988] [162.288] [101.979] [114.981] [120.969] [139.813] [163.752] [ 77.769] [142.057] [145.954] [118.876] [ 79.517] [109.025] [104.46 ] [ 97.397] [119.518] [163.678] [162.157] [101.939] [ 66.869] [121.061] [ 64.792] [114.998] [128.394] [141.893] [ 85.222] [118.959] [ 83.656] [119.367] [ 85.001] [163.98 ] [ 95.847] [139.907] [143.429] [124.899] [ 51.351] [163.9 ] [104.978] [145.87 ] [146.991] [134.032] [149.646] [162.145] [ 84.599] [ 72.619] [ 74.299] [204.104] [121.042] [114.975] [141.887] [118.958] [119.443] [163.947] [139.973] [124.897] [ 81.917] [145.871] [134.008] [162.127] [158.867] [102.013] [100.019] [ 95.431] [119.054] [116.001] [ 92.354] [129.967] [148.305] [ 99.978] [134.059] [160.92 ] [128.048] [126.189] [ 95.996] [177.822] [ 99.942] [168.137] [ 96.019] [105.583] [143.997] [105.995] [200.056] [ 95.349] [119.056] [115.994] [ 92.71 ] [129.98 ] [147.913] [ 99.989] [133.956] [160.92 ] [128.007] [126.074] [ 96.018] [177.865] [ 99.947] [168.077] [127.997] [ 99.972] [ 95.485] [118.984] [115.994] [ 92.702] [129.964] [ 73.975] [ 99.96 ] [133.959] [160.927] [128.042] [126.084] [ 96.013] [100.047] [ 95.469] [119.06 ] [116.048] [ 92.613] [130.009] [ 73.989] [100.024] [134.074] [160.901] [128.034] [126.032] [ 96.015] [ 79.985] [199.965] [120.02 ] [143.952] [ 85.953] [ 81.921] [100.029] [ 95.407] [119.004] [115.972] [185.776] [129.987] [147.988] [ 99.99 ] [133.927] [160.884] [128.046] [126.075] [ 96.034] [108.767] [100.028] [ 61.867] [ 95.487] [ 90.743] [118.984] [ 47.607] [116.01 ] [103.247] [ 92.598] [126.254] [129.966] [ 99.358] [147.988] [ 99.948] [100.007] [ 62.327] [133.939] [111.601] [160.989] [ 67.722] [128.052] [ 84.469] [126.083] [ 78.855] [ 96. ] [100.036] [ 95.452] [118.994] [116.009] [ 92.166] [129.973] [147.96 ] [100.074] [133.932] [160.933] [128.024] [126.093] [ 95.953] [152.175] [105.587] [ 99.966] [115.012] [175.662] [112.979] [146.172] [131.646] [167.974] [150.802] [177.874] [143.983] [ 96.052] [156.035] [100.021] [151.953] [105.494] [ 99.968] [115.005] [175.597] [113.013] [146.132] [131.654] [167.914] [ 73.209] [ 89.027] [143.936] [ 96.023] [156.145] [199.9 ] [ 75.994] [105.482] [100.064] [115.012] [ 87.958] [113.021] [146.059] [131.729] [168.087] [151.048] [177.997] [144.013] [ 96.001] [155.972] [171.246] [ 76. ] [ 60.266] [105.607] [ 84.758] [ 99.962] [ 75.683] [114.982] [138.159] [175.974] [ 94.383] [113.002] [ 67.906] [146.118] [ 99.845] [131.607] [118.747] [167.935] [128.144] [150.721] [ 83.734] [ 89.128] [108.258] [143.94 ] [ 77.762] [ 96.02 ] [ 81.209] [156.088] [ 80.865] [199.909]] 2[[129.935] [122.822] [140.03 ] [120.065] [111.58 ] [185.571] [133.97 ] [140.415] [103.009] [170.085] [140.553] [ 80.334] [102.601] [ 90.567] [136.024] [136.801] [139.748] [ 98.011] [120.024] [195.956] [129.935] [122.278] [140.006] [120.068] [111.58 ] [ 87.953] [133.929] [140.419] [103.012] [170.056] [136.832] [ 80.334] [102.601] [ 90.567] [136.024] [136.825] [139.672] [ 98.028] [120.034] [124.979] [128.015] [ 83.703] [119.496] [127.993] [127.995] [117.913] [ 63.345] [ 86.934] [167.963] [ 99.099] [160.979] [ 72.032] [150.006] [ 94.1 ] [167.349] [117.999] [110.992] [127.086] [116.01 ] [124.04 ] [110.005] [127.003] [128.02 ] [129.022] [ 63.345] [ 86.934] [167.963] [ 99.099] [160.979] [ 72.032] [150.006] [ 94.1 ] [167.349] [117.999] [110.992] [127.086] [ 63.345] [ 86.934] [167.963] [ 99.099] [160.979] [ 72.032] [150.006] [ 94.1 ] [167.349] [117.999] [110.992] [127.086] [ 94.1 ] [160.979] [124.04 ] [167.349] [167.963] [110.005] [ 86.934] [ 72.032] [ 63.345] [117.999] [127.086] [ 99.099] [150.006] [110.992] [148.045] [ 99.13 ] [ 99.946] [ 99.027] [ 96.618] [ 94.982] [104.838] [169.502] [100.105] [146.248] [135.995] [ 83.132] [129.117] [ 91.13 ] [144.848] [ 88.515] [ 83.213] [103.293] [ 84.937] [137.164] [ 75.195] [124.061] [ 97.083] [143.024] [155.075] [125.999] [128.005] [131.997] [128.008] [168.247] [122.988] [126.995] [ 78.781] [ 90.007] [ 79.983] [ 78.412] [ 82.149] [104.957] [ 95.068] [ 64.639] [115.036] [116.966] [193.437] [135.771] [146.092] [121.949] [ 84.769] [ 97.515] [ 79.454] [ 86.961] [116.966] [ 90.007] [ 79.983] [ 78.412] [ 82.149] [104.957] [ 95.068] [109.859] [115.036] [193.437] [135.771] [146.092] [121.949] [ 84.769] [ 97.515] [ 79.454] [ 86.961] [ 90.007] [ 79.983] [ 78.412] [ 82.149] [104.957] [ 64.639] [116.966] [193.437] [135.771] [146.092] [121.949] [ 84.769] [136.882] [ 90.007] [ 79.983] [ 78.412] [ 82.149] [104.957] [ 64.639] [193.437] [135.771] [146.092] [121.949] [ 84.769] [ 90.007] [ 79.983] [ 78.412] [ 82.149] [104.957] [ 64.639] [117.035] [193.437] [135.771] [146.092] [121.949] [ 84.769] [ 90.007] [ 79.983] [ 78.412] [ 82.149] [104.957] [ 95.068] [109.859] [115.036] [193.351] [135.743] [146.1 ] [121.985] [155.9 ] [129.963] [ 89.598] [ 86.927] [125.01 ] [ 78.05 ] [ 99.289] [ 98.84 ] [106.376] [ 94.828] [ 91.973] [ 99.993] [166.538] [ 92.253] [152.751] [140.767] [112.791] [ 85.372] [ 93.439] [ 84.715] [ 92.841] [166.201] [106.114] [ 96.462] [124.742] [127.331] [ 86.941] [ 91.977] [175.868] [107.045] [157.992] [124.821] [ 84.547] [151.947] [ 92.161] [ 96.54 ] [ 94.922] [ 98.96 ] [ 93.852] [105.253] [169.673] [197.431] [122.394] [167.875] [122.868] [175.861] [ 91.962] [ 91.977] [ 92.015] [176.042] [ 91.973] [175.891] [107.051] [ 79.006] [124.811] [169.735] [151.922] [ 94.867] [ 96.543] [ 98.949] [ 93.785] [105.785] [169.839] [ 97.855] [121.689] [ 92. ] [166.032] [168.072] [ 92.015] [ 91.973] [175.891] [107.051] [ 79.006] [124.811] [169.735] [151.921] [ 96.542] [ 98.957] [ 93.804] [105.775] [170.137] [195.146] [122.956] [ 91.625] [167.875] [122.868] [175.861] [ 91.962] [ 91.977] [ 92.015] [176.042] [ 91.973] [175.891] [107.051] [ 79.006] [124.811] [169.735] [151.922] [ 94.867] [ 96.543] [ 98.949] [ 93.785] [105.785] [169.839] [ 97.855] [121.689] [ 92. ] [166.032] [168.072] [ 92.013] [ 98.992] [105.923] [ 96.511] [ 93.81 ] [169.51 ] [168.507] [157.919] [ 98.953] [107.034] [175.906] [102.131] [108.161] [122.658] [ 96.521] [105.923] [ 96.511] [ 93.81 ] [169.51 ] [168.507] [157.919] [ 98.953] [107.034] [175.906] [102.131] [108.161] [122.658] [ 96.521] [105.923] [ 96.511] [ 93.81 ] [169.51 ] [168.507] [157.919] [ 98.953] [107.034] [175.906] [102.131] [166.042] [105.906] [ 96.512] [ 93.848] [ 84.931] [169.19 ] [157.951] [ 99.023] [107.039] [175.855] [ 98.074] [166.042] [104.963] [115.81 ] [121.929] [ 96.559] [105.923] [ 96.511] [ 93.81 ] [169.51 ] [168.507] [157.919] [ 98.953] [107.034] [175.906] [102.131] [166.042] [ 99.165] [200.053] [ 91.025] [166.572] [ 74.928] [ 83.61 ] [112.694] [ 76.758] [112.708] [103.451] [116.109] [ 99.982] [152.48 ] [137.24 ] [123.46 ] [ 99.259] [ 99.973] [ 91.039] [166.602] [ 74.934] [ 83.615] [112.439] [ 74.283] [112.726] [ 84.2 ] [116.153] [ 99.947] [133.173] [ 93.615] [173.954] [ 89.889] [ 99.259] [ 99.973] [ 91.039] [166.602] [ 74.934] [ 83.615] [112.439] [ 74.283] [112.726] [ 84.2 ] [116.153] [ 99.947] [133.173] [ 93.615] [173.954] [ 89.861]]