Upload 19 files
#1
by
keesephillips
- opened
- .gitattributes +2 -0
- README.md +50 -0
- assets/music_notes.png +0 -0
- assets/trumpet.png +0 -0
- data/processed/artist_album.csv +3 -0
- data/processed/playlists.csv +3 -0
- data/raw/data/playlists_100.parquet +3 -0
- data/raw/data/playlists_150.parquet +3 -0
- data/raw/data/playlists_200.parquet +3 -0
- data/raw/data/playlists_50.parquet +3 -0
- main.py +152 -0
- model.ipynb +1006 -0
- models/recommender.pt +3 -0
- notebooks/dbscan.ipynb +748 -0
- notebooks/nn_collab_filter.ipynb +748 -0
- requirements.txt +0 -0
- scripts/build_features.py +102 -0
- scripts/make_dataset.py +82 -0
- scripts/model.py +156 -0
- setup.py +14 -0
.gitattributes
CHANGED
|
@@ -35,3 +35,5 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
|
|
| 35 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
| 36 |
recommendation_module_project/data/processed/artist_album.csv filter=lfs diff=lfs merge=lfs -text
|
| 37 |
recommendation_module_project/data/processed/playlists.csv filter=lfs diff=lfs merge=lfs -text
|
|
|
|
|
|
|
|
|
| 35 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
| 36 |
recommendation_module_project/data/processed/artist_album.csv filter=lfs diff=lfs merge=lfs -text
|
| 37 |
recommendation_module_project/data/processed/playlists.csv filter=lfs diff=lfs merge=lfs -text
|
| 38 |
+
data/processed/artist_album.csv filter=lfs diff=lfs merge=lfs -text
|
| 39 |
+
data/processed/playlists.csv filter=lfs diff=lfs merge=lfs -text
|
README.md
ADDED
|
@@ -0,0 +1,50 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# AIPI Recommendation Module Project
|
| 2 |
+
## Developer: Keese Phillips
|
| 3 |
+
|
| 4 |
+
## About:
|
| 5 |
+
The purpose of this project is to create recommendations for different albums based on the user's playlists. This will allow the user to discover new music and possible additions to the playlist. The model is trained on a dataset from Spotify which is a combination of one million user playlists of all genders and ages. This was part of an initiative from Spotify for the community to find the best recommendation model. To download the dataset please visit [Spotify Challenge](https://www.aicrowd.com/challenges/spotify-million-playlist-dataset-challenge) and sign up for the challenge.
|
| 6 |
+
|
| 7 |
+
## How to run the project
|
| 8 |
+
|
| 9 |
+
### If you want to run the full pipeline and train the model from scratch
|
| 10 |
+
1. You will need to visit the [challenge site](https://www.aicrowd.com/challenges/spotify-million-playlist-dataset-challenge) sign up to be able to download the dataset
|
| 11 |
+
2. You will need to install all of the necessary packages to run the setup.py script beforehand
|
| 12 |
+
3. You will then need to run setup.py to create the data pipeline and train the model
|
| 13 |
+
4. You will then need to run the frontend to use the model
|
| 14 |
+
```bash
|
| 15 |
+
pip install -r requirements.txt
|
| 16 |
+
python setup.py
|
| 17 |
+
streamlit run main.py
|
| 18 |
+
```
|
| 19 |
+
|
| 20 |
+
### If you want to just run the frontend
|
| 21 |
+
1. You will need to install all of the necessary packages to run the setup.py script beforehand
|
| 22 |
+
2. You will then need to run the frontend to use the model
|
| 23 |
+
```bash
|
| 24 |
+
pip install -r requirements.txt
|
| 25 |
+
streamlit run main.py
|
| 26 |
+
```
|
| 27 |
+
|
| 28 |
+
## Project Structure
|
| 29 |
+
> - requirements.txt: list of python libraries to download before running project
|
| 30 |
+
> - setup.py: script to set up project (get data, train model)
|
| 31 |
+
> - main.py: main script/notebook to run streamlit user interface
|
| 32 |
+
> - assets: directory for images used in frontend
|
| 33 |
+
> - scripts: directory for pipeline scripts or utility scripts
|
| 34 |
+
> - make_dataset.py: script to get data
|
| 35 |
+
> - model.py: script to train model and predict
|
| 36 |
+
> - models: directory for trained models
|
| 37 |
+
> - recommendation.pt: pytorch trained model for album recommendations
|
| 38 |
+
> - data: directory for project data
|
| 39 |
+
> - raw: directory for raw data from spotify's challenge
|
| 40 |
+
> - processed: directory to store the processed dataframe to use on the frontend
|
| 41 |
+
> - notebooks: directory to store any exploration notebooks used
|
| 42 |
+
> - .gitignore: git ignore file
|
| 43 |
+
|
| 44 |
+
## [Data source](https://www.aicrowd.com/challenges/spotify-million-playlist-dataset-challenge)
|
| 45 |
+
The data used to train the model was provided by Spotify. As per their dataset description:
|
| 46 |
+
> The dataset contains 1,000,000 playlists, including playlist titles and track titles, created by users on the Spotify platform between January 2010 and October 2017.
|
| 47 |
+
|
| 48 |
+
## Contributions
|
| 49 |
+
Brinnae Bent
|
| 50 |
+
Jon Reifschneider
|
assets/music_notes.png
ADDED
|
assets/trumpet.png
ADDED
|
data/processed/artist_album.csv
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:332fcb8cb088acbc5390f00e451b33575dadc63b467e4859dd9e532ef5819f73
|
| 3 |
+
size 106221612
|
data/processed/playlists.csv
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:074643d7a3b7162bee273197767685f42804de07f868492a09e60a00c326f79d
|
| 3 |
+
size 1450033316
|
data/raw/data/playlists_100.parquet
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:311ed6d496e6241aed333d23b97ab50aa1c98967096aa6a9ef4b1d6c1ab79b06
|
| 3 |
+
size 95129747
|
data/raw/data/playlists_150.parquet
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:c808d440e81596958db4e952a0dfe257ec3906368bce84dbc9766a4b9e8e8001
|
| 3 |
+
size 94931327
|
data/raw/data/playlists_200.parquet
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:e1681633bb4e1d631ab0ac59dfd1407c6b8d7d72d1e274a7586eaaf3543adc35
|
| 3 |
+
size 95181294
|
data/raw/data/playlists_50.parquet
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:de488754f4b9b2b3ce7a0f4dba660e3108b3e2c3ebc4ad9dbdd0dd9dad1a5fe1
|
| 3 |
+
size 95005296
|
main.py
ADDED
|
@@ -0,0 +1,152 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
Attribution: https://github.com/AIPI540/AIPI540-Deep-Learning-Applications/
|
| 3 |
+
|
| 4 |
+
Jon Reifschneider
|
| 5 |
+
Brinnae Bent
|
| 6 |
+
|
| 7 |
+
"""
|
| 8 |
+
|
| 9 |
+
import streamlit as st
|
| 10 |
+
from PIL import Image
|
| 11 |
+
import numpy as np
|
| 12 |
+
import os
|
| 13 |
+
import numpy as np
|
| 14 |
+
import pandas as pd
|
| 15 |
+
import pandas as pd
|
| 16 |
+
import json
|
| 17 |
+
import matplotlib.pyplot as plt
|
| 18 |
+
|
| 19 |
+
import os
|
| 20 |
+
import urllib.request
|
| 21 |
+
import zipfile
|
| 22 |
+
import json
|
| 23 |
+
import pandas as pd
|
| 24 |
+
import time
|
| 25 |
+
import torch
|
| 26 |
+
import numpy as np
|
| 27 |
+
import pandas as pd
|
| 28 |
+
import torch.nn as nn
|
| 29 |
+
import torch.nn.functional as F
|
| 30 |
+
import torch.optim as optim
|
| 31 |
+
from torch.utils.data import DataLoader, TensorDataset
|
| 32 |
+
from sklearn.model_selection import train_test_split
|
| 33 |
+
import matplotlib.pyplot as plt
|
| 34 |
+
from sklearn.preprocessing import LabelEncoder
|
| 35 |
+
|
| 36 |
+
class NNColabFiltering(nn.Module):
|
| 37 |
+
|
| 38 |
+
def __init__(self, n_playlists, n_artists, embedding_dim_users, embedding_dim_items, n_activations, rating_range):
|
| 39 |
+
super().__init__()
|
| 40 |
+
self.user_embeddings = nn.Embedding(num_embeddings=n_playlists,embedding_dim=embedding_dim_users)
|
| 41 |
+
self.item_embeddings = nn.Embedding(num_embeddings=n_artists,embedding_dim=embedding_dim_items)
|
| 42 |
+
self.fc1 = nn.Linear(embedding_dim_users+embedding_dim_items,n_activations)
|
| 43 |
+
self.fc2 = nn.Linear(n_activations,1)
|
| 44 |
+
self.rating_range = rating_range
|
| 45 |
+
|
| 46 |
+
def forward(self, X):
|
| 47 |
+
# Get embeddings for minibatch
|
| 48 |
+
embedded_users = self.user_embeddings(X[:,0])
|
| 49 |
+
embedded_items = self.item_embeddings(X[:,1])
|
| 50 |
+
# Concatenate user and item embeddings
|
| 51 |
+
embeddings = torch.cat([embedded_users,embedded_items],dim=1)
|
| 52 |
+
# Pass embeddings through network
|
| 53 |
+
preds = self.fc1(embeddings)
|
| 54 |
+
preds = F.relu(preds)
|
| 55 |
+
preds = self.fc2(preds)
|
| 56 |
+
# Scale predicted ratings to target-range [low,high]
|
| 57 |
+
preds = torch.sigmoid(preds) * (self.rating_range[1]-self.rating_range[0]) + self.rating_range[0]
|
| 58 |
+
return preds
|
| 59 |
+
|
| 60 |
+
def generate_recommendations(artist_album, playlists, model, playlist_id, device, top_n=10, batch_size=1024):
|
| 61 |
+
model.eval()
|
| 62 |
+
|
| 63 |
+
|
| 64 |
+
all_movie_ids = torch.tensor(artist_album['artist_album_id'].values, dtype=torch.long, device=device)
|
| 65 |
+
user_ids = torch.full((len(all_movie_ids),), playlist_id, dtype=torch.long, device=device)
|
| 66 |
+
|
| 67 |
+
# Initialize tensor to store all predictions
|
| 68 |
+
all_predictions = torch.zeros(len(all_movie_ids), device=device)
|
| 69 |
+
|
| 70 |
+
# Generate predictions in batches
|
| 71 |
+
with torch.no_grad():
|
| 72 |
+
for i in range(0, len(all_movie_ids), batch_size):
|
| 73 |
+
batch_user_ids = user_ids[i:i+batch_size]
|
| 74 |
+
batch_movie_ids = all_movie_ids[i:i+batch_size]
|
| 75 |
+
|
| 76 |
+
input_tensor = torch.stack([batch_user_ids, batch_movie_ids], dim=1)
|
| 77 |
+
batch_predictions = model(input_tensor).squeeze()
|
| 78 |
+
all_predictions[i:i+batch_size] = batch_predictions
|
| 79 |
+
|
| 80 |
+
# Convert to numpy for easier handling
|
| 81 |
+
predictions = all_predictions.cpu().numpy()
|
| 82 |
+
|
| 83 |
+
albums_listened = set(playlists.loc[playlists['playlist_id'] == playlist_id, 'artist_album_id'].tolist())
|
| 84 |
+
|
| 85 |
+
unlistened_mask = np.isin(artist_album['artist_album_id'].values, list(albums_listened), invert=True)
|
| 86 |
+
|
| 87 |
+
# Get top N recommendations
|
| 88 |
+
top_indices = np.argsort(predictions[unlistened_mask])[-top_n:][::-1]
|
| 89 |
+
recs = artist_album['artist_album_id'].values[unlistened_mask][top_indices]
|
| 90 |
+
|
| 91 |
+
recs_names = artist_album.loc[artist_album['artist_album_id'].isin(recs)]
|
| 92 |
+
album, artist = recs_names['album_name'].values, recs_names['artist_name'].values
|
| 93 |
+
|
| 94 |
+
return album.tolist(), artist.tolist()
|
| 95 |
+
|
| 96 |
+
|
| 97 |
+
def load_data():
|
| 98 |
+
'''
|
| 99 |
+
Loads the prefetched data from the output dir
|
| 100 |
+
|
| 101 |
+
Inputs:
|
| 102 |
+
|
| 103 |
+
Returns:
|
| 104 |
+
artist_album: pandas DataFrame with the best sentiment score
|
| 105 |
+
playlists: pandas DataFrame with the worst sentiment score
|
| 106 |
+
'''
|
| 107 |
+
artist_album = pd.read_csv(os.path.join(os.getcwd() + '/data/processed','artist_album.csv'))
|
| 108 |
+
artist_album = artist_album[['artist_album_id','artist_album','artist_name','album_name']].drop_duplicates()
|
| 109 |
+
playlists = pd.read_csv(os.path.join(os.getcwd() + '/data/processed','playlists.csv'))
|
| 110 |
+
|
| 111 |
+
return artist_album, playlists
|
| 112 |
+
|
| 113 |
+
artist_album, playlists = load_data()
|
| 114 |
+
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
|
| 115 |
+
model = torch.load('models/recommender.pt', map_location=device)
|
| 116 |
+
|
| 117 |
+
if __name__ == '__main__':
|
| 118 |
+
|
| 119 |
+
st.header('Spotify Playlists')
|
| 120 |
+
|
| 121 |
+
img1, img2 = st.columns(2)
|
| 122 |
+
|
| 123 |
+
music_notes = Image.open('assets/music_notes.png')
|
| 124 |
+
img1.image(music_notes, use_column_width=True)
|
| 125 |
+
|
| 126 |
+
trumpet = Image.open('assets/trumpet.png')
|
| 127 |
+
img2.image(trumpet, use_column_width=True)
|
| 128 |
+
|
| 129 |
+
# Using "with" notation
|
| 130 |
+
with st.sidebar:
|
| 131 |
+
playlist_name = st.selectbox(
|
| 132 |
+
"Playlist Selection",
|
| 133 |
+
( list(set(playlists['name'].dropna())) )
|
| 134 |
+
)
|
| 135 |
+
playlist_id = playlists['playlist_id'][playlists['name'] == playlist_name].values[0]
|
| 136 |
+
albums, artists = generate_recommendations(artist_album, playlists, model, playlist_id, device)
|
| 137 |
+
|
| 138 |
+
st.dataframe(data=playlists[['artist_name','album_name','track_name']][playlists['playlist_id'] == playlist_id])
|
| 139 |
+
|
| 140 |
+
st.write(f"*Recommendations for playlist:* {playlists['name'][playlists['playlist_id'] == playlist_id].values[0]}")
|
| 141 |
+
col1, col2 = st.columns(2)
|
| 142 |
+
with col1:
|
| 143 |
+
st.write(f'Artist')
|
| 144 |
+
with col2:
|
| 145 |
+
st.write(f'Album')
|
| 146 |
+
|
| 147 |
+
for album, artist in zip(albums, artists):
|
| 148 |
+
with col1:
|
| 149 |
+
st.write(f"**{artist}**")
|
| 150 |
+
with col2:
|
| 151 |
+
st.write(f"**{album}**")
|
| 152 |
+
|
model.ipynb
ADDED
|
@@ -0,0 +1,1006 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"cells": [
|
| 3 |
+
{
|
| 4 |
+
"cell_type": "code",
|
| 5 |
+
"execution_count": 1,
|
| 6 |
+
"metadata": {
|
| 7 |
+
"id": "uq9k8YYUKjnp"
|
| 8 |
+
},
|
| 9 |
+
"outputs": [],
|
| 10 |
+
"source": [
|
| 11 |
+
"import os\n",
|
| 12 |
+
"import urllib.request\n",
|
| 13 |
+
"import zipfile\n",
|
| 14 |
+
"import json\n",
|
| 15 |
+
"import pandas as pd\n",
|
| 16 |
+
"import time\n",
|
| 17 |
+
"import torch\n",
|
| 18 |
+
"import numpy as np\n",
|
| 19 |
+
"import pandas as pd\n",
|
| 20 |
+
"import torch.nn as nn\n",
|
| 21 |
+
"import torch.nn.functional as F\n",
|
| 22 |
+
"import torch.optim as optim\n",
|
| 23 |
+
"from torch.utils.data import DataLoader, TensorDataset\n",
|
| 24 |
+
"from sklearn.model_selection import train_test_split\n",
|
| 25 |
+
"import matplotlib.pyplot as plt"
|
| 26 |
+
]
|
| 27 |
+
},
|
| 28 |
+
{
|
| 29 |
+
"cell_type": "code",
|
| 30 |
+
"execution_count": 2,
|
| 31 |
+
"metadata": {
|
| 32 |
+
"id": "L5h3Tsa0LIoo"
|
| 33 |
+
},
|
| 34 |
+
"outputs": [],
|
| 35 |
+
"source": [
|
| 36 |
+
"def unzip_archive(filepath, dir_path):\n",
|
| 37 |
+
" with zipfile.ZipFile(f\"{filepath}\", 'r') as zip_ref:\n",
|
| 38 |
+
" zip_ref.extractall(dir_path)\n",
|
| 39 |
+
"\n",
|
| 40 |
+
"unzip_archive(os.getcwd() + '/data/raw/spotify_million_playlist_dataset.zip', os.getcwd() + '/data/raw/playlists')\n"
|
| 41 |
+
]
|
| 42 |
+
},
|
| 43 |
+
{
|
| 44 |
+
"cell_type": "code",
|
| 45 |
+
"execution_count": 3,
|
| 46 |
+
"metadata": {},
|
| 47 |
+
"outputs": [],
|
| 48 |
+
"source": [
|
| 49 |
+
"import shutil\n",
|
| 50 |
+
"\n",
|
| 51 |
+
"def make_dir(directory):\n",
|
| 52 |
+
" if os.path.exists(directory):\n",
|
| 53 |
+
" shutil.rmtree(directory)\n",
|
| 54 |
+
" os.makedirs(directory)\n",
|
| 55 |
+
" else:\n",
|
| 56 |
+
" os.makedirs(directory)\n",
|
| 57 |
+
" \n",
|
| 58 |
+
"directory = os.getcwd() + '/data/raw/data'\n",
|
| 59 |
+
"make_dir(directory)"
|
| 60 |
+
]
|
| 61 |
+
},
|
| 62 |
+
{
|
| 63 |
+
"cell_type": "code",
|
| 64 |
+
"execution_count": 4,
|
| 65 |
+
"metadata": {},
|
| 66 |
+
"outputs": [],
|
| 67 |
+
"source": [
|
| 68 |
+
"cols = [\n",
|
| 69 |
+
" 'name',\n",
|
| 70 |
+
" 'pid',\n",
|
| 71 |
+
" 'num_followers',\n",
|
| 72 |
+
" 'pos',\n",
|
| 73 |
+
" 'artist_name',\n",
|
| 74 |
+
" 'track_name',\n",
|
| 75 |
+
" 'album_name'\n",
|
| 76 |
+
"]"
|
| 77 |
+
]
|
| 78 |
+
},
|
| 79 |
+
{
|
| 80 |
+
"cell_type": "code",
|
| 81 |
+
"execution_count": 5,
|
| 82 |
+
"metadata": {
|
| 83 |
+
"colab": {
|
| 84 |
+
"base_uri": "https://localhost:8080/"
|
| 85 |
+
},
|
| 86 |
+
"id": "qyCujIu8cDGg",
|
| 87 |
+
"outputId": "0964ace3-2916-49e3-eebf-2e08e61d95d9"
|
| 88 |
+
},
|
| 89 |
+
"outputs": [
|
| 90 |
+
{
|
| 91 |
+
"name": "stdout",
|
| 92 |
+
"output_type": "stream",
|
| 93 |
+
"text": [
|
| 94 |
+
"mpd.slice.188000-188999.json\t100/1000\t10.0%"
|
| 95 |
+
]
|
| 96 |
+
}
|
| 97 |
+
],
|
| 98 |
+
"source": [
|
| 99 |
+
"\n",
|
| 100 |
+
"directory = os.getcwd() + '/data/raw/playlists/data'\n",
|
| 101 |
+
"df = pd.DataFrame()\n",
|
| 102 |
+
"index = 0\n",
|
| 103 |
+
"# Loop through all files in the directory\n",
|
| 104 |
+
"for filename in os.listdir(directory):\n",
|
| 105 |
+
" # Check if the item is a file (not a subdirectory)\n",
|
| 106 |
+
" if os.path.isfile(os.path.join(directory, filename)):\n",
|
| 107 |
+
" if filename.find('.json') != -1 :\n",
|
| 108 |
+
" index += 1\n",
|
| 109 |
+
"\n",
|
| 110 |
+
" # Print the filename or perform operations on the file\n",
|
| 111 |
+
" print(f'\\r{filename}\\t{index}/1000\\t{((index/1000)*100):.1f}%', end='')\n",
|
| 112 |
+
"\n",
|
| 113 |
+
" # If you need the full file path, you can use:\n",
|
| 114 |
+
" full_path = os.path.join(directory, filename)\n",
|
| 115 |
+
"\n",
|
| 116 |
+
" with open(full_path, 'r') as file:\n",
|
| 117 |
+
" json_data = json.load(file)\n",
|
| 118 |
+
"\n",
|
| 119 |
+
" temp = pd.DataFrame(json_data['playlists'])\n",
|
| 120 |
+
" expanded_df = temp.explode('tracks').reset_index(drop=True)\n",
|
| 121 |
+
"\n",
|
| 122 |
+
" # Normalize the JSON data\n",
|
| 123 |
+
" json_normalized = pd.json_normalize(expanded_df['tracks'])\n",
|
| 124 |
+
"\n",
|
| 125 |
+
" # Concatenate the original DataFrame with the normalized JSON data\n",
|
| 126 |
+
" result = pd.concat([expanded_df.drop(columns=['tracks']), json_normalized], axis=1)\n",
|
| 127 |
+
" \n",
|
| 128 |
+
" result = result[cols]\n",
|
| 129 |
+
"\n",
|
| 130 |
+
" df = pd.concat([df, result], axis=0, ignore_index=True)\n",
|
| 131 |
+
" \n",
|
| 132 |
+
" if index % 50 == 0:\n",
|
| 133 |
+
" df.to_parquet(f'{os.getcwd()}/data/raw/data/playlists_{index % 1000}.parquet')\n",
|
| 134 |
+
" del df\n",
|
| 135 |
+
" df = pd.DataFrame()\n",
|
| 136 |
+
" if index % 100 == 0:\n",
|
| 137 |
+
" break"
|
| 138 |
+
]
|
| 139 |
+
},
|
| 140 |
+
{
|
| 141 |
+
"cell_type": "code",
|
| 142 |
+
"execution_count": 6,
|
| 143 |
+
"metadata": {},
|
| 144 |
+
"outputs": [],
|
| 145 |
+
"source": [
|
| 146 |
+
"import pyarrow.parquet as pq\n",
|
| 147 |
+
"\n",
|
| 148 |
+
"def read_parquet_folder(folder_path):\n",
|
| 149 |
+
" dataframes = []\n",
|
| 150 |
+
" for file in os.listdir(folder_path):\n",
|
| 151 |
+
" if file.endswith('.parquet'):\n",
|
| 152 |
+
" file_path = os.path.join(folder_path, file)\n",
|
| 153 |
+
" df = pd.read_parquet(file_path)\n",
|
| 154 |
+
" dataframes.append(df)\n",
|
| 155 |
+
" \n",
|
| 156 |
+
" return pd.concat(dataframes, ignore_index=True)\n",
|
| 157 |
+
"\n",
|
| 158 |
+
"folder_path = os.getcwd() + '/data/raw/data'\n",
|
| 159 |
+
"df = read_parquet_folder(folder_path)"
|
| 160 |
+
]
|
| 161 |
+
},
|
| 162 |
+
{
|
| 163 |
+
"cell_type": "code",
|
| 164 |
+
"execution_count": 7,
|
| 165 |
+
"metadata": {},
|
| 166 |
+
"outputs": [],
|
| 167 |
+
"source": [
|
| 168 |
+
"directory = os.getcwd() + '/data/raw/mappings'\n",
|
| 169 |
+
"make_dir(directory)"
|
| 170 |
+
]
|
| 171 |
+
},
|
| 172 |
+
{
|
| 173 |
+
"cell_type": "code",
|
| 174 |
+
"execution_count": 8,
|
| 175 |
+
"metadata": {},
|
| 176 |
+
"outputs": [],
|
| 177 |
+
"source": [
|
| 178 |
+
"def create_ids(df, col, name):\n",
|
| 179 |
+
" # Create a dictionary mapping unique values to IDs\n",
|
| 180 |
+
" value_to_id = {val: i for i, val in enumerate(df[col].unique())}\n",
|
| 181 |
+
"\n",
|
| 182 |
+
" # Create a new column with the IDs\n",
|
| 183 |
+
" df[f'{name}_id'] = df[col].map(value_to_id)\n",
|
| 184 |
+
" df[[f'{name}_id', col]].drop_duplicates().to_csv(os.getcwd() + f'/data/raw/mappings/{name}.csv')\n",
|
| 185 |
+
" # df = df.drop(col, axis=1)\n",
|
| 186 |
+
" return df"
|
| 187 |
+
]
|
| 188 |
+
},
|
| 189 |
+
{
|
| 190 |
+
"cell_type": "code",
|
| 191 |
+
"execution_count": 9,
|
| 192 |
+
"metadata": {},
|
| 193 |
+
"outputs": [],
|
| 194 |
+
"source": [
|
| 195 |
+
"df = create_ids(df, 'artist_name', 'artist')\n",
|
| 196 |
+
"df = create_ids(df, 'pid', 'playlist')\n",
|
| 197 |
+
"df = create_ids(df, 'track_name', 'song')\n",
|
| 198 |
+
"df = create_ids(df, 'album_name', 'album')"
|
| 199 |
+
]
|
| 200 |
+
},
|
| 201 |
+
{
|
| 202 |
+
"cell_type": "code",
|
| 203 |
+
"execution_count": 10,
|
| 204 |
+
"metadata": {},
|
| 205 |
+
"outputs": [],
|
| 206 |
+
"source": [
|
| 207 |
+
"df['artist_count'] = df.groupby(['playlist_id','artist_id'])['song_id'].transform('nunique')\n",
|
| 208 |
+
"df['album_count'] = df.groupby(['playlist_id','artist_id'])['album_id'].transform('nunique')\n",
|
| 209 |
+
"df['song_count'] = df.groupby(['playlist_id','artist_id'])['song_id'].transform('count')"
|
| 210 |
+
]
|
| 211 |
+
},
|
| 212 |
+
{
|
| 213 |
+
"cell_type": "code",
|
| 214 |
+
"execution_count": 11,
|
| 215 |
+
"metadata": {},
|
| 216 |
+
"outputs": [],
|
| 217 |
+
"source": [
|
| 218 |
+
"df['playlist_songs'] = df.groupby(['playlist_id'])['pos'].transform('max')\n",
|
| 219 |
+
"df['playlist_songs'] += 1"
|
| 220 |
+
]
|
| 221 |
+
},
|
| 222 |
+
{
|
| 223 |
+
"cell_type": "code",
|
| 224 |
+
"execution_count": 12,
|
| 225 |
+
"metadata": {},
|
| 226 |
+
"outputs": [],
|
| 227 |
+
"source": [
|
| 228 |
+
"df['artist_percent'] = df['artist_count'] / df['playlist_songs']\n",
|
| 229 |
+
"df['song_percent'] = df['song_count'] / df['playlist_songs']\n",
|
| 230 |
+
"df['album_percent'] = df['album_count'] / df['playlist_songs']"
|
| 231 |
+
]
|
| 232 |
+
},
|
| 233 |
+
{
|
| 234 |
+
"cell_type": "code",
|
| 235 |
+
"execution_count": 13,
|
| 236 |
+
"metadata": {},
|
| 237 |
+
"outputs": [
|
| 238 |
+
{
|
| 239 |
+
"data": {
|
| 240 |
+
"text/html": [
|
| 241 |
+
"<div>\n",
|
| 242 |
+
"<style scoped>\n",
|
| 243 |
+
" .dataframe tbody tr th:only-of-type {\n",
|
| 244 |
+
" vertical-align: middle;\n",
|
| 245 |
+
" }\n",
|
| 246 |
+
"\n",
|
| 247 |
+
" .dataframe tbody tr th {\n",
|
| 248 |
+
" vertical-align: top;\n",
|
| 249 |
+
" }\n",
|
| 250 |
+
"\n",
|
| 251 |
+
" .dataframe thead th {\n",
|
| 252 |
+
" text-align: right;\n",
|
| 253 |
+
" }\n",
|
| 254 |
+
"</style>\n",
|
| 255 |
+
"<table border=\"1\" class=\"dataframe\">\n",
|
| 256 |
+
" <thead>\n",
|
| 257 |
+
" <tr style=\"text-align: right;\">\n",
|
| 258 |
+
" <th></th>\n",
|
| 259 |
+
" <th>name</th>\n",
|
| 260 |
+
" <th>pid</th>\n",
|
| 261 |
+
" <th>num_followers</th>\n",
|
| 262 |
+
" <th>pos</th>\n",
|
| 263 |
+
" <th>artist_name</th>\n",
|
| 264 |
+
" <th>track_name</th>\n",
|
| 265 |
+
" <th>album_name</th>\n",
|
| 266 |
+
" <th>artist_id</th>\n",
|
| 267 |
+
" <th>playlist_id</th>\n",
|
| 268 |
+
" <th>song_id</th>\n",
|
| 269 |
+
" <th>album_id</th>\n",
|
| 270 |
+
" <th>artist_count</th>\n",
|
| 271 |
+
" <th>album_count</th>\n",
|
| 272 |
+
" <th>song_count</th>\n",
|
| 273 |
+
" <th>playlist_songs</th>\n",
|
| 274 |
+
" <th>artist_percent</th>\n",
|
| 275 |
+
" <th>song_percent</th>\n",
|
| 276 |
+
" <th>album_percent</th>\n",
|
| 277 |
+
" </tr>\n",
|
| 278 |
+
" </thead>\n",
|
| 279 |
+
" <tbody>\n",
|
| 280 |
+
" <tr>\n",
|
| 281 |
+
" <th>212</th>\n",
|
| 282 |
+
" <td>throwbacks</td>\n",
|
| 283 |
+
" <td>143005</td>\n",
|
| 284 |
+
" <td>2</td>\n",
|
| 285 |
+
" <td>0</td>\n",
|
| 286 |
+
" <td>R. Kelly</td>\n",
|
| 287 |
+
" <td>Ignition - Remix</td>\n",
|
| 288 |
+
" <td>Chocolate Factory</td>\n",
|
| 289 |
+
" <td>108</td>\n",
|
| 290 |
+
" <td>5</td>\n",
|
| 291 |
+
" <td>203</td>\n",
|
| 292 |
+
" <td>152</td>\n",
|
| 293 |
+
" <td>1</td>\n",
|
| 294 |
+
" <td>1</td>\n",
|
| 295 |
+
" <td>1</td>\n",
|
| 296 |
+
" <td>193</td>\n",
|
| 297 |
+
" <td>0.005181</td>\n",
|
| 298 |
+
" <td>0.005181</td>\n",
|
| 299 |
+
" <td>0.005181</td>\n",
|
| 300 |
+
" </tr>\n",
|
| 301 |
+
" <tr>\n",
|
| 302 |
+
" <th>213</th>\n",
|
| 303 |
+
" <td>throwbacks</td>\n",
|
| 304 |
+
" <td>143005</td>\n",
|
| 305 |
+
" <td>2</td>\n",
|
| 306 |
+
" <td>1</td>\n",
|
| 307 |
+
" <td>Backstreet Boys</td>\n",
|
| 308 |
+
" <td>I Want It That Way</td>\n",
|
| 309 |
+
" <td>Original Album Classics</td>\n",
|
| 310 |
+
" <td>109</td>\n",
|
| 311 |
+
" <td>5</td>\n",
|
| 312 |
+
" <td>204</td>\n",
|
| 313 |
+
" <td>153</td>\n",
|
| 314 |
+
" <td>1</td>\n",
|
| 315 |
+
" <td>1</td>\n",
|
| 316 |
+
" <td>1</td>\n",
|
| 317 |
+
" <td>193</td>\n",
|
| 318 |
+
" <td>0.005181</td>\n",
|
| 319 |
+
" <td>0.005181</td>\n",
|
| 320 |
+
" <td>0.005181</td>\n",
|
| 321 |
+
" </tr>\n",
|
| 322 |
+
" <tr>\n",
|
| 323 |
+
" <th>214</th>\n",
|
| 324 |
+
" <td>throwbacks</td>\n",
|
| 325 |
+
" <td>143005</td>\n",
|
| 326 |
+
" <td>2</td>\n",
|
| 327 |
+
" <td>2</td>\n",
|
| 328 |
+
" <td>*NSYNC</td>\n",
|
| 329 |
+
" <td>Bye Bye Bye</td>\n",
|
| 330 |
+
" <td>No Strings Attached</td>\n",
|
| 331 |
+
" <td>110</td>\n",
|
| 332 |
+
" <td>5</td>\n",
|
| 333 |
+
" <td>205</td>\n",
|
| 334 |
+
" <td>154</td>\n",
|
| 335 |
+
" <td>1</td>\n",
|
| 336 |
+
" <td>1</td>\n",
|
| 337 |
+
" <td>1</td>\n",
|
| 338 |
+
" <td>193</td>\n",
|
| 339 |
+
" <td>0.005181</td>\n",
|
| 340 |
+
" <td>0.005181</td>\n",
|
| 341 |
+
" <td>0.005181</td>\n",
|
| 342 |
+
" </tr>\n",
|
| 343 |
+
" <tr>\n",
|
| 344 |
+
" <th>215</th>\n",
|
| 345 |
+
" <td>throwbacks</td>\n",
|
| 346 |
+
" <td>143005</td>\n",
|
| 347 |
+
" <td>2</td>\n",
|
| 348 |
+
" <td>3</td>\n",
|
| 349 |
+
" <td>Fountains Of Wayne</td>\n",
|
| 350 |
+
" <td>Stacy's Mom</td>\n",
|
| 351 |
+
" <td>Welcome Interstate Managers</td>\n",
|
| 352 |
+
" <td>111</td>\n",
|
| 353 |
+
" <td>5</td>\n",
|
| 354 |
+
" <td>206</td>\n",
|
| 355 |
+
" <td>155</td>\n",
|
| 356 |
+
" <td>1</td>\n",
|
| 357 |
+
" <td>1</td>\n",
|
| 358 |
+
" <td>1</td>\n",
|
| 359 |
+
" <td>193</td>\n",
|
| 360 |
+
" <td>0.005181</td>\n",
|
| 361 |
+
" <td>0.005181</td>\n",
|
| 362 |
+
" <td>0.005181</td>\n",
|
| 363 |
+
" </tr>\n",
|
| 364 |
+
" <tr>\n",
|
| 365 |
+
" <th>216</th>\n",
|
| 366 |
+
" <td>throwbacks</td>\n",
|
| 367 |
+
" <td>143005</td>\n",
|
| 368 |
+
" <td>2</td>\n",
|
| 369 |
+
" <td>4</td>\n",
|
| 370 |
+
" <td>Bowling For Soup</td>\n",
|
| 371 |
+
" <td>1985</td>\n",
|
| 372 |
+
" <td>A Hangover You Don't Deserve</td>\n",
|
| 373 |
+
" <td>112</td>\n",
|
| 374 |
+
" <td>5</td>\n",
|
| 375 |
+
" <td>207</td>\n",
|
| 376 |
+
" <td>156</td>\n",
|
| 377 |
+
" <td>1</td>\n",
|
| 378 |
+
" <td>1</td>\n",
|
| 379 |
+
" <td>1</td>\n",
|
| 380 |
+
" <td>193</td>\n",
|
| 381 |
+
" <td>0.005181</td>\n",
|
| 382 |
+
" <td>0.005181</td>\n",
|
| 383 |
+
" <td>0.005181</td>\n",
|
| 384 |
+
" </tr>\n",
|
| 385 |
+
" <tr>\n",
|
| 386 |
+
" <th>...</th>\n",
|
| 387 |
+
" <td>...</td>\n",
|
| 388 |
+
" <td>...</td>\n",
|
| 389 |
+
" <td>...</td>\n",
|
| 390 |
+
" <td>...</td>\n",
|
| 391 |
+
" <td>...</td>\n",
|
| 392 |
+
" <td>...</td>\n",
|
| 393 |
+
" <td>...</td>\n",
|
| 394 |
+
" <td>...</td>\n",
|
| 395 |
+
" <td>...</td>\n",
|
| 396 |
+
" <td>...</td>\n",
|
| 397 |
+
" <td>...</td>\n",
|
| 398 |
+
" <td>...</td>\n",
|
| 399 |
+
" <td>...</td>\n",
|
| 400 |
+
" <td>...</td>\n",
|
| 401 |
+
" <td>...</td>\n",
|
| 402 |
+
" <td>...</td>\n",
|
| 403 |
+
" <td>...</td>\n",
|
| 404 |
+
" <td>...</td>\n",
|
| 405 |
+
" </tr>\n",
|
| 406 |
+
" <tr>\n",
|
| 407 |
+
" <th>400</th>\n",
|
| 408 |
+
" <td>throwbacks</td>\n",
|
| 409 |
+
" <td>143005</td>\n",
|
| 410 |
+
" <td>2</td>\n",
|
| 411 |
+
" <td>188</td>\n",
|
| 412 |
+
" <td>JoJo</td>\n",
|
| 413 |
+
" <td>Too Little, Too Late - Radio Version</td>\n",
|
| 414 |
+
" <td>Too Little, Too Late</td>\n",
|
| 415 |
+
" <td>199</td>\n",
|
| 416 |
+
" <td>5</td>\n",
|
| 417 |
+
" <td>390</td>\n",
|
| 418 |
+
" <td>293</td>\n",
|
| 419 |
+
" <td>1</td>\n",
|
| 420 |
+
" <td>1</td>\n",
|
| 421 |
+
" <td>1</td>\n",
|
| 422 |
+
" <td>193</td>\n",
|
| 423 |
+
" <td>0.005181</td>\n",
|
| 424 |
+
" <td>0.005181</td>\n",
|
| 425 |
+
" <td>0.005181</td>\n",
|
| 426 |
+
" </tr>\n",
|
| 427 |
+
" <tr>\n",
|
| 428 |
+
" <th>401</th>\n",
|
| 429 |
+
" <td>throwbacks</td>\n",
|
| 430 |
+
" <td>143005</td>\n",
|
| 431 |
+
" <td>2</td>\n",
|
| 432 |
+
" <td>189</td>\n",
|
| 433 |
+
" <td>Spice Girls</td>\n",
|
| 434 |
+
" <td>Wannabe - Radio Edit</td>\n",
|
| 435 |
+
" <td>Spice</td>\n",
|
| 436 |
+
" <td>200</td>\n",
|
| 437 |
+
" <td>5</td>\n",
|
| 438 |
+
" <td>391</td>\n",
|
| 439 |
+
" <td>294</td>\n",
|
| 440 |
+
" <td>1</td>\n",
|
| 441 |
+
" <td>1</td>\n",
|
| 442 |
+
" <td>1</td>\n",
|
| 443 |
+
" <td>193</td>\n",
|
| 444 |
+
" <td>0.005181</td>\n",
|
| 445 |
+
" <td>0.005181</td>\n",
|
| 446 |
+
" <td>0.005181</td>\n",
|
| 447 |
+
" </tr>\n",
|
| 448 |
+
" <tr>\n",
|
| 449 |
+
" <th>402</th>\n",
|
| 450 |
+
" <td>throwbacks</td>\n",
|
| 451 |
+
" <td>143005</td>\n",
|
| 452 |
+
" <td>2</td>\n",
|
| 453 |
+
" <td>190</td>\n",
|
| 454 |
+
" <td>MiMS</td>\n",
|
| 455 |
+
" <td>This Is Why I'm Hot</td>\n",
|
| 456 |
+
" <td>Music Is My Savior</td>\n",
|
| 457 |
+
" <td>201</td>\n",
|
| 458 |
+
" <td>5</td>\n",
|
| 459 |
+
" <td>392</td>\n",
|
| 460 |
+
" <td>295</td>\n",
|
| 461 |
+
" <td>1</td>\n",
|
| 462 |
+
" <td>1</td>\n",
|
| 463 |
+
" <td>1</td>\n",
|
| 464 |
+
" <td>193</td>\n",
|
| 465 |
+
" <td>0.005181</td>\n",
|
| 466 |
+
" <td>0.005181</td>\n",
|
| 467 |
+
" <td>0.005181</td>\n",
|
| 468 |
+
" </tr>\n",
|
| 469 |
+
" <tr>\n",
|
| 470 |
+
" <th>403</th>\n",
|
| 471 |
+
" <td>throwbacks</td>\n",
|
| 472 |
+
" <td>143005</td>\n",
|
| 473 |
+
" <td>2</td>\n",
|
| 474 |
+
" <td>191</td>\n",
|
| 475 |
+
" <td>Rihanna</td>\n",
|
| 476 |
+
" <td>Disturbia</td>\n",
|
| 477 |
+
" <td>Good Girl Gone Bad</td>\n",
|
| 478 |
+
" <td>115</td>\n",
|
| 479 |
+
" <td>5</td>\n",
|
| 480 |
+
" <td>393</td>\n",
|
| 481 |
+
" <td>296</td>\n",
|
| 482 |
+
" <td>3</td>\n",
|
| 483 |
+
" <td>3</td>\n",
|
| 484 |
+
" <td>3</td>\n",
|
| 485 |
+
" <td>193</td>\n",
|
| 486 |
+
" <td>0.015544</td>\n",
|
| 487 |
+
" <td>0.015544</td>\n",
|
| 488 |
+
" <td>0.015544</td>\n",
|
| 489 |
+
" </tr>\n",
|
| 490 |
+
" <tr>\n",
|
| 491 |
+
" <th>404</th>\n",
|
| 492 |
+
" <td>throwbacks</td>\n",
|
| 493 |
+
" <td>143005</td>\n",
|
| 494 |
+
" <td>2</td>\n",
|
| 495 |
+
" <td>192</td>\n",
|
| 496 |
+
" <td>DEV</td>\n",
|
| 497 |
+
" <td>Bass Down Low</td>\n",
|
| 498 |
+
" <td>The Night The Sun Came Up</td>\n",
|
| 499 |
+
" <td>179</td>\n",
|
| 500 |
+
" <td>5</td>\n",
|
| 501 |
+
" <td>394</td>\n",
|
| 502 |
+
" <td>264</td>\n",
|
| 503 |
+
" <td>2</td>\n",
|
| 504 |
+
" <td>1</td>\n",
|
| 505 |
+
" <td>2</td>\n",
|
| 506 |
+
" <td>193</td>\n",
|
| 507 |
+
" <td>0.010363</td>\n",
|
| 508 |
+
" <td>0.010363</td>\n",
|
| 509 |
+
" <td>0.005181</td>\n",
|
| 510 |
+
" </tr>\n",
|
| 511 |
+
" </tbody>\n",
|
| 512 |
+
"</table>\n",
|
| 513 |
+
"<p>193 rows × 18 columns</p>\n",
|
| 514 |
+
"</div>"
|
| 515 |
+
],
|
| 516 |
+
"text/plain": [
|
| 517 |
+
" name pid num_followers pos artist_name \\\n",
|
| 518 |
+
"212 throwbacks 143005 2 0 R. Kelly \n",
|
| 519 |
+
"213 throwbacks 143005 2 1 Backstreet Boys \n",
|
| 520 |
+
"214 throwbacks 143005 2 2 *NSYNC \n",
|
| 521 |
+
"215 throwbacks 143005 2 3 Fountains Of Wayne \n",
|
| 522 |
+
"216 throwbacks 143005 2 4 Bowling For Soup \n",
|
| 523 |
+
".. ... ... ... ... ... \n",
|
| 524 |
+
"400 throwbacks 143005 2 188 JoJo \n",
|
| 525 |
+
"401 throwbacks 143005 2 189 Spice Girls \n",
|
| 526 |
+
"402 throwbacks 143005 2 190 MiMS \n",
|
| 527 |
+
"403 throwbacks 143005 2 191 Rihanna \n",
|
| 528 |
+
"404 throwbacks 143005 2 192 DEV \n",
|
| 529 |
+
"\n",
|
| 530 |
+
" track_name album_name \\\n",
|
| 531 |
+
"212 Ignition - Remix Chocolate Factory \n",
|
| 532 |
+
"213 I Want It That Way Original Album Classics \n",
|
| 533 |
+
"214 Bye Bye Bye No Strings Attached \n",
|
| 534 |
+
"215 Stacy's Mom Welcome Interstate Managers \n",
|
| 535 |
+
"216 1985 A Hangover You Don't Deserve \n",
|
| 536 |
+
".. ... ... \n",
|
| 537 |
+
"400 Too Little, Too Late - Radio Version Too Little, Too Late \n",
|
| 538 |
+
"401 Wannabe - Radio Edit Spice \n",
|
| 539 |
+
"402 This Is Why I'm Hot Music Is My Savior \n",
|
| 540 |
+
"403 Disturbia Good Girl Gone Bad \n",
|
| 541 |
+
"404 Bass Down Low The Night The Sun Came Up \n",
|
| 542 |
+
"\n",
|
| 543 |
+
" artist_id playlist_id song_id album_id artist_count album_count \\\n",
|
| 544 |
+
"212 108 5 203 152 1 1 \n",
|
| 545 |
+
"213 109 5 204 153 1 1 \n",
|
| 546 |
+
"214 110 5 205 154 1 1 \n",
|
| 547 |
+
"215 111 5 206 155 1 1 \n",
|
| 548 |
+
"216 112 5 207 156 1 1 \n",
|
| 549 |
+
".. ... ... ... ... ... ... \n",
|
| 550 |
+
"400 199 5 390 293 1 1 \n",
|
| 551 |
+
"401 200 5 391 294 1 1 \n",
|
| 552 |
+
"402 201 5 392 295 1 1 \n",
|
| 553 |
+
"403 115 5 393 296 3 3 \n",
|
| 554 |
+
"404 179 5 394 264 2 1 \n",
|
| 555 |
+
"\n",
|
| 556 |
+
" song_count playlist_songs artist_percent song_percent album_percent \n",
|
| 557 |
+
"212 1 193 0.005181 0.005181 0.005181 \n",
|
| 558 |
+
"213 1 193 0.005181 0.005181 0.005181 \n",
|
| 559 |
+
"214 1 193 0.005181 0.005181 0.005181 \n",
|
| 560 |
+
"215 1 193 0.005181 0.005181 0.005181 \n",
|
| 561 |
+
"216 1 193 0.005181 0.005181 0.005181 \n",
|
| 562 |
+
".. ... ... ... ... ... \n",
|
| 563 |
+
"400 1 193 0.005181 0.005181 0.005181 \n",
|
| 564 |
+
"401 1 193 0.005181 0.005181 0.005181 \n",
|
| 565 |
+
"402 1 193 0.005181 0.005181 0.005181 \n",
|
| 566 |
+
"403 3 193 0.015544 0.015544 0.015544 \n",
|
| 567 |
+
"404 2 193 0.010363 0.010363 0.005181 \n",
|
| 568 |
+
"\n",
|
| 569 |
+
"[193 rows x 18 columns]"
|
| 570 |
+
]
|
| 571 |
+
},
|
| 572 |
+
"execution_count": 13,
|
| 573 |
+
"metadata": {},
|
| 574 |
+
"output_type": "execute_result"
|
| 575 |
+
}
|
| 576 |
+
],
|
| 577 |
+
"source": [
|
| 578 |
+
"df[df['playlist_id'] == 5]"
|
| 579 |
+
]
|
| 580 |
+
},
|
| 581 |
+
{
|
| 582 |
+
"cell_type": "code",
|
| 583 |
+
"execution_count": 14,
|
| 584 |
+
"metadata": {},
|
| 585 |
+
"outputs": [
|
| 586 |
+
{
|
| 587 |
+
"data": {
|
| 588 |
+
"text/html": [
|
| 589 |
+
"<div>\n",
|
| 590 |
+
"<style scoped>\n",
|
| 591 |
+
" .dataframe tbody tr th:only-of-type {\n",
|
| 592 |
+
" vertical-align: middle;\n",
|
| 593 |
+
" }\n",
|
| 594 |
+
"\n",
|
| 595 |
+
" .dataframe tbody tr th {\n",
|
| 596 |
+
" vertical-align: top;\n",
|
| 597 |
+
" }\n",
|
| 598 |
+
"\n",
|
| 599 |
+
" .dataframe thead th {\n",
|
| 600 |
+
" text-align: right;\n",
|
| 601 |
+
" }\n",
|
| 602 |
+
"</style>\n",
|
| 603 |
+
"<table border=\"1\" class=\"dataframe\">\n",
|
| 604 |
+
" <thead>\n",
|
| 605 |
+
" <tr style=\"text-align: right;\">\n",
|
| 606 |
+
" <th></th>\n",
|
| 607 |
+
" <th>playlist_id</th>\n",
|
| 608 |
+
" <th>artist_id</th>\n",
|
| 609 |
+
" <th>artist_percent</th>\n",
|
| 610 |
+
" </tr>\n",
|
| 611 |
+
" </thead>\n",
|
| 612 |
+
" <tbody>\n",
|
| 613 |
+
" <tr>\n",
|
| 614 |
+
" <th>0</th>\n",
|
| 615 |
+
" <td>0</td>\n",
|
| 616 |
+
" <td>0</td>\n",
|
| 617 |
+
" <td>0.571429</td>\n",
|
| 618 |
+
" </tr>\n",
|
| 619 |
+
" <tr>\n",
|
| 620 |
+
" <th>1</th>\n",
|
| 621 |
+
" <td>0</td>\n",
|
| 622 |
+
" <td>0</td>\n",
|
| 623 |
+
" <td>0.571429</td>\n",
|
| 624 |
+
" </tr>\n",
|
| 625 |
+
" <tr>\n",
|
| 626 |
+
" <th>2</th>\n",
|
| 627 |
+
" <td>0</td>\n",
|
| 628 |
+
" <td>0</td>\n",
|
| 629 |
+
" <td>0.571429</td>\n",
|
| 630 |
+
" </tr>\n",
|
| 631 |
+
" <tr>\n",
|
| 632 |
+
" <th>3</th>\n",
|
| 633 |
+
" <td>0</td>\n",
|
| 634 |
+
" <td>0</td>\n",
|
| 635 |
+
" <td>0.571429</td>\n",
|
| 636 |
+
" </tr>\n",
|
| 637 |
+
" <tr>\n",
|
| 638 |
+
" <th>4</th>\n",
|
| 639 |
+
" <td>0</td>\n",
|
| 640 |
+
" <td>0</td>\n",
|
| 641 |
+
" <td>0.571429</td>\n",
|
| 642 |
+
" </tr>\n",
|
| 643 |
+
" </tbody>\n",
|
| 644 |
+
"</table>\n",
|
| 645 |
+
"</div>"
|
| 646 |
+
],
|
| 647 |
+
"text/plain": [
|
| 648 |
+
" playlist_id artist_id artist_percent\n",
|
| 649 |
+
"0 0 0 0.571429\n",
|
| 650 |
+
"1 0 0 0.571429\n",
|
| 651 |
+
"2 0 0 0.571429\n",
|
| 652 |
+
"3 0 0 0.571429\n",
|
| 653 |
+
"4 0 0 0.571429"
|
| 654 |
+
]
|
| 655 |
+
},
|
| 656 |
+
"execution_count": 14,
|
| 657 |
+
"metadata": {},
|
| 658 |
+
"output_type": "execute_result"
|
| 659 |
+
}
|
| 660 |
+
],
|
| 661 |
+
"source": [
|
| 662 |
+
"artists = df.loc[:,['playlist_id','artist_id','album_id','album_percent']]\n",
|
| 663 |
+
"artists.head()"
|
| 664 |
+
]
|
| 665 |
+
},
|
| 666 |
+
{
|
| 667 |
+
"cell_type": "code",
|
| 668 |
+
"execution_count": 15,
|
| 669 |
+
"metadata": {},
|
| 670 |
+
"outputs": [],
|
| 671 |
+
"source": [
|
| 672 |
+
"X = artists.loc[:,['playlist_id','artist_id','album_id']]\n",
|
| 673 |
+
"y = artists.loc[:,'album_percent']\n",
|
| 674 |
+
"\n",
|
| 675 |
+
"# Split our data into training and test sets\n",
|
| 676 |
+
"X_train, X_val, y_train, y_val = train_test_split(X,y,random_state=0, test_size=0.2)"
|
| 677 |
+
]
|
| 678 |
+
},
|
| 679 |
+
{
|
| 680 |
+
"cell_type": "code",
|
| 681 |
+
"execution_count": 16,
|
| 682 |
+
"metadata": {},
|
| 683 |
+
"outputs": [],
|
| 684 |
+
"source": [
|
| 685 |
+
"def prep_dataloaders(X_train,y_train,X_val,y_val,batch_size):\n",
|
| 686 |
+
" # Convert training and test data to TensorDatasets\n",
|
| 687 |
+
" trainset = TensorDataset(torch.from_numpy(np.array(X_train)).long(), \n",
|
| 688 |
+
" torch.from_numpy(np.array(y_train)).float())\n",
|
| 689 |
+
" valset = TensorDataset(torch.from_numpy(np.array(X_val)).long(), \n",
|
| 690 |
+
" torch.from_numpy(np.array(y_val)).float())\n",
|
| 691 |
+
"\n",
|
| 692 |
+
" # Create Dataloaders for our training and test data to allow us to iterate over minibatches \n",
|
| 693 |
+
" trainloader = torch.utils.data.DataLoader(trainset, batch_size=batch_size, shuffle=True)\n",
|
| 694 |
+
" valloader = torch.utils.data.DataLoader(valset, batch_size=batch_size, shuffle=False)\n",
|
| 695 |
+
"\n",
|
| 696 |
+
" return trainloader, valloader\n",
|
| 697 |
+
"\n",
|
| 698 |
+
"batchsize = 64\n",
|
| 699 |
+
"trainloader,valloader = prep_dataloaders(X_train,y_train,X_val,y_val,batchsize)"
|
| 700 |
+
]
|
| 701 |
+
},
|
| 702 |
+
{
|
| 703 |
+
"cell_type": "code",
|
| 704 |
+
"execution_count": 17,
|
| 705 |
+
"metadata": {},
|
| 706 |
+
"outputs": [],
|
| 707 |
+
"source": [
|
| 708 |
+
"class NNColabFiltering(nn.Module):\n",
|
| 709 |
+
" \n",
|
| 710 |
+
" def __init__(self, n_playlists, n_artists, embedding_dim_users, embedding_dim_items, n_activations, rating_range):\n",
|
| 711 |
+
" super().__init__()\n",
|
| 712 |
+
" self.user_embeddings = nn.Embedding(num_embeddings=n_playlists,embedding_dim=embedding_dim_users)\n",
|
| 713 |
+
" self.item_embeddings = nn.Embedding(num_embeddings=n_artists,embedding_dim=embedding_dim_items)\n",
|
| 714 |
+
" self.fc1 = nn.Linear(embedding_dim_users+embedding_dim_items,n_activations)\n",
|
| 715 |
+
" self.fc2 = nn.Linear(n_activations,1)\n",
|
| 716 |
+
" self.rating_range = rating_range\n",
|
| 717 |
+
"\n",
|
| 718 |
+
" def forward(self, X):\n",
|
| 719 |
+
" # Get embeddings for minibatch\n",
|
| 720 |
+
" embedded_users = self.user_embeddings(X[:,0])\n",
|
| 721 |
+
" embedded_items = self.item_embeddings(X[:,1])\n",
|
| 722 |
+
" # Concatenate user and item embeddings\n",
|
| 723 |
+
" embeddings = torch.cat([embedded_users,embedded_items],dim=1)\n",
|
| 724 |
+
" # Pass embeddings through network\n",
|
| 725 |
+
" preds = self.fc1(embeddings)\n",
|
| 726 |
+
" preds = F.relu(preds)\n",
|
| 727 |
+
" preds = self.fc2(preds)\n",
|
| 728 |
+
" # Scale predicted ratings to target-range [low,high]\n",
|
| 729 |
+
" preds = torch.sigmoid(preds) * (self.rating_range[1]-self.rating_range[0]) + self.rating_range[0]\n",
|
| 730 |
+
" return preds"
|
| 731 |
+
]
|
| 732 |
+
},
|
| 733 |
+
{
|
| 734 |
+
"cell_type": "code",
|
| 735 |
+
"execution_count": 19,
|
| 736 |
+
"metadata": {},
|
| 737 |
+
"outputs": [],
|
| 738 |
+
"source": [
|
| 739 |
+
"def train_model(model, criterion, optimizer, dataloaders, device, num_epochs=5, scheduler=None):\n",
|
| 740 |
+
" model = model.to(device) # Send model to GPU if available\n",
|
| 741 |
+
" since = time.time()\n",
|
| 742 |
+
"\n",
|
| 743 |
+
" costpaths = {'train':[],'val':[]}\n",
|
| 744 |
+
"\n",
|
| 745 |
+
" for epoch in range(num_epochs):\n",
|
| 746 |
+
" print('Epoch {}/{}'.format(epoch, num_epochs - 1))\n",
|
| 747 |
+
" print('-' * 10)\n",
|
| 748 |
+
"\n",
|
| 749 |
+
" # Each epoch has a training and validation phase\n",
|
| 750 |
+
" for phase in ['train', 'val']:\n",
|
| 751 |
+
" if phase == 'train':\n",
|
| 752 |
+
" model.train() # Set model to training mode\n",
|
| 753 |
+
" else:\n",
|
| 754 |
+
" model.eval() # Set model to evaluate mode\n",
|
| 755 |
+
"\n",
|
| 756 |
+
" running_loss = 0.0\n",
|
| 757 |
+
"\n",
|
| 758 |
+
" # Get the inputs and labels, and send to GPU if available\n",
|
| 759 |
+
" index = 0\n",
|
| 760 |
+
" for (inputs,labels) in dataloaders[phase]:\n",
|
| 761 |
+
" inputs = inputs.to(device)\n",
|
| 762 |
+
" labels = labels.to(device)\n",
|
| 763 |
+
"\n",
|
| 764 |
+
" # Zero the weight gradients\n",
|
| 765 |
+
" optimizer.zero_grad()\n",
|
| 766 |
+
"\n",
|
| 767 |
+
" # Forward pass to get outputs and calculate loss\n",
|
| 768 |
+
" # Track gradient only for training data\n",
|
| 769 |
+
" with torch.set_grad_enabled(phase == 'train'):\n",
|
| 770 |
+
" outputs = model.forward(inputs).view(-1)\n",
|
| 771 |
+
" loss = criterion(outputs, labels)\n",
|
| 772 |
+
"\n",
|
| 773 |
+
" # Backpropagation to get the gradients with respect to each weight\n",
|
| 774 |
+
" # Only if in train\n",
|
| 775 |
+
" if phase == 'train':\n",
|
| 776 |
+
" loss.backward()\n",
|
| 777 |
+
" # Update the weights\n",
|
| 778 |
+
" optimizer.step()\n",
|
| 779 |
+
"\n",
|
| 780 |
+
" # Convert loss into a scalar and add it to running_loss\n",
|
| 781 |
+
" running_loss += np.sqrt(loss.item()) * labels.size(0)\n",
|
| 782 |
+
" print(f'\\r{running_loss} {index} {index / len(dataloaders[phase])}', end='')\n",
|
| 783 |
+
" index +=1\n",
|
| 784 |
+
"\n",
|
| 785 |
+
" # Step along learning rate scheduler when in train\n",
|
| 786 |
+
" if (phase == 'train') and (scheduler is not None):\n",
|
| 787 |
+
" scheduler.step()\n",
|
| 788 |
+
"\n",
|
| 789 |
+
" # Calculate and display average loss and accuracy for the epoch\n",
|
| 790 |
+
" epoch_loss = running_loss / len(dataloaders[phase].dataset)\n",
|
| 791 |
+
" costpaths[phase].append(epoch_loss)\n",
|
| 792 |
+
" print('{} loss: {:.4f}'.format(phase, epoch_loss))\n",
|
| 793 |
+
"\n",
|
| 794 |
+
" time_elapsed = time.time() - since\n",
|
| 795 |
+
" print('Training complete in {:.0f}m {:.0f}s'.format(\n",
|
| 796 |
+
" time_elapsed // 60, time_elapsed % 60))\n",
|
| 797 |
+
"\n",
|
| 798 |
+
" return costpaths"
|
| 799 |
+
]
|
| 800 |
+
},
|
| 801 |
+
{
|
| 802 |
+
"cell_type": "code",
|
| 803 |
+
"execution_count": null,
|
| 804 |
+
"metadata": {},
|
| 805 |
+
"outputs": [
|
| 806 |
+
{
|
| 807 |
+
"name": "stdout",
|
| 808 |
+
"output_type": "stream",
|
| 809 |
+
"text": [
|
| 810 |
+
"Epoch 0/2\n",
|
| 811 |
+
"----------\n",
|
| 812 |
+
"910724978601.7391 123493 100.00%\n",
|
| 813 |
+
"train loss: 115229.4395\n",
|
| 814 |
+
"227700857865.127 30873 100.00%\n",
|
| 815 |
+
"val loss: 115239.3512\n",
|
| 816 |
+
"Epoch 1/2\n",
|
| 817 |
+
"----------\n",
|
| 818 |
+
"910727409277.4519 123493 100.00%\n",
|
| 819 |
+
"train loss: 115229.7471\n",
|
| 820 |
+
"227700857865.127 30873 100.00%\n",
|
| 821 |
+
"val loss: 115239.3512\n",
|
| 822 |
+
"Epoch 2/2\n",
|
| 823 |
+
"----------\n",
|
| 824 |
+
"910734475316.9005 123493 100.00%\n",
|
| 825 |
+
"train loss: 115230.6411\n",
|
| 826 |
+
"227700857865.127 30873 100.00%\n",
|
| 827 |
+
"val loss: 115239.3512\n",
|
| 828 |
+
"Training complete in 71m 54s\n"
|
| 829 |
+
]
|
| 830 |
+
}
|
| 831 |
+
],
|
| 832 |
+
"source": [
|
| 833 |
+
"dataloaders = {'train':trainloader, 'val':valloader}\n",
|
| 834 |
+
"n_playlists = X.loc[:,'playlist_id'].max()+1\n",
|
| 835 |
+
"n_artists = X.loc[:,'artist_id'].max()+1\n",
|
| 836 |
+
"n_albums = X.loc[:,'album_id'].max()+1\n",
|
| 837 |
+
"model = NNColabFiltering(\n",
|
| 838 |
+
" n_playlists,\n",
|
| 839 |
+
" n_artists,\n",
|
| 840 |
+
" embedding_dim_users=50,\n",
|
| 841 |
+
" embedding_dim_items=50,\n",
|
| 842 |
+
" n_activations = 100,\n",
|
| 843 |
+
" rating_range=[0.,n_albums]\n",
|
| 844 |
+
")\n",
|
| 845 |
+
"criterion = nn.MSELoss()\n",
|
| 846 |
+
"lr=0.001\n",
|
| 847 |
+
"n_epochs=10\n",
|
| 848 |
+
"wd=1e-3\n",
|
| 849 |
+
"optimizer = optim.Adam(model.parameters(), lr=lr, weight_decay=wd)\n",
|
| 850 |
+
"device = torch.device(\"cuda\" if torch.cuda.is_available() else \"cpu\")\n",
|
| 851 |
+
"\n",
|
| 852 |
+
"costpaths = train_model(model,criterion,optimizer,dataloaders, device, n_epochs, scheduler=None)"
|
| 853 |
+
]
|
| 854 |
+
},
|
| 855 |
+
{
|
| 856 |
+
"cell_type": "code",
|
| 857 |
+
"execution_count": null,
|
| 858 |
+
"metadata": {},
|
| 859 |
+
"outputs": [
|
| 860 |
+
{
|
| 861 |
+
"data": {
|
| 862 |
+
"image/png": "iVBORw0KGgoAAAANSUhEUgAABNoAAAHWCAYAAAChceSWAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjcuMSwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy/bCgiHAAAACXBIWXMAAA9hAAAPYQGoP6dpAACCgElEQVR4nOzde1zUVf7H8fdwGxCZQRBEBJWyxCsK3ii7WAaVmRdctcxL2sVW3dTd1txKrbYs27bsl+m2a2oXy7xmWpppaiXeQLwmmnlDBDRkRlGuM78/XGeX9RIq+h3g9Xw85rE753vmzOfMmB7efM/3a3I6nU4BAAAAAAAAuCoeRhcAAAAAAAAAVAUEbQAAAAAAAEAFIGgDAAAAAAAAKgBBGwAAAAAAAFABCNoAAAAAAACACkDQBgAAAAAAAFQAgjYAAAAAAACgAhC0AQAAAAAAABWAoA0AAAAAAACoAARtAFBBGjZsqEGDBhldBgAAAK6hmTNnymQy6cCBA0aXAsANEbQBqFbWrVunCRMmKC8vz+hSAAAAAABVjJfRBQDA9bRu3Tq9+OKLGjRokAIDAyt07PT0dHl48PsLAAAAAKiu+IkQAC7A4XCooKDgsl5jNpvl7e19jSoCAAAAALg7gjYA1caECRP0zDPPSJKioqJkMplc19cwmUwaPny4PvnkEzVr1kxms1nLli2TJP3tb3/TLbfcouDgYPn5+SkuLk7z5s07b/z/vUbbuet3/Pjjjxo9erRCQkLk7++vHj166NixY9dlzgAAANXdvHnzZDKZtGbNmvOO/eMf/5DJZNKOHTu0bds2DRo0SDfccIN8fX0VFhamwYMH69dffzWgagCVFVtHAVQbPXv21J49e/Tpp5/qrbfeUu3atSVJISEhkqRVq1bp888/1/Dhw1W7dm01bNhQkjR58mQ9+OCD6tevn4qKivTZZ5/pd7/7nZYsWaIuXbr85vuOGDFCtWrV0vjx43XgwAG9/fbbGj58uObMmXPN5goAAICzunTpopo1a+rzzz/XHXfcUebYnDlz1KxZMzVv3lxvvvmmfvnlFz366KMKCwvTzp079f7772vnzp1av369TCaTQTMAUJkQtAGoNlq2bKnY2Fh9+umn6t69uytIOyc9PV3bt29X06ZNy7Tv2bNHfn5+rufDhw9XbGys/v73v5craAsODtY333zjWpw5HA698847stlsslqtVz8xAAAAXJSfn5+6du2qefPm6Z133pGnp6ckKSsrS2vWrNGECRMkSb///e/1xz/+scxrO3TooIceekg//PCDbrvttutdOoBKiK2jAPBvd9xxx3khm6QyIduJEydks9l02223KTU1tVzjPvHEE2V+A3rbbbeptLRUBw8evPqiAQAA8Jv69OmjnJwcrV692tU2b948ORwO9enTR1LZNV9BQYGOHz+uDh06SFK5130AQNBWgV555RXdcsstqlGjRrnvZrhgwQIlJCQoODhYJpNJaWlp5/W58847XdeSOvcYOnSo6/jWrVv10EMPKTIyUn5+fmrSpIkmT5582fX/1vsAVV1UVNQF25csWaIOHTrI19dXQUFBCgkJ0dSpU2Wz2co1bv369cs8r1WrlqSzoR0AAACuvXvvvVdWq7XMpTvmzJmjVq1a6eabb5Yk5ebm6umnn1adOnXk5+enkJAQ1/qwvOs+ACBou0x33nmnZs6cecFjRUVF+t3vfqennnqq3OPl5+erY8eOev311y/Z7/HHH9fRo0ddj0mTJrmOpaSkKDQ0VB9//LF27typ5557TmPHjtW7775b7jrK8z5AVfffv8U85/vvv9eDDz4oX19fvffee/rqq6+0YsUKPfzww3I6neUa99z2hP9V3tcDAADg6pjNZnXv3l0LFy5USUmJjhw5oh9//NF1Npsk9e7dW//85z81dOhQLViwQN98843r5lgOh8Oo0gFUMlyjrQK9+OKLknTRIO5C+vfvL0k6cODAJfvVqFFDYWFhFzw2ePDgMs9vuOEGJScna8GCBRo+fLir/YsvvtCLL76oXbt2KTw8XAMHDtRzzz0nL6///DG41PsAVcHlXsR2/vz58vX11fLly2U2m13tM2bMqOjSAAAAcA316dNHs2bN0sqVK/XTTz/J6XS6grYTJ05o5cqVevHFFzVu3DjXa/bu3WtUuQAqKc5oqyQ++eQT1a5dW82bN9fYsWN1+vTpS/a32WwKCgpyPf/+++81YMAAPf3009q1a5f+8Y9/aObMmXrllVeu6n2Aysbf31+SlJeXV67+np6eMplMKi0tdbUdOHBAixYtugbVAQAA4Frp3LmzgoKCNGfOHM2ZM0ft2rVzbQ09twPhf3ccvP3229e7TACVHGe0VQIPP/ywGjRooPDwcG3btk1jxoxRenq6FixYcMH+69at05w5c7R06VJX24svvqhnn31WAwcOlHT2rLeXX35Zf/7znzV+/Pgreh+gMoqLi5MkPffcc+rbt6+8vb3VtWvXi/bv0qWL/v73v+vee+/Vww8/rJycHE2ZMkWNGjXStm3brlfZAAAAuEre3t7q2bOnPvvsM+Xn5+tvf/ub65jFYtHtt9+uSZMmqbi4WPXq1dM333yj/fv3G1gxgMqIoO03vPrqq3r11Vddz8+cOaP169eX2ZK5a9eu8y52XpGeeOIJ1/9v0aKF6tatq7vvvlv79u3TjTfeWKbvjh071K1bN40fP14JCQmu9q1bt+rHH38scwZbaWmpCgoKdPr0adWoUeOy3georNq2bauXX35Z06ZN07Jly+RwOC65gLrrrrs0ffp0vfbaaxo5cqSioqL0+uuv68CBAwRtAAAAlUyfPn30r3/9SyaTSb179y5zbPbs2RoxYoSmTJkip9OphIQEff311woPDzeoWgCVkcnJ1bgvKTc3V7m5ua7n/fr1U1JSknr27Olqa9iwYZnrnM2cOVMjR44s99Y06exWtKioKG3ZskWtWrW6ZN/8/HzVrFlTy5YtU2Jioqt9165d6tSpkx577LHztoT6+fnpxRdfLFP3OTfccIM8PM7fRXyx9wEAAAAAAMD5OKPtNwQFBZW51pmfn59CQ0PVqFEjw2pKS0uTJNWtW9fVtnPnTt11110aOHDgeSGbJMXGxio9Pf2y6r7Q+wAAAAAAAODCCNoq0KFDh5Sbm6tDhw6ptLTUFVQ1atRINWvWlCRFR0dr4sSJ6tGjhyS5+mdmZkqS0tPTJUlhYWEKCwvTvn37NHv2bN1///0KDg7Wtm3bNGrUKN1+++1q2bKlpLPbRe+66y4lJiZq9OjRysrKknT2gp4hISGSpHHjxumBBx5Q/fr11atXL3l4eGjr1q3asWOH/vrXv5brfQAAAAAAAHBx3HW0Ao0bN06tW7fW+PHjderUKbVu3VqtW7fW5s2bXX3S09Nls9lczxcvXqzWrVurS5cukqS+ffuqdevWmjZtmiTJx8dH3377rRISEhQdHa0//vGPSkpK0pdffukaY968eTp27Jg+/vhj1a1b1/Vo27atq09iYqKWLFmib775Rm3btlWHDh301ltvqUGDBuV+HwAAAAAAAFwc12gDAAAAAAAAKgBntAEAAAAAAAAVgKANAAAAAAAAqADcDOECHA6HMjMzFRAQIJPJZHQ5AACgknA6nTp58qTCw8Pl4cHvM90R6zwAAHAlyrvOI2i7gMzMTEVGRhpdBgAAqKQOHz6siIgIo8vABbDOAwAAV+O31nkEbRcQEBAg6eyHZ7FYDK4GAABUFna7XZGRka61BNwP6zwAAHAlyrvOI2i7gHPbCCwWCwswAABw2diS6L5Y5wEAgKvxW+s8Lh4CAAAAAAAAVACCNgAAAAAAAKACELQBAAAAAAAAFYCgDQAAAAAAAKgABG0AAAAAAABABSBoAwAAAAAAACoAQRsAAAAAAABQAQjaAAAAAAAAgApA0AYAAAAAAABUAII2AAAAAAAAoAIQtAEAAAAAAAAVgKANAAAAAAAAqAAEbQAAoNqwnS42ugQAAABUYQRtAACgWlj/y6/q+Poqfb39qNGlAAAAoIoiaAMAAFXejiM2PTZrs04WlmhR2hE5nU6jSwIAAEAVRNAGAACqtF+OndLADzbqVGGJ2kcFaXLf1jKZTEaXBQAAgCqIoA0AAFRZWbYC9Z++Ub/mF6lZuEX/HNhGvt6eRpcFAACAKoqgDQAAVEl5p4vUf/oGHck7o6ja/po1uJ0svt5GlwUAAIAqjKANAABUOfmFJRo0Y5P25pxSHYtZHw5up9o1zUaXBQAAgCqOoA0AAFQpRSUODf04RWmH82T189ZHQ9orMqiG0WUBAACgGiBoAwAAVUapw6nRn6fp+73HVcPHUzMebaub6wQYXRYAAACqCYI2AABQJTidTo37YoeWbDsqb0+Tpj0Sp9j6tYwuCwAAANWIoUHb1KlT1bJlS1ksFlksFsXHx+vrr792HS8oKNCwYcMUHBysmjVrKikpSdnZ2b857k8//aQHH3xQVqtV/v7+atu2rQ4dOnQtpwIAAAz29xV79MmGQzKZpLf6tNLtN4cYXRIAAACqGUODtoiICL322mtKSUnR5s2bddddd6lbt27auXOnJGnUqFH68ssvNXfuXK1Zs0aZmZnq2bPnJcfct2+fOnbsqOjoaK1evVrbtm3TCy+8IF9f3+sxJQAAYIDpP+zX/636WZL0crfmeqBluMEVAQAAoDoyOZ1Op9FF/LegoCC98cYb6tWrl0JCQjR79mz16tVLkrR79241adJEycnJ6tChwwVf37dvX3l7e+ujjz664hrsdrusVqtsNpssFssVjwMAAK69BakZGv35VknSnxJu1vC7bjKsFtYQ7o/vCAAAXInyriHc5hptpaWl+uyzz5Sfn6/4+HilpKSouLhYnTt3dvWJjo5W/fr1lZycfMExHA6Hli5dqptvvlmJiYkKDQ1V+/bttWjRoku+d2Fhoex2e5kHAABwf9/uytYz87ZJkgbfGqVhnRoZXBEAAACqM8ODtu3bt6tmzZoym80aOnSoFi5cqKZNmyorK0s+Pj4KDAws079OnTrKysq64Fg5OTk6deqUXnvtNd1777365ptv1KNHD/Xs2VNr1qy5aA0TJ06U1Wp1PSIjIytyigAA4BrYuD9Xw2anqtThVM/W9fR8lyYymUxGlwUAAIBqzMvoAho3bqy0tDTZbDbNmzdPAwcOvGQodikOh0OS1K1bN40aNUqS1KpVK61bt07Tpk3THXfcccHXjR07VqNHj3Y9t9vthG0AALixnZk2DZm5SYUlDt0dHarXe7WUhwchGwAAAIxleNDm4+OjRo3ObvOIi4vTpk2bNHnyZPXp00dFRUXKy8src1Zbdna2wsLCLjhW7dq15eXlpaZNm5Zpb9KkiX744YeL1mA2m2U2m69+MgAA4Jo7cDxfAz/YpJOFJWrXMEhT+sXK29Pwk/QBAAAA47eO/i+Hw6HCwkLFxcXJ29tbK1eudB1LT0/XoUOHFB8ff8HX+vj4qG3btkpPTy/TvmfPHjVo0OCa1g0AAK69bHuBHpm+QcdPFapJXYv+ObCNfL09jS4LAAAAkGTwGW1jx47Vfffdp/r16+vkyZOaPXu2Vq9ereXLl8tqtWrIkCEaPXq0goKCZLFYNGLECMXHx5e542h0dLQmTpyoHj16SJKeeeYZ9enTR7fffrs6deqkZcuW6csvv9Tq1asNmiUAAKgIeaeLNGD6RmWcOKOGwTX04eB2svp5G10WAAAA4GJo0JaTk6MBAwbo6NGjslqtatmypZYvX6577rlHkvTWW2/Jw8NDSUlJKiwsVGJiot57770yY6Snp8tms7me9+jRQ9OmTdPEiRP1hz/8QY0bN9b8+fPVsWPH6zo3AABQcU4XlWjwzE1Kzz6p0ACzPhrSXiEBXPYBAAAA7sXkdDqdRhfhbux2u6xWq2w2mywWi9HlAABQrRWVOPT4h5u1Zs8xWXy9NHfoLWocFmB0WRfEGsL98R0BAIArUd41hNtdow0AAOAch8OpP87dqjV7jsnP21MzHm3ntiEbAAAAQNAGAADcktPp1IQvd+rLrZny8jBp6iOximtQy+iyAAAAgIsiaAMAAG7p7W/36sPkgzKZpDd7x+jOxqFGlwQAAABcEkEbAABwOzN/3K/JK/dKkl56sJm6tapncEUAAADAbyNoAwAAbmXRliOa8OUuSdKozjerf3xDYwsCAAAAyomgDQAAuI3vdufoT3O3SpIG3dJQf7i7kcEVAQAAAOVH0AYAANzCpgO5euqTFJU4nOreKlzjHmgqk8lkdFkAAABAuRG0AQAAw/101K7BMzepoNihu6JD9cbvYuThQcgGAACAyoWgDQAAGOrgr/ka8MFGnSwoUZsGtTTl4Vh5e7JEuV7Wrl2rrl27Kjw8XCaTSYsWLXIdKy4u1pgxY9SiRQv5+/srPDxcAwYMUGZmZpkxcnNz1a9fP1ksFgUGBmrIkCE6depUmT7btm3TbbfdJl9fX0VGRmrSpEnn1TJ37lxFR0fL19dXLVq00FdffVXmuNPp1Lhx41S3bl35+fmpc+fO2rt3b8V9GAAAAFeJVSwAADBMjr1A/adv1LGThYoOC9D0QW3l5+NpdFnVSn5+vmJiYjRlypTzjp0+fVqpqal64YUXlJqaqgULFig9PV0PPvhgmX79+vXTzp07tWLFCi1ZskRr167VE0884Tput9uVkJCgBg0aKCUlRW+88YYmTJig999/39Vn3bp1euihhzRkyBBt2bJF3bt3V/fu3bVjxw5Xn0mTJumdd97RtGnTtGHDBvn7+ysxMVEFBQXX4JMBAAC4fCan0+k0ugh3Y7fbZbVaZbPZZLFYjC4HAIAqyXa6WH3eT9burJOqH1RD84bGK9Tia3RZV6WyryFMJpMWLlyo7t27X7TPpk2b1K5dOx08eFD169fXTz/9pKZNm2rTpk1q06aNJGnZsmW6//77lZGRofDwcE2dOlXPPfecsrKy5OPjI0l69tlntWjRIu3evVuS1KdPH+Xn52vJkiWu9+rQoYNatWqladOmyel0Kjw8XH/84x/1pz/9SZJks9lUp04dzZw5U3379i3XHCv7dwQAAIxR3jUEZ7QBAIDr7kxRqYbM2qTdWScVEmDWx0PaV/qQrbqw2WwymUwKDAyUJCUnJyswMNAVsklS586d5eHhoQ0bNrj63H777a6QTZISExOVnp6uEydOuPp07ty5zHslJiYqOTlZkrR//35lZWWV6WO1WtW+fXtXnwspLCyU3W4v8wAAALhWCNoAAMB1VVzq0O8/SdHmgycU4OulDwe3U/3gGkaXhXIoKCjQmDFj9NBDD7l+k5uVlaXQ0NAy/by8vBQUFKSsrCxXnzp16pTpc+75b/X57+P//boL9bmQiRMnymq1uh6RkZGXNWcAAIDLQdAGAACuG4fDqWfmbtV36cfk6+2hGYPaqkldtu9VBsXFxerdu7ecTqemTp1qdDnlNnbsWNlsNtfj8OHDRpcEAACqMC+jCwAAANWD0+nUS0t2aVFaprw8TJraL05tGgYZXRbK4VzIdvDgQa1atarMdUnCwsKUk5NTpn9JSYlyc3MVFhbm6pOdnV2mz7nnv9Xnv4+fa6tbt26ZPq1atbpo7WazWWaz+XKmCwAAcMU4ow0AAFwX76z8WTPXHZAkvdk7Rp2iQy/9AriFcyHb3r179e233yo4OLjM8fj4eOXl5SklJcXVtmrVKjkcDrVv397VZ+3atSouLnb1WbFihRo3bqxatWq5+qxcubLM2CtWrFB8fLwkKSoqSmFhYWX62O12bdiwwdUHAADAaARtAADgmvsw+YDe+naPJGlC16bq1qqewRXhnFOnTiktLU1paWmSzt50IC0tTYcOHVJxcbF69eqlzZs365NPPlFpaamysrKUlZWloqIiSVKTJk1077336vHHH9fGjRv1448/avjw4erbt6/Cw8MlSQ8//LB8fHw0ZMgQ7dy5U3PmzNHkyZM1evRoVx1PP/20li1bpjfffFO7d+/WhAkTtHnzZg0fPlzS2Tuijhw5Un/961+1ePFibd++XQMGDFB4ePgl75IKAABwPZmcTqfT6CLcDbd9BwCg4nyRdkQj56TJ6ZSevvsmjbrnZqNLumYq4xpi9erV6tSp03ntAwcO1IQJExQVFXXB13333Xe68847JUm5ubkaPny4vvzyS3l4eCgpKUnvvPOOatas6eq/bds2DRs2TJs2bVLt2rU1YsQIjRkzpsyYc+fO1fPPP68DBw7opptu0qRJk3T//fe7jjudTo0fP17vv/++8vLy1LFjR7333nu6+eby/5mqjN8RAAAwXnnXEARtF8ACDACAirE6PUePzdqsEodTA+Ib6MUHm8lkMhld1jXDGsL98R0BAIArUd41BFtHAQDANZFyMFdDP05RicOpB2PCNaFr1Q7ZAAAAAII2AABQ4XZn2fXojE0qKHbojptD9LffxcjDg5ANAAAAVRtBGwAAqFCHc09rwPSNsheUKLZ+oKY+EisfL5YcAAAAqPpY9QIAgApz7GShHpm+QTknC9W4ToA+GNRWNXy8jC4LAAAAuC4I2gAAQIWwnSnWgA826uCvpxUZ5KcPh7RTYA0fo8sCAAAArhuCNgAAcNXOFJXq8Vmb9dNRu2rXNOujwe1Vx+JrdFkAAADAdUXQBgAArkpxqUPDZ6dq44FcBfh66cPB7dSwtr/RZQEAAADXHUEbAAC4Yg6HU3+et00rd+fI7OWh6QPbqmm4xeiyAAAAAEMQtAEAgCvidDr18tJdWrjliDw9THqvX6zaRQUZXRYAAABgGII2AABwRaZ897Nm/HhAkvRGr5a6u0kdYwsCAAAADEbQBgAALtvH6w/qb9/skSSNe6CpesZGGFwRAAAAYDyCNgAAcFmWbMvUC1/skCSNuKuRBneMMrgiAAAAwD0QtAEAgHJbu+eYRs1Jk9Mp9WtfX6PvudnokgAAAAC3QdAGAADKJfXQCT35UYqKS516oGVdvdStuUwmk9FlAQAAAG6DoA0AAPymPdkn9eiMTTpTXKrbbqqtv/duJU8PQjYAAADgvxG0AQCASzqce1r9p2+Q7UyxWtcP1D/6x8nHiyUEAAAA8L9YJQMAgIs6drJQ/advULa9UDfXqakZg9qqho+X0WUBAAAAbomgDQAAXJC9oFiDZmzUgV9Pq16gnz4c3F6BNXyMLgsAAABwWwRtAADgPAXFpXps1mbtzLQr2N9HHz/WXmFWX6PLAgAAANwaQRsAACijpNSh4bO3aOP+XAWYvTRrcDtF1fY3uiwAAADA7RG0AQAAF4fDqTHzt+vbn7Ll4+Whfw5so+b1rEaXBQAAAFQKBG0AAECS5HQ69epXP2l+aoY8PUya8nCsOtwQbHRZAAAAQKVB0AYAACRJU9fs079+2C9JmpTUUvc0rWNwRQAAAEDlQtAGAAA0e8MhTVqWLkl6vksTJcVFGFwRAAAAUPkQtAEAUM19tf2onlu0XZI0rNONeuy2GwyuCAAAAKicCNoAAKjGvt97TE9/tkVOp/RQu/r6U0Jjo0sCAAAAKi2CNgAAqqm0w3l68qMUFZc6dX+LMP21e3OZTCajywIAAAAqLYI2AACqoZ9zTmrQjI06XVSqjo1q660+reTpQcgGAAAAXA2CNgAAqpmME6f1yL82Ku90sWIiA/WP/nEye3kaXRYAAABQ6RkatE2dOlUtW7aUxWKRxWJRfHy8vv76a9fxgoICDRs2TMHBwapZs6aSkpKUnZ1d7vGHDh0qk8mkt99++xpUDwBA5fPrqUINmL5RWfYCNQqtqRmD2srf7GV0WQAAAECVYGjQFhERoddee00pKSnavHmz7rrrLnXr1k07d+6UJI0aNUpffvml5s6dqzVr1igzM1M9e/Ys19gLFy7U+vXrFR4efi2nAABApXGyoFiDZmzSL8fzVS/QTx8Naacgfx+jywIAAACqDEN/hd21a9cyz1955RVNnTpV69evV0REhKZPn67Zs2frrrvukiTNmDFDTZo00fr169WhQ4eLjnvkyBGNGDFCy5cvV5cuXX6zjsLCQhUWFrqe2+32K5wRAADuqaC4VE98mKLtR2wK9vfRR0Paqa7Vz+iyAAAAgCrFba7RVlpaqs8++0z5+fmKj49XSkqKiouL1blzZ1ef6Oho1a9fX8nJyRcdx+FwqH///nrmmWfUrFmzcr33xIkTZbVaXY/IyMirng8AAO6ipNShP3y6Rcm//KqaZi/NfLSdbgipaXRZAAAAQJVjeNC2fft21axZU2azWUOHDtXChQvVtGlTZWVlycfHR4GBgWX616lTR1lZWRcd7/XXX5eXl5f+8Ic/lLuGsWPHymazuR6HDx++0ukAAOBWnE6nxi7Yrm92ZcvHy0P/HNBGLSKsRpcFAAAAVEmGX/24cePGSktLk81m07x58zRw4ECtWbPmisZKSUnR5MmTlZqaKpPJVO7Xmc1mmc3mK3pPAADc2Wtf79bclAx5mKT/e6i14m8MNrokAAAAoMoy/Iw2Hx8fNWrUSHFxcZo4caJiYmI0efJkhYWFqaioSHl5eWX6Z2dnKyws7IJjff/998rJyVH9+vXl5eUlLy8vHTx4UH/84x/VsGHDaz8ZAADcyLQ1+/SPtb9Ikl5LaqnEZhf+9xMAAABAxTA8aPtfDodDhYWFiouLk7e3t1auXOk6lp6erkOHDik+Pv6Cr+3fv7+2bdumtLQ01yM8PFzPPPOMli9ffr2mAACA4T7beEivfb1bkvSX+6PVuw3XHwUAAACuNUO3jo4dO1b33Xef6tevr5MnT2r27NlavXq1li9fLqvVqiFDhmj06NEKCgqSxWLRiBEjFB8fX+aOo9HR0Zo4caJ69Oih4OBgBQeX3RLj7e2tsLAwNW7c+HpPDwAAQyzbcVR/WbhdkjT0jhv1xO03GlwRAAAAUD0YGrTl5ORowIABOnr0qKxWq1q2bKnly5frnnvukSS99dZb8vDwUFJSkgoLC5WYmKj33nuvzBjp6emy2WxGlA8AgNtZ9/Nx/eHTNDmcUt+2kRpzL79oAgAAAK4Xk9PpdBpdhLux2+2yWq2y2WyyWCxGlwMAQLlsy8jTQ++vV35Rqe5tFqYp/WLl6VH+mwPh6rGGcH98RwAA4EqUdw3hdtdoAwAAl+/nnFMaNGOT8otKdWujYE1+qBUhGwAAAHCdEbQBAFDJHck7owHTNyg3v0gtI6z6R/82Mnt5Gl0WAAAAUO0QtAEAUIn9eqpQ/advUKatQDeE+Gvmo+1U02zoJVgBAACAaougDQCASupUYYkenblJvxzLV7jVVx8Paa8gfx+jywIAAACqLYI2AAAqocKSUj3x4WZty7CpVg1vfTikvcID/YwuCwAAAKjWCNoAAKhkSh1OPf1pmtbt+1X+Pp6a+Wg7NQqtaXRZAAAAQLVH0AYAQCXidDr13MLtWrYzSz6eHnp/QBvFRAYaXRYAAAAAEbQBAFCpTFqers82HZaHSXrnoVa6tVFto0sCAAAA8G8EbQAAVBLvr92nqav3SZIm9myhe5vXNbgiAAAAAP+NoA0AgErg882H9epXuyVJz94XrT5t6xtcEaqKtWvXqmvXrgoPD5fJZNKiRYvKHF+wYIESEhIUHBwsk8mktLS088bIyspS//79FRYWJn9/f8XGxmr+/Pll+uTm5qpfv36yWCwKDAzUkCFDdOrUqTJ9tm3bpttuu02+vr6KjIzUpEmTznuvuXPnKjo6Wr6+vmrRooW++uqrq/4MAAAAKgpBGwAAbm75ziw9O3+bJOnJ22/Q0DtuNLgiVCX5+fmKiYnRlClTLnq8Y8eOev311y86xoABA5Senq7Fixdr+/bt6tmzp3r37q0tW7a4+vTr1087d+7UihUrtGTJEq1du1ZPPPGE67jdbldCQoIaNGiglJQUvfHGG5owYYLef/99V59169bpoYce0pAhQ7RlyxZ1795d3bt3144dOyrgkwAAALh6JqfT6TS6CHdjt9tltVpls9lksViMLgcAUI2t23dcg2ZsUlGJQ73bROj1pJYymUxGl4WLqOxrCJPJpIULF6p79+7nHTtw4ICioqK0ZcsWtWrVqsyxmjVraurUqerfv7+rLTg4WK+//roee+wx/fTTT2ratKk2bdqkNm3aSJKWLVum+++/XxkZGQoPD9fUqVP13HPPKSsrSz4+PpKkZ599VosWLdLu3WfP5uzTp4/y8/O1ZMkS1/t06NBBrVq10rRp08o1x8r+HQEAAGOUdw3BGW0AALip7Rk2PfFhiopKHEpoWkev9mhByAa3dMstt2jOnDnKzc2Vw+HQZ599poKCAt15552SpOTkZAUGBrpCNknq3LmzPDw8tGHDBlef22+/3RWySVJiYqLS09N14sQJV5/OnTuXee/ExEQlJydftLbCwkLZ7fYyDwAAgGuFoA0AADe079gpDZyxUacKS9ThhiC981BreXnyzzbc0+eff67i4mIFBwfLbDbrySef1MKFC9WoUSNJZ6/hFhoaWuY1Xl5eCgoKUlZWlqtPnTp1yvQ59/y3+pw7fiETJ06U1Wp1PSIjI69usgAAAJfAih0AADdz1HZGA6ZvVG5+kZrXs+ifA9rI19vT6LKAi3rhhReUl5enb7/9Vps3b9bo0aPVu3dvbd++3ejSNHbsWNlsNtfj8OHDRpcEAACqMC+jCwAAAP9xIr9I/adv1JG8M7qhtr9mPtpOAb7eRpcFXNS+ffv07rvvaseOHWrWrJkkKSYmRt9//72mTJmiadOmKSwsTDk5OWVeV1JSotzcXIWFhUmSwsLClJ2dXabPuee/1efc8Qsxm80ym81XN0kAAIBy4ow2AADcRH5hiQbN3KSfc06prtVXHw5pp9o1CQjg3k6fPi1J8vAou6z09PSUw+GQJMXHxysvL08pKSmu46tWrZLD4VD79u1dfdauXavi4mJXnxUrVqhx48aqVauWq8/KlSvLvM+KFSsUHx9f8RMDAAC4AgRtAAC4gcKSUg39OEVbD+epVg1vfTSknSJq1TC6LFQDp06dUlpamtLS0iRJ+/fvV1pamg4dOiRJys3NVVpamnbt2iVJSk9PV1pamuu6aNHR0WrUqJGefPJJbdy4Ufv27dObb76pFStWuO5e2qRJE9177716/PHHtXHjRv34448aPny4+vbtq/DwcEnSww8/LB8fHw0ZMkQ7d+7UnDlzNHnyZI0ePdpV69NPP61ly5bpzTff1O7duzVhwgRt3rxZw4cPv06fFgAAwKURtAEAYLBSh1Oj52zV93uPq4aPp2Y82k6NQgOMLgvVxObNm9W6dWu1bt1akjR69Gi1bt1a48aNkyQtXrxYrVu3VpcuXSRJffv2VevWrTVt2jRJkre3t7766iuFhISoa9euatmypT788EPNmjVL999/v+t9PvnkE0VHR+vuu+/W/fffr44dO+r99993Hbdarfrmm2+0f/9+xcXF6Y9//KPGjRunJ554wtXnlltu0ezZs/X+++8rJiZG8+bN06JFi9S8efNr/jkBAACUh8npdDqNLsLd2O12Wa1W2Ww2WSwWo8sBAFRhTqdTf1m4Q59uPCRvT5NmDGqnjjfVNrosXCHWEO6P7wgAAFyJ8q4hOKMNAAAD/e2bdH268ZBMJmly39aEbAAAAEAlRtAGAIBB/vX9L5ry3T5J0ivdW+j+FnUNrggAAADA1SBoAwDAAPNSMvTXpT9Jkp5JbKyH29c3uCIAAAAAV4ugDQCA62zFrmyNmb9NkvRYxyj9/s4bDa4IAAAAQEUgaAMA4Dpa/8uvGjY7VaUOp5JiI/SX+5vIZDIZXRYAAACACkDQBgDAdbLjiE2Pz9qsohKHOjepo9eTWsjDg5ANAAAAqCoI2gAAuA72H8/XoBkbdbKwRO2jgvTuw63l5ck/wwAAAEBVwgofAIBrLMtWoEf+tUHHTxWpWbhF/xzYRr7enkaXBQAAAKCCEbQBAHAN5Z0u0oAPNuhI3hlF1fbXrMHtZPH1NrosAAAAANcAQRsAANdIfmGJBs3YpD3Zp1THYtaHg9updk2z0WUBAAAAuEYI2gAAuAaKShwa+nGK0g7nyernrY+GtFdkUA2jywIAAABwDRG0AQBQwUodTo3+PE3f7z0uP29PzXi0rW6uE2B0WQAAAACuMYI2AAAqkNPp1LgvdmjJtqPy9jRpWv84xdavZXRZAAAAAK4DgjYAACrQWyv26JMNh2QySX/v3Up33BxidEkAAAAArhOCNgAAKsgHP+zXO6t+liS93K25usaEG1wRAAAAgOuJoA0AgAqwcEuGXlqyS5L0p4Sb9UiHBgZXBAAAAOB6I2gDAOAqrdqdrT/N3SZJGnxrlIZ1amRwRQAAAACMQNAGAMBV2Lg/V099nKpSh1M9W9fT812ayGQyGV0WAAAAAAMQtAEAcIV2Zto0ZOYmFZY4dHd0qF7v1VIeHoRsAAAAQHVF0AYAwBU4cDxfAz/YpJOFJWrXMEhT+sXK25N/VgEAAIDqjJ8IAAC4TNn2Aj0yfYOOnypUk7oW/XNgG/l6expdFgAAAACDEbQBAHAZbKeLNWD6RmWcOKMGwTU0a3BbWf28jS4LAAAAgBsgaAMAoJxOF5Vo8KxNSs8+qdAAsz4e0l6hAb5GlwUAAADATRC0AQBQDkUlDj31capSDp6QxddLHw1pr8igGkaXBQAAAMCNELQBAPAbHA6n/jR3q9bsOSY/b0/NeLSdGocFGF0WAAAAADdD0AYAwCU4nU69+OVOLd6aKS8Pk6Y+Equ4BrWMLgsAAACAGyJoAwDgEt7+dq9mJR+UySS92TtGdzYONbokAAAAAG7K0KBt6tSpatmypSwWiywWi+Lj4/X111+7jhcUFGjYsGEKDg5WzZo1lZSUpOzs7IuOV1xcrDFjxqhFixby9/dXeHi4BgwYoMzMzOsxHQBAFTPzx/2avHKvJOmlB5upW6t6BlcEAAAAwJ0ZGrRFRETotddeU0pKijZv3qy77rpL3bp1086dOyVJo0aN0pdffqm5c+dqzZo1yszMVM+ePS863unTp5WamqoXXnhBqampWrBggdLT0/Xggw9erykBAKqIRVuOaMKXuyRJozrfrP7xDY0tCAAAAIDbMzmdTqfRRfy3oKAgvfHGG+rVq5dCQkI0e/Zs9erVS5K0e/duNWnSRMnJyerQoUO5xtu0aZPatWungwcPqn79+uV6jd1ul9Vqlc1mk8ViueK5AAAqp+925+jxDzerxOHUoFsaanzXpjKZTEaXhUqANYT74zsCAABXorxrCLe5Rltpaak+++wz5efnKz4+XikpKSouLlbnzp1dfaKjo1W/fn0lJyeXe1ybzSaTyaTAwMCL9iksLJTdbi/zAABUT5sP5OqpT1JU4nCqW6twjXuAkA0AAABA+RgetG3fvl01a9aU2WzW0KFDtXDhQjVt2lRZWVny8fE5LyCrU6eOsrKyyjV2QUGBxowZo4ceeuiSaePEiRNltVpdj8jIyKuZEgCgkvrpqF2DZ25SQbFDnRqH6G+/i5GHByEbAAAAgPIxPGhr3Lix0tLStGHDBj311FMaOHCgdu3addXjFhcXq3fv3nI6nZo6deol+44dO1Y2m831OHz48FW/PwCgcjn062kN+GCj7AUlatOglt7rFydvT8P/mQQAAABQiXgZXYCPj48aNWokSYqLi9OmTZs0efJk9enTR0VFRcrLyytzVlt2drbCwsIuOea5kO3gwYNatWrVb15/w2w2y2w2X/VcAACVU87JAj0yfYOOnSxUdFiApg9qKz8fT6PLAgAAAFDJuN2v6h0OhwoLCxUXFydvb2+tXLnSdSw9PV2HDh1SfHz8RV9/LmTbu3evvv32WwUHB1+PsgEAlZTtTLEGTN+oQ7mnVT+ohj4c3E5WP2+jywIAAABQCRl6RtvYsWN13333qX79+jp58qRmz56t1atXa/ny5bJarRoyZIhGjx6toKAgWSwWjRgxQvHx8WXuOBodHa2JEyeqR48eKi4uVq9evZSamqolS5aotLTUdT23oKAg+fj4GDVVAIAbOlNUqiEzN2l31kmFBJj18ZD2CrX4Gl0WAAAAgErK0KAtJydHAwYM0NGjR2W1WtWyZUstX75c99xzjyTprbfekoeHh5KSklRYWKjExES99957ZcZIT0+XzWaTJB05ckSLFy+WJLVq1apMv++++0533nnnNZ8TAKByKC516PefpGjzwRMK8PXSh4PbqX5wDaPLAgAAAFCJmZxOp9PoItyN3W6X1WqVzWb7zeu7AQAqH4fDqdGfp2lRWqZ8vT300ZD2atswyOiyUAWwhnB/fEcAAOBKlHcN4XbXaAMA4FpyOp16ackuLUrLlJeHSVP7xRGyAQAAAKgQBG0AgGrl/1b9rJnrDkiS/va7GHWKDjW2IAAAAABVBkEbAKDa+Cj5gP6+Yo8kaULXpureup7BFQEAAACoSgjaAADVwuKtmRq3eKck6em7b9KgW6MMrggAAABAVUPQBgCo8lan52j0nDQ5ndKA+AYa2fkmo0sCAAAAUAURtAEAqrSUgyf01MepKnE49WBMuCZ0bSaTyWR0WQAAAACqIII2AECVlZ51UoNnbtKZ4lLdcXOI/va7GHl4ELIBAAAAuDYI2gAAVdLh3NPqP32DbGeKFVs/UFMfiZWPF//sAQAAALh2+IkDAFDlHDtZqEemb1DOyUI1rhOgDwa1VQ0fL6PLAgAAAFDFEbQBAKoU25liDfhgow7+eloRtfz04ZB2CqzhY3RZAAAAAKoBgjYAQJVRUFyqx2dt1k9H7apd00cfD2mvOhZfo8sCAAAAUE0QtAEAqoTiUoeGz07VxgO5CjB7adbgdmpY29/osgAAAABUIwRtAIBKz+Fwasy8bfr2pxyZvTw0fVBbNQu3Gl0WUCmsXbtWXbt2VXh4uEwmkxYtWlTm+IIFC5SQkKDg4GCZTCalpaVdcJzk5GTddddd8vf3l8Vi0e23364zZ864jufm5qpfv36yWCwKDAzUkCFDdOrUqTJjbNu2Tbfddpt8fX0VGRmpSZMmnfc+c+fOVXR0tHx9fdWiRQt99dVXV/0ZAAAAVBSCNgBApeZ0OvXXpT9pwZYj8vQw6b1+sWoXFWR0WUClkZ+fr5iYGE2ZMuWixzt27KjXX3/9omMkJyfr3nvvVUJCgjZu3KhNmzZp+PDh8vD4z1KzX79+2rlzp1asWKElS5Zo7dq1euKJJ1zH7Xa7EhIS1KBBA6WkpOiNN97QhAkT9P7777v6rFu3Tg899JCGDBmiLVu2qHv37urevbt27NhRAZ8EAADA1TM5nU6n0UW4G7vdLqvVKpvNJovFYnQ5AIBLmPLdz3pjebok6e+9Y9QzNsLgilCdVfY1hMlk0sKFC9W9e/fzjh04cEBRUVHasmWLWrVqVeZYhw4ddM899+jll1++4Lg//fSTmjZtqk2bNqlNmzaSpGXLlun+++9XRkaGwsPDNXXqVD333HPKysqSj8/ZG5g8++yzWrRokXbv3i1J6tOnj/Lz87VkyZIy792qVStNmzatXHOs7N8RAAAwRnnXEJzRBgCotD5ef9AVso17oCkhG2CAnJwcbdiwQaGhobrllltUp04d3XHHHfrhhx9cfZKTkxUYGOgK2SSpc+fO8vDw0IYNG1x9br/9dlfIJkmJiYlKT0/XiRMnXH06d+5c5v0TExOVnJx80foKCwtlt9vLPAAAAK4VgjYAQKW0ZFumXvji7HaxEXc10uCOUQZXBFRPv/zyiyRpwoQJevzxx7Vs2TLFxsbq7rvv1t69eyVJWVlZCg0NLfM6Ly8vBQUFKSsry9WnTp06Zfqce/5bfc4dv5CJEyfKarW6HpGRkVcxWwAAgEsjaAMAVDpr9xzTqDlpcjqlfu3ra/Q9NxtdElBtORwOSdKTTz6pRx99VK1bt9Zbb72lxo0b64MPPjC4Omns2LGy2Wyux+HDh40uCQAAVGFeRhcAAMDlSD10Qk9+lKLiUqe6tKyrl7o1l8lkMrosoNqqW7euJKlp06Zl2ps0aaJDhw5JksLCwpSTk1PmeElJiXJzcxUWFubqk52dXabPuee/1efc8Qsxm80ym82XOy0AAIArwhltAIBKY0/2SQ2euUlnikt120219VbvVvL0IGQDjNSwYUOFh4crPT29TPuePXvUoEEDSVJ8fLzy8vKUkpLiOr5q1So5HA61b9/e1Wft2rUqLi529VmxYoUaN26sWrVqufqsXLmyzPusWLFC8fHx12RuAAAAl4sz2gAAlcLh3NPqP32D8k4Xq3X9QE17JE4+Xvy+CLhap06d0s8//+x6vn//fqWlpSkoKEj169dXbm6uDh06pMzMTElyBWphYWEKCwuTyWTSM888o/HjxysmJkatWrXSrFmztHv3bs2bN0/S2bPb7r33Xj3++OOaNm2aiouLNXz4cPXt21fh4eGSpIcfflgvvviihgwZojFjxmjHjh2aPHmy3nrrLVdtTz/9tO644w69+eab6tKliz777DNt3rxZ77///vX6uAAAAC7J5HQ6nUYX4W647TsAuJfjpwr1u2nJ2n88XzfXqanPn4xXYA2f334hcJ1VxjXE6tWr1alTp/PaBw4cqJkzZ2rmzJl69NFHzzs+fvx4TZgwwfX8tdde05QpU5Sbm6uYmBhNmjRJHTt2dB3Pzc3V8OHD9eWXX8rDw0NJSUl65513VLNmTVefbdu2adiwYdq0aZNq166tESNGaMyYMWXed+7cuXr++ed14MAB3XTTTZo0aZLuv//+cs+3Mn5HAADAeOVdQxC0XQALMABwHycLitX3/fXamWlXvUA/zX/qFoVZfY0uC7gg1hDuj+8IAABcifKuIdhzAwBwWwXFpXps1mbtzLQr2N9HHz/WnpANAAAAgNsiaAMAuKWSUoeGz96iDftzFWD20qzB7RRV29/osgAAAADgogjaAABux+Fwasz87fr2p2z5eHnonwPbqHk9q9FlAQAAAMAlEbQBANyK0+nUq1/9pPmpGfL0MGnKw7HqcEOw0WUBAAAAwG8iaAMAuJWpa/bpXz/slyS9ntRS9zStY3BFAAAAAFA+VxS0HT58WBkZGa7nGzdu1MiRI/X+++9XWGEAgOrn042HNGlZuiTp+S5N1CsuwuCKAPfFegwAAMD9XFHQ9vDDD+u7776TJGVlZemee+7Rxo0b9dxzz+mll16q0AIBANXDV9uP6rmF2yVJwzrdqMduu8HgigD3xnoMAADA/VxR0LZjxw61a9dOkvT555+refPmWrdunT755BPNnDmzIusDAFQDP+w9rpGfpcnhlB5qV19/SmhsdEmA22M9BgAA4H6uKGgrLi6W2WyWJH377bd68MEHJUnR0dE6evRoxVUHAKjy0g7n6YmPNquo1KH7W4Tpr92by2QyGV0W4PZYjwEAALifKwramjVrpmnTpun777/XihUrdO+990qSMjMzFRzMneEAAOXzc85JPTpjo04Xlapjo9p6q08reXoQsgHlwXoMAADA/VxR0Pb666/rH//4h+6880499NBDiomJkSQtXrzYtYUBAIBLOZJ3Rv2nb9SJ08WKiQzUP/rHyezlaXRZQKXBegwAAMD9mJxOp/NKXlhaWiq73a5atWq52g4cOKAaNWooNDS0wgo0gt1ul9Vqlc1mk8ViMbocAKhyfj1VqN9NS9Yvx/PVKLSmPn8yXkH+PkaXBVy1672GqMrrsWuFdR4AALgS5V1DXNEZbWfOnFFhYaFrUXfw4EG9/fbbSk9PZ1EHALikkwXFGjRjk345nq96gX76aEg7QjbgCrAeAwAAcD9XFLR169ZNH374oSQpLy9P7du315tvvqnu3btr6tSpFVogAKDqKCgu1RMfpmj7EZuC/H304ZB2qmv1M7osoFJiPQYAAOB+vK7kRampqXrrrbckSfPmzVOdOnW0ZcsWzZ8/X+PGjdNTTz1VoUUCACq/klKH/vDpFiX/8qtqmr0069F2ujGkptFlAZUW6zH343Q6daa41OgyAACo1vy8PWUyGXeDtSsK2k6fPq2AgABJ0jfffKOePXvKw8NDHTp00MGDByu0QABA5ed0OvWXhdv1za5s+Xh56J8D2qhFhNXosoBKjfWY+zlTXKqm45YbXQYAANXarpcSVcPniuKuCnFFW0cbNWqkRYsW6fDhw1q+fLkSEhIkSTk5OVxUFgBwnteW7dbnmzPkYZL+76HWir8x2OiSgEqP9RgAAID7uaKIb9y4cXr44Yc1atQo3XXXXYqPj5d09reprVu3rtACAQCV27Q1+/SPNb9Ikl5LaqnEZmEGVwRUDazH3I+ft6d2vZRodBkAAFRrft6ehr6/yel0Oq/khVlZWTp69KhiYmLk4XH2xLiNGzfKYrEoOjq6Qou83rjtOwBUjDmbDmnM/O2SpL/cH60nbr/R4IqAa+t6ryGq8nrsWmGdBwAArkR51xBXvGk1LCxMYWFhysjIkCRFRESoXbt2VzocAKCKWbbjqMYuOBuyDb3jRkI24BpgPQYAAOBerugabQ6HQy+99JKsVqsaNGigBg0aKDAwUC+//LIcDkdF1wgAqGTW/Xxcf/g0TQ6n1KdNpMbc29jokoAqh/UYAACA+7miM9qee+45TZ8+Xa+99ppuvfVWSdIPP/ygCRMmqKCgQK+88kqFFgkAqDy2ZeTp8Q83q6jUoXubhemVHs0Nvb02UFWxHgMAAHA/V3SNtvDwcE2bNk0PPvhgmfYvvvhCv//973XkyJEKK9AIXLsDAK7Mzzmn1PsfycrNL9ItNwbrg0Ft5WvwxUiB6+l6riGq+nrsWmGdBwAArkR51xBXtHU0Nzf3ghfYjY6OVm5u7pUMCQCo5DLzzmjA9A3KzS9Sywir3h/QhpANuIZYjwEAALifKwraYmJi9O67757X/u6776ply5blHmfq1Klq2bKlLBaLLBaL4uPj9fXXX7uOFxQUaNiwYQoODlbNmjWVlJSk7OzsS47pdDo1btw41a1bV35+furcubP27t1b/skBAC5bbn6R+k/foExbgW4I8dfMR9uppvmK77cDoBwqaj0GAACAinNFPwVNmjRJXbp00bfffqv4+HhJUnJysg4fPqyvvvqq3ONERETotdde00033SSn06lZs2apW7du2rJli5o1a6ZRo0Zp6dKlmjt3rqxWq4YPH66ePXvqxx9/vGRt77zzjmbNmqWoqCi98MILSkxM1K5du+Tr63sl0wUAXMKpwhI9OmOj9h3LV7jVVx8Paa8gfx+jywKqvIpajwEAAKDiXNEZbXfccYf27NmjHj16KC8vT3l5eerZs6d27typjz76qNzjdO3aVffff79uuukm3XzzzXrllVdUs2ZNrV+/XjabTdOnT9ff//533XXXXYqLi9OMGTO0bt06rV+//oLjOZ1Ovf3223r++efVrVs3tWzZUh9++KEyMzO1aNGiK5kqAOASCktK9eRHm7U1w6ZaNbz14ZD2Cg/0M7osoFqoqPUYAAAAKs4V3QzhYrZu3arY2FiVlpZe9mtLS0s1d+5cDRw4UFu2bFFWVpbuvvtunThxQoGBga5+DRo00MiRIzVq1Kjzxvjll1904403asuWLWrVqpWr/Y477lCrVq00efLkC753YWGhCgsLXc/tdrsiIyO5SC4AXEKpw6nhs1P19Y4s+ft4avbjHRQTGWh0WYCh3OFC+1ezHqsO3OE7AgAAlc81vRlCRdq+fbtq1qwps9msoUOHauHChWratKmysrLk4+NTJmSTpDp16igrK+uCY51rr1OnTrlfI0kTJ06U1Wp1PSIjI69uUgBQxTmdTj23cLu+3pElH08PvT+gDSEbAAAAgGrP8KCtcePGSktL04YNG/TUU09p4MCB2rVr13WtYezYsbLZbK7H4cOHr+v7A0BlM2l5uj7bdFgeJumdh1rp1ka1jS4JAAAAAAxn+C3hfHx81KhRI0lSXFycNm3apMmTJ6tPnz4qKipSXl5embPasrOzFRYWdsGxzrVnZ2erbt26ZV7z31tJ/5fZbJbZbL76yQBANfD+2n2aunqfJOnVHi10b/O6v/EKAAAAAKgeLito69mz5yWP5+XlXU0tkiSHw6HCwkLFxcXJ29tbK1euVFJSkiQpPT1dhw4dct1Z639FRUUpLCxMK1eudAVrdrvddbYcAODqfL75sF79arckacy90erbrr7BFQHVz/VYjwEAAODKXFbQZrVaf/P4gAEDyj3e2LFjdd9996l+/fo6efKkZs+erdWrV2v58uWyWq0aMmSIRo8eraCgIFksFo0YMULx8fHq0KGDa4zo6GhNnDhRPXr0kMlk0siRI/XXv/5VN910k6KiovTCCy8oPDxc3bt3v5ypAgD+x/KdWXp2/jZJ0hO336Chd9xgcEVA9VTR6zEAAABUnMsK2mbMmFGhb56Tk6MBAwbo6NGjslqtatmypZYvX6577rlHkvTWW2/Jw8NDSUlJKiwsVGJiot57770yY6Snp8tms7me//nPf1Z+fr6eeOIJ5eXlqWPHjlq2bJl8fX0rtHYAqE6S9/2qEZ9ukcMp9W4TobH3RctkMhldFlAtVfR6DAAAABXH5HQ6nUYX4W647TsA/MeOIzb1fX+9ThWWKKFpHb3XL1ZenobfSwdwS6wh3B/fEQAAuBLlXUPwkxIA4KJ+OXZKAz/YqFOFJepwQ5Deeag1IRsAAAAAXAQ/LQEALuio7Yz6T9+oX/OL1LyeRf8c0Ea+3p5GlwUAAAAAbougDQBwnhP5RRowfaOO5J3RDbX9NfPRdgrw9Ta6LAAAAABwawRtAIAy8gtLNGjmJu3NOaUwi68+HNJOtWuajS4LAAAAANweQRsAwKWwpFRDP07R1sN5CqzhrY+GtFNErRpGlwUAAAAAlQJBGwBAklTqcGr0nK36fu9x1fDx1IxBbXVTnQCjywIAAACASoOgDQAgp9OpF77YoaXbj8rb06R/9I9T6/q1jC4LAAAAACoVgjYAgN78Zo9mbzgkk0ma3Le1brspxOiSAAAAAKDSIWgDgGruX9//one/+1mS9Er3Frq/RV2DKwIAAACAyomgDQCqsfkpGfrr0p8kSc8kNtbD7esbXBEAAAAAVF4EbQBQTX27K1t/nr9NkvRYxyj9/s4bDa4IAAAAACo3gjYAqIY2/PKrhs1OVanDqaTYCP3l/iYymUxGlwUAAAAAlRpBGwBUMzuO2PTYrM0qLHGoc5M6ej2phTw8CNkAAAAA4GoRtAFANbL/eL4Gzdiok4UlahcVpHcfbi0vT/4pAAAAAICKwE9XAFBNZNkK9Mi/Nuj4qSI1C7foXwPbyNfb0+iyAAAAAKDKIGgDgGog73SRBnywQUfyziiqtr9mDW4ni6+30WUBcANr165V165dFR4eLpPJpEWLFpU5vmDBAiUkJCg4OFgmk0lpaWkXHcvpdOq+++674DiHDh1Sly5dVKNGDYWGhuqZZ55RSUlJmT6rV69WbGyszGazGjVqpJkzZ573HlOmTFHDhg3l6+ur9u3ba+PGjVc4cwAAgIpH0AYAVdzpohI9OnOT9mSfUh2LWR8ObqfaNc1GlwXATeTn5ysmJkZTpky56PGOHTvq9ddf/82x3n777QveWKW0tFRdunRRUVGR1q1bp1mzZmnmzJkaN26cq8/+/fvVpUsXderUSWlpaRo5cqQee+wxLV++3NVnzpw5Gj16tMaPH6/U1FTFxMQoMTFROTk5VzBzAACAimdyOp1Oo4twN3a7XVarVTabTRaLxehyAOCKFZU49NiHm7V2zzFZ/bw1d2i8bq4TYHRZQJVV2dcQJpNJCxcuVPfu3c87duDAAUVFRWnLli1q1arVecfT0tL0wAMPaPPmzapbt26Zcb7++ms98MADyszMVJ06dSRJ06ZN05gxY3Ts2DH5+PhozJgxWrp0qXbs2OEas2/fvsrLy9OyZcskSe3bt1fbtm317rvvSpIcDociIyM1YsQIPfvss+WaY2X/jgAAgDHKu4bgjDYAqKJKHU6N/jxNa/cck5+3p2Y82paQDcA1cfr0aT388MOaMmWKwsLCzjuenJysFi1auEI2SUpMTJTdbtfOnTtdfTp37lzmdYmJiUpOTpYkFRUVKSUlpUwfDw8Pde7c2dXnQgoLC2W328s8AAAArhWCNgCogpxOpyYs3qkl247K29Okaf3jFFu/ltFlAaiiRo0apVtuuUXdunW74PGsrKwyIZsk1/OsrKxL9rHb7Tpz5oyOHz+u0tLSC/Y5N8aFTJw4UVar1fWIjIy87PkBAACUF0EbAFRBb63Yo4/WH5TJJP29dyvdcXOI0SUBqKIWL16sVatW6e233za6lAsaO3asbDab63H48GGjSwIAAFUYQRsAVDEf/LBf76z6WZL0Urfm6hoTbnBFAKqyVatWad++fQoMDJSXl5e8vLwkSUlJSbrzzjslSWFhYcrOzi7zunPPz201vVgfi8UiPz8/1a5dW56enhfsc6HtqueYzWZZLJYyDwAAgGuFoA0AqpCFWzL00pJdkqQ/3nOz+ndoYHBFAKq6Z599Vtu2bVNaWprrIUlvvfWWZsyYIUmKj4/X9u3by9wddMWKFbJYLGratKmrz8qVK8uMvWLFCsXHx0uSfHx8FBcXV6aPw+HQypUrXX0AAACM5mV0AQCAirFqd7b+NHebJOnRWxtq+F2NDK4IQGVw6tQp/fzzz67n+/fvV1pamoKCglS/fn3l5ubq0KFDyszMlCSlp6dLOnsG2n8//lf9+vUVFRUlSUpISFDTpk3Vv39/TZo0SVlZWXr++ec1bNgwmc1mSdLQoUP17rvv6s9//rMGDx6sVatW6fPPP9fSpUtdY44ePVoDBw5UmzZt1K5dO7399tvKz8/Xo48+es0+HwAAgMtB0AYAVcDG/bl66uNUlTqc6tm6nl7o0lQmk8nosgBUAps3b1anTp1cz0ePHi1JGjhwoGbOnKnFixeXCbL69u0rSRo/frwmTJhQrvfw9PTUkiVL9NRTTyk+Pl7+/v4aOHCgXnrpJVefqKgoLV26VKNGjdLkyZMVERGhf/3rX0pMTHT16dOnj44dO6Zx48YpKytLrVq10rJly867QQIAAIBRTE6n02l0Ee7GbrfLarXKZrNxHQ8Abm9Xpl193k/WyYIS3R0dqmn94+TtyZUBACOwhnB/fEcAAOBKlHcNwU9iAFCJHfw1XwM+2KiTBSVq1zBIU/rFErIBAAAAgEH4aQwAKqkce4Eemb5Bx08Vqkldi/45sI18vT2NLgsAAAAAqi2CNgCohGynizXgg406nHtGDYJraNbgtrL6eRtdFgAAAABUawRtAFDJnC4q0eBZm7Q766RCA8z6eEh7hQb4Gl0WAAAAAFR7BG0AUIkUlTj01MepSjl4QhZfL304pJ0ig2oYXRYAAAAAQARtAFBpOBxO/WnuVq3Zc0y+3h6a8WhbRYdxxzwAAAAAcBcEbQBQCTidTr345U4t3popLw+Tpj0Sp7gGQUaXBQAAAAD4LwRtAFAJTF65V7OSD8pkkt7sHaM7G4caXRIAAAAA4H8QtAGAm5u17oDe/navJOmlB5upW6t6BlcEAAAAALgQgjYAcGNfpB3R+MU7JUmjOt+s/vENjS0IAAAAAHBRBG0A4Ka+S8/RHz/fKkkadEtD/eHuRgZXBAAAAAC4FII2AHBDKQdz9dTHKSpxONWtVbjGPdBUJpPJ6LIAAAAAAJdA0AYAbuano3Y9OmOTCoodurNxiP72uxh5eBCyAQAAAIC7I2gDADdy6NfTGvDBRtkLStSmQS1N7Rcnb0/+qgYAAACAyoCf3gDATeScLNAj0zfo2MlCRYcFaPrAtvLz8TS6LAAAAABAORG0AYAbsJ0p1oDpG3Uo97TqB9XQh4PbyVrD2+iyAAAAAACXgaANAAx2pqhUj83apN1ZJxUSYNbHQ9or1OJrdFkAAAAAgMtE0AYABioudWjY7FRtOnBCAb5e+nBwO9UPrmF0WQAAAACAK0DQBgAGcTic+vO8bVq1O0e+3h76YFBbNalrMbosAAAAAMAVImgDAAM4nU69tGSXFm45Ii8Pk6b2i1PbhkFGlwUAAAAAuAoEbQBggHdX/ayZ6w5Ikv72uxh1ig41tiAAAAAAwFUzNGibOHGi2rZtq4CAAIWGhqp79+5KT08v02ffvn3q0aOHQkJCZLFY1Lt3b2VnZ19y3NLSUr3wwguKioqSn5+fbrzxRr388styOp3XcjoAUC4frT+oN1fskSSN79pU3VvXM7giAAAAAEBFMDRoW7NmjYYNG6b169drxYoVKi4uVkJCgvLz8yVJ+fn5SkhIkMlk0qpVq/Tjjz+qqKhIXbt2lcPhuOi4r7/+uqZOnap3331XP/30k15//XVNmjRJ//d//3e9pgYAF7R4a6bGfbFDkvSHu2/So7dGGVwRAAAAAKCieBn55suWLSvzfObMmQoNDVVKSopuv/12/fjjjzpw4IC2bNkii+XsBcJnzZqlWrVqadWqVercufMFx123bp26deumLl26SJIaNmyoTz/9VBs3bry2EwKAS1idnqPRc9LkdEr9OzTQqM43GV0SAAAAAKACudU12mw2myQpKOjsBcELCwtlMplkNptdfXx9feXh4aEffvjhouPccsstWrlypfbsObs1a+vWrfrhhx903333XbB/YWGh7HZ7mQcAVKSUgyf01MepKnE41TUmXC8+2Ewmk8nosgAAAAAAFchtgjaHw6GRI0fq1ltvVfPmzSVJHTp0kL+/v8aMGaPTp08rPz9ff/rTn1RaWqqjR49edKxnn31Wffv2VXR0tLy9vdW6dWuNHDlS/fr1u2D/iRMnymq1uh6RkZHXZI4Aqqf0rJMaPHOTzhSX6o6bQ/Tm72Lk4UHIBgAAAABVjdsEbcOGDdOOHTv02WefudpCQkI0d+5cffnll6pZs6asVqvy8vIUGxsrD4+Ll/7555/rk08+0ezZs5WamqpZs2bpb3/7m2bNmnXB/mPHjpXNZnM9Dh8+XOHzA1A9Hc49rf7TN8h2plix9QM19ZFY+Xi5zV+9AAAAAIAKZOg12s4ZPny4lixZorVr1yoiIqLMsYSEBO3bt0/Hjx+Xl5eXAgMDFRYWphtuuOGi4z3zzDOus9okqUWLFjp48KAmTpyogQMHntffbDaX2Z4KABXh2MlC9Z++QTknC9W4ToA+GNRWNXzc4q9dAAAAAMA1YOhPfE6nUyNGjNDChQu1evVqRUVd/O57tWvXliStWrVKOTk5evDBBy/a9/Tp0+ed8ebp6XnJO5UCQEWyFxRr4AcbdeDX04qo5acPh7RTYA0fo8sCAAAAAFxDhgZtw4YN0+zZs/XFF18oICBAWVlZkiSr1So/Pz9J0owZM9SkSROFhIQoOTlZTz/9tEaNGqXGjRu7xrn77rvVo0cPDR8+XJLUtWtXvfLKK6pfv76aNWumLVu26O9//7sGDx58/ScJoNopKC7VY7M2a9dRu2rX9NHHQ9qrjsXX6LIAAAAAANeYoUHb1KlTJUl33nlnmfYZM2Zo0KBBkqT09HSNHTtWubm5atiwoZ577jmNGjWqTP9zW0vP+b//+z+98MIL+v3vf6+cnByFh4frySef1Lhx467pfACguNSh4bNTtXF/rgLMXpo1uJ0a1vY3uiwAAAAAwHVgcjqdTqOLcDd2u11Wq1U2m00Wi8XocgBUEg6HU3+au1ULthyR2ctDHw5up/Y3BBtdFoDriDWE++M7AgAAV6K8awhufQcAFcDpdOqvS3/Sgi1H5Olh0pSHYwnZAAAAAKCaIWgDgArw3up9+uDH/ZKkN3q1VOemdQyuCAAAAABwvRG0AcBV+mTDQb2xPF2SNO6BpuoZG2FwRQAAAAAAIxC0AcBVWLrtqJ5ftEOSNOKuRhrcMcrgigAAAAAARiFoA4Ar9P3eYxo5Z4ucTqlf+/oafc/NRpcEAAAAADAQQRsAXIEth07oyY9SVFzqVJeWdfVSt+YymUxGlwUAAAAAMBBBGwBcpr3ZJ/XozE06XVSq226qrbd6t5KnByEbAAAAAFR3BG0AcBl2Z9nVf/pG5Z0uVqvIQE17JE4+XvxVCgAAAACQvIwuAADcne10sRZvy9T8lAylHc6TJN0UWlMzBrWVv5m/RgEAAAAAZ/ETIgBcQEmpQ2v3HtP8lCNasStbRaUOSZKnh0l3RYfq5W7NVcvfx+AqAQAAAADuhKANAP5LetZJzUs5rEVpmTp2stDVHh0WoF5xEerWqp5CAswGVggAAAAAcFcEbQCqvdz8Ii1OO6J5qRnaccTuag/y91G3VuFKio1Qs3ALdxUFAAAAAFwSQRuAaqm41KHvdudofmqGVu3OUXGpU5Lk9e+tob3iInRn41BudAAAAAAAKDeCNgDVys5Mm+alZGhxWqZ+zS9ytTevZ1FSbIQejAlXcE22hgIAAAAALh9BG4Aq79jJQn2RdkTzUjK0O+ukq712TbN6tA5XUlyEosMsBlYIAAAAAKgKCNoAVEmFJaVa9dPZraHfpR9TqePs1lAfTw/d07SOkuLq6fabQuTlydZQAAAAAEDFIGgDUGU4nU5ty7BpfmqGFm/NVN7pYtexmMhA9YqLUNeWdRVYw8fAKgEAAAAAVRVBG4BKL9teoIVbjmh+Sob25pxytdexmNWjdYR6xdVTo9AAAysEAAAAAFQH7JkCUCkVFJfqy62ZGjRjo+InrtRrX+/W3pxTMnt56MGYcM0a3E7rnr1bz94XTcgGAJewdu1ade3aVeHh4TKZTFq0aFGZ4wsWLFBCQoKCg4NlMpmUlpZW5nhubq5GjBihxo0by8/PT/Xr19cf/vAH2Wy2Mv0OHTqkLl26qEaNGgoNDdUzzzyjkpKSMn1Wr16t2NhYmc1mNWrUSDNnzjyv3ilTpqhhw4by9fVV+/bttXHjxor4GAAAACoEZ7QBqDScTqdSD+VpfmqGlmzNlL3gPz+gxTWopV5xEerSsq4svt4GVgkAlUt+fr5iYmI0ePBg9ezZ84LHO3bsqN69e+vxxx8/73hmZqYyMzP1t7/9TU2bNtXBgwc1dOhQZWZmat68eZKk0tJSdenSRWFhYVq3bp2OHj2qAQMGyNvbW6+++qokaf/+/erSpYuGDh2qTz75RCtXrtRjjz2munXrKjExUZI0Z84cjR49WtOmTVP79u319ttvKzExUenp6QoNDb2GnxIAAED5mJxOp9PoItyN3W6X1WqVzWaTxcKdCAGjZeadcW0N/eV4vqs93OqrnrER6hlbTzeE1DSwQgA4q7KvIUwmkxYuXKju3bufd+zAgQOKiorSli1b1KpVq0uOM3fuXD3yyCPKz8+Xl5eXvv76az3wwAPKzMxUnTp1JEnTpk3TmDFjdOzYMfn4+GjMmDFaunSpduzY4Rqnb9++ysvL07JlyyRJ7du3V9u2bfXuu+9KkhwOhyIjIzVixAg9++yz5ZpjZf+OAACAMcq7huCMNgBu6UxRqZbvzNK8lAz9uO+4zv1KwM/bU/c1D1NSXITibwiWh4fJ2EIBAOc5twD18jq71ExOTlaLFi1cIZskJSYm6qmnntLOnTvVunVrJScnq3PnzmXGSUxM1MiRIyVJRUVFSklJ0dixY13HPTw81LlzZyUnJ1+0lsLCQhUWFrqe2+32ipgiAADABRG0AXAbTqdTmw6c0PyUDC3dflSnCv+zNbRdVJB6xUXo/hZ1VdPMX10A4K6OHz+ul19+WU888YSrLSsrq0zIJsn1PCsr65J97Ha7zpw5oxMnTqi0tPSCfXbv3n3ReiZOnKgXX3zxquYEAABQXvy0CsBwh3NPa0HqES3YkqGDv552tUcG+aln6wglxUaofnANAysEAJSH3W5Xly5d1LRpU02YMMHociRJY8eO1ejRo13P7Xa7IiMjDawIAABUZQRtAAyRX1iir3dkaV7KYa3/JdfV7u/jqftb1FWvuAi1bRjE1lAAqCROnjype++9VwEBAVq4cKG8vf9zY5qwsLDz7g6anZ3tOnbuf8+1/Xcfi8UiPz8/eXp6ytPT84J9zo1xIWazWWaz+armBgAAUF4EbQCuG4fDqfX7f9X8lCP6esdRnS4qlSSZTNItNwYrKTZC9zYPUw0f/moCgMrEbrcrMTFRZrNZixcvlq+vb5nj8fHxeuWVV5STk+O6O+iKFStksVjUtGlTV5+vvvqqzOtWrFih+Ph4SZKPj4/i4uK0cuVK180aHA6HVq5cqeHDh1/jGQIAAJQPP80CuOYO/pqv+SkZmp96REfyzrjaGwbXUK+4CPWIjVC9QD8DKwSA6uvUqVP6+eefXc/379+vtLQ0BQUFqX79+srNzdWhQ4eUmZkpSUpPT5d09gy0sLAw2e12JSQk6PTp0/r4449lt9tdNxwICQmRp6enEhIS1LRpU/Xv31+TJk1SVlaWnn/+eQ0bNsx1ttnQoUP17rvv6s9//rMGDx6sVatW6fPPP9fSpUtdtY0ePVoDBw5UmzZt1K5dO7399tvKz8/Xo48+er0+LgAAgEsiaANwTZwsKNbSbUc1PzVDmw6ccLUHmL30QMzZraGx9WvJZGJrKAAYafPmzerUqZPr+bnrmQ0cOFAzZ87U4sWLywRZffv2lSSNHz9eEyZMUGpqqjZs2CBJatSoUZmx9+/fr4YNG8rT01NLlizRU089pfj4ePn7+2vgwIF66aWXXH2joqK0dOlSjRo1SpMnT1ZERIT+9a9/KTEx0dWnT58+OnbsmMaNG6esrCy1atVKy5YtO+8GCQAAAEYxOZ1Op9FFuBu73S6r1eq6NT2A8il1OLVu33HNS8nQ8p1ZKih2SJI8TFLHm0KUFFtPic3C5OvtaXClAHBtsIZwf3xHAADgSpR3DcEZbQCu2r5jpzQ/JUMLtxzRUVuBq/3GEH/1iotUj9b1FGb1vcQIAAAAAABUfgRtAK6I7XSxvtyWqfmpGdpyKM/VbvXzVteYuuoVF6mYCCtbQwEAAAAA1QZBG4ByKyl16Pufz24NXbErW0UlZ7eGenqYdMfNIeoVF6G7m4TK7MXWUAAAAABA9UPQBuA37ck+qfkpGVqw5YiOnSx0tTeuE6BecRHq1jpcoQFsDQUAAAAAVG8EbQAu6ER+kRZvPbs1dFuGzdVeq4a3urWqp15xEWoWbmFrKAAAAAAA/0bQBsCluNShNenHNC8lQyt3Z6u49OxNib08TOoUHapecRHq1DhUPl4eBlcKAAAAAID7IWgDoF2Zds1PzdAXaUd0/FSRq71ZuEVJsRHq1ipcwTXNBlYIAAAAAID7I2gDqqnjpwr1RVqm5qdkaNdRu6u9dk0fdW9VT0lxEWpS12JghQAAAAAAVC4EbUA1UlTi0Krd2ZqXckSr03NU4ji7NdTH00N3Nzm7NfT2m0Pk7cnWUAAAAAAALhdBG1DFOZ1O7Thi17yUw1q8NVMnThe7jsVEWJUUF6GuLcNVy9/HwCoBAAAAAKj8CNqAKirHXqBFaUc0LyVDe7JPudpDA8zqEVtPvWIjdFOdAAMrBAAAAACgaiFoA6qQguJSfftTtuanZGjNnmP6985Q+Xh5KLFZmJJi66ljo9ryYmsoAAAAAAAVjqANqOScTqfSDudpXkqGvtyaKXtBietYbP1A9YqLVJeWdWX18zawSgAAAAAAqj6CNqCSOmo7o4Vbzm4N/eVYvqu9rtVXPWPrqWdshG4MqWlghQAAAAAAVC8EbUAlcqaoVN/sytK8lAz98PNxOf+9NdTX20P3Na+rpNgIxd8YLE8Pk7GFAgAAAABQDRG0AW7O6XQq5eAJzUvJ0NJtR3Wy8D9bQ9s1DFKvuAjd1yJMAb5sDQUAAAAAwEgEbYCbyjhxWgtSj2hBaoYO/Hra1R5Ry089YyOUFFtPDYL9DawQAAAAAAD8N4I2wI2cLirR19vPbg1N/uVXV3sNH0/d3+Ls1tD2UUHyYGsoAAAAAABuh6ANMJjD4dTGA7mal5Khr7cfVX5RqetY/A3B6hUXoXubh8nfzH+uAAAAAAC4Mw8j33zixIlq27atAgICFBoaqu7duys9Pb1Mn3379qlHjx4KCQmRxWJR7969lZ2d/ZtjHzlyRI888oiCg4Pl5+enFi1aaPPmzddqKsBlO/hrvv6+Yo9uf+M79X1/vealZCi/qFQNgmto9D036/s/d9KnT3RQUlwEIRsAAAAAAJWAoT+9r1mzRsOGDVPbtm1VUlKiv/zlL0pISNCuXbvk7++v/Px8JSQkKCYmRqtWrZIkvfDCC+ratavWr18vD48L54QnTpzQrbfeqk6dOunrr79WSEiI9u7dq1q1al3P6QHnOVVYoq+2HdW8lAxtPJDraq9p9tIDLeuqV1yE4hrUksnE1lAAAAAAACobQ4O2ZcuWlXk+c+ZMhYaGKiUlRbfffrt+/PFHHThwQFu2bJHFYpEkzZo1S7Vq1dKqVavUuXPnC477+uuvKzIyUjNmzHC1RUVFXbSOwsJCFRYWup7b7farmRZQRqnDqeR9v2p+aoa+3nFUBcUOSZLJJHVsVFu94iKU0DRMfj6eBlcKAAAAAACuhlvtR7PZbJKkoKAgSWcDMJPJJLPZ7Orj6+srDw8P/fDDDxcN2hYvXqzExET97ne/05o1a1SvXj39/ve/1+OPP37B/hMnTtSLL75YwbNBdffLsVOan5qhhalHlGkrcLXfEOKvpNgI9Yytp7pWPwMrBAAAAAAAFcltgjaHw6GRI0fq1ltvVfPmzSVJHTp0kL+/v8aMGaNXX31VTqdTzz77rEpLS3X06NGLjvXLL79o6tSpGj16tP7yl79o06ZN+sMf/iAfHx8NHDjwvP5jx47V6NGjXc/tdrsiIyMrfpKo8mxnirV021HNSzms1EN5rnaLr5e6xoSrV1yEWkUGsjUUAAAAAIAqyG2CtmHDhmnHjh364YcfXG0hISGaO3eunnrqKb3zzjvy8PDQQw89pNjY2Iten006G9q1adNGr776qiSpdevW2rFjh6ZNm3bBoM1sNpc5aw64HKUOp77fe0zzU49o+c4sFZWc3RrqYZLuuDlESXER6tykjny92RoKAAAAAEBV5hZB2/Dhw7VkyRKtXbtWERERZY4lJCRo3759On78uLy8vBQYGKiwsDDdcMMNFx2vbt26atq0aZm2Jk2aaP78+dekflRPe7NPal5qhhZtOaJs+3+u8XdznZrqFReh7q3qKdTia2CFAAAAAADgejI0aHM6nRoxYoQWLlyo1atXX/KGBbVr15YkrVq1Sjk5OXrwwQcv2vfWW29Venp6mbY9e/aoQYMGFVM4qq2800VavDVT81MytDXD5moPrOGtbjHh6hUXqeb1LGwNBQAAAACgGjI0aBs2bJhmz56tL774QgEBAcrKypIkWa1W+fmdvUj8jBkz1KRJE4WEhCg5OVlPP/20Ro0apcaNG7vGufvuu9WjRw8NHz5ckjRq1CjdcsstevXVV9W7d29t3LhR77//vt5///3rP0lUesWlDq3dc0zzUjK08qccFZWe3Rrq5WHSnY1D1SuunjpFh8rsxdZQAAAAAACqM0ODtqlTp0qS7rzzzjLtM2bM0KBBgyRJ6enpGjt2rHJzc9WwYUM999xzGjVqVJn+57aWntO2bVstXLhQY8eO1UsvvaSoqCi9/fbb6tev3zWdD6qWn47aNT8lQ4vSjuj4qSJXe5O6FvWKi1C3VuGqXZNr+wEAAAAAgLNMTqfTaXQR7sZut8tqtcpms8lisRhdDq6jX08V6ou0TM1PzdDOTLurPdjfR91a1VNSXD01C7caWCEAwJ2xhnB/fEcAAOBKlHcN4RY3QwCMVFTi0HfpOZqXkqHvdueoxHE2e/b2NOnu6DrqFRehOxqHyNvz4ne6BQAAAAAAIGhDteR0OrUz0655KRlavDVTufn/2RraMsKqpNgIPRgTrlr+PgZWCQAAAAAAKhOCNlQrOScL9MWWs1tDd2eddLWHBJjVs3U9JcVF6OY6AQZWCAAAAAAAKiuCNlR5hSWlWvnT2a2ha/YcU+m/t4b6eHnonqZnt4be1qi2vNgaCgAAAAAArgJBG6okp9OprRk2zf/31lDbmWLXsdb1A5UUG6GuLcNlreFtYJUAAAAAAKAqIWhDlZJlK9DCLUc0L+Ww9h3Ld7WHWXzVM/bs1tAbQ2oaWCEAAAAAAKiqCNpQ6RUUl+qbXdmal5KhH/Ye0793hsrs5aF7m4epV1yEbrmxtjw9TMYWCgAAAAAAqjSCNlRKTqdTqYdOaF5KhpZsO6qTBSWuY20b1lJSbITub1lXFl+2hgIAAAAAgOuDoA2VypG8M1qYmqH5qUe0//h/tobWC/RTUmw99YyNUMPa/gZWCAAAAAAAqiuCNri900UlWrYjS/NTM7Ru369y/ntraA0fT93XvK6S4uqpQ1SwPNgaCgAAAAAADETQBrfkcDi16UCu5qVk6KvtR5VfVOo61uGGIPWKi9R9zcPkb+aPMAAAAAAAcA+kFHArh3NPa35qhuanZuhw7hlXe/2gGkqKjVDP2HqKDKphYIUAAAAAAAAXRtAGw50qLNFX249qfkqGNuzPdbXXNHupS4u6SoqLUNuGtWQysTUUAAAAAAC4L4I2GMLhcGr9L79qXkqGvt6RpTPFZ7eGmkzSrTfWVq+4CCU2C5Ofj6fBlQIAAAAAAJQPQRuuqwPH8zU/NUMLUo/oSN5/tobeUNtfSXER6tG6nsID/QysEAAAAAAA4MoQtOGasxcUa+m2s1tDNx884WoP8PVS15hw9YqLUOvIQLaGAgAAAACASo2gDddEqcOpH34+rvkpGVq+M0uFJQ5JkodJuu2mEPWKi9A9TevI15utoQAAAAAAoGogaEOF+jnnpOalHNGiLUeUZS9wtd8UWtO1NbSOxdfACgEAAAAAAK4NgjZcNdvpYi3elql5KRnaejjP1R5Yw1sP/ntraIt6VraGAgAAAACAKs3D6AJQOZWUOrRqd7aGfZKqtq98qxcW7dDWw3ny9DCpc5NQTe0Xqw1/uVsvdWuulhFcfw0AAHe1du1ade3aVeHh4TKZTFq0aFGZ4wsWLFBCQoKCg4NlMpmUlpZ23hgFBQUaNmyYgoODVbNmTSUlJSk7O7tMn0OHDqlLly6qUaOGQkND9cwzz6ikpKRMn9WrVys2NlZms1mNGjXSzJkzz3uvKVOmqGHDhvL19VX79u21cePGq/0IAAAAKgxntOGy7M6ya35KhhalZerYyUJXe3RYgHrFRahbq3oKCTAbWCEAALgc+fn5iomJ0eDBg9WzZ88LHu/YsaN69+6txx9//IJjjBo1SkuXLtXcuXNltVo1fPhw9ezZUz/++KMkqbS0VF26dFFYWJjWrVuno0ePasCAAfL29tarr74qSdq/f7+6dOmioUOH6pNPPtHKlSv12GOPqW7dukpMTJQkzZkzR6NHj9a0adPUvn17vf3220pMTFR6erpCQ0Ov0ScEAABQfian0+k0ugh3Y7fbZbVaZbPZZLFYjC7HcLn5RVqcdkTzUjO044jd1R7k76Nurc5uDW0WbjWwQgAA3ENlX0OYTCYtXLhQ3bt3P+/YgQMHFBUVpS1btqhVq1audpvNppCQEM2ePVu9evWSJO3evVtNmjRRcnKyOnTooK+//loPPPCAMjMzVadOHUnStGnTNGbMGB07dkw+Pj4aM2aMli5dqh07drjG7tu3r/Ly8rRs2TJJUvv27dW2bVu9++67kiSHw6HIyEiNGDFCzz77bLnmWNm/IwAAYIzyriE4ow0XVFzq0He7czQ/NUOrdueouPRsHuvtadJd0aFKio3QnY1D5ePF7mMAAKqzlJQUFRcXq3Pnzq626Oho1a9f3xW0JScnq0WLFq6QTZISExP11FNPaefOnWrdurWSk5PLjHGuz8iRIyVJRUVFSklJ0dixY13HPTw81LlzZyUnJ1+0vsLCQhUW/ucsfLvdftG+AAAAV4ugDWXszLRpXkqGFqdl6tf8Ild783oW9YqN0IOt6inI38fACgEAgDvJysqSj4+PAgMDy7TXqVNHWVlZrj7/HbKdO37u2KX62O12nTlzRidOnFBpaekF++zevfui9U2cOFEvvvjiFc0NAADgchG0QcdOFuqLtCOal5Kh3VknXe21a5rVo3W4kuIiFB3G1goAAFD5jB07VqNHj3Y9t9vtioyMNLAiAABQlRG0VVOFJaVa9VOO5qVkaPWeYyp1nN0a6uPpoXua1lFSXD3dflOIvDzZGgoAAC4uLCxMRUVFysvLK3NWW3Z2tsLCwlx9/vfuoOfuSvrfff73TqXZ2dmyWCzy8/OTp6enPD09L9jn3BgXYjabZTZzoyYAAHB9ELRVI06nU9sybJqfmqHFWzOVd7rYdSwmMlC94iLUtWVdBdZgaygAACifuLg4eXt7a+XKlUpKSpIkpaen69ChQ4qPj5ckxcfH65VXXlFOTo7r7qArVqyQxWJR06ZNXX2++uqrMmOvWLHCNYaPj4/i4uK0cuVK180aHA6HVq5cqeHDh1+PqQIAAPwmgrZqINteoIVbjmh+Sob25pxytdexmNUzNkJJsfXUKDTAwAoBAIBRTp06pZ9//tn1fP/+/UpLS1NQUJDq16+v3NxcHTp0SJmZmZLOhmjS2TPQwsLCZLVaNWTIEI0ePVpBQUGyWCwaMWKE4uPj1aFDB0lSQkKCmjZtqv79+2vSpEnKysrS888/r2HDhrnONhs6dKjeffdd/fnPf9bgwYO1atUqff7551q6dKmrttGjR2vgwIFq06aN2rVrp7ffflv5+fl69NFHr9fHBQAAcEkEbVVUQXGpVuzK1ryUDH2/95j+vTNUZi8PJTYLU6+4CN3aqLY8PUzGFgoAAAy1efNmderUyfX83PXMBg4cqJkzZ2rx4sVlgqy+fftKksaPH68JEyZIkt566y15eHgoKSlJhYWFSkxM1Hvvved6jaenp5YsWaKnnnpK8fHx8vf318CBA/XSSy+5+kRFRWnp0qUaNWqUJk+erIiICP3rX/9SYmKiq0+fPn107NgxjRs3TllZWWrVqpWWLVt23g0SAAAAjGJyOp1Oo4twN3a7XVarVTabTRZL5bkJgNPpVOqhPM1PzdCXWzN1sqDEdaxNg1pKiotQl5Z1ZfH1NrBKAACqrsq6hqhO+I4AAMCVKO8agjPaqoDMvDOuraG/HM93tYdbfZUUF6GesRGKqu1vYIUAAAAAAABVH0FbJXWmqFTLd2ZpXkqGftx3XOfOS/Tz9tR9zc9uDe1wQ7A82BoKAAAAAABwXRC0VSJOp1ObDpzQ/JQMLd1+VKcK/7M1tH1UkJLiInR/i7qqaeZrBQAAAAAAuN5IZCqBw7mntSD1iBZsydDBX0+72iOD/JQUG6Gk2AhFBtUwsEIAAAAAAAAQtLmp/MISfbX9qOanZmj9L7mudn8fT93foq56xUWobcMgtoYCAAAAAAC4CYI2N+JwOLV+/6+an3JEX+84qtNFpZIkk0m65cZgJcVG6N7mYarhw9cGAAAAAADgbkhs3MCB4/lakJqh+alHdCTvjKs9qra/kmLrqUdshOoF+hlYIQAAAAAAAH4LQZtBThYUa+m2s1tDNx044WoPMHvpgZhw9Yqrp9j6tWQysTUUAAAAAACgMiBou85OF5Vo7ILtWr4zSwXFDkmSh0nqeFOIesVFKKFpHfl6expcJQAAAAAAAC4XQdt15uftqR1HbCoodqhRaE0lxUaoR+t6CrP6Gl0aAAAAAAAArgJB23VmMpk0vmszWfy8FRNhZWsoAAAAAABAFUHQZoDbbw4xugQAAAAAAABUMA+jCwAAAAAAAACqAoI2AAAAAAAAoAIQtAEAAAAAAAAVgKANAAAAAAAAqAAEbQAAAAAAAEAFMDRomzhxotq2bauAgACFhoaqe/fuSk9PL9Nn37596tGjh0JCQmSxWNS7d29lZ2eX+z1ee+01mUwmjRw5soKrBwAAAAAAAP7D0KBtzZo1GjZsmNavX68VK1aouLhYCQkJys/PlyTl5+crISFBJpNJq1at0o8//qiioiJ17dpVDofjN8fftGmT/vGPf6hly5bXeioAAAAAAACo5ryMfPNly5aVeT5z5kyFhoYqJSVFt99+u3788UcdOHBAW7ZskcVikSTNmjVLtWrV0qpVq9S5c+eLjn3q1Cn169dP//znP/XXv/71ms4DAAAAAAAAcKtrtNlsNklSUFCQJKmwsFAmk0lms9nVx9fXVx4eHvrhhx8uOdawYcPUpUuXS4Zx5xQWFsput5d5AAAAAAAAAJfDbYI2h8OhkSNH6tZbb1Xz5s0lSR06dJC/v7/GjBmj06dPKz8/X3/6059UWlqqo0ePXnSszz77TKmpqZo4cWK53nvixImyWq2uR2RkZIXMCQAAAAAAANWH2wRtw4YN044dO/TZZ5+52kJCQjR37lx9+eWXqlmzpqxWq/Ly8hQbGysPjwuXfvjwYT399NP65JNP5OvrW673Hjt2rGw2m+tx+PDhCpkTAAAAAAAAqg9Dr9F2zvDhw7VkyRKtXbtWERERZY4lJCRo3759On78uLy8vBQYGKiwsDDdcMMNFxwrJSVFOTk5io2NdbWVlpZq7dq1evfdd1VYWChPT88yrzGbzWW2pwIAAAAAAACXy9Cgzel0asSIEVq4cKFWr16tqKioi/atXbu2JGnVqlXKycnRgw8+eMF+d999t7Zv316m7dFHH1V0dLTGjBlzXsgGAAAAAAAAVARDg7Zhw4Zp9uzZ+uKLLxQQEKCsrCxJktVqlZ+fnyRpxowZatKkiUJCQpScnKynn35ao0aNUuPGjV3j3H333erRo4eGDx+ugIAA1zXezvH391dwcPB57QAAAAAAAEBFMTRomzp1qiTpzjvvLNM+Y8YMDRo0SJKUnp6usWPHKjc3Vw0bNtRzzz2nUaNGlel/bmtpRXE6nZLE3UcBAMBlObd2OLeWgPthnQcAAK5Eedd5JicrwfNkZGRw51EAAHDFDh8+fN51Z+EeWOcBAICr8VvrPIK2C3A4HMrMzFRAQIBMJlOFj2+32xUZGanDhw/LYrFU+PjuhvlWbcy3amO+VVt1m6907efsdDp18uRJhYeHX/QO6TAW67yKxXyrNuZbtVW3+UrVb87Mt2KVd53nFncddTceHh7X5bfQFoulWvxhP4f5Vm3Mt2pjvlVbdZuvdG3nbLVar8m4qBis864N5lu1Md+qrbrNV6p+c2a+Fac86zx+1QoAAAAAAABUAII2AAAAAAAAoAIQtBnAbDZr/PjxMpvNRpdyXTDfqo35Vm3Mt2qrbvOVqueccX1Vtz9jzLdqY75VW3Wbr1T95sx8jcHNEAAAAAAAAIAKwBltAAAAAAAAQAUgaAMAAAAAAAAqAEEbAAAAAAAAUAEI2gAAAAAAAIAKQNBWQaZMmaKGDRvK19dX7du318aNGy/Zf+7cuYqOjpavr69atGihr776qsxxp9OpcePGqW7duvLz81Pnzp21d+/eazmFy3I58/3nP/+p2267TbVq1VKtWrXUuXPn8/oPGjRIJpOpzOPee++91tMot8uZ78yZM8+bi6+vb5k+Ven7vfPOO8+br8lkUpcuXVx93Pn7Xbt2rbp27arw8HCZTCYtWrToN1+zevVqxcbGymw2q1GjRpo5c+Z5fS7374Tr5XLnu2DBAt1zzz0KCQmRxWJRfHy8li9fXqbPhAkTzvt+o6Ojr+Esyu9y57t69eoL/nnOysoq06+qfL8X+m/TZDKpWbNmrj7u+v1OnDhRbdu2VUBAgEJDQ9W9e3elp6f/5usq+7+/MAbrPNZ557DOY50nVZ11AOs81nnu+v1W9nUeQVsFmDNnjkaPHq3x48crNTVVMTExSkxMVE5OzgX7r1u3Tg899JCGDBmiLVu2qHv37urevbt27Njh6jNp0iS98847mjZtmjZs2CB/f38lJiaqoKDgek3roi53vqtXr9ZDDz2k7777TsnJyYqMjFRCQoKOHDlSpt+9996ro0ePuh6ffvrp9ZjOb7rc+UqSxWIpM5eDBw+WOV6Vvt8FCxaUmeuOHTvk6emp3/3ud2X6uev3m5+fr5iYGE2ZMqVc/ffv368uXbqoU6dOSktL08iRI/XYY4+VWZRcyZ+Z6+Vy57t27Vrdc889+uqrr5SSkqJOnTqpa9eu2rJlS5l+zZo1K/P9/vDDD9ei/Mt2ufM9Jz09vcx8QkNDXceq0vc7efLkMvM8fPiwgoKCzvvv1x2/3zVr1mjYsGFav369VqxYoeLiYiUkJCg/P/+ir6ns//7CGKzzWOf9L9Z5rPOqyjqAdR7rPMk9v99Kv85z4qq1a9fOOWzYMNfz0tJSZ3h4uHPixIkX7N+7d29nly5dyrS1b9/e+eSTTzqdTqfT4XA4w8LCnG+88YbreF5entNsNjs//fTTazCDy3O58/1fJSUlzoCAAOesWbNcbQMHDnR269atokutEJc73xkzZjitVutFx6vq3+9bb73lDAgIcJ46dcrV5s7f73+T5Fy4cOEl+/z5z392NmvWrExbnz59nImJia7nV/sZXi/lme+FNG3a1Pniiy+6no8fP94ZExNTcYVdI+WZ73fffeeU5Dxx4sRF+1Tl7/f/27v/mCrL/4/jr8OPg8BSMBKOZQSmzEyyMhlq04JS7A9tNmVDRi1zmjrdsuVWTp1rw83pH81Rbqj9MBnqFJcTDQz/YJrNn2jo1KjmDE1bCf6qed7fP/xwvt7iL47Hcw6H52M7g3Od69xcF+9zn/vl5c25N23aZC6Xy3799VdfW2ep77lz50yS7dq16459OvvxF6FBziPn3YycR86L5BxgRs6L5PqS84L3/swZbQ/o33//1b59+5Sfn+9ri4qKUn5+vnbv3n3b5+zevdvRX5JGjx7t69/U1KTm5mZHnx49eignJ+eO2wwWf+Z7q8uXL+u///5Tz549He11dXXq1auXsrKyNH36dF24cCGgY/eHv/NtbW1Venq6+vTpo3Hjxuno0aO+xyK9vuXl5SosLFRiYqKjPRzr64977b+B+B2GM6/Xq5aWlnb774kTJ9S7d29lZmaqqKhIv//+e4hGGBiDBw+Wx+PRa6+9pvr6el97pNe3vLxc+fn5Sk9Pd7R3hvr+888/ktTutXmzznz8RWiQ88h5t0POI+dFag4g50V2fcl5wXt/ZqHtAZ0/f17Xr19Xamqqoz01NbXd33q3aW5uvmv/tq8d2Waw+DPfW3300Ufq3bu34wU+ZswYffXVV6qtrdWSJUu0a9cuFRQU6Pr16wEdf0f5M9+srCytWrVKVVVV+uabb+T1ejVs2DCdPn1aUmTXd+/evTpy5IimTJniaA/X+vrjTvvvxYsXdeXKlYDsI+Fs6dKlam1t1cSJE31tOTk5WrNmjaqrq1VWVqampia9/PLLamlpCeFI/ePxePT5559r48aN2rhxo/r06aNRo0Zp//79kgLzHhiuzpw5o23btrXbfztDfb1er+bMmaPhw4fr2WefvWO/znz8RWiQ824g5/0/ch45L1JzgETOi+T6kvOC+/4cE9CtAfdQWlqqiooK1dXVOT44trCw0Pf9oEGDlJ2drb59+6qurk55eXmhGKrfcnNzlZub67s/bNgwDRgwQF988YUWL14cwpE9fOXl5Ro0aJCGDh3qaI+k+nZl3377rRYtWqSqqirHZ1kUFBT4vs/OzlZOTo7S09NVWVmpd999NxRD9VtWVpaysrJ894cNG6ZTp05p+fLl+vrrr0M4sofvyy+/VFJSksaPH+9o7wz1nTFjho4cORIWnykCdGXkPHJeZ69vV0bOI+eFa307Y87jjLYHlJKSoujoaJ09e9bRfvbsWaWlpd32OWlpaXft3/a1I9sMFn/m22bp0qUqLS3Vjh07lJ2dfde+mZmZSklJ0cmTJx94zA/iQebbJjY2Vs8//7xvLpFa30uXLqmiouK+3pDDpb7+uNP+2717d8XHxwfkNROOKioqNGXKFFVWVrY7JftWSUlJ6t+/f6es7+0MHTrUN5dIra+ZadWqVSouLpbb7b5r33Cr78yZM/Xdd9/phx9+0BNPPHHXvp35+IvQIOfdQM67M3Jee+FSX3+Q88h5kVhfcl7wj78stD0gt9utF198UbW1tb42r9er2tpax/923Sw3N9fRX5K+//57X/+MjAylpaU5+ly8eFE//vjjHbcZLP7MV7pxdY/FixerurpaQ4YMuefPOX36tC5cuCCPxxOQcfvL3/ne7Pr162poaPDNJRLrK924lPK1a9c0efLke/6ccKmvP+61/wbiNRNu1q1bp3feeUfr1q3TG2+8cc/+ra2tOnXqVKes7+0cPHjQN5dIrK9048pOJ0+evK9/QIVLfc1MM2fO1KZNm7Rz505lZGTc8zmd+fiL0CDnkfPuhZzXXrjU1x/kPHJepNVXIueF5Pgb0EsrdFEVFRUWFxdna9assZ9//tmmTp1qSUlJ1tzcbGZmxcXFNm/ePF//+vp6i4mJsaVLl1pjY6MtWLDAYmNjraGhwdentLTUkpKSrKqqyg4fPmzjxo2zjIwMu3LlStDnd6uOzre0tNTcbrdt2LDB/vjjD9+tpaXFzMxaWlps7ty5tnv3bmtqarKamhp74YUXrF+/fnb16tWQzPFmHZ3vokWLbPv27Xbq1Cnbt2+fFRYWWrdu3ezo0aO+PpFU3zYjRoywSZMmtWsP9/q2tLTYgQMH7MCBAybJli1bZgcOHLDffvvNzMzmzZtnxcXFvv6//PKLJSQk2IcffmiNjY22YsUKi46Oturqal+fe/0OQ6mj8127dq3FxMTYihUrHPvv33//7evzwQcfWF1dnTU1NVl9fb3l5+dbSkqKnTt3Lujzu1VH57t8+XLbvHmznThxwhoaGmz27NkWFRVlNTU1vj6RVN82kydPtpycnNtuM1zrO336dOvRo4fV1dU5XpuXL1/29Ym04y9Cg5xHziPnkfPIeeGXA8zIeeS88D3+stAWIJ999pk9+eST5na7bejQobZnzx7fYyNHjrSSkhJH/8rKSuvfv7+53W4bOHCgbd261fG41+u1+fPnW2pqqsXFxVleXp4dP348GFO5Lx2Zb3p6uklqd1uwYIGZmV2+fNlef/11e+yxxyw2NtbS09PtvffeC4s3szYdme+cOXN8fVNTU23s2LG2f/9+x/Yiqb5mZseOHTNJtmPHjnbbCvf6tl3m+9Zb2xxLSkps5MiR7Z4zePBgc7vdlpmZaatXr2633bv9DkOpo/MdOXLkXfub3bjsvcfjMbfbbY8//rhNmjTJTp48GdyJ3UFH57tkyRLr27evdevWzXr27GmjRo2ynTt3tttupNTX7MZlzePj423lypW33Wa41vd285Tk2B8j8fiL0CDnkfPakPOcwr2+5DxyHjmPnHezYL0/u/43CQAAAAAAAAAPgM9oAwAAAAAAAAKAhTYAAAAAAAAgAFhoAwAAAAAAAAKAhTYAAAAAAAAgAFhoAwAAAAAAAAKAhTYAAAAAAAAgAFhoAwAAAAAAAAKAhTYAAAAAAAAgAFhoA4AgcLlc2rx5c6iHAQAAgIeArAegDQttACLe22+/LZfL1e42ZsyYUA8NAAAAD4isByCcxIR6AAAQDGPGjNHq1asdbXFxcSEaDQAAAAKJrAcgXHBGG4AuIS4uTmlpaY5bcnKypBun+peVlamgoEDx8fHKzMzUhg0bHM9vaGjQq6++qvj4eD366KOaOnWqWltbHX1WrVqlgQMHKi4uTh6PRzNnznQ8fv78eb355ptKSEhQv379tGXLloc7aQAAgC6CrAcgXLDQBgCS5s+frwkTJujQoUMqKipSYWGhGhsbJUmXLl3S6NGjlZycrJ9++knr169XTU2NI1yVlZVpxowZmjp1qhoaGrRlyxY9/fTTjp+xaNEiTZw4UYcPH9bYsWNVVFSkv/76K6jzBAAA6IrIegCCxgAgwpWUlFh0dLQlJiY6bp9++qmZmUmyadOmOZ6Tk5Nj06dPNzOzlStXWnJysrW2tvoe37p1q0VFRVlzc7OZmfXu3ds+/vjjO45Bkn3yySe++62trSbJtm3bFrB5AgAAdEVkPQDhhM9oA9AlvPLKKyorK3O09ezZ0/d9bm6u47Hc3FwdPHhQktTY2KjnnntOiYmJvseHDx8ur9er48ePy+Vy6cyZM8rLy7vrGLKzs33fJyYmqnv37jp37py/UwIAAMD/kPUAhAsW2gB0CYmJie1O7w+U+Pj4++oXGxvruO9yueT1eh/GkAAAALoUsh6AcMFntAGApD179rS7P2DAAEnSgAEDdOjQIV26dMn3eH19vaKiopSVlaVHHnlETz31lGpra4M6ZgAAANwfsh6AYOGMNgBdwrVr19Tc3Oxoi4mJUUpKiiRp/fr1GjJkiEaMGKG1a9dq7969Ki8vlyQVFRVpwYIFKikp0cKFC/Xnn39q1qxZKi4uVmpqqiRp4cKFmjZtmnr16qWCggK1tLSovr5es2bNCu5EAQAAuiCyHoBwwUIbgC6hurpaHo/H0ZaVlaVjx45JunGVqIqKCr3//vvyeDxat26dnnnmGUlSQkKCtm/frtmzZ+ull15SQkKCJkyYoGXLlvm2VVJSoqtXr2r58uWaO3euUlJS9NZbbwVvggAAAF0YWQ9AuHCZmYV6EAAQSi6XS5s2bdL48eNDPRQAAAAEGFkPQDDxGW0AAAAAAABAALDQBgAAAAAAAAQAfzoKAAAAAAAABABntAEAAAAAAAABwEIbAAAAAAAAEAAstAEAAAAAAAABwEIbAAAAAAAAEAAstAEAAAAAAAABwEIbAAAAAAAAEAAstAEAAAAAAAABwEIbAAAAAAAAEAD/B7HEIDyYWbXeAAAAAElFTkSuQmCC",
|
| 863 |
+
"text/plain": [
|
| 864 |
+
"<Figure size 1500x500 with 2 Axes>"
|
| 865 |
+
]
|
| 866 |
+
},
|
| 867 |
+
"metadata": {},
|
| 868 |
+
"output_type": "display_data"
|
| 869 |
+
}
|
| 870 |
+
],
|
| 871 |
+
"source": [
|
| 872 |
+
"# Plot the cost over training and validation sets\n",
|
| 873 |
+
"fig,ax = plt.subplots(1,2,figsize=(15,5))\n",
|
| 874 |
+
"for i,key in enumerate(costpaths.keys()):\n",
|
| 875 |
+
" ax_sub=ax[i%3]\n",
|
| 876 |
+
" ax_sub.plot(costpaths[key])\n",
|
| 877 |
+
" ax_sub.set_title(key)\n",
|
| 878 |
+
" ax_sub.set_xlabel('Epoch')\n",
|
| 879 |
+
" ax_sub.set_ylabel('Loss')\n",
|
| 880 |
+
"plt.show()"
|
| 881 |
+
]
|
| 882 |
+
},
|
| 883 |
+
{
|
| 884 |
+
"cell_type": "code",
|
| 885 |
+
"execution_count": 22,
|
| 886 |
+
"metadata": {},
|
| 887 |
+
"outputs": [],
|
| 888 |
+
"source": [
|
| 889 |
+
"# Save the entire model\n",
|
| 890 |
+
"torch.save(model, os.getcwd() + '/models/recommender.pt')"
|
| 891 |
+
]
|
| 892 |
+
},
|
| 893 |
+
{
|
| 894 |
+
"cell_type": "code",
|
| 895 |
+
"execution_count": 24,
|
| 896 |
+
"metadata": {},
|
| 897 |
+
"outputs": [],
|
| 898 |
+
"source": [
|
| 899 |
+
"def generate_recommendations(artist_album, playlists, model, playlist_id, device, top_n=10, batch_size=1024):\n",
|
| 900 |
+
" model.eval()\n",
|
| 901 |
+
"\n",
|
| 902 |
+
"\n",
|
| 903 |
+
" all_movie_ids = torch.tensor(artist_album['artist_album_id'].values, dtype=torch.long, device=device)\n",
|
| 904 |
+
" user_ids = torch.full((len(all_movie_ids),), playlist_id, dtype=torch.long, device=device)\n",
|
| 905 |
+
"\n",
|
| 906 |
+
" # Initialize tensor to store all predictions\n",
|
| 907 |
+
" all_predictions = torch.zeros(len(all_movie_ids), device=device)\n",
|
| 908 |
+
"\n",
|
| 909 |
+
" # Generate predictions in batches\n",
|
| 910 |
+
" with torch.no_grad():\n",
|
| 911 |
+
" for i in range(0, len(all_movie_ids), batch_size):\n",
|
| 912 |
+
" batch_user_ids = user_ids[i:i+batch_size]\n",
|
| 913 |
+
" batch_movie_ids = all_movie_ids[i:i+batch_size]\n",
|
| 914 |
+
"\n",
|
| 915 |
+
" input_tensor = torch.stack([batch_user_ids, batch_movie_ids], dim=1)\n",
|
| 916 |
+
" batch_predictions = model(input_tensor).squeeze()\n",
|
| 917 |
+
" all_predictions[i:i+batch_size] = batch_predictions\n",
|
| 918 |
+
"\n",
|
| 919 |
+
" # Convert to numpy for easier handling\n",
|
| 920 |
+
" predictions = all_predictions.cpu().numpy()\n",
|
| 921 |
+
"\n",
|
| 922 |
+
" albums_listened = set(playlists.loc[playlists['playlist_id'] == playlist_id, 'artist_album_id'].tolist())\n",
|
| 923 |
+
"\n",
|
| 924 |
+
" unlistened_mask = np.isin(artist_album['artist_album_id'].values, list(albums_listened), invert=True)\n",
|
| 925 |
+
"\n",
|
| 926 |
+
" # Get top N recommendations\n",
|
| 927 |
+
" top_indices = np.argsort(predictions[unlistened_mask])[-top_n:][::-1]\n",
|
| 928 |
+
" recs = artist_album['artist_album_id'].values[unlistened_mask][top_indices]\n",
|
| 929 |
+
"\n",
|
| 930 |
+
" recs_names = artist_album.loc[artist_album['artist_album_id'].isin(recs)]\n",
|
| 931 |
+
" album, artist = recs_names['album_name'].values, recs_names['artist_name'].values\n",
|
| 932 |
+
"\n",
|
| 933 |
+
" return album.tolist(), artist.tolist() "
|
| 934 |
+
]
|
| 935 |
+
},
|
| 936 |
+
{
|
| 937 |
+
"cell_type": "code",
|
| 938 |
+
"execution_count": null,
|
| 939 |
+
"metadata": {},
|
| 940 |
+
"outputs": [
|
| 941 |
+
{
|
| 942 |
+
"name": "stdout",
|
| 943 |
+
"output_type": "stream",
|
| 944 |
+
"text": [
|
| 945 |
+
"Precision: 5.0609978643478826e-06\n",
|
| 946 |
+
"Recall: 5.0609978643478826e-06\n"
|
| 947 |
+
]
|
| 948 |
+
}
|
| 949 |
+
],
|
| 950 |
+
"source": [
|
| 951 |
+
"from torchmetrics import Precision, Recall\n",
|
| 952 |
+
"\n",
|
| 953 |
+
"precision = Precision(task=\"multiclass\", num_classes=num_classes).to(device) \n",
|
| 954 |
+
"recall = Recall(task=\"multiclass\", num_classes=num_classes).to(device) \n",
|
| 955 |
+
"\n",
|
| 956 |
+
"\n",
|
| 957 |
+
"model.eval()\n",
|
| 958 |
+
"with torch.no_grad():\n",
|
| 959 |
+
" for batch in dataloaders['val']:\n",
|
| 960 |
+
" inputs, targets = batch\n",
|
| 961 |
+
" inputs = inputs.to(device)\n",
|
| 962 |
+
" targets = targets.to(device)\n",
|
| 963 |
+
"\n",
|
| 964 |
+
" outputs = model(inputs)\n",
|
| 965 |
+
"\n",
|
| 966 |
+
" # For binary classification\n",
|
| 967 |
+
" preds = torch.argmax(outputs, dim=1)\n",
|
| 968 |
+
"\n",
|
| 969 |
+
" # Update metrics\n",
|
| 970 |
+
" precision(preds, targets)\n",
|
| 971 |
+
" recall(preds, targets)\n",
|
| 972 |
+
"\n",
|
| 973 |
+
"# Compute final metrics\n",
|
| 974 |
+
"final_precision = precision.compute()\n",
|
| 975 |
+
"final_recall = recall.compute()\n",
|
| 976 |
+
"\n",
|
| 977 |
+
"print(f\"Precision: {final_precision}\")\n",
|
| 978 |
+
"print(f\"Recall: {final_recall}\")"
|
| 979 |
+
]
|
| 980 |
+
}
|
| 981 |
+
],
|
| 982 |
+
"metadata": {
|
| 983 |
+
"colab": {
|
| 984 |
+
"machine_shape": "hm",
|
| 985 |
+
"provenance": []
|
| 986 |
+
},
|
| 987 |
+
"kernelspec": {
|
| 988 |
+
"display_name": "Python 3",
|
| 989 |
+
"name": "python3"
|
| 990 |
+
},
|
| 991 |
+
"language_info": {
|
| 992 |
+
"codemirror_mode": {
|
| 993 |
+
"name": "ipython",
|
| 994 |
+
"version": 3
|
| 995 |
+
},
|
| 996 |
+
"file_extension": ".py",
|
| 997 |
+
"mimetype": "text/x-python",
|
| 998 |
+
"name": "python",
|
| 999 |
+
"nbconvert_exporter": "python",
|
| 1000 |
+
"pygments_lexer": "ipython3",
|
| 1001 |
+
"version": "3.9.19"
|
| 1002 |
+
}
|
| 1003 |
+
},
|
| 1004 |
+
"nbformat": 4,
|
| 1005 |
+
"nbformat_minor": 0
|
| 1006 |
+
}
|
models/recommender.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:e7c8e2b8fc581d039c84e3e0c9983b17a6fa328563c253a03b139d49dc87f3e9
|
| 3 |
+
size 120583728
|
notebooks/dbscan.ipynb
ADDED
|
@@ -0,0 +1,748 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"nbformat": 4,
|
| 3 |
+
"nbformat_minor": 0,
|
| 4 |
+
"metadata": {
|
| 5 |
+
"colab": {
|
| 6 |
+
"provenance": [],
|
| 7 |
+
"machine_shape": "hm"
|
| 8 |
+
},
|
| 9 |
+
"kernelspec": {
|
| 10 |
+
"name": "python3",
|
| 11 |
+
"display_name": "Python 3"
|
| 12 |
+
},
|
| 13 |
+
"language_info": {
|
| 14 |
+
"name": "python"
|
| 15 |
+
}
|
| 16 |
+
},
|
| 17 |
+
"cells": [
|
| 18 |
+
{
|
| 19 |
+
"cell_type": "code",
|
| 20 |
+
"source": [
|
| 21 |
+
"import os\n",
|
| 22 |
+
"import urllib.request\n",
|
| 23 |
+
"import zipfile\n",
|
| 24 |
+
"import json\n",
|
| 25 |
+
"import pandas as pd\n",
|
| 26 |
+
"import time\n",
|
| 27 |
+
"import torch\n",
|
| 28 |
+
"import numpy as np\n",
|
| 29 |
+
"import pandas as pd\n",
|
| 30 |
+
"import torch.nn as nn\n",
|
| 31 |
+
"import torch.nn.functional as F\n",
|
| 32 |
+
"import torch.optim as optim\n",
|
| 33 |
+
"from torch.utils.data import DataLoader, TensorDataset\n",
|
| 34 |
+
"from sklearn.model_selection import train_test_split\n",
|
| 35 |
+
"import matplotlib.pyplot as plt\n",
|
| 36 |
+
"from sklearn.preprocessing import LabelEncoder"
|
| 37 |
+
],
|
| 38 |
+
"metadata": {
|
| 39 |
+
"id": "KHnddFeW5hwh"
|
| 40 |
+
},
|
| 41 |
+
"execution_count": null,
|
| 42 |
+
"outputs": []
|
| 43 |
+
},
|
| 44 |
+
{
|
| 45 |
+
"cell_type": "code",
|
| 46 |
+
"source": [
|
| 47 |
+
"from google.colab import drive\n",
|
| 48 |
+
"drive.mount('/content/drive')"
|
| 49 |
+
],
|
| 50 |
+
"metadata": {
|
| 51 |
+
"id": "l7pGG_d85lzH"
|
| 52 |
+
},
|
| 53 |
+
"execution_count": null,
|
| 54 |
+
"outputs": []
|
| 55 |
+
},
|
| 56 |
+
{
|
| 57 |
+
"cell_type": "code",
|
| 58 |
+
"source": [
|
| 59 |
+
"# prompt: copy a file from another directory to current directory in python code and create folders if needed\n",
|
| 60 |
+
"\n",
|
| 61 |
+
"import shutil\n",
|
| 62 |
+
"import os\n",
|
| 63 |
+
"\n",
|
| 64 |
+
"def copy_file(src, dst):\n",
|
| 65 |
+
" \"\"\"\n",
|
| 66 |
+
" Copies a file from src to dst, creating any necessary directories.\n",
|
| 67 |
+
"\n",
|
| 68 |
+
" Args:\n",
|
| 69 |
+
" src: The path to the source file.\n",
|
| 70 |
+
" dst: The path to the destination file.\n",
|
| 71 |
+
" \"\"\"\n",
|
| 72 |
+
" # Create the destination directory if it doesn't exist.\n",
|
| 73 |
+
" dst_dir = os.path.dirname(dst)\n",
|
| 74 |
+
" if not os.path.exists(dst_dir):\n",
|
| 75 |
+
" os.makedirs(dst_dir)\n",
|
| 76 |
+
"\n",
|
| 77 |
+
" # Copy the file.\n",
|
| 78 |
+
" shutil.copy2(src, dst)\n",
|
| 79 |
+
"\n",
|
| 80 |
+
"copy_file('/content/drive/MyDrive/rec_data/spotify_million_playlist_dataset.zip', os.getcwd() + '/data/raw/spotify_million_playlist_dataset.zip')"
|
| 81 |
+
],
|
| 82 |
+
"metadata": {
|
| 83 |
+
"id": "dL8TIlH55qSc"
|
| 84 |
+
},
|
| 85 |
+
"execution_count": 3,
|
| 86 |
+
"outputs": []
|
| 87 |
+
},
|
| 88 |
+
{
|
| 89 |
+
"cell_type": "code",
|
| 90 |
+
"source": [
|
| 91 |
+
"def unzip_archive(filepath, dir_path):\n",
|
| 92 |
+
" with zipfile.ZipFile(f\"{filepath}\", 'r') as zip_ref:\n",
|
| 93 |
+
" zip_ref.extractall(dir_path)\n",
|
| 94 |
+
"\n",
|
| 95 |
+
"unzip_archive(os.getcwd() + '/data/raw/spotify_million_playlist_dataset.zip', os.getcwd() + '/data/raw/playlists')\n"
|
| 96 |
+
],
|
| 97 |
+
"metadata": {
|
| 98 |
+
"id": "LLy-YA775snY"
|
| 99 |
+
},
|
| 100 |
+
"execution_count": null,
|
| 101 |
+
"outputs": []
|
| 102 |
+
},
|
| 103 |
+
{
|
| 104 |
+
"cell_type": "code",
|
| 105 |
+
"source": [
|
| 106 |
+
"import shutil\n",
|
| 107 |
+
"\n",
|
| 108 |
+
"def make_dir(directory):\n",
|
| 109 |
+
" if os.path.exists(directory):\n",
|
| 110 |
+
" shutil.rmtree(directory)\n",
|
| 111 |
+
" os.makedirs(directory)\n",
|
| 112 |
+
" else:\n",
|
| 113 |
+
" os.makedirs(directory)"
|
| 114 |
+
],
|
| 115 |
+
"metadata": {
|
| 116 |
+
"id": "YtO0seclE1Pb"
|
| 117 |
+
},
|
| 118 |
+
"execution_count": null,
|
| 119 |
+
"outputs": []
|
| 120 |
+
},
|
| 121 |
+
{
|
| 122 |
+
"cell_type": "code",
|
| 123 |
+
"source": [
|
| 124 |
+
"\n",
|
| 125 |
+
"\n",
|
| 126 |
+
"directory = os.getcwd() + '/data/raw/data'\n",
|
| 127 |
+
"make_dir(directory)"
|
| 128 |
+
],
|
| 129 |
+
"metadata": {
|
| 130 |
+
"id": "UeqDk3_65vTt"
|
| 131 |
+
},
|
| 132 |
+
"execution_count": null,
|
| 133 |
+
"outputs": []
|
| 134 |
+
},
|
| 135 |
+
{
|
| 136 |
+
"cell_type": "code",
|
| 137 |
+
"source": [
|
| 138 |
+
"cols = [\n",
|
| 139 |
+
" 'name',\n",
|
| 140 |
+
" 'pid',\n",
|
| 141 |
+
" 'num_followers',\n",
|
| 142 |
+
" 'pos',\n",
|
| 143 |
+
" 'artist_name',\n",
|
| 144 |
+
" 'track_name',\n",
|
| 145 |
+
" 'album_name'\n",
|
| 146 |
+
"]"
|
| 147 |
+
],
|
| 148 |
+
"metadata": {
|
| 149 |
+
"id": "zMTup29b5wtO"
|
| 150 |
+
},
|
| 151 |
+
"execution_count": null,
|
| 152 |
+
"outputs": []
|
| 153 |
+
},
|
| 154 |
+
{
|
| 155 |
+
"cell_type": "code",
|
| 156 |
+
"source": [
|
| 157 |
+
"directory = os.getcwd() + '/data/raw/playlists/data'\n",
|
| 158 |
+
"df = pd.DataFrame()\n",
|
| 159 |
+
"index = 0\n",
|
| 160 |
+
"# Loop through all files in the directory\n",
|
| 161 |
+
"for filename in os.listdir(directory):\n",
|
| 162 |
+
" # Check if the item is a file (not a subdirectory)\n",
|
| 163 |
+
" if os.path.isfile(os.path.join(directory, filename)):\n",
|
| 164 |
+
" if filename.find('.json') != -1 :\n",
|
| 165 |
+
" index += 1\n",
|
| 166 |
+
"\n",
|
| 167 |
+
" # Print the filename or perform operations on the file\n",
|
| 168 |
+
" print(f'\\r{filename}\\t{index}/1000\\t{((index/1000)*100):.1f}%', end='')\n",
|
| 169 |
+
"\n",
|
| 170 |
+
" # If you need the full file path, you can use:\n",
|
| 171 |
+
" full_path = os.path.join(directory, filename)\n",
|
| 172 |
+
"\n",
|
| 173 |
+
" with open(full_path, 'r') as file:\n",
|
| 174 |
+
" json_data = json.load(file)\n",
|
| 175 |
+
"\n",
|
| 176 |
+
" temp = pd.DataFrame(json_data['playlists'])\n",
|
| 177 |
+
" expanded_df = temp.explode('tracks').reset_index(drop=True)\n",
|
| 178 |
+
"\n",
|
| 179 |
+
" # Normalize the JSON data\n",
|
| 180 |
+
" json_normalized = pd.json_normalize(expanded_df['tracks'])\n",
|
| 181 |
+
"\n",
|
| 182 |
+
" # Concatenate the original DataFrame with the normalized JSON data\n",
|
| 183 |
+
" result = pd.concat([expanded_df.drop(columns=['tracks']), json_normalized], axis=1)\n",
|
| 184 |
+
"\n",
|
| 185 |
+
" result = result[cols]\n",
|
| 186 |
+
"\n",
|
| 187 |
+
" df = pd.concat([df, result], axis=0, ignore_index=True)\n",
|
| 188 |
+
"\n",
|
| 189 |
+
" if index % 50 == 0:\n",
|
| 190 |
+
" df.to_parquet(f'{os.getcwd()}/data/raw/data/playlists_{index % 1000}.parquet')\n",
|
| 191 |
+
" del df\n",
|
| 192 |
+
" df = pd.DataFrame()\n",
|
| 193 |
+
" if index % 100 == 0:\n",
|
| 194 |
+
" break"
|
| 195 |
+
],
|
| 196 |
+
"metadata": {
|
| 197 |
+
"colab": {
|
| 198 |
+
"base_uri": "https://localhost:8080/"
|
| 199 |
+
},
|
| 200 |
+
"id": "h6jQO9HT5zsG",
|
| 201 |
+
"outputId": "ec229c95-c29b-4622-bccf-0fc0bb69f9ba"
|
| 202 |
+
},
|
| 203 |
+
"execution_count": null,
|
| 204 |
+
"outputs": [
|
| 205 |
+
{
|
| 206 |
+
"output_type": "stream",
|
| 207 |
+
"name": "stdout",
|
| 208 |
+
"text": [
|
| 209 |
+
"mpd.slice.727000-727999.json\t100/1000\t10.0%"
|
| 210 |
+
]
|
| 211 |
+
}
|
| 212 |
+
]
|
| 213 |
+
},
|
| 214 |
+
{
|
| 215 |
+
"cell_type": "code",
|
| 216 |
+
"source": [
|
| 217 |
+
"import pyarrow.parquet as pq\n",
|
| 218 |
+
"\n",
|
| 219 |
+
"def read_parquet_folder(folder_path):\n",
|
| 220 |
+
" dataframes = []\n",
|
| 221 |
+
" for file in os.listdir(folder_path):\n",
|
| 222 |
+
" if file.endswith('.parquet'):\n",
|
| 223 |
+
" file_path = os.path.join(folder_path, file)\n",
|
| 224 |
+
" df = pd.read_parquet(file_path)\n",
|
| 225 |
+
" dataframes.append(df)\n",
|
| 226 |
+
"\n",
|
| 227 |
+
" return pd.concat(dataframes, ignore_index=True)\n",
|
| 228 |
+
"\n",
|
| 229 |
+
"folder_path = os.getcwd() + '/data/raw/data'\n",
|
| 230 |
+
"df = read_parquet_folder(folder_path)"
|
| 231 |
+
],
|
| 232 |
+
"metadata": {
|
| 233 |
+
"id": "PngL0QHq516u"
|
| 234 |
+
},
|
| 235 |
+
"execution_count": null,
|
| 236 |
+
"outputs": []
|
| 237 |
+
},
|
| 238 |
+
{
|
| 239 |
+
"cell_type": "code",
|
| 240 |
+
"source": [
|
| 241 |
+
"directory = os.getcwd() + '/data/raw/mappings'\n",
|
| 242 |
+
"make_dir(directory)"
|
| 243 |
+
],
|
| 244 |
+
"metadata": {
|
| 245 |
+
"id": "hdLpjr2153b_"
|
| 246 |
+
},
|
| 247 |
+
"execution_count": null,
|
| 248 |
+
"outputs": []
|
| 249 |
+
},
|
| 250 |
+
{
|
| 251 |
+
"cell_type": "code",
|
| 252 |
+
"source": [
|
| 253 |
+
"def create_ids(df, col, name):\n",
|
| 254 |
+
" # Create a dictionary mapping unique values to IDs\n",
|
| 255 |
+
" value_to_id = {val: i for i, val in enumerate(df[col].unique())}\n",
|
| 256 |
+
"\n",
|
| 257 |
+
" # Create a new column with the IDs\n",
|
| 258 |
+
" df[f'{name}_id'] = df[col].map(value_to_id)\n",
|
| 259 |
+
" df[[f'{name}_id', col]].drop_duplicates().to_csv(os.getcwd() + f'/data/raw/mappings/{name}.csv')\n",
|
| 260 |
+
"\n",
|
| 261 |
+
" return df"
|
| 262 |
+
],
|
| 263 |
+
"metadata": {
|
| 264 |
+
"id": "peZyue6t57Mz"
|
| 265 |
+
},
|
| 266 |
+
"execution_count": null,
|
| 267 |
+
"outputs": []
|
| 268 |
+
},
|
| 269 |
+
{
|
| 270 |
+
"cell_type": "code",
|
| 271 |
+
"source": [
|
| 272 |
+
"df = create_ids(df, 'artist_name', 'artist')\n",
|
| 273 |
+
"df = create_ids(df, 'pid', 'playlist')\n",
|
| 274 |
+
"# df = create_ids(df, 'track_name', 'track')\n",
|
| 275 |
+
"df = create_ids(df, 'album_name', 'album')"
|
| 276 |
+
],
|
| 277 |
+
"metadata": {
|
| 278 |
+
"id": "p68WNyaf58rS"
|
| 279 |
+
},
|
| 280 |
+
"execution_count": null,
|
| 281 |
+
"outputs": []
|
| 282 |
+
},
|
| 283 |
+
{
|
| 284 |
+
"cell_type": "code",
|
| 285 |
+
"source": [
|
| 286 |
+
"df['song_count'] = df.groupby(['pid','artist_name','album_name'])['track_name'].transform('nunique')\n",
|
| 287 |
+
"\n",
|
| 288 |
+
"df['playlist_songs'] = df.groupby(['pid'])['pos'].transform('max')\n",
|
| 289 |
+
"df['playlist_songs'] += 1"
|
| 290 |
+
],
|
| 291 |
+
"metadata": {
|
| 292 |
+
"id": "aSBKxRFa5-O_"
|
| 293 |
+
},
|
| 294 |
+
"execution_count": null,
|
| 295 |
+
"outputs": []
|
| 296 |
+
},
|
| 297 |
+
{
|
| 298 |
+
"cell_type": "code",
|
| 299 |
+
"source": [
|
| 300 |
+
"df['artist_album'] = df[['artist_name', 'album_name']].agg('::'.join, axis=1)\n",
|
| 301 |
+
"\n",
|
| 302 |
+
"# Step 2: Create a dictionary mapping unique combined values to IDs\n",
|
| 303 |
+
"value_to_id = {val: i for i, val in enumerate(df['artist_album'].unique())}\n",
|
| 304 |
+
"\n",
|
| 305 |
+
"# Step 3: Map these IDs back to the DataFrame\n",
|
| 306 |
+
"df['artist_album_id'] = df['artist_album'].map(value_to_id)\n",
|
| 307 |
+
"\n",
|
| 308 |
+
"df[[f'artist_album_id', 'artist_album', 'artist_name', 'album_name', 'track_name']].drop_duplicates().to_csv(os.getcwd() + f'/data/raw/mappings/artist_album.csv')\n"
|
| 309 |
+
],
|
| 310 |
+
"metadata": {
|
| 311 |
+
"id": "4WqHH-pn5_nL"
|
| 312 |
+
},
|
| 313 |
+
"execution_count": null,
|
| 314 |
+
"outputs": []
|
| 315 |
+
},
|
| 316 |
+
{
|
| 317 |
+
"cell_type": "code",
|
| 318 |
+
"source": [
|
| 319 |
+
"# df = df.groupby(['playlist_id','artist_album','artist_album_id','playlist_songs']).agg({\n",
|
| 320 |
+
"# 'song_count': 'sum',\n",
|
| 321 |
+
"# 'track_name': '|'.join,\n",
|
| 322 |
+
"# 'track_name': '|'.join,\n",
|
| 323 |
+
"# }).reset_index()\n",
|
| 324 |
+
"df['song_count'] = df.groupby(['playlist_id','artist_album_id'])['song_count'].transform('sum')\n",
|
| 325 |
+
"\n",
|
| 326 |
+
"# Encode the genres data\n",
|
| 327 |
+
"encoder = LabelEncoder()\n",
|
| 328 |
+
"encoder.fit(df['track_name'])\n",
|
| 329 |
+
"df['track_id'] = encoder.transform(df['track_name'])"
|
| 330 |
+
],
|
| 331 |
+
"metadata": {
|
| 332 |
+
"id": "V1bhU5rW6BSY"
|
| 333 |
+
},
|
| 334 |
+
"execution_count": null,
|
| 335 |
+
"outputs": []
|
| 336 |
+
},
|
| 337 |
+
{
|
| 338 |
+
"cell_type": "code",
|
| 339 |
+
"source": [
|
| 340 |
+
"# df['artist_percent'] = df['artist_count'] / df['playlist_songs']\n",
|
| 341 |
+
"df['song_percent'] = df['song_count'] / df['playlist_songs']\n",
|
| 342 |
+
"# df['album_percent'] = df['album_count'] / df['playlist_songs']"
|
| 343 |
+
],
|
| 344 |
+
"metadata": {
|
| 345 |
+
"id": "l6sUWKYC6DCw"
|
| 346 |
+
},
|
| 347 |
+
"execution_count": null,
|
| 348 |
+
"outputs": []
|
| 349 |
+
},
|
| 350 |
+
{
|
| 351 |
+
"cell_type": "code",
|
| 352 |
+
"source": [
|
| 353 |
+
"import numpy as np\n",
|
| 354 |
+
"\n",
|
| 355 |
+
"# Assuming you have a DataFrame 'df' with a column 'column_name'\n",
|
| 356 |
+
"df['song_percent'] = 1 / (1 + np.exp(-df['song_percent']))"
|
| 357 |
+
],
|
| 358 |
+
"metadata": {
|
| 359 |
+
"id": "XxC0WnlL6EWz"
|
| 360 |
+
},
|
| 361 |
+
"execution_count": null,
|
| 362 |
+
"outputs": []
|
| 363 |
+
},
|
| 364 |
+
{
|
| 365 |
+
"cell_type": "code",
|
| 366 |
+
"source": [
|
| 367 |
+
"artists = df.loc[:,['playlist_id','artist_id','album_id']].drop_duplicates()\n",
|
| 368 |
+
"artists.head()"
|
| 369 |
+
],
|
| 370 |
+
"metadata": {
|
| 371 |
+
"colab": {
|
| 372 |
+
"base_uri": "https://localhost:8080/",
|
| 373 |
+
"height": 206
|
| 374 |
+
},
|
| 375 |
+
"id": "kbxBcQiX6F2v",
|
| 376 |
+
"outputId": "eb1fe0b1-83df-4a31-9110-5c904ad14af9"
|
| 377 |
+
},
|
| 378 |
+
"execution_count": null,
|
| 379 |
+
"outputs": [
|
| 380 |
+
{
|
| 381 |
+
"output_type": "execute_result",
|
| 382 |
+
"data": {
|
| 383 |
+
"text/plain": [
|
| 384 |
+
" playlist_id artist_id album_id\n",
|
| 385 |
+
"0 0 0 0\n",
|
| 386 |
+
"1 0 1 1\n",
|
| 387 |
+
"2 0 2 2\n",
|
| 388 |
+
"3 0 3 3\n",
|
| 389 |
+
"4 0 4 4"
|
| 390 |
+
],
|
| 391 |
+
"text/html": [
|
| 392 |
+
"\n",
|
| 393 |
+
" <div id=\"df-cedfd0c3-1f93-4a45-b95c-5d58bbf23f45\" class=\"colab-df-container\">\n",
|
| 394 |
+
" <div>\n",
|
| 395 |
+
"<style scoped>\n",
|
| 396 |
+
" .dataframe tbody tr th:only-of-type {\n",
|
| 397 |
+
" vertical-align: middle;\n",
|
| 398 |
+
" }\n",
|
| 399 |
+
"\n",
|
| 400 |
+
" .dataframe tbody tr th {\n",
|
| 401 |
+
" vertical-align: top;\n",
|
| 402 |
+
" }\n",
|
| 403 |
+
"\n",
|
| 404 |
+
" .dataframe thead th {\n",
|
| 405 |
+
" text-align: right;\n",
|
| 406 |
+
" }\n",
|
| 407 |
+
"</style>\n",
|
| 408 |
+
"<table border=\"1\" class=\"dataframe\">\n",
|
| 409 |
+
" <thead>\n",
|
| 410 |
+
" <tr style=\"text-align: right;\">\n",
|
| 411 |
+
" <th></th>\n",
|
| 412 |
+
" <th>playlist_id</th>\n",
|
| 413 |
+
" <th>artist_id</th>\n",
|
| 414 |
+
" <th>album_id</th>\n",
|
| 415 |
+
" </tr>\n",
|
| 416 |
+
" </thead>\n",
|
| 417 |
+
" <tbody>\n",
|
| 418 |
+
" <tr>\n",
|
| 419 |
+
" <th>0</th>\n",
|
| 420 |
+
" <td>0</td>\n",
|
| 421 |
+
" <td>0</td>\n",
|
| 422 |
+
" <td>0</td>\n",
|
| 423 |
+
" </tr>\n",
|
| 424 |
+
" <tr>\n",
|
| 425 |
+
" <th>1</th>\n",
|
| 426 |
+
" <td>0</td>\n",
|
| 427 |
+
" <td>1</td>\n",
|
| 428 |
+
" <td>1</td>\n",
|
| 429 |
+
" </tr>\n",
|
| 430 |
+
" <tr>\n",
|
| 431 |
+
" <th>2</th>\n",
|
| 432 |
+
" <td>0</td>\n",
|
| 433 |
+
" <td>2</td>\n",
|
| 434 |
+
" <td>2</td>\n",
|
| 435 |
+
" </tr>\n",
|
| 436 |
+
" <tr>\n",
|
| 437 |
+
" <th>3</th>\n",
|
| 438 |
+
" <td>0</td>\n",
|
| 439 |
+
" <td>3</td>\n",
|
| 440 |
+
" <td>3</td>\n",
|
| 441 |
+
" </tr>\n",
|
| 442 |
+
" <tr>\n",
|
| 443 |
+
" <th>4</th>\n",
|
| 444 |
+
" <td>0</td>\n",
|
| 445 |
+
" <td>4</td>\n",
|
| 446 |
+
" <td>4</td>\n",
|
| 447 |
+
" </tr>\n",
|
| 448 |
+
" </tbody>\n",
|
| 449 |
+
"</table>\n",
|
| 450 |
+
"</div>\n",
|
| 451 |
+
" <div class=\"colab-df-buttons\">\n",
|
| 452 |
+
"\n",
|
| 453 |
+
" <div class=\"colab-df-container\">\n",
|
| 454 |
+
" <button class=\"colab-df-convert\" onclick=\"convertToInteractive('df-cedfd0c3-1f93-4a45-b95c-5d58bbf23f45')\"\n",
|
| 455 |
+
" title=\"Convert this dataframe to an interactive table.\"\n",
|
| 456 |
+
" style=\"display:none;\">\n",
|
| 457 |
+
"\n",
|
| 458 |
+
" <svg xmlns=\"http://www.w3.org/2000/svg\" height=\"24px\" viewBox=\"0 -960 960 960\">\n",
|
| 459 |
+
" <path d=\"M120-120v-720h720v720H120Zm60-500h600v-160H180v160Zm220 220h160v-160H400v160Zm0 220h160v-160H400v160ZM180-400h160v-160H180v160Zm440 0h160v-160H620v160ZM180-180h160v-160H180v160Zm440 0h160v-160H620v160Z\"/>\n",
|
| 460 |
+
" </svg>\n",
|
| 461 |
+
" </button>\n",
|
| 462 |
+
"\n",
|
| 463 |
+
" <style>\n",
|
| 464 |
+
" .colab-df-container {\n",
|
| 465 |
+
" display:flex;\n",
|
| 466 |
+
" gap: 12px;\n",
|
| 467 |
+
" }\n",
|
| 468 |
+
"\n",
|
| 469 |
+
" .colab-df-convert {\n",
|
| 470 |
+
" background-color: #E8F0FE;\n",
|
| 471 |
+
" border: none;\n",
|
| 472 |
+
" border-radius: 50%;\n",
|
| 473 |
+
" cursor: pointer;\n",
|
| 474 |
+
" display: none;\n",
|
| 475 |
+
" fill: #1967D2;\n",
|
| 476 |
+
" height: 32px;\n",
|
| 477 |
+
" padding: 0 0 0 0;\n",
|
| 478 |
+
" width: 32px;\n",
|
| 479 |
+
" }\n",
|
| 480 |
+
"\n",
|
| 481 |
+
" .colab-df-convert:hover {\n",
|
| 482 |
+
" background-color: #E2EBFA;\n",
|
| 483 |
+
" box-shadow: 0px 1px 2px rgba(60, 64, 67, 0.3), 0px 1px 3px 1px rgba(60, 64, 67, 0.15);\n",
|
| 484 |
+
" fill: #174EA6;\n",
|
| 485 |
+
" }\n",
|
| 486 |
+
"\n",
|
| 487 |
+
" .colab-df-buttons div {\n",
|
| 488 |
+
" margin-bottom: 4px;\n",
|
| 489 |
+
" }\n",
|
| 490 |
+
"\n",
|
| 491 |
+
" [theme=dark] .colab-df-convert {\n",
|
| 492 |
+
" background-color: #3B4455;\n",
|
| 493 |
+
" fill: #D2E3FC;\n",
|
| 494 |
+
" }\n",
|
| 495 |
+
"\n",
|
| 496 |
+
" [theme=dark] .colab-df-convert:hover {\n",
|
| 497 |
+
" background-color: #434B5C;\n",
|
| 498 |
+
" box-shadow: 0px 1px 3px 1px rgba(0, 0, 0, 0.15);\n",
|
| 499 |
+
" filter: drop-shadow(0px 1px 2px rgba(0, 0, 0, 0.3));\n",
|
| 500 |
+
" fill: #FFFFFF;\n",
|
| 501 |
+
" }\n",
|
| 502 |
+
" </style>\n",
|
| 503 |
+
"\n",
|
| 504 |
+
" <script>\n",
|
| 505 |
+
" const buttonEl =\n",
|
| 506 |
+
" document.querySelector('#df-cedfd0c3-1f93-4a45-b95c-5d58bbf23f45 button.colab-df-convert');\n",
|
| 507 |
+
" buttonEl.style.display =\n",
|
| 508 |
+
" google.colab.kernel.accessAllowed ? 'block' : 'none';\n",
|
| 509 |
+
"\n",
|
| 510 |
+
" async function convertToInteractive(key) {\n",
|
| 511 |
+
" const element = document.querySelector('#df-cedfd0c3-1f93-4a45-b95c-5d58bbf23f45');\n",
|
| 512 |
+
" const dataTable =\n",
|
| 513 |
+
" await google.colab.kernel.invokeFunction('convertToInteractive',\n",
|
| 514 |
+
" [key], {});\n",
|
| 515 |
+
" if (!dataTable) return;\n",
|
| 516 |
+
"\n",
|
| 517 |
+
" const docLinkHtml = 'Like what you see? Visit the ' +\n",
|
| 518 |
+
" '<a target=\"_blank\" href=https://colab.research.google.com/notebooks/data_table.ipynb>data table notebook</a>'\n",
|
| 519 |
+
" + ' to learn more about interactive tables.';\n",
|
| 520 |
+
" element.innerHTML = '';\n",
|
| 521 |
+
" dataTable['output_type'] = 'display_data';\n",
|
| 522 |
+
" await google.colab.output.renderOutput(dataTable, element);\n",
|
| 523 |
+
" const docLink = document.createElement('div');\n",
|
| 524 |
+
" docLink.innerHTML = docLinkHtml;\n",
|
| 525 |
+
" element.appendChild(docLink);\n",
|
| 526 |
+
" }\n",
|
| 527 |
+
" </script>\n",
|
| 528 |
+
" </div>\n",
|
| 529 |
+
"\n",
|
| 530 |
+
"\n",
|
| 531 |
+
"<div id=\"df-066c4d9a-38ab-411d-b575-92d90726ec60\">\n",
|
| 532 |
+
" <button class=\"colab-df-quickchart\" onclick=\"quickchart('df-066c4d9a-38ab-411d-b575-92d90726ec60')\"\n",
|
| 533 |
+
" title=\"Suggest charts\"\n",
|
| 534 |
+
" style=\"display:none;\">\n",
|
| 535 |
+
"\n",
|
| 536 |
+
"<svg xmlns=\"http://www.w3.org/2000/svg\" height=\"24px\"viewBox=\"0 0 24 24\"\n",
|
| 537 |
+
" width=\"24px\">\n",
|
| 538 |
+
" <g>\n",
|
| 539 |
+
" <path d=\"M19 3H5c-1.1 0-2 .9-2 2v14c0 1.1.9 2 2 2h14c1.1 0 2-.9 2-2V5c0-1.1-.9-2-2-2zM9 17H7v-7h2v7zm4 0h-2V7h2v10zm4 0h-2v-4h2v4z\"/>\n",
|
| 540 |
+
" </g>\n",
|
| 541 |
+
"</svg>\n",
|
| 542 |
+
" </button>\n",
|
| 543 |
+
"\n",
|
| 544 |
+
"<style>\n",
|
| 545 |
+
" .colab-df-quickchart {\n",
|
| 546 |
+
" --bg-color: #E8F0FE;\n",
|
| 547 |
+
" --fill-color: #1967D2;\n",
|
| 548 |
+
" --hover-bg-color: #E2EBFA;\n",
|
| 549 |
+
" --hover-fill-color: #174EA6;\n",
|
| 550 |
+
" --disabled-fill-color: #AAA;\n",
|
| 551 |
+
" --disabled-bg-color: #DDD;\n",
|
| 552 |
+
" }\n",
|
| 553 |
+
"\n",
|
| 554 |
+
" [theme=dark] .colab-df-quickchart {\n",
|
| 555 |
+
" --bg-color: #3B4455;\n",
|
| 556 |
+
" --fill-color: #D2E3FC;\n",
|
| 557 |
+
" --hover-bg-color: #434B5C;\n",
|
| 558 |
+
" --hover-fill-color: #FFFFFF;\n",
|
| 559 |
+
" --disabled-bg-color: #3B4455;\n",
|
| 560 |
+
" --disabled-fill-color: #666;\n",
|
| 561 |
+
" }\n",
|
| 562 |
+
"\n",
|
| 563 |
+
" .colab-df-quickchart {\n",
|
| 564 |
+
" background-color: var(--bg-color);\n",
|
| 565 |
+
" border: none;\n",
|
| 566 |
+
" border-radius: 50%;\n",
|
| 567 |
+
" cursor: pointer;\n",
|
| 568 |
+
" display: none;\n",
|
| 569 |
+
" fill: var(--fill-color);\n",
|
| 570 |
+
" height: 32px;\n",
|
| 571 |
+
" padding: 0;\n",
|
| 572 |
+
" width: 32px;\n",
|
| 573 |
+
" }\n",
|
| 574 |
+
"\n",
|
| 575 |
+
" .colab-df-quickchart:hover {\n",
|
| 576 |
+
" background-color: var(--hover-bg-color);\n",
|
| 577 |
+
" box-shadow: 0 1px 2px rgba(60, 64, 67, 0.3), 0 1px 3px 1px rgba(60, 64, 67, 0.15);\n",
|
| 578 |
+
" fill: var(--button-hover-fill-color);\n",
|
| 579 |
+
" }\n",
|
| 580 |
+
"\n",
|
| 581 |
+
" .colab-df-quickchart-complete:disabled,\n",
|
| 582 |
+
" .colab-df-quickchart-complete:disabled:hover {\n",
|
| 583 |
+
" background-color: var(--disabled-bg-color);\n",
|
| 584 |
+
" fill: var(--disabled-fill-color);\n",
|
| 585 |
+
" box-shadow: none;\n",
|
| 586 |
+
" }\n",
|
| 587 |
+
"\n",
|
| 588 |
+
" .colab-df-spinner {\n",
|
| 589 |
+
" border: 2px solid var(--fill-color);\n",
|
| 590 |
+
" border-color: transparent;\n",
|
| 591 |
+
" border-bottom-color: var(--fill-color);\n",
|
| 592 |
+
" animation:\n",
|
| 593 |
+
" spin 1s steps(1) infinite;\n",
|
| 594 |
+
" }\n",
|
| 595 |
+
"\n",
|
| 596 |
+
" @keyframes spin {\n",
|
| 597 |
+
" 0% {\n",
|
| 598 |
+
" border-color: transparent;\n",
|
| 599 |
+
" border-bottom-color: var(--fill-color);\n",
|
| 600 |
+
" border-left-color: var(--fill-color);\n",
|
| 601 |
+
" }\n",
|
| 602 |
+
" 20% {\n",
|
| 603 |
+
" border-color: transparent;\n",
|
| 604 |
+
" border-left-color: var(--fill-color);\n",
|
| 605 |
+
" border-top-color: var(--fill-color);\n",
|
| 606 |
+
" }\n",
|
| 607 |
+
" 30% {\n",
|
| 608 |
+
" border-color: transparent;\n",
|
| 609 |
+
" border-left-color: var(--fill-color);\n",
|
| 610 |
+
" border-top-color: var(--fill-color);\n",
|
| 611 |
+
" border-right-color: var(--fill-color);\n",
|
| 612 |
+
" }\n",
|
| 613 |
+
" 40% {\n",
|
| 614 |
+
" border-color: transparent;\n",
|
| 615 |
+
" border-right-color: var(--fill-color);\n",
|
| 616 |
+
" border-top-color: var(--fill-color);\n",
|
| 617 |
+
" }\n",
|
| 618 |
+
" 60% {\n",
|
| 619 |
+
" border-color: transparent;\n",
|
| 620 |
+
" border-right-color: var(--fill-color);\n",
|
| 621 |
+
" }\n",
|
| 622 |
+
" 80% {\n",
|
| 623 |
+
" border-color: transparent;\n",
|
| 624 |
+
" border-right-color: var(--fill-color);\n",
|
| 625 |
+
" border-bottom-color: var(--fill-color);\n",
|
| 626 |
+
" }\n",
|
| 627 |
+
" 90% {\n",
|
| 628 |
+
" border-color: transparent;\n",
|
| 629 |
+
" border-bottom-color: var(--fill-color);\n",
|
| 630 |
+
" }\n",
|
| 631 |
+
" }\n",
|
| 632 |
+
"</style>\n",
|
| 633 |
+
"\n",
|
| 634 |
+
" <script>\n",
|
| 635 |
+
" async function quickchart(key) {\n",
|
| 636 |
+
" const quickchartButtonEl =\n",
|
| 637 |
+
" document.querySelector('#' + key + ' button');\n",
|
| 638 |
+
" quickchartButtonEl.disabled = true; // To prevent multiple clicks.\n",
|
| 639 |
+
" quickchartButtonEl.classList.add('colab-df-spinner');\n",
|
| 640 |
+
" try {\n",
|
| 641 |
+
" const charts = await google.colab.kernel.invokeFunction(\n",
|
| 642 |
+
" 'suggestCharts', [key], {});\n",
|
| 643 |
+
" } catch (error) {\n",
|
| 644 |
+
" console.error('Error during call to suggestCharts:', error);\n",
|
| 645 |
+
" }\n",
|
| 646 |
+
" quickchartButtonEl.classList.remove('colab-df-spinner');\n",
|
| 647 |
+
" quickchartButtonEl.classList.add('colab-df-quickchart-complete');\n",
|
| 648 |
+
" }\n",
|
| 649 |
+
" (() => {\n",
|
| 650 |
+
" let quickchartButtonEl =\n",
|
| 651 |
+
" document.querySelector('#df-066c4d9a-38ab-411d-b575-92d90726ec60 button');\n",
|
| 652 |
+
" quickchartButtonEl.style.display =\n",
|
| 653 |
+
" google.colab.kernel.accessAllowed ? 'block' : 'none';\n",
|
| 654 |
+
" })();\n",
|
| 655 |
+
" </script>\n",
|
| 656 |
+
"</div>\n",
|
| 657 |
+
"\n",
|
| 658 |
+
" </div>\n",
|
| 659 |
+
" </div>\n"
|
| 660 |
+
],
|
| 661 |
+
"application/vnd.google.colaboratory.intrinsic+json": {
|
| 662 |
+
"type": "dataframe",
|
| 663 |
+
"variable_name": "artists"
|
| 664 |
+
}
|
| 665 |
+
},
|
| 666 |
+
"metadata": {},
|
| 667 |
+
"execution_count": 18
|
| 668 |
+
}
|
| 669 |
+
]
|
| 670 |
+
},
|
| 671 |
+
{
|
| 672 |
+
"cell_type": "code",
|
| 673 |
+
"source": [
|
| 674 |
+
"X = artists.loc[:,['artist_id','album_id',]]\n",
|
| 675 |
+
"y = artists.loc[:,'playlist_id',]\n",
|
| 676 |
+
"\n",
|
| 677 |
+
"# Split our data into training and test sets\n",
|
| 678 |
+
"X_train, X_val, y_train, y_val = train_test_split(X,y,random_state=0, test_size=0.2)"
|
| 679 |
+
],
|
| 680 |
+
"metadata": {
|
| 681 |
+
"id": "5HLSc9z36Izn"
|
| 682 |
+
},
|
| 683 |
+
"execution_count": null,
|
| 684 |
+
"outputs": []
|
| 685 |
+
},
|
| 686 |
+
{
|
| 687 |
+
"cell_type": "code",
|
| 688 |
+
"execution_count": 21,
|
| 689 |
+
"metadata": {
|
| 690 |
+
"id": "k47MaxR65Nq4"
|
| 691 |
+
},
|
| 692 |
+
"outputs": [],
|
| 693 |
+
"source": [
|
| 694 |
+
"from sklearn.cluster import DBSCAN\n",
|
| 695 |
+
"db_model = DBSCAN(eps=0.2,min_samples=5)\n",
|
| 696 |
+
"labels_db = db_model.fit_predict(X)\n"
|
| 697 |
+
]
|
| 698 |
+
},
|
| 699 |
+
{
|
| 700 |
+
"cell_type": "code",
|
| 701 |
+
"source": [
|
| 702 |
+
"from sklearn.metrics import precision_score, recall_score\n",
|
| 703 |
+
"y_no_noise = y[labels_db != -1]\n",
|
| 704 |
+
"labels_db_no_noise = labels_db[labels_db != -1]\n",
|
| 705 |
+
"\n",
|
| 706 |
+
"precision = precision_score(y_no_noise, labels_db_no_noise, average='weighted')\n",
|
| 707 |
+
"recall = recall_score(y_no_noise, labels_db_no_noise, average='weighted')\n",
|
| 708 |
+
"\n",
|
| 709 |
+
"print(f'Precision: {precision}')\n",
|
| 710 |
+
"print(f'Recall: {recall}')"
|
| 711 |
+
],
|
| 712 |
+
"metadata": {
|
| 713 |
+
"colab": {
|
| 714 |
+
"base_uri": "https://localhost:8080/"
|
| 715 |
+
},
|
| 716 |
+
"id": "Osq-NpGu9V2k",
|
| 717 |
+
"outputId": "cb9f28e0-1a44-4208-f520-e09ff274d48b"
|
| 718 |
+
},
|
| 719 |
+
"execution_count": 27,
|
| 720 |
+
"outputs": [
|
| 721 |
+
{
|
| 722 |
+
"output_type": "stream",
|
| 723 |
+
"name": "stderr",
|
| 724 |
+
"text": [
|
| 725 |
+
"/usr/local/lib/python3.10/dist-packages/sklearn/metrics/_classification.py:1471: UndefinedMetricWarning: Precision is ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.\n",
|
| 726 |
+
" _warn_prf(average, modifier, msg_start, len(result))\n"
|
| 727 |
+
]
|
| 728 |
+
},
|
| 729 |
+
{
|
| 730 |
+
"output_type": "stream",
|
| 731 |
+
"name": "stdout",
|
| 732 |
+
"text": [
|
| 733 |
+
"Precision: 1.589262536579764e-05\n",
|
| 734 |
+
"Recall: 9.606273770069471e-06\n"
|
| 735 |
+
]
|
| 736 |
+
},
|
| 737 |
+
{
|
| 738 |
+
"output_type": "stream",
|
| 739 |
+
"name": "stderr",
|
| 740 |
+
"text": [
|
| 741 |
+
"/usr/local/lib/python3.10/dist-packages/sklearn/metrics/_classification.py:1471: UndefinedMetricWarning: Recall is ill-defined and being set to 0.0 in labels with no true samples. Use `zero_division` parameter to control this behavior.\n",
|
| 742 |
+
" _warn_prf(average, modifier, msg_start, len(result))\n"
|
| 743 |
+
]
|
| 744 |
+
}
|
| 745 |
+
]
|
| 746 |
+
}
|
| 747 |
+
]
|
| 748 |
+
}
|
notebooks/nn_collab_filter.ipynb
ADDED
|
@@ -0,0 +1,748 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"cells": [
|
| 3 |
+
{
|
| 4 |
+
"cell_type": "code",
|
| 5 |
+
"execution_count": 28,
|
| 6 |
+
"metadata": {},
|
| 7 |
+
"outputs": [],
|
| 8 |
+
"source": [
|
| 9 |
+
"import os\n",
|
| 10 |
+
"import urllib.request\n",
|
| 11 |
+
"import zipfile\n",
|
| 12 |
+
"import json\n",
|
| 13 |
+
"import pandas as pd\n",
|
| 14 |
+
"import time\n",
|
| 15 |
+
"import torch\n",
|
| 16 |
+
"import numpy as np\n",
|
| 17 |
+
"import pandas as pd\n",
|
| 18 |
+
"import torch.nn as nn\n",
|
| 19 |
+
"import torch.nn.functional as F\n",
|
| 20 |
+
"import torch.optim as optim\n",
|
| 21 |
+
"from torch.utils.data import DataLoader, TensorDataset\n",
|
| 22 |
+
"from sklearn.model_selection import train_test_split\n",
|
| 23 |
+
"import matplotlib.pyplot as plt\n",
|
| 24 |
+
"from sklearn.preprocessing import LabelEncoder"
|
| 25 |
+
]
|
| 26 |
+
},
|
| 27 |
+
{
|
| 28 |
+
"cell_type": "code",
|
| 29 |
+
"execution_count": 29,
|
| 30 |
+
"metadata": {
|
| 31 |
+
"colab": {
|
| 32 |
+
"base_uri": "https://localhost:8080/"
|
| 33 |
+
},
|
| 34 |
+
"id": "y1pGv3um_VAV",
|
| 35 |
+
"outputId": "64ee7998-f542-4477-a6c3-7444a04a42c8"
|
| 36 |
+
},
|
| 37 |
+
"outputs": [],
|
| 38 |
+
"source": [
|
| 39 |
+
"# from google.colab import drive\n",
|
| 40 |
+
"# drive.mount('/content/drive')"
|
| 41 |
+
]
|
| 42 |
+
},
|
| 43 |
+
{
|
| 44 |
+
"cell_type": "code",
|
| 45 |
+
"execution_count": 30,
|
| 46 |
+
"metadata": {
|
| 47 |
+
"id": "MrmOyqSn_Y7C"
|
| 48 |
+
},
|
| 49 |
+
"outputs": [],
|
| 50 |
+
"source": [
|
| 51 |
+
"# prompt: copy a file from another directory to current directory in python code and create folders if needed\n",
|
| 52 |
+
"\n",
|
| 53 |
+
"import shutil\n",
|
| 54 |
+
"import os\n",
|
| 55 |
+
"\n",
|
| 56 |
+
"def copy_file(src, dst):\n",
|
| 57 |
+
" \"\"\"\n",
|
| 58 |
+
" Copies a file from src to dst, creating any necessary directories.\n",
|
| 59 |
+
"\n",
|
| 60 |
+
" Args:\n",
|
| 61 |
+
" src: The path to the source file.\n",
|
| 62 |
+
" dst: The path to the destination file.\n",
|
| 63 |
+
" \"\"\"\n",
|
| 64 |
+
" # Create the destination directory if it doesn't exist.\n",
|
| 65 |
+
" dst_dir = os.path.dirname(dst)\n",
|
| 66 |
+
" if not os.path.exists(dst_dir):\n",
|
| 67 |
+
" os.makedirs(dst_dir)\n",
|
| 68 |
+
"\n",
|
| 69 |
+
" # Copy the file.\n",
|
| 70 |
+
" shutil.copy2(src, dst)\n",
|
| 71 |
+
"\n",
|
| 72 |
+
"# copy_file('/content/drive/MyDrive/rec_data/spotify_million_playlist_dataset.zip', os.getcwd() + '/data/raw/spotify_million_playlist_dataset.zip')"
|
| 73 |
+
]
|
| 74 |
+
},
|
| 75 |
+
{
|
| 76 |
+
"cell_type": "code",
|
| 77 |
+
"execution_count": 32,
|
| 78 |
+
"metadata": {
|
| 79 |
+
"id": "L5h3Tsa0LIoo"
|
| 80 |
+
},
|
| 81 |
+
"outputs": [],
|
| 82 |
+
"source": [
|
| 83 |
+
"def unzip_archive(filepath, dir_path):\n",
|
| 84 |
+
" with zipfile.ZipFile(f\"{filepath}\", 'r') as zip_ref:\n",
|
| 85 |
+
" zip_ref.extractall(dir_path)\n",
|
| 86 |
+
"\n",
|
| 87 |
+
"unzip_archive(os.getcwd() + '/data/raw/spotify_million_playlist_dataset.zip', os.getcwd() + '/data/raw/playlists')\n"
|
| 88 |
+
]
|
| 89 |
+
},
|
| 90 |
+
{
|
| 91 |
+
"cell_type": "code",
|
| 92 |
+
"execution_count": 33,
|
| 93 |
+
"metadata": {
|
| 94 |
+
"id": "JcLT9U2Q_LJw"
|
| 95 |
+
},
|
| 96 |
+
"outputs": [],
|
| 97 |
+
"source": [
|
| 98 |
+
"import shutil\n",
|
| 99 |
+
"\n",
|
| 100 |
+
"def make_dir(directory):\n",
|
| 101 |
+
" if os.path.exists(directory):\n",
|
| 102 |
+
" shutil.rmtree(directory)\n",
|
| 103 |
+
" os.makedirs(directory)\n",
|
| 104 |
+
" else:\n",
|
| 105 |
+
" os.makedirs(directory)\n",
|
| 106 |
+
"\n",
|
| 107 |
+
"directory = os.getcwd() + '/data/raw/data'\n",
|
| 108 |
+
"make_dir(directory)\n",
|
| 109 |
+
"directory = os.getcwd() + '/data/processed'\n",
|
| 110 |
+
"make_dir(directory)"
|
| 111 |
+
]
|
| 112 |
+
},
|
| 113 |
+
{
|
| 114 |
+
"cell_type": "code",
|
| 115 |
+
"execution_count": 34,
|
| 116 |
+
"metadata": {
|
| 117 |
+
"id": "fC-0iP1L_LJx"
|
| 118 |
+
},
|
| 119 |
+
"outputs": [],
|
| 120 |
+
"source": [
|
| 121 |
+
"cols = [\n",
|
| 122 |
+
" 'name',\n",
|
| 123 |
+
" 'pid',\n",
|
| 124 |
+
" 'num_followers',\n",
|
| 125 |
+
" 'pos',\n",
|
| 126 |
+
" 'artist_name',\n",
|
| 127 |
+
" 'track_name',\n",
|
| 128 |
+
" 'album_name'\n",
|
| 129 |
+
"]"
|
| 130 |
+
]
|
| 131 |
+
},
|
| 132 |
+
{
|
| 133 |
+
"cell_type": "code",
|
| 134 |
+
"execution_count": 35,
|
| 135 |
+
"metadata": {
|
| 136 |
+
"colab": {
|
| 137 |
+
"base_uri": "https://localhost:8080/"
|
| 138 |
+
},
|
| 139 |
+
"id": "qyCujIu8cDGg",
|
| 140 |
+
"outputId": "f3b21394-acbc-40ab-d70a-b666acd985e7"
|
| 141 |
+
},
|
| 142 |
+
"outputs": [
|
| 143 |
+
{
|
| 144 |
+
"name": "stdout",
|
| 145 |
+
"output_type": "stream",
|
| 146 |
+
"text": [
|
| 147 |
+
"mpd.slice.278000-278999.json\t200/1000\t20.0%"
|
| 148 |
+
]
|
| 149 |
+
}
|
| 150 |
+
],
|
| 151 |
+
"source": [
|
| 152 |
+
"directory = os.getcwd() + '/data/raw/playlists/data'\n",
|
| 153 |
+
"df = pd.DataFrame()\n",
|
| 154 |
+
"index = 0\n",
|
| 155 |
+
"# Loop through all files in the directory\n",
|
| 156 |
+
"for filename in os.listdir(directory):\n",
|
| 157 |
+
" # Check if the item is a file (not a subdirectory)\n",
|
| 158 |
+
" if os.path.isfile(os.path.join(directory, filename)):\n",
|
| 159 |
+
" if filename.find('.json') != -1 :\n",
|
| 160 |
+
" index += 1\n",
|
| 161 |
+
"\n",
|
| 162 |
+
" # Print the filename or perform operations on the file\n",
|
| 163 |
+
" print(f'\\r{filename}\\t{index}/1000\\t{((index/1000)*100):.1f}%', end='')\n",
|
| 164 |
+
"\n",
|
| 165 |
+
" # If you need the full file path, you can use:\n",
|
| 166 |
+
" full_path = os.path.join(directory, filename)\n",
|
| 167 |
+
"\n",
|
| 168 |
+
" with open(full_path, 'r') as file:\n",
|
| 169 |
+
" json_data = json.load(file)\n",
|
| 170 |
+
"\n",
|
| 171 |
+
" temp = pd.DataFrame(json_data['playlists'])\n",
|
| 172 |
+
" expanded_df = temp.explode('tracks').reset_index(drop=True)\n",
|
| 173 |
+
"\n",
|
| 174 |
+
" # Normalize the JSON data\n",
|
| 175 |
+
" json_normalized = pd.json_normalize(expanded_df['tracks'])\n",
|
| 176 |
+
"\n",
|
| 177 |
+
" # Concatenate the original DataFrame with the normalized JSON data\n",
|
| 178 |
+
" result = pd.concat([expanded_df.drop(columns=['tracks']), json_normalized], axis=1)\n",
|
| 179 |
+
"\n",
|
| 180 |
+
" result = result[cols]\n",
|
| 181 |
+
"\n",
|
| 182 |
+
" df = pd.concat([df, result], axis=0, ignore_index=True)\n",
|
| 183 |
+
"\n",
|
| 184 |
+
" if index % 50 == 0:\n",
|
| 185 |
+
" df.to_parquet(f'{os.getcwd()}/data/raw/data/playlists_{index % 1000}.parquet')\n",
|
| 186 |
+
" del df\n",
|
| 187 |
+
" df = pd.DataFrame()\n",
|
| 188 |
+
" if index % 200 == 0:\n",
|
| 189 |
+
" break"
|
| 190 |
+
]
|
| 191 |
+
},
|
| 192 |
+
{
|
| 193 |
+
"cell_type": "code",
|
| 194 |
+
"execution_count": 36,
|
| 195 |
+
"metadata": {
|
| 196 |
+
"id": "unZ418pc_LJy"
|
| 197 |
+
},
|
| 198 |
+
"outputs": [],
|
| 199 |
+
"source": [
|
| 200 |
+
"import pyarrow.parquet as pq\n",
|
| 201 |
+
"\n",
|
| 202 |
+
"def read_parquet_folder(folder_path):\n",
|
| 203 |
+
" dataframes = []\n",
|
| 204 |
+
" for file in os.listdir(folder_path):\n",
|
| 205 |
+
" if file.endswith('.parquet'):\n",
|
| 206 |
+
" file_path = os.path.join(folder_path, file)\n",
|
| 207 |
+
" df = pd.read_parquet(file_path)\n",
|
| 208 |
+
" dataframes.append(df)\n",
|
| 209 |
+
"\n",
|
| 210 |
+
" return pd.concat(dataframes, ignore_index=True)\n",
|
| 211 |
+
"\n",
|
| 212 |
+
"folder_path = os.getcwd() + '/data/raw/data'\n",
|
| 213 |
+
"df = read_parquet_folder(folder_path)"
|
| 214 |
+
]
|
| 215 |
+
},
|
| 216 |
+
{
|
| 217 |
+
"cell_type": "code",
|
| 218 |
+
"execution_count": 37,
|
| 219 |
+
"metadata": {
|
| 220 |
+
"id": "es6n8S3a_LJz"
|
| 221 |
+
},
|
| 222 |
+
"outputs": [],
|
| 223 |
+
"source": [
|
| 224 |
+
"directory = os.getcwd() + '/data/processed'\n",
|
| 225 |
+
"make_dir(directory)"
|
| 226 |
+
]
|
| 227 |
+
},
|
| 228 |
+
{
|
| 229 |
+
"cell_type": "code",
|
| 230 |
+
"execution_count": 38,
|
| 231 |
+
"metadata": {
|
| 232 |
+
"id": "Rc2JtdBR_LJz"
|
| 233 |
+
},
|
| 234 |
+
"outputs": [],
|
| 235 |
+
"source": [
|
| 236 |
+
"def create_ids(df, col, name):\n",
|
| 237 |
+
" # Create a dictionary mapping unique values to IDs\n",
|
| 238 |
+
" value_to_id = {val: i for i, val in enumerate(df[col].unique())}\n",
|
| 239 |
+
"\n",
|
| 240 |
+
" # Create a new column with the IDs\n",
|
| 241 |
+
" df[f'{name}_id'] = df[col].map(value_to_id)\n",
|
| 242 |
+
" df[[f'{name}_id', col]].drop_duplicates().to_csv(os.getcwd() + f'/data/processed/{name}.csv')\n",
|
| 243 |
+
"\n",
|
| 244 |
+
" return df"
|
| 245 |
+
]
|
| 246 |
+
},
|
| 247 |
+
{
|
| 248 |
+
"cell_type": "code",
|
| 249 |
+
"execution_count": 39,
|
| 250 |
+
"metadata": {
|
| 251 |
+
"id": "O6aZ566R_LJ0"
|
| 252 |
+
},
|
| 253 |
+
"outputs": [],
|
| 254 |
+
"source": [
|
| 255 |
+
"# df = create_ids(df, 'artist_name', 'artist')\n",
|
| 256 |
+
"df = create_ids(df, 'pid', 'playlist')\n",
|
| 257 |
+
"# df = create_ids(df, 'track_name', 'track')\n",
|
| 258 |
+
"# df = create_ids(df, 'album_name', 'album')"
|
| 259 |
+
]
|
| 260 |
+
},
|
| 261 |
+
{
|
| 262 |
+
"cell_type": "code",
|
| 263 |
+
"execution_count": 40,
|
| 264 |
+
"metadata": {
|
| 265 |
+
"id": "pWWICQvh03KH"
|
| 266 |
+
},
|
| 267 |
+
"outputs": [],
|
| 268 |
+
"source": [
|
| 269 |
+
"df['song_count'] = df.groupby(['pid','artist_name','album_name'])['track_name'].transform('nunique')\n",
|
| 270 |
+
"\n",
|
| 271 |
+
"df['playlist_songs'] = df.groupby(['pid'])['pos'].transform('max')\n",
|
| 272 |
+
"df['playlist_songs'] += 1"
|
| 273 |
+
]
|
| 274 |
+
},
|
| 275 |
+
{
|
| 276 |
+
"cell_type": "code",
|
| 277 |
+
"execution_count": 41,
|
| 278 |
+
"metadata": {
|
| 279 |
+
"id": "F-S7j-gI4I6W"
|
| 280 |
+
},
|
| 281 |
+
"outputs": [],
|
| 282 |
+
"source": [
|
| 283 |
+
"df['artist_album'] = df[['artist_name', 'album_name']].agg('::'.join, axis=1)\n",
|
| 284 |
+
"\n",
|
| 285 |
+
"# Step 2: Create a dictionary mapping unique combined values to IDs\n",
|
| 286 |
+
"value_to_id = {val: i for i, val in enumerate(df['artist_album'].unique())}\n",
|
| 287 |
+
"\n",
|
| 288 |
+
"# Step 3: Map these IDs back to the DataFrame\n",
|
| 289 |
+
"df['artist_album_id'] = df['artist_album'].map(value_to_id)\n",
|
| 290 |
+
"\n",
|
| 291 |
+
"df[[f'artist_album_id', 'artist_album', 'artist_name', 'album_name', 'track_name']].drop_duplicates().to_csv(os.getcwd() + f'/data/processed/artist_album.csv')\n",
|
| 292 |
+
"df[['name', 'playlist_id','artist_album_id', 'artist_album', 'artist_name', 'album_name', 'track_name']].to_csv(os.getcwd() + f'/data/processed/playlists.csv')\n"
|
| 293 |
+
]
|
| 294 |
+
},
|
| 295 |
+
{
|
| 296 |
+
"cell_type": "code",
|
| 297 |
+
"execution_count": null,
|
| 298 |
+
"metadata": {
|
| 299 |
+
"id": "q6KHerHG6xZF"
|
| 300 |
+
},
|
| 301 |
+
"outputs": [],
|
| 302 |
+
"source": [
|
| 303 |
+
"# df = df.groupby(['playlist_id','artist_album','artist_album_id','playlist_songs']).agg({\n",
|
| 304 |
+
"# 'song_count': 'sum',\n",
|
| 305 |
+
"# 'track_name': '|'.join,\n",
|
| 306 |
+
"# 'track_name': '|'.join,\n",
|
| 307 |
+
"# }).reset_index()\n",
|
| 308 |
+
"df['song_count'] = df.groupby(['playlist_id','artist_album_id'])['song_count'].transform('sum')\n",
|
| 309 |
+
"\n",
|
| 310 |
+
"# Encode the genres data\n",
|
| 311 |
+
"encoder = LabelEncoder()\n",
|
| 312 |
+
"encoder.fit(df['track_name'])\n",
|
| 313 |
+
"df['track_id'] = encoder.transform(df['track_name'])"
|
| 314 |
+
]
|
| 315 |
+
},
|
| 316 |
+
{
|
| 317 |
+
"cell_type": "code",
|
| 318 |
+
"execution_count": null,
|
| 319 |
+
"metadata": {
|
| 320 |
+
"id": "r0YprWVe_LJ0"
|
| 321 |
+
},
|
| 322 |
+
"outputs": [],
|
| 323 |
+
"source": [
|
| 324 |
+
"# df['artist_count'] = df.groupby(['playlist_id','artist_id'])['song_id'].transform('nunique')\n",
|
| 325 |
+
"# df['album_count'] = df.groupby(['playlist_id','artist_id','album_id'])['song_id'].transform('nunique')\n",
|
| 326 |
+
"# df['song_count'] = df.groupby(['artist_id'])['song_id'].transform('count')"
|
| 327 |
+
]
|
| 328 |
+
},
|
| 329 |
+
{
|
| 330 |
+
"cell_type": "code",
|
| 331 |
+
"execution_count": null,
|
| 332 |
+
"metadata": {
|
| 333 |
+
"id": "D0IkRvv6_LJ1"
|
| 334 |
+
},
|
| 335 |
+
"outputs": [],
|
| 336 |
+
"source": [
|
| 337 |
+
"# df['artist_percent'] = df['artist_count'] / df['playlist_songs']\n",
|
| 338 |
+
"df['song_percent'] = df['song_count'] / df['playlist_songs']\n",
|
| 339 |
+
"# df['album_percent'] = df['album_count'] / df['playlist_songs']"
|
| 340 |
+
]
|
| 341 |
+
},
|
| 342 |
+
{
|
| 343 |
+
"cell_type": "code",
|
| 344 |
+
"execution_count": null,
|
| 345 |
+
"metadata": {
|
| 346 |
+
"id": "TnFfvqoSxtW3"
|
| 347 |
+
},
|
| 348 |
+
"outputs": [],
|
| 349 |
+
"source": [
|
| 350 |
+
"import numpy as np\n",
|
| 351 |
+
"\n",
|
| 352 |
+
"# Assuming you have a DataFrame 'df' with a column 'column_name'\n",
|
| 353 |
+
"df['song_percent'] = 1 / (1 + np.exp(-df['song_percent']))"
|
| 354 |
+
]
|
| 355 |
+
},
|
| 356 |
+
{
|
| 357 |
+
"cell_type": "code",
|
| 358 |
+
"execution_count": null,
|
| 359 |
+
"metadata": {
|
| 360 |
+
"colab": {
|
| 361 |
+
"base_uri": "https://localhost:8080/",
|
| 362 |
+
"height": 206
|
| 363 |
+
},
|
| 364 |
+
"id": "XyURi3ZQ_LJ1",
|
| 365 |
+
"outputId": "70e3d126-ab5c-490d-a92e-030f32348969"
|
| 366 |
+
},
|
| 367 |
+
"outputs": [],
|
| 368 |
+
"source": [
|
| 369 |
+
"artists = df.loc[:,['playlist_id','artist_album_id','song_percent']].drop_duplicates()\n",
|
| 370 |
+
"artists.head()"
|
| 371 |
+
]
|
| 372 |
+
},
|
| 373 |
+
{
|
| 374 |
+
"cell_type": "code",
|
| 375 |
+
"execution_count": null,
|
| 376 |
+
"metadata": {},
|
| 377 |
+
"outputs": [],
|
| 378 |
+
"source": [
|
| 379 |
+
"artists.loc[:,['playlist_id','artist_album_id',]].to_csv(os.getcwd() + '/data/processed/playlists.csv')"
|
| 380 |
+
]
|
| 381 |
+
},
|
| 382 |
+
{
|
| 383 |
+
"cell_type": "code",
|
| 384 |
+
"execution_count": null,
|
| 385 |
+
"metadata": {
|
| 386 |
+
"id": "qFqdH4JH_LJ2"
|
| 387 |
+
},
|
| 388 |
+
"outputs": [],
|
| 389 |
+
"source": [
|
| 390 |
+
"X = artists.loc[:,['playlist_id','artist_album_id',]]\n",
|
| 391 |
+
"y = artists.loc[:,'song_percent']\n",
|
| 392 |
+
"\n",
|
| 393 |
+
"# Split our data into training and test sets\n",
|
| 394 |
+
"X_train, X_val, y_train, y_val = train_test_split(X,y,random_state=0, test_size=0.2)"
|
| 395 |
+
]
|
| 396 |
+
},
|
| 397 |
+
{
|
| 398 |
+
"cell_type": "code",
|
| 399 |
+
"execution_count": null,
|
| 400 |
+
"metadata": {
|
| 401 |
+
"id": "uEYzNHNb_LJ2"
|
| 402 |
+
},
|
| 403 |
+
"outputs": [],
|
| 404 |
+
"source": [
|
| 405 |
+
"def prep_dataloaders(X_train,y_train,X_val,y_val,batch_size):\n",
|
| 406 |
+
" # Convert training and test data to TensorDatasets\n",
|
| 407 |
+
" trainset = TensorDataset(torch.from_numpy(np.array(X_train)).long(),\n",
|
| 408 |
+
" torch.from_numpy(np.array(y_train)).float())\n",
|
| 409 |
+
" valset = TensorDataset(torch.from_numpy(np.array(X_val)).long(),\n",
|
| 410 |
+
" torch.from_numpy(np.array(y_val)).float())\n",
|
| 411 |
+
"\n",
|
| 412 |
+
" # Create Dataloaders for our training and test data to allow us to iterate over minibatches\n",
|
| 413 |
+
" trainloader = torch.utils.data.DataLoader(trainset, batch_size=batch_size, shuffle=True)\n",
|
| 414 |
+
" valloader = torch.utils.data.DataLoader(valset, batch_size=batch_size, shuffle=False)\n",
|
| 415 |
+
"\n",
|
| 416 |
+
" return trainloader, valloader\n",
|
| 417 |
+
"\n",
|
| 418 |
+
"batchsize = 64\n",
|
| 419 |
+
"trainloader,valloader = prep_dataloaders(X_train,y_train,X_val,y_val,batchsize)"
|
| 420 |
+
]
|
| 421 |
+
},
|
| 422 |
+
{
|
| 423 |
+
"cell_type": "code",
|
| 424 |
+
"execution_count": 3,
|
| 425 |
+
"metadata": {
|
| 426 |
+
"id": "TBpWfyOc_LJ2"
|
| 427 |
+
},
|
| 428 |
+
"outputs": [],
|
| 429 |
+
"source": [
|
| 430 |
+
"class NNColabFiltering(nn.Module):\n",
|
| 431 |
+
"\n",
|
| 432 |
+
" def __init__(self, n_playlists, n_artists, embedding_dim_users, embedding_dim_items, n_activations, rating_range):\n",
|
| 433 |
+
" super().__init__()\n",
|
| 434 |
+
" self.user_embeddings = nn.Embedding(num_embeddings=n_playlists,embedding_dim=embedding_dim_users)\n",
|
| 435 |
+
" self.item_embeddings = nn.Embedding(num_embeddings=n_artists,embedding_dim=embedding_dim_items)\n",
|
| 436 |
+
" self.fc1 = nn.Linear(embedding_dim_users+embedding_dim_items,n_activations)\n",
|
| 437 |
+
" self.fc2 = nn.Linear(n_activations,1)\n",
|
| 438 |
+
" self.rating_range = rating_range\n",
|
| 439 |
+
"\n",
|
| 440 |
+
" def forward(self, X):\n",
|
| 441 |
+
" # Get embeddings for minibatch\n",
|
| 442 |
+
" embedded_users = self.user_embeddings(X[:,0])\n",
|
| 443 |
+
" embedded_items = self.item_embeddings(X[:,1])\n",
|
| 444 |
+
" # Concatenate user and item embeddings\n",
|
| 445 |
+
" embeddings = torch.cat([embedded_users,embedded_items],dim=1)\n",
|
| 446 |
+
" # Pass embeddings through network\n",
|
| 447 |
+
" preds = self.fc1(embeddings)\n",
|
| 448 |
+
" preds = F.relu(preds)\n",
|
| 449 |
+
" preds = self.fc2(preds)\n",
|
| 450 |
+
" # Scale predicted ratings to target-range [low,high]\n",
|
| 451 |
+
" preds = torch.sigmoid(preds) * (self.rating_range[1]-self.rating_range[0]) + self.rating_range[0]\n",
|
| 452 |
+
" return preds"
|
| 453 |
+
]
|
| 454 |
+
},
|
| 455 |
+
{
|
| 456 |
+
"cell_type": "code",
|
| 457 |
+
"execution_count": null,
|
| 458 |
+
"metadata": {
|
| 459 |
+
"id": "xEa69rXx_LJ3"
|
| 460 |
+
},
|
| 461 |
+
"outputs": [],
|
| 462 |
+
"source": [
|
| 463 |
+
"def train_model(model, criterion, optimizer, dataloaders, device, num_epochs=5, scheduler=None):\n",
|
| 464 |
+
" from torchmetrics import Precision, Recall\n",
|
| 465 |
+
" precision = Precision(task=\"multiclass\") \n",
|
| 466 |
+
" recall = Recall(task=\"multiclass\")\n",
|
| 467 |
+
" \n",
|
| 468 |
+
" model = model.to(device) # Send model to GPU if available\n",
|
| 469 |
+
" since = time.time()\n",
|
| 470 |
+
"\n",
|
| 471 |
+
" costpaths = {'train':[],'val':[]}\n",
|
| 472 |
+
"\n",
|
| 473 |
+
" for epoch in range(num_epochs):\n",
|
| 474 |
+
" print('Epoch {}/{}'.format(epoch, num_epochs - 1))\n",
|
| 475 |
+
" print('-' * 10)\n",
|
| 476 |
+
"\n",
|
| 477 |
+
" # Each epoch has a training and validation phase\n",
|
| 478 |
+
" for phase in ['train', 'val']:\n",
|
| 479 |
+
" if phase == 'train':\n",
|
| 480 |
+
" model.train() # Set model to training mode\n",
|
| 481 |
+
" else:\n",
|
| 482 |
+
" model.eval() # Set model to evaluate mode\n",
|
| 483 |
+
"\n",
|
| 484 |
+
" running_loss = 0.0\n",
|
| 485 |
+
"\n",
|
| 486 |
+
" # Get the inputs and labels, and send to GPU if available\n",
|
| 487 |
+
" index = 0\n",
|
| 488 |
+
" for (inputs,labels) in dataloaders[phase]:\n",
|
| 489 |
+
" inputs = inputs.to(device)\n",
|
| 490 |
+
" labels = labels.to(device)\n",
|
| 491 |
+
"\n",
|
| 492 |
+
" # Zero the weight gradients\n",
|
| 493 |
+
" optimizer.zero_grad()\n",
|
| 494 |
+
"\n",
|
| 495 |
+
" # Forward pass to get outputs and calculate loss\n",
|
| 496 |
+
" # Track gradient only for training data\n",
|
| 497 |
+
" with torch.set_grad_enabled(phase == 'train'):\n",
|
| 498 |
+
" outputs = model.forward(inputs).view(-1)\n",
|
| 499 |
+
" loss = criterion(outputs, labels)\n",
|
| 500 |
+
"\n",
|
| 501 |
+
" # Backpropagation to get the gradients with respect to each weight\n",
|
| 502 |
+
" # Only if in train\n",
|
| 503 |
+
" if phase == 'train':\n",
|
| 504 |
+
" loss.backward()\n",
|
| 505 |
+
" # Update the weights\n",
|
| 506 |
+
" optimizer.step()\n",
|
| 507 |
+
" \n",
|
| 508 |
+
" elif phase == 'val':\n",
|
| 509 |
+
" precision.update(torch.argmax(outputs, dim=1), labels)\n",
|
| 510 |
+
" recall.update(torch.argmax(outputs, dim=1), labels)\n",
|
| 511 |
+
"\n",
|
| 512 |
+
" # Convert loss into a scalar and add it to running_loss\n",
|
| 513 |
+
" running_loss += np.sqrt(loss.item()) * labels.size(0)\n",
|
| 514 |
+
" print(f'\\r{running_loss} {index} {(index / len(dataloaders[phase]))*100:.2f}%', end='')\n",
|
| 515 |
+
" index +=1\n",
|
| 516 |
+
"\n",
|
| 517 |
+
" # Step along learning rate scheduler when in train\n",
|
| 518 |
+
" if (phase == 'train') and (scheduler is not None):\n",
|
| 519 |
+
" scheduler.step()\n",
|
| 520 |
+
"\n",
|
| 521 |
+
" # Calculate and display average loss and accuracy for the epoch\n",
|
| 522 |
+
" epoch_loss = running_loss / len(dataloaders[phase].dataset)\n",
|
| 523 |
+
" costpaths[phase].append(epoch_loss)\n",
|
| 524 |
+
" print('\\n{} loss: {:.4f}'.format(phase, epoch_loss))\n",
|
| 525 |
+
"\n",
|
| 526 |
+
" time_elapsed = time.time() - since\n",
|
| 527 |
+
" print('Training complete in {:.0f}m {:.0f}s'.format(\n",
|
| 528 |
+
" time_elapsed // 60, time_elapsed % 60))\n",
|
| 529 |
+
" \n",
|
| 530 |
+
" precision = precision.compute()\n",
|
| 531 |
+
" recall = recall.compute()\n",
|
| 532 |
+
" \n",
|
| 533 |
+
" return costpaths, precision, recall"
|
| 534 |
+
]
|
| 535 |
+
},
|
| 536 |
+
{
|
| 537 |
+
"cell_type": "code",
|
| 538 |
+
"execution_count": null,
|
| 539 |
+
"metadata": {
|
| 540 |
+
"colab": {
|
| 541 |
+
"base_uri": "https://localhost:8080/"
|
| 542 |
+
},
|
| 543 |
+
"id": "Qp7Rymw0gGk0",
|
| 544 |
+
"outputId": "03707b9d-a3ad-4f66-a2a3-76b5ab536479"
|
| 545 |
+
},
|
| 546 |
+
"outputs": [],
|
| 547 |
+
"source": [
|
| 548 |
+
"# Train the model\n",
|
| 549 |
+
"dataloaders = {'train':trainloader, 'val':valloader}\n",
|
| 550 |
+
"n_users = X.loc[:,'playlist_id'].max()+1\n",
|
| 551 |
+
"n_items = X.loc[:,'artist_album_id'].max()+1\n",
|
| 552 |
+
"model = NNColabFiltering(n_users,n_items,embedding_dim_users=50, embedding_dim_items=50, n_activations = 100,rating_range=[0.,1.])\n",
|
| 553 |
+
"criterion = nn.MSELoss()\n",
|
| 554 |
+
"lr=0.001\n",
|
| 555 |
+
"n_epochs=10\n",
|
| 556 |
+
"wd=1e-3\n",
|
| 557 |
+
"optimizer = optim.Adam(model.parameters(), lr=lr, weight_decay=wd)\n",
|
| 558 |
+
"device = torch.device(\"cuda\" if torch.cuda.is_available() else \"cpu\")\n",
|
| 559 |
+
"\n",
|
| 560 |
+
"cost_paths = train_model(model,criterion,optimizer,dataloaders, device,n_epochs, scheduler=None)"
|
| 561 |
+
]
|
| 562 |
+
},
|
| 563 |
+
{
|
| 564 |
+
"cell_type": "code",
|
| 565 |
+
"execution_count": null,
|
| 566 |
+
"metadata": {},
|
| 567 |
+
"outputs": [],
|
| 568 |
+
"source": [
|
| 569 |
+
"\n",
|
| 570 |
+
"\n",
|
| 571 |
+
"print(f\"Precision: {final_precision:.4f}\")\n",
|
| 572 |
+
"print(f\"Recall: {final_recall:.4f}\")"
|
| 573 |
+
]
|
| 574 |
+
},
|
| 575 |
+
{
|
| 576 |
+
"cell_type": "code",
|
| 577 |
+
"execution_count": null,
|
| 578 |
+
"metadata": {},
|
| 579 |
+
"outputs": [],
|
| 580 |
+
"source": [
|
| 581 |
+
"def calculate_ndcg(true_relevance, predicted_relevance, k=None):\n",
|
| 582 |
+
" if k is None:\n",
|
| 583 |
+
" k = len(true_relevance)\n",
|
| 584 |
+
" \n",
|
| 585 |
+
" dcg = np.sum(predicted_relevance[:k] / np.log2(np.arange(2, k + 2)))\n",
|
| 586 |
+
" idcg = np.sum(np.sort(true_relevance)[::-1][:k] / np.log2(np.arange(2, k + 2)))\n",
|
| 587 |
+
" \n",
|
| 588 |
+
" return dcg / idcg if idcg > 0 else 0"
|
| 589 |
+
]
|
| 590 |
+
},
|
| 591 |
+
{
|
| 592 |
+
"cell_type": "code",
|
| 593 |
+
"execution_count": null,
|
| 594 |
+
"metadata": {
|
| 595 |
+
"colab": {
|
| 596 |
+
"base_uri": "https://localhost:8080/",
|
| 597 |
+
"height": 343
|
| 598 |
+
},
|
| 599 |
+
"id": "MiDPM6Zu_LJ4",
|
| 600 |
+
"outputId": "06c421bd-e716-47e2-96a2-70b971875638"
|
| 601 |
+
},
|
| 602 |
+
"outputs": [],
|
| 603 |
+
"source": [
|
| 604 |
+
"# Plot the cost over training and validation sets\n",
|
| 605 |
+
"fig,ax = plt.subplots(1,2,figsize=(15,5))\n",
|
| 606 |
+
"for i,key in enumerate(cost_paths.keys()):\n",
|
| 607 |
+
" ax_sub=ax[i%3]\n",
|
| 608 |
+
" ax_sub.plot(cost_paths[key])\n",
|
| 609 |
+
" ax_sub.set_title(key)\n",
|
| 610 |
+
" ax_sub.set_xlabel('Epoch')\n",
|
| 611 |
+
" ax_sub.set_ylabel('Loss')\n",
|
| 612 |
+
"plt.show()"
|
| 613 |
+
]
|
| 614 |
+
},
|
| 615 |
+
{
|
| 616 |
+
"cell_type": "code",
|
| 617 |
+
"execution_count": null,
|
| 618 |
+
"metadata": {
|
| 619 |
+
"id": "NC2SMmwfUepL"
|
| 620 |
+
},
|
| 621 |
+
"outputs": [],
|
| 622 |
+
"source": [
|
| 623 |
+
"# Save the entire model\n",
|
| 624 |
+
"torch.save(model, os.getcwd() + '/recommender.pt')"
|
| 625 |
+
]
|
| 626 |
+
},
|
| 627 |
+
{
|
| 628 |
+
"cell_type": "code",
|
| 629 |
+
"execution_count": 4,
|
| 630 |
+
"metadata": {},
|
| 631 |
+
"outputs": [],
|
| 632 |
+
"source": [
|
| 633 |
+
"device = torch.device(\"cuda\" if torch.cuda.is_available() else \"cpu\")\n",
|
| 634 |
+
"model = torch.load('models/recommender.pt', map_location=device)"
|
| 635 |
+
]
|
| 636 |
+
},
|
| 637 |
+
{
|
| 638 |
+
"cell_type": "code",
|
| 639 |
+
"execution_count": 5,
|
| 640 |
+
"metadata": {},
|
| 641 |
+
"outputs": [],
|
| 642 |
+
"source": [
|
| 643 |
+
"artist_album = pd.read_csv(os.path.join(os.getcwd() + '/data/processed','artist_album.csv'))\n",
|
| 644 |
+
"artist_album = artist_album[['artist_album_id','artist_album','artist_name','album_name']].drop_duplicates()\n",
|
| 645 |
+
"playlists = pd.read_csv(os.path.join(os.getcwd() + '/data/processed','playlists.csv'))"
|
| 646 |
+
]
|
| 647 |
+
},
|
| 648 |
+
{
|
| 649 |
+
"cell_type": "code",
|
| 650 |
+
"execution_count": 6,
|
| 651 |
+
"metadata": {
|
| 652 |
+
"colab": {
|
| 653 |
+
"base_uri": "https://localhost:8080/"
|
| 654 |
+
},
|
| 655 |
+
"id": "YhpNb8tV8-WC",
|
| 656 |
+
"outputId": "4077277b-895e-47bb-f487-26838e0b1266"
|
| 657 |
+
},
|
| 658 |
+
"outputs": [
|
| 659 |
+
{
|
| 660 |
+
"name": "stdout",
|
| 661 |
+
"output_type": "stream",
|
| 662 |
+
"text": [
|
| 663 |
+
"Recommendations for playlist 5\n",
|
| 664 |
+
"The Fall \t At Dawn We Rage\n",
|
| 665 |
+
"Jasmin EP \t Cedaa\n",
|
| 666 |
+
"Geordie Racer \t Caspa\n",
|
| 667 |
+
"Matt Black \t Swifta Beater\n",
|
| 668 |
+
"Throw Away \t Sibot\n",
|
| 669 |
+
"Bounce 4 Life \t Monolithium\n",
|
| 670 |
+
"Time in Between (Soulection Compilation Vol.2) \t Bosstone\n",
|
| 671 |
+
"Split EP \t Arnold\n",
|
| 672 |
+
"The Covers, Vol. 7 \t Alexi Blue\n",
|
| 673 |
+
"Chelsea Hotel - This Is the Rhythm of the Night \t Literary Artists\n"
|
| 674 |
+
]
|
| 675 |
+
}
|
| 676 |
+
],
|
| 677 |
+
"source": [
|
| 678 |
+
"def generate_recommendations(artist_album, playlists, model, playlist_id, device, top_n=10, batch_size=1024):\n",
|
| 679 |
+
" model.eval()\n",
|
| 680 |
+
"\n",
|
| 681 |
+
"\n",
|
| 682 |
+
" all_movie_ids = torch.tensor(artist_album['artist_album_id'].values, dtype=torch.long, device=device)\n",
|
| 683 |
+
" user_ids = torch.full((len(all_movie_ids),), playlist_id, dtype=torch.long, device=device)\n",
|
| 684 |
+
"\n",
|
| 685 |
+
" # Initialize tensor to store all predictions\n",
|
| 686 |
+
" all_predictions = torch.zeros(len(all_movie_ids), device=device)\n",
|
| 687 |
+
"\n",
|
| 688 |
+
" # Generate predictions in batches\n",
|
| 689 |
+
" with torch.no_grad():\n",
|
| 690 |
+
" for i in range(0, len(all_movie_ids), batch_size):\n",
|
| 691 |
+
" batch_user_ids = user_ids[i:i+batch_size]\n",
|
| 692 |
+
" batch_movie_ids = all_movie_ids[i:i+batch_size]\n",
|
| 693 |
+
"\n",
|
| 694 |
+
" input_tensor = torch.stack([batch_user_ids, batch_movie_ids], dim=1)\n",
|
| 695 |
+
" batch_predictions = model(input_tensor).squeeze()\n",
|
| 696 |
+
" all_predictions[i:i+batch_size] = batch_predictions\n",
|
| 697 |
+
"\n",
|
| 698 |
+
" # Convert to numpy for easier handling\n",
|
| 699 |
+
" predictions = all_predictions.cpu().numpy()\n",
|
| 700 |
+
"\n",
|
| 701 |
+
" albums_listened = set(playlists.loc[playlists['playlist_id'] == playlist_id, 'artist_album_id'].tolist())\n",
|
| 702 |
+
"\n",
|
| 703 |
+
" unlistened_mask = np.isin(artist_album['artist_album_id'].values, list(albums_listened), invert=True)\n",
|
| 704 |
+
"\n",
|
| 705 |
+
" # Get top N recommendations\n",
|
| 706 |
+
" top_indices = np.argsort(predictions[unlistened_mask])[-top_n:][::-1]\n",
|
| 707 |
+
" recs = artist_album['artist_album_id'].values[unlistened_mask][top_indices]\n",
|
| 708 |
+
"\n",
|
| 709 |
+
" recs_names = artist_album.loc[artist_album['artist_album_id'].isin(recs)]\n",
|
| 710 |
+
" album, artist = recs_names['album_name'].values, recs_names['artist_name'].values\n",
|
| 711 |
+
"\n",
|
| 712 |
+
" return album.tolist(), artist.tolist()\n",
|
| 713 |
+
"\n",
|
| 714 |
+
"playlist_id = 5 \n",
|
| 715 |
+
"albums, artists = generate_recommendations(artist_album, playlists, model, playlist_id, device)\n",
|
| 716 |
+
"\n",
|
| 717 |
+
"print(\"Recommendations for playlist\", playlist_id)\n",
|
| 718 |
+
"for album, artist in zip(albums, artists):\n",
|
| 719 |
+
" print(album, '\\t', artist)"
|
| 720 |
+
]
|
| 721 |
+
}
|
| 722 |
+
],
|
| 723 |
+
"metadata": {
|
| 724 |
+
"accelerator": "GPU",
|
| 725 |
+
"colab": {
|
| 726 |
+
"gpuType": "T4",
|
| 727 |
+
"provenance": []
|
| 728 |
+
},
|
| 729 |
+
"kernelspec": {
|
| 730 |
+
"display_name": "Python 3",
|
| 731 |
+
"name": "python3"
|
| 732 |
+
},
|
| 733 |
+
"language_info": {
|
| 734 |
+
"codemirror_mode": {
|
| 735 |
+
"name": "ipython",
|
| 736 |
+
"version": 3
|
| 737 |
+
},
|
| 738 |
+
"file_extension": ".py",
|
| 739 |
+
"mimetype": "text/x-python",
|
| 740 |
+
"name": "python",
|
| 741 |
+
"nbconvert_exporter": "python",
|
| 742 |
+
"pygments_lexer": "ipython3",
|
| 743 |
+
"version": "3.6.15"
|
| 744 |
+
}
|
| 745 |
+
},
|
| 746 |
+
"nbformat": 4,
|
| 747 |
+
"nbformat_minor": 0
|
| 748 |
+
}
|
requirements.txt
ADDED
|
Binary file (14.9 kB). View file
|
|
|
scripts/build_features.py
ADDED
|
@@ -0,0 +1,102 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import os
|
| 2 |
+
import urllib.request
|
| 3 |
+
import zipfile
|
| 4 |
+
import json
|
| 5 |
+
import pandas as pd
|
| 6 |
+
import time
|
| 7 |
+
import torch
|
| 8 |
+
import numpy as np
|
| 9 |
+
import pandas as pd
|
| 10 |
+
import torch.nn as nn
|
| 11 |
+
import torch.nn.functional as F
|
| 12 |
+
import torch.optim as optim
|
| 13 |
+
from torch.utils.data import DataLoader, TensorDataset
|
| 14 |
+
from sklearn.model_selection import train_test_split
|
| 15 |
+
import matplotlib.pyplot as plt
|
| 16 |
+
from sklearn.preprocessing import LabelEncoder
|
| 17 |
+
import shutil
|
| 18 |
+
import os
|
| 19 |
+
import pyarrow.parquet as pq
|
| 20 |
+
|
| 21 |
+
|
| 22 |
+
cols = [
|
| 23 |
+
'name',
|
| 24 |
+
'pid',
|
| 25 |
+
'num_followers',
|
| 26 |
+
'pos',
|
| 27 |
+
'artist_name',
|
| 28 |
+
'track_name',
|
| 29 |
+
'album_name'
|
| 30 |
+
]
|
| 31 |
+
|
| 32 |
+
|
| 33 |
+
def copy_file(src, dst):
|
| 34 |
+
|
| 35 |
+
dst_dir = os.path.dirname(dst)
|
| 36 |
+
if not os.path.exists(dst_dir):
|
| 37 |
+
os.makedirs(dst_dir)
|
| 38 |
+
|
| 39 |
+
shutil.copy2(src, dst)
|
| 40 |
+
|
| 41 |
+
def unzip_archive(filepath, dir_path):
|
| 42 |
+
with zipfile.ZipFile(f"{filepath}", 'r') as zip_ref:
|
| 43 |
+
zip_ref.extractall(dir_path)
|
| 44 |
+
|
| 45 |
+
|
| 46 |
+
def make_dir(directory):
|
| 47 |
+
if os.path.exists(directory):
|
| 48 |
+
shutil.rmtree(directory)
|
| 49 |
+
os.makedirs(directory)
|
| 50 |
+
else:
|
| 51 |
+
os.makedirs(directory)
|
| 52 |
+
|
| 53 |
+
|
| 54 |
+
def make_dataset():
|
| 55 |
+
directory = os.getcwd() + '/data/raw/playlists/data'
|
| 56 |
+
df = pd.DataFrame()
|
| 57 |
+
index = 0
|
| 58 |
+
# Loop through all files in the directory
|
| 59 |
+
for filename in os.listdir(directory):
|
| 60 |
+
# Check if the item is a file (not a subdirectory)
|
| 61 |
+
if os.path.isfile(os.path.join(directory, filename)):
|
| 62 |
+
if filename.find('.json') != -1 :
|
| 63 |
+
index += 1
|
| 64 |
+
|
| 65 |
+
# Print the filename or perform operations on the file
|
| 66 |
+
print(f'\r{filename}\t{index}/1000\t{((index/1000)*100):.1f}%', end='')
|
| 67 |
+
|
| 68 |
+
# If you need the full file path, you can use:
|
| 69 |
+
full_path = os.path.join(directory, filename)
|
| 70 |
+
|
| 71 |
+
with open(full_path, 'r') as file:
|
| 72 |
+
json_data = json.load(file)
|
| 73 |
+
|
| 74 |
+
temp = pd.DataFrame(json_data['playlists'])
|
| 75 |
+
expanded_df = temp.explode('tracks').reset_index(drop=True)
|
| 76 |
+
|
| 77 |
+
# Normalize the JSON data
|
| 78 |
+
json_normalized = pd.json_normalize(expanded_df['tracks'])
|
| 79 |
+
|
| 80 |
+
# Concatenate the original DataFrame with the normalized JSON data
|
| 81 |
+
result = pd.concat([expanded_df.drop(columns=['tracks']), json_normalized], axis=1)
|
| 82 |
+
|
| 83 |
+
result = result[cols]
|
| 84 |
+
|
| 85 |
+
df = pd.concat([df, result], axis=0, ignore_index=True)
|
| 86 |
+
|
| 87 |
+
if index % 50 == 0:
|
| 88 |
+
df.to_parquet(f'{os.getcwd()}/data/raw/data/playlists_{index % 1000}.parquet')
|
| 89 |
+
del df
|
| 90 |
+
df = pd.DataFrame()
|
| 91 |
+
if index % 200 == 0:
|
| 92 |
+
break
|
| 93 |
+
|
| 94 |
+
|
| 95 |
+
if __name__ == '__main__':
|
| 96 |
+
unzip_archive(os.getcwd() + '/data/raw/spotify_million_playlist_dataset.zip', os.getcwd() + '/data/raw/playlists')
|
| 97 |
+
directory = os.getcwd() + '/data/raw/data'
|
| 98 |
+
make_dir(directory)
|
| 99 |
+
directory = os.getcwd() + '/data/processed'
|
| 100 |
+
make_dir(directory)
|
| 101 |
+
make_dataset()
|
| 102 |
+
|
scripts/make_dataset.py
ADDED
|
@@ -0,0 +1,82 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import numpy as np
|
| 2 |
+
import os
|
| 3 |
+
import urllib.request
|
| 4 |
+
import zipfile
|
| 5 |
+
import json
|
| 6 |
+
import pandas as pd
|
| 7 |
+
import time
|
| 8 |
+
import torch
|
| 9 |
+
import numpy as np
|
| 10 |
+
import pandas as pd
|
| 11 |
+
import torch.nn as nn
|
| 12 |
+
import torch.nn.functional as F
|
| 13 |
+
import torch.optim as optim
|
| 14 |
+
from torch.utils.data import DataLoader, TensorDataset
|
| 15 |
+
from sklearn.model_selection import train_test_split
|
| 16 |
+
import matplotlib.pyplot as plt
|
| 17 |
+
from sklearn.preprocessing import LabelEncoder
|
| 18 |
+
import shutil
|
| 19 |
+
import os
|
| 20 |
+
import pyarrow.parquet as pq
|
| 21 |
+
|
| 22 |
+
def make_dir(directory):
|
| 23 |
+
if os.path.exists(directory):
|
| 24 |
+
shutil.rmtree(directory)
|
| 25 |
+
os.makedirs(directory)
|
| 26 |
+
else:
|
| 27 |
+
os.makedirs(directory)
|
| 28 |
+
|
| 29 |
+
|
| 30 |
+
def read_parquet_folder(folder_path):
|
| 31 |
+
dataframes = []
|
| 32 |
+
for file in os.listdir(folder_path):
|
| 33 |
+
if file.endswith('.parquet'):
|
| 34 |
+
file_path = os.path.join(folder_path, file)
|
| 35 |
+
df = pd.read_parquet(file_path)
|
| 36 |
+
dataframes.append(df)
|
| 37 |
+
|
| 38 |
+
return pd.concat(dataframes, ignore_index=True)
|
| 39 |
+
|
| 40 |
+
|
| 41 |
+
def create_ids(df, col, name):
|
| 42 |
+
# Create a dictionary mapping unique values to IDs
|
| 43 |
+
value_to_id = {val: i for i, val in enumerate(df[col].unique())}
|
| 44 |
+
|
| 45 |
+
# Create a new column with the IDs
|
| 46 |
+
df[f'{name}_id'] = df[col].map(value_to_id)
|
| 47 |
+
df[[f'{name}_id', col]].drop_duplicates().to_csv(os.getcwd() + f'/data/processed/{name}.csv')
|
| 48 |
+
|
| 49 |
+
return df
|
| 50 |
+
|
| 51 |
+
if __name__ == '__main__':
|
| 52 |
+
folder_path = os.getcwd() + '/data/raw/data'
|
| 53 |
+
df = read_parquet_folder(folder_path)
|
| 54 |
+
|
| 55 |
+
directory = os.getcwd() + '/data/processed'
|
| 56 |
+
make_dir(directory)
|
| 57 |
+
|
| 58 |
+
df = create_ids(df, 'artist_name', 'artist')
|
| 59 |
+
df = create_ids(df, 'pid', 'playlist')
|
| 60 |
+
df = create_ids(df, 'album_name', 'album')
|
| 61 |
+
|
| 62 |
+
df['song_count'] = df.groupby(['pid','artist_name','album_name'])['track_name'].transform('nunique')
|
| 63 |
+
df['playlist_songs'] = df.groupby(['pid'])['pos'].transform('max')
|
| 64 |
+
df['playlist_songs'] += 1
|
| 65 |
+
|
| 66 |
+
df['artist_album'] = df[['artist_name', 'album_name']].agg('::'.join, axis=1)
|
| 67 |
+
value_to_id = {val: i for i, val in enumerate(df['artist_album'].unique())}
|
| 68 |
+
df['artist_album_id'] = df['artist_album'].map(value_to_id)
|
| 69 |
+
|
| 70 |
+
df[[f'artist_album_id', 'artist_album', 'artist_name', 'album_name', 'track_name']].drop_duplicates().to_csv(os.getcwd() + f'/data/processed/artist_album.csv')
|
| 71 |
+
|
| 72 |
+
df['song_count'] = df.groupby(['playlist_id','artist_album_id'])['song_count'].transform('sum')
|
| 73 |
+
|
| 74 |
+
encoder = LabelEncoder()
|
| 75 |
+
encoder.fit(df['track_name'])
|
| 76 |
+
|
| 77 |
+
df['track_id'] = encoder.transform(df['track_name'])
|
| 78 |
+
df['song_percent'] = df['song_count'] / df['playlist_songs']
|
| 79 |
+
df['song_percent'] = 1 / (1 + np.exp(-df['song_percent']))
|
| 80 |
+
|
| 81 |
+
artists = df.loc[:,['playlist_id','artist_album_id','song_percent']].drop_duplicates()
|
| 82 |
+
artists.loc[:,['playlist_id','artist_album_id',]].to_csv(os.getcwd() + '/data/processed/playlists.csv')
|
scripts/model.py
ADDED
|
@@ -0,0 +1,156 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
Attribution: https://github.com/AIPI540/AIPI540-Deep-Learning-Applications/
|
| 3 |
+
|
| 4 |
+
Jon Reifschneider
|
| 5 |
+
Brinnae Bent
|
| 6 |
+
|
| 7 |
+
"""
|
| 8 |
+
|
| 9 |
+
import os
|
| 10 |
+
import urllib.request
|
| 11 |
+
import zipfile
|
| 12 |
+
import json
|
| 13 |
+
import pandas as pd
|
| 14 |
+
import time
|
| 15 |
+
import torch
|
| 16 |
+
import numpy as np
|
| 17 |
+
import pandas as pd
|
| 18 |
+
import torch.nn as nn
|
| 19 |
+
import torch.nn.functional as F
|
| 20 |
+
import torch.optim as optim
|
| 21 |
+
from torch.utils.data import DataLoader, TensorDataset
|
| 22 |
+
from sklearn.model_selection import train_test_split
|
| 23 |
+
import matplotlib.pyplot as plt
|
| 24 |
+
from sklearn.preprocessing import LabelEncoder
|
| 25 |
+
|
| 26 |
+
|
| 27 |
+
|
| 28 |
+
|
| 29 |
+
def prep_dataloaders(X_train,y_train,X_val,y_val,batch_size):
|
| 30 |
+
# Convert training and test data to TensorDatasets
|
| 31 |
+
trainset = TensorDataset(torch.from_numpy(np.array(X_train)).long(),
|
| 32 |
+
torch.from_numpy(np.array(y_train)).float())
|
| 33 |
+
valset = TensorDataset(torch.from_numpy(np.array(X_val)).long(),
|
| 34 |
+
torch.from_numpy(np.array(y_val)).float())
|
| 35 |
+
|
| 36 |
+
# Create Dataloaders for our training and test data to allow us to iterate over minibatches
|
| 37 |
+
trainloader = torch.utils.data.DataLoader(trainset, batch_size=batch_size, shuffle=True)
|
| 38 |
+
valloader = torch.utils.data.DataLoader(valset, batch_size=batch_size, shuffle=False)
|
| 39 |
+
|
| 40 |
+
return trainloader, valloader
|
| 41 |
+
|
| 42 |
+
|
| 43 |
+
class NNColabFiltering(nn.Module):
|
| 44 |
+
|
| 45 |
+
def __init__(self, n_playlists, n_artists, embedding_dim_users, embedding_dim_items, n_activations, rating_range):
|
| 46 |
+
super().__init__()
|
| 47 |
+
self.user_embeddings = nn.Embedding(num_embeddings=n_playlists,embedding_dim=embedding_dim_users)
|
| 48 |
+
self.item_embeddings = nn.Embedding(num_embeddings=n_artists,embedding_dim=embedding_dim_items)
|
| 49 |
+
self.fc1 = nn.Linear(embedding_dim_users+embedding_dim_items,n_activations)
|
| 50 |
+
self.fc2 = nn.Linear(n_activations,1)
|
| 51 |
+
self.rating_range = rating_range
|
| 52 |
+
|
| 53 |
+
def forward(self, X):
|
| 54 |
+
# Get embeddings for minibatch
|
| 55 |
+
embedded_users = self.user_embeddings(X[:,0])
|
| 56 |
+
embedded_items = self.item_embeddings(X[:,1])
|
| 57 |
+
# Concatenate user and item embeddings
|
| 58 |
+
embeddings = torch.cat([embedded_users,embedded_items],dim=1)
|
| 59 |
+
# Pass embeddings through network
|
| 60 |
+
preds = self.fc1(embeddings)
|
| 61 |
+
preds = F.relu(preds)
|
| 62 |
+
preds = self.fc2(preds)
|
| 63 |
+
# Scale predicted ratings to target-range [low,high]
|
| 64 |
+
preds = torch.sigmoid(preds) * (self.rating_range[1]-self.rating_range[0]) + self.rating_range[0]
|
| 65 |
+
return preds
|
| 66 |
+
|
| 67 |
+
def train_model(model, criterion, optimizer, dataloaders, device, num_epochs=5, scheduler=None):
|
| 68 |
+
|
| 69 |
+
model = model.to(device) # Send model to GPU if available
|
| 70 |
+
since = time.time()
|
| 71 |
+
|
| 72 |
+
costpaths = {'train':[],'val':[]}
|
| 73 |
+
|
| 74 |
+
for epoch in range(num_epochs):
|
| 75 |
+
print('Epoch {}/{}'.format(epoch, num_epochs - 1))
|
| 76 |
+
print('-' * 10)
|
| 77 |
+
|
| 78 |
+
# Each epoch has a training and validation phase
|
| 79 |
+
for phase in ['train', 'val']:
|
| 80 |
+
if phase == 'train':
|
| 81 |
+
model.train() # Set model to training mode
|
| 82 |
+
else:
|
| 83 |
+
model.eval() # Set model to evaluate mode
|
| 84 |
+
|
| 85 |
+
running_loss = 0.0
|
| 86 |
+
|
| 87 |
+
# Get the inputs and labels, and send to GPU if available
|
| 88 |
+
index = 0
|
| 89 |
+
for (inputs,labels) in dataloaders[phase]:
|
| 90 |
+
inputs = inputs.to(device)
|
| 91 |
+
labels = labels.to(device)
|
| 92 |
+
|
| 93 |
+
# Zero the weight gradients
|
| 94 |
+
optimizer.zero_grad()
|
| 95 |
+
|
| 96 |
+
# Forward pass to get outputs and calculate loss
|
| 97 |
+
# Track gradient only for training data
|
| 98 |
+
with torch.set_grad_enabled(phase == 'train'):
|
| 99 |
+
outputs = model.forward(inputs).view(-1)
|
| 100 |
+
loss = criterion(outputs, labels)
|
| 101 |
+
|
| 102 |
+
# Backpropagation to get the gradients with respect to each weight
|
| 103 |
+
# Only if in train
|
| 104 |
+
if phase == 'train':
|
| 105 |
+
loss.backward()
|
| 106 |
+
# Update the weights
|
| 107 |
+
optimizer.step()
|
| 108 |
+
|
| 109 |
+
# Convert loss into a scalar and add it to running_loss
|
| 110 |
+
running_loss += np.sqrt(loss.item()) * labels.size(0)
|
| 111 |
+
print(f'\r{running_loss} {index} {(index / len(dataloaders[phase]))*100:.2f}%', end='')
|
| 112 |
+
index +=1
|
| 113 |
+
|
| 114 |
+
# Step along learning rate scheduler when in train
|
| 115 |
+
if (phase == 'train') and (scheduler is not None):
|
| 116 |
+
scheduler.step()
|
| 117 |
+
|
| 118 |
+
# Calculate and display average loss and accuracy for the epoch
|
| 119 |
+
epoch_loss = running_loss / len(dataloaders[phase].dataset)
|
| 120 |
+
costpaths[phase].append(epoch_loss)
|
| 121 |
+
print('\n{} loss: {:.4f}'.format(phase, epoch_loss))
|
| 122 |
+
|
| 123 |
+
time_elapsed = time.time() - since
|
| 124 |
+
print('Training complete in {:.0f}m {:.0f}s'.format(
|
| 125 |
+
time_elapsed // 60, time_elapsed % 60))
|
| 126 |
+
|
| 127 |
+
return costpaths
|
| 128 |
+
|
| 129 |
+
|
| 130 |
+
if __name__ == '__main__':
|
| 131 |
+
artists = pd.read_csv(os.getcwd() + '/data/processed/playlists.csv')
|
| 132 |
+
X = artists.loc[:,['playlist_id','artist_album_id',]]
|
| 133 |
+
y = artists.loc[:,'song_percent']
|
| 134 |
+
|
| 135 |
+
# Split our data into training and test sets
|
| 136 |
+
X_train, X_val, y_train, y_val = train_test_split(X,y,random_state=0, test_size=0.2)
|
| 137 |
+
batchsize = 64
|
| 138 |
+
trainloader,valloader = prep_dataloaders(X_train,y_train,X_val,y_val,batchsize)
|
| 139 |
+
|
| 140 |
+
dataloaders = {'train':trainloader, 'val':valloader}
|
| 141 |
+
n_users = X.loc[:,'playlist_id'].max()+1
|
| 142 |
+
n_items = X.loc[:,'artist_album_id'].max()+1
|
| 143 |
+
model = NNColabFiltering(n_users,n_items,embedding_dim_users=50, embedding_dim_items=50, n_activations = 100,rating_range=[0.,1.])
|
| 144 |
+
criterion = nn.MSELoss()
|
| 145 |
+
lr=0.001
|
| 146 |
+
n_epochs=10
|
| 147 |
+
wd=1e-3
|
| 148 |
+
optimizer = optim.Adam(model.parameters(), lr=lr, weight_decay=wd)
|
| 149 |
+
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
|
| 150 |
+
|
| 151 |
+
cost_paths = train_model(model,criterion,optimizer,dataloaders, device,n_epochs, scheduler=None)
|
| 152 |
+
|
| 153 |
+
|
| 154 |
+
# Save the entire model
|
| 155 |
+
torch.save(model, os.getcwd() + '/models/recommender.pt')
|
| 156 |
+
|
setup.py
ADDED
|
@@ -0,0 +1,14 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import subprocess
|
| 2 |
+
import sys
|
| 3 |
+
|
| 4 |
+
script = 'make_dataset.py'
|
| 5 |
+
command = f'{sys.executable} scripts/{script}'
|
| 6 |
+
subprocess.run(command, shell=True)
|
| 7 |
+
|
| 8 |
+
script = 'build_features.py'
|
| 9 |
+
command = f'{sys.executable} python scripts/{script}'
|
| 10 |
+
subprocess.run(command, shell=True)
|
| 11 |
+
|
| 12 |
+
script = 'model.py'
|
| 13 |
+
command = f'{sys.executable} python scripts/{script}'
|
| 14 |
+
subprocess.run(command, shell=True)
|