-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathgetTrainingData.py
More file actions
88 lines (82 loc) · 3.03 KB
/
getTrainingData.py
File metadata and controls
88 lines (82 loc) · 3.03 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
'''
This file uses the downloaded data called tagMoods.csv from Last.fm to create a training
set for Spotify moods. It searches Spotify for the track in the dataset
and if it is found, will record the danceability, energy, and valence of the track
along with the mood corresponding to the tags users marked on Last.fm. It will then
write the id of the track, the features, and the mood into a CSV file titled songMoods.csv.
This data will then be used to train the machine learning model.
'''
#Load libraries
import pandas
import spotipy
import time
import csv
def searchForTracks(sp):
# Load dataset from Last.fm
dataset = pandas.read_csv("tagMoods.csv")
#search spotify for the track, if found record uri and mood
tracks = []
tracksForData = []
for i in range(len(dataset['title'])):
title = dataset['title'][i]
#searches for track from dataset in spotify to see if we can get features
results = sp.search(q='track:' + title, type='track')
#need to delay calls to API
time.sleep(1)
items = results['tracks']['items']
#search found a track
if (len(items) > 0):
#checks if track has same artist as in dataset
if (items[0]['artists'][0]['name'] == dataset['artist'][i]):
tracks.append(items[0]['id'])
tracksForData.append([items[0]['id'], dataset['mood'][i]])
with open('tracksInSpotify.csv', mode='w') as file:
writer = csv.writer(file, delimiter=',',
quotechar='"',
quoting=csv.QUOTE_MINIMAL)
#combine features and data into one line
for i in range(len(tracksForData)):
line = [tracksForData[i]]
writer.writerow(line)
#returns 2D list of form [danceability, energy, valence] for each song
def getAudioFeatures(sp):
#reads tracks found by spotify and reformats into 2D list
dataset = pandas.read_csv("tracksInSpotify.csv")
dataset = pandas.Series.tolist(dataset)
tracks = []
data = []
for elem in dataset:
res = elem[0].strip('][').replace("'", "").split(', ')
data.append(res)
tracks.append(res[0])
features = []
featuresTotal = []
#max number of API calls
max = 100
for i in range(0, len(tracks), max):
#get audio features of max tracks at once
audioFeatures = sp.audio_features(tracks[i:i+max])
#space time between API calls
time.sleep(1)
for j in range(len(audioFeatures)):
if (audioFeatures != None):
features.append(audioFeatures[j]['danceability'])
features.append(audioFeatures[j]['energy'])
features.append(audioFeatures[j]['valence'])
featuresTotal.append(features)
features = []
return data, featuresTotal
def writeToCSV(data, features):
#write data to Excel file
with open('songMoods.csv', mode='w') as file:
writer = csv.writer(file, delimiter=',',
quotechar='"',
quoting=csv.QUOTE_MINIMAL)
#combine features and data into one line
for i in range(len(features)):
line = [data[i][0]] + features[i] + [data[i][1]]
writer.writerow(line)
def main(sp):
searchForTracks(sp)
data, features = getAudioFeatures(sp)
writeToCSV(data, features)