Commit
路
2d39e80
1
Parent(s):
5178166
Create vectorial_representation.py
Browse files- vectorial_representation.py +19 -0
vectorial_representation.py
ADDED
|
@@ -0,0 +1,19 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import numpy as np
|
| 2 |
+
from vectorization import model, spotify_data
|
| 3 |
+
|
| 4 |
+
# Funci贸n para convertir una canci贸n en un vector promedio de sus palabras
|
| 5 |
+
def song_vector(tokens, model):
|
| 6 |
+
# Filtrar palabras que est谩n en el modelo
|
| 7 |
+
tokens = [word for word in tokens if word in model.wv.key_to_index]
|
| 8 |
+
|
| 9 |
+
if len(tokens) == 0:
|
| 10 |
+
return np.zeros(model.vector_size)
|
| 11 |
+
|
| 12 |
+
# Calcular el promedio de los vectores de las palabras
|
| 13 |
+
song_vec = np.mean([model.wv[word] for word in tokens], axis=0)
|
| 14 |
+
return song_vec
|
| 15 |
+
|
| 16 |
+
# Aplicar esta funci贸n a cada canci贸n en tu dataset
|
| 17 |
+
spotify_data['song_vector'] = spotify_data['cleaned_text'].apply(lambda x: song_vector(x, model))
|
| 18 |
+
spotify_data.to_csv('dataset_modificado.csv', index=False)
|
| 19 |
+
|