-
Notifications
You must be signed in to change notification settings - Fork 0
/
tag_recommend.py
139 lines (106 loc) · 4.93 KB
/
tag_recommend.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from scipy.spatial.distance import cosine
import matplotlib.image as mpimg
import io
import tensorflow as tf
from tensorflow.keras.applications import MobileNetV2
from pyspark.sql import SparkSession
from pyspark.ml.evaluation import RegressionEvaluator
from pyspark.ml.recommendation import ALS, ALSModel
from sklearn.model_selection import train_test_split
from functions import prepare_image, extract_features
import os
np.random.seed(0)
from flask import Flask, jsonify, render_template, request
app = Flask(__name__)
file_name = "./all_hashtags.pkl"
pics = pd.read_pickle("./tag_pics.pkl")
all_hashtags = pd.read_pickle(file_name)
hashtag_metadata = pd.read_pickle("./hashtag_metadata.pkl")
hashtag_lookup = pd.read_pickle("./hashtag_lookup.pkl")
### create our neural network ###
img_shape = (160, 160, 3)
# Create the base model from the pre-trained model MobileNet V2
base_model = MobileNetV2(input_shape=img_shape, include_top=False, weights='imagenet')
global_average_layer = tf.keras.layers.GlobalAveragePooling2D()
neural_network = tf.keras.Sequential([
base_model,
global_average_layer,
])
### ALS MODEL #####
spark = SparkSession.builder.master('local').getOrCreate()
als_model = ALSModel.load("./ALS_MODEL/als/")
recs = als_model.recommendForAllUsers(numItems=10).toPandas()
img_features = als_model.userFactors.toPandas()
hashtag_features = als_model.itemFactors.toPandas()
hashtag_index = list(all_hashtags)
def lookup_hashtag(hashtag_id):
return hashtag_index[hashtag_id]
def lookup_hashtag_recs(rec_scores):
return [lookup_hashtag(rec) for (rec, score) in rec_scores]
def recommender_dataframe(recs, hashtag_lookup, als_model):
recs['recommended_hashtags'] = recs['recommendations'].apply(lookup_hashtag_recs)
recs.index = recs['image_id']
recs = recs.join(hashtag_metadata, how='left')[['recommendations',
'recommended_hashtags',
'hashtags',
'image_local_name',
'search_hashtag']]
recs.drop('recommendations', axis=1, inplace=True)
image_factors = als_model.userFactors.toPandas()
image_factors.index = image_factors['id']
recs.join(image_factors);
# Add deep features information to recs dataframe
recs_deep = recs.join(pics, on='image_local_name', how='inner')
hashtags_df = pd.DataFrame.from_dict(hashtag_lookup, orient='index')
hashtags_df.head()
hashtags_df = hashtags_df.reset_index()
hashtags_df.columns = ['hashtag', 'id']
hashtags_df.index = hashtags_df['id']
hashtags_df.drop('id', axis=1, inplace=True)
# Only use certain columns
recs_deep_clean = recs_deep[['image_local_name', 'hashtags', 'deep_features']]
img_features.index = img_features['id']
img_features.drop(['id'], axis=1)
# Add image feature into dataframe
recommender_df = recs_deep_clean.join(img_features, how='inner')
return recommender_df, hashtags_df
recommender_df, hashtags_df = recommender_dataframe(recs,hashtag_lookup, als_model)
# Function that finds k nearest neighbors by cosine similarity
def find_neighbor_vectors(image_path, k=5, recommender_df=recommender_df):
"""Find image features (user vectors) for similar images."""
prep_image = prepare_image(image_path, where='local')
pics = extract_features(prep_image, neural_network)
rdf = recommender_df.copy()
rdf['dist'] = rdf['deep_features'].apply(lambda x: cosine(x, pics))
rdf = rdf.sort_values(by='dist')
return rdf.head(k)
def generate_hashtags(image_path):
fnv = find_neighbor_vectors(image_path, k=5, recommender_df=recommender_df)
# Find the average of the 5 user features found based on cosine similarity.
features = []
for item in fnv.features.values:
features.append(item)
avg_features = np.mean(np.asarray(features), axis=0)
# Add new column to the hashtag features which will be the dot product with the average image(user) features
hashtag_features['dot_product'] = hashtag_features['features'].apply(lambda x: np.asarray(x).dot(avg_features))
# Find the 10 hashtags with the highest feature dot products
final_recs = hashtag_features.sort_values(by='dot_product', ascending=False).head(10)
# Look up hashtags by their numeric IDs
output = []
for hashtag_id in final_recs.id.values:
output.append(hashtags_df.iloc[hashtag_id]['hashtag'])
return output
def show_results(test_image):
img = mpimg.imread(f'{test_image}.jpg')
'''
plt.figure(figsize=(9, 9))
plt.title(f'Original Hashtag: {test_image.upper()}', fontsize=32)
plt.imshow(img)
'''
recommended_hashtags = generate_hashtags(f'{test_image}.jpg')
print(', '.join(recommended_hashtags))
show_results('animals')
#app.run(host="0.0.0.0", port=5000,use_reloader=True, debug=True)