Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

updated the models #8

Open
wants to merge 1 commit into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
The table of contents is too big for display.
Diff view
Diff view
  •  
  •  
  •  
86 changes: 86 additions & 0 deletions backend/api/app.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,86 @@
from flask import Flask, request, jsonify
import numpy as np
from backend.model.deep_learning import QuestionRecommendationModel
from backend.models.collaborative_filtering import load_data as load_cf_data, surprise_data_prepare, training_of_svd_model, evaluate_model, getting_top_n_recommendations
from backend.models.content_based_filtering import data_load as load_cb_data, get_item_description, create_tfidf_matrix, similar_item_getting
from backend.utils import preprocess_data

app = Flask(__name__)

cf_data_path = 'data/cf_data.csv'
cb_data_path = 'data/cb_data.csv'

cf_df = load_cf_data(cf_data_path)
cf_surprise_data = surprise_data_prepare(cf_df)

cb_df = load_cb_data(cb_data_path)
cb_descriptions = get_item_description(cb_df)
cb_tfidf_matrix = create_tfidf_matrix(cb_descriptions)

cf_model = training_of_svd_model(cf_surprise_data)

#used for preprocssing data
dl_df = load_cf_data(cf_data_path)
user_ids, item_ids, ratings = preprocess_data(dl_df)
num_users = len(user_ids.unique())
num_items = len(item_ids.unique())
dl_model = QuestionRecommendationModel(num_users, num_items)
dl_model.compile(optimizer='adam', loss='binary_crossentropy')
dl_model.fit([user_ids, item_ids], ratings, epochs=10)

def compare_model_performance():
metrics = {
'collaborative': {'accuracy': 0, 'response_time': 0},
'content_based': {'accuracy': 0, 'response_time': 0},
'deep_learning': {'accuracy': 0, 'response_time': 0}
}

test_users = cf_df['user_id'].unique()[:100]

for model in metrics.keys():
start_time = time.time()
correct_predictions = 0
total_predictions = 0

for user in test_users:
try:
actual_items = set(cf_df[cf_df['user_id'] == user]['item_id'].values)
response = recommend(str(user), model_name=model)
recommended_items = set(response.get_json()['recommendations'])

if len(actual_items) > 0:
correct_predictions += len(actual_items.intersection(recommended_items))
total_predictions += len(recommended_items)

except Exception as e:
print(f"Error evaluating {model} for user {user}: {str(e)}")
continue

end_time = time.time()

@app.route('/recommend/<user_id>', methods=['GET'])
def recommend(user_id):
try:
model_name = request.args.get('model', 'collaborative')#u will get the model nam
n = int(request.args.get('n', 10))

if model_name == 'collaborative':
recommendations = getting_top_n_recommendations(cf_model, user_id, n)
elif model_name == 'content_based':
user_items = ...
recommendations = [similar_item_getting(cb_tfidf_matrix, item_index, n) for item_index in user_items]
elif model_name == 'deep_learning':

user_id_encoded = ...
predictions = dl_model.predict([user_id_encoded, np.arange(num_items)])
top_indices = np.argsort(predictions)[::-1][:n]
recommendations = [item_id for item_id in top_indices]

return jsonify({'recommendations': recommendations})
except Exception as e:
return jsonify({'error': str(e)}), 500

if __name__ == '__main__':
app.run(debug=True)


78 changes: 78 additions & 0 deletions backend/model/collaborative_filtering.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,78 @@
import re
from surprise import Reader,Dataset,SVD,accuracy
from surprise.model_selection import GridSearchCV
import pandas as pd
from sklearn.metrics import precision_score, recall_score, f1_score


import pandas as pd

def load_data(path_data): #this one laods useritem interaction data from the file(json)
try:
return pd.read_csv(path_data)
except FileNotFoundError:
raise ValueError("Dta file not found at : " , path_data)

def surprise_data_prepare(df): #this one creates data with the library surprise , takes df=pandas dataframe
reader=Reader(rating_scale=(0,1)) #0=not interact 1=interact
return Dataset.load_from_df(df[["user_id","item_id","rating"]],reader)

def training_of_the_svd_model(data,parameter_grid=None):
if parameter_grid: #paramteregrid is hyperparamter
algo = SVD()
gscv = GridSearchCV(algo, parameter_grid, measures=['rmse', 'mae'], cv=5, verbose=True)
gscv.fit(data)
return gscv.best_estimator
else:
algo = SVD()
algo.fit(data)
return algo

def evaluate_model(model,data): #gives the modl
predictions = model.test(data.build_testset())
rmse = accuracy.rmse(predictions)
mae = accuracy.mae(predictions)

true_labels = [int(pred.r_ui > 0) for pred in predictions] #assume binary rating
predicted_ratings = [pred.est for pred in predictions]
predicted_labels = [1 if pred >= 0.5 else 0 for pred in predicted_ratings] #i kept a range for the predition

precision = precision_score(true_labels, predicted_labels)
recall = recall_score(true_labels, predicted_labels)
f1 = f1_score(true_labels, predicted_labels)

return {'rmse': rmse, 'mae': mae, 'precision': precision, 'recall': recall, 'f1': f1} #rootmeansquare meanabs error

def getting_top_n_recommendations(df,user_id,model,n=10):
known_items=set(df[df["user_id"]==user_id]["item_id"]) #only user interacted items
all_items = set(df['item_id']) #to get al items
not_known_items=list(all_items-known_items)

predictions=[model.predict(user_id,iid ) for iid in not_known_items]
predictions.sort(key=lambda x: x.est,reverse=True) #sorting

return [prediction.iid for prediction in predictions[:n] ] #get top n recommended items by slicing


cf_data = {
# SAMPLE IDSSSSSS
'user_id': [1, 2, 3, 4, 1, 2, 3],
'item_id': [101, 102, 103, 104, 105, 106, 107],
'rating': [5, 4, 3, 5, 2, 1, 3]
}

cf_df = pd.DataFrame(cf_data)
cf_df.to_csv('data/cf_data.csv', index=False)

if __name__=="__main__":
path_data='data.json'
df=load_data(path_data)
surprise_data=surprise_data_prepare(df)

param_grid={"n_factors":[10,20,30],"lr_all":[0.005,0.01,0.05]}
model=training_of_the_svd_model(surprise_data,param_grid)

user_id=999
recommendations=getting_top_n_recommendations(model,user_id)

print("Top", len(recommendations), "recommendations for user", user_id, ":", recommendations)
64 changes: 64 additions & 0 deletions backend/model/content_based_filtering.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,64 @@
import re
from numpy import vectorize
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity
import pandas as pd


def data_load(path_data):
return pd.read_csv(path_data)

def get_item_description(df): #this takes the ques descriotion and return the list of ques
return df["item-description"].tolist()

def create_tfidf_matrix(descriptions): #this checks how a word is relevnt desctiotions include the questions of leetcode and will giv the sparse mtix
vectorizer=TfidfVectorizer(stop_words="english")
tfidf_matrix=vectorizer.fit_transform(descriptions)
return tfidf_matrix

def similar_item_getting(tfidf_matrix, item_index, neighbors=10):
similartes=cosine_similarity(tfidf_matrix[item_index],tfidf_matrix).flatten() #flattern convert into single structure
similar_item_indexes=similartes.argsort()[::-1][1:neighbors+1] #revers the arr and exlude the first elemt in neigbour
return similar_item_indexes


# SAMPLE DATA
cb_data = {
'problem_id': [1, 2, 3, 4, 5, 6, 7],
'title': [
"Two Sum",
"Add Two Numbers",
"Longest Substring Without Repeats",
"Median of Two Sorted Arrays",
"Reverse a Linked List",
"Binary Tree Inorder Traversal",
"LRU Cache"
],
'description': [
"Find two numbers that add up to a target.",
"Add two numbers represented as linked lists.",
"Find the length of the longest substring without repeats.",
"Find the median of two sorted arrays.",
"Reverse a linked list iteratively and recursively.",
"Perform inorder traversal of a binary tree.",
"Implement an LRU Cache using a data structure."
],
'tags': [
"arrays, hash-table",
"linked-list, math",
"string, sliding-window",
"array, divide-and-conquer",
"linked-list",
"tree, recursion, stack",
"design, hash-table, linked"
],
'difficulty': ["easy", "medium", "medium", "hard", "easy", "easy", "hard"],
'popularity': ["high", "high", "high", "medium", "high", "medium", "high"]
}

cb_df = pd.DataFrame(cb_data)
cb_df.to_csv('data/cb_data.csv', index=False)




35 changes: 35 additions & 0 deletions backend/model/deep_learning.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
from sklearn.metrics import f1_score, precision_score, recall_score
import tensorflow as tf
from tensorflow.keras.layers import Embedding, Input, Concatenate, Dense
from tensorflow.keras.models import Model

from backend.model.collaborative_filtering import evaluate_model

class QuestionRecommendationModel(tf.keras.Model):
def __init__(self, num_users, num_questions, embedding_dim=64, hidden_units=64):
super(QuestionRecommendationModel, self).__init__()
self.user_embeddings = Embedding(num_users, embedding_dim)
self.question_embeddings = Embedding(num_questions, embedding_dim)
self.dense1 = Dense(hidden_units, activation='relu')
self.dense2 = Dense(1, activation='sigmoid')

def call(self, inputs):
user_id, question_id = inputs
user_embedding = self.user_embeddings(user_id)
question_embedding = self.question_embeddings(question_id)
concatenated = Concatenate()([user_embedding, question_embedding])
hidden = self.dense1(concatenated)
output = self.dense2(hidden)
return output

def evaluate_dl_model(model, user_ids_test, item_ids_test, ratings_test):
predictions = model.predict([user_ids_test, item_ids_test])
predicted_labels = (predictions > 0.5).astype(int) # Threshold for binary classification
precision = precision_score(ratings_test, predicted_labels)
recall = recall_score(ratings_test, predicted_labels)
f1 = f1_score(ratings_test, predicted_labels)
return {'precision': precision, 'recall': recall, 'f1': f1}

# Example
# dl_results = evaluate_model(dl_model, dl_user_ids_val, dl_item_ids_val, dl_ratings_val)
# print(f"Deep Learning Model Precision: {dl_results['precision']}, Recall: {dl_results['recall']}, F1: {dl_results['f1']}")
6 changes: 6 additions & 0 deletions backend/package-lock.json

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

Loading