Spaces:
Sleeping
Sleeping
| import gradio as gr | |
| import pickle | |
| import pandas as pd | |
| import numpy as np | |
| import torch | |
| import torch.nn as nn | |
| import os | |
| from scipy.sparse import csr_matrix | |
| class ItemBasedCF: | |
| def __init__(self, n_neighbors=20): | |
| self.n_neighbors = n_neighbors | |
| self.item_similarity = None | |
| self.user_item_matrix = None | |
| def predict(self, user_idx, movie_idx): | |
| user_ratings = self.user_item_matrix[user_idx].toarray().flatten() | |
| rated_mask = user_ratings > 0 | |
| if not rated_mask.any(): | |
| return 2.5 | |
| similarities = self.item_similarity[movie_idx].toarray().flatten() | |
| weights = similarities * rated_mask | |
| if weights.sum() == 0: | |
| return 2.5 | |
| prediction = (weights * user_ratings).sum() / weights.sum() | |
| return np.clip(prediction, 1, 5) | |
| class SVDRecommender: | |
| def __init__(self, n_factors=50): | |
| self.n_factors = n_factors | |
| self.user_factors = None | |
| self.item_factors = None | |
| self.global_mean = 3.5 | |
| def predict(self, user_idx, movie_idx): | |
| prediction = self.global_mean + np.dot(self.user_factors[user_idx], self.item_factors[movie_idx]) | |
| return np.clip(prediction, 1, 5) | |
| class NeuralCF(nn.Module): | |
| def __init__(self, n_users, n_movies, embedding_dim=50, hidden_layers=[64, 32, 16]): | |
| super(NeuralCF, self).__init__() | |
| self.user_embedding = nn.Embedding(n_users, embedding_dim) | |
| self.movie_embedding = nn.Embedding(n_movies, embedding_dim) | |
| layers = [] | |
| input_dim = embedding_dim * 2 | |
| for hidden_dim in hidden_layers: | |
| layers.append(nn.Linear(input_dim, hidden_dim)) | |
| layers.append(nn.ReLU()) | |
| layers.append(nn.Dropout(0.2)) | |
| input_dim = hidden_dim | |
| layers.append(nn.Linear(input_dim, 1)) | |
| self.mlp = nn.Sequential(*layers) | |
| def forward(self, user_ids, movie_ids): | |
| user_emb = self.user_embedding(user_ids) | |
| movie_emb = self.movie_embedding(movie_ids) | |
| x = torch.cat([user_emb, movie_emb], dim=1) | |
| output = self.mlp(x) | |
| return output.squeeze() | |
| def predict(self, user_idx, movie_idx, device='cpu'): | |
| self.eval() | |
| with torch.no_grad(): | |
| user_tensor = torch.LongTensor([user_idx]).to(device) | |
| movie_tensor = torch.LongTensor([movie_idx]).to(device) | |
| prediction = self.forward(user_tensor, movie_tensor) | |
| return torch.clamp(prediction, 1, 5).item() | |
| class HybridRecommender: | |
| def __init__(self, n_users, n_movies): | |
| self.n_users = n_users | |
| self.n_movies = n_movies | |
| self.item_cf = None | |
| self.svd = None | |
| self.ncf = None | |
| self.weights = { | |
| 'item_cf': 0.3, | |
| 'svd': 0.4, | |
| 'ncf': 0.3 | |
| } | |
| def predict(self, user_idx, movie_idx): | |
| cf_pred = self.item_cf.predict(user_idx, movie_idx) | |
| svd_pred = self.svd.predict(user_idx, movie_idx) | |
| ncf_pred = self.ncf.predict(user_idx, movie_idx) | |
| prediction = ( | |
| self.weights['item_cf'] * cf_pred + | |
| self.weights['svd'] * svd_pred + | |
| self.weights['ncf'] * ncf_pred | |
| ) | |
| return np.clip(prediction, 1, 5) | |
| def recommend_movies(self, user_id, N=10, user_id_map=None, reverse_movie_map=None, movies_df=None): | |
| if user_id_map is not None: | |
| if user_id not in user_id_map: | |
| return [] | |
| user_idx = user_id_map[user_id] | |
| else: | |
| user_idx = user_id | |
| rated_movies = set(np.where(self.item_cf.user_item_matrix[user_idx].toarray().flatten() > 0)[0]) | |
| scores = [] | |
| for movie_idx in range(self.n_movies): | |
| if movie_idx not in rated_movies: | |
| score = self.predict(user_idx, movie_idx) | |
| scores.append((movie_idx, score)) | |
| scores.sort(key=lambda x: x[1], reverse=True) | |
| top_recommendations = scores[:N] | |
| recommendations = [] | |
| for movie_idx, score in top_recommendations: | |
| if reverse_movie_map is not None: | |
| original_movie_id = reverse_movie_map[movie_idx] | |
| else: | |
| original_movie_id = movie_idx | |
| if movies_df is not None: | |
| title = movies_df[movies_df['movie_id'] == original_movie_id]['title'].values[0] | |
| else: | |
| title = f"Movie {original_movie_id}" | |
| recommendations.append((original_movie_id, title, score)) | |
| return recommendations | |
| class MovieLensDataLoader: | |
| def __init__(self, ratings_path=None, movies_path=None): | |
| self.ratings_path = ratings_path | |
| self.movies_path = movies_path | |
| self.user_id_map = {} | |
| self.movie_id_map = {} | |
| self.reverse_user_map = {} | |
| self.reverse_movie_map = {} | |
| def load_model_and_data(): | |
| import os | |
| print("Checking for files...") | |
| print(f"Current directory: {os.getcwd()}") | |
| print(f"Files in current directory: {os.listdir('.')}") | |
| if os.path.exists('model_artifacts'): | |
| print(f"Files in model_artifacts/: {os.listdir('model_artifacts')}") | |
| else: | |
| print("ERROR: model_artifacts/ folder does not exist!") | |
| try: | |
| files_to_check = [ | |
| 'model_artifacts/hybrid_model.pkl', | |
| 'model_artifacts/loader.pkl', | |
| 'model_artifacts/movies.pkl' | |
| ] | |
| for file_path in files_to_check: | |
| if not os.path.exists(file_path): | |
| print(f"ERROR: Missing file: {file_path}") | |
| else: | |
| file_size = os.path.getsize(file_path) / (1024*1024) | |
| print(f"Found: {file_path} ({file_size:.2f} MB)") | |
| with open('model_artifacts/hybrid_model.pkl', 'rb') as f: | |
| model = pickle.load(f) | |
| print("Loaded hybrid_model.pkl") | |
| with open('model_artifacts/loader.pkl', 'rb') as f: | |
| loader = pickle.load(f) | |
| print("Loaded loader.pkl") | |
| with open('model_artifacts/movies.pkl', 'rb') as f: | |
| movies = pickle.load(f) | |
| print("Loaded movies.pkl") | |
| user_ids = sorted(loader.user_id_map.keys()) | |
| print(f"Model loaded successfully! {len(user_ids)} users available") | |
| return model, loader, movies, user_ids | |
| except FileNotFoundError as e: | |
| print(f"ERROR: File not found - {e}") | |
| print("Make sure all pkl files are in the model_artifacts/ folder") | |
| return None, None, None, [] | |
| except Exception as e: | |
| print(f"ERROR loading model: {type(e).__name__}: {e}") | |
| import traceback | |
| traceback.print_exc() | |
| return None, None, None, [] | |
| print("Loading model and data...") | |
| model, loader, movies_df, user_ids = load_model_and_data() | |
| print(f"Model loaded! Available users: {len(user_ids)}") | |
| def get_recommendations(user_id, num_recommendations): | |
| if model is None or loader is None: | |
| return "Error: Model not loaded properly. Please check the model files." | |
| try: | |
| user_id = int(user_id) | |
| num_recommendations = int(num_recommendations) | |
| if user_id not in loader.user_id_map: | |
| return f"User ID {user_id} not found! Please select a valid user ID." | |
| recommendations = model.recommend_movies( | |
| user_id=user_id, | |
| N=num_recommendations, | |
| user_id_map=loader.user_id_map, | |
| reverse_movie_map=loader.reverse_movie_map, | |
| movies_df=movies_df | |
| ) | |
| if not recommendations: | |
| return f"No recommendations found for User {user_id}" | |
| output = f"Top {num_recommendations} Movie Recommendations for User {user_id}\n\n" | |
| output += "=" * 60 + "\n\n" | |
| for i, (movie_id, title, score) in enumerate(recommendations, 1): | |
| stars = "*" * int(score) | |
| output += f"{i}. {title}\n" | |
| output += f" Predicted Rating: {score:.2f}/5.00 {stars}\n" | |
| output += f" Movie ID: {movie_id}\n\n" | |
| return output | |
| except ValueError: | |
| return "Error: Please enter valid numbers for User ID and Number of Recommendations" | |
| except Exception as e: | |
| return f"Error generating recommendations: {str(e)}" | |
| def get_user_history(user_id): | |
| if model is None or loader is None: | |
| return "Error: Model not loaded properly." | |
| try: | |
| user_id = int(user_id) | |
| if user_id not in loader.user_id_map: | |
| return f"User ID {user_id} not found!" | |
| user_idx = loader.user_id_map[user_id] | |
| user_ratings = model.item_cf.user_item_matrix[user_idx].toarray().flatten() | |
| rated_indices = np.where(user_ratings > 0)[0] | |
| if len(rated_indices) == 0: | |
| return f"No rating history found for User {user_id}" | |
| history = [] | |
| for movie_idx in rated_indices: | |
| original_movie_id = loader.reverse_movie_map[movie_idx] | |
| title = movies_df[movies_df['movie_id'] == original_movie_id]['title'].values[0] | |
| rating = user_ratings[movie_idx] | |
| history.append((title, rating)) | |
| history.sort(key=lambda x: x[1], reverse=True) | |
| output = f"Rating History for User {user_id}\n\n" | |
| output += f"Total movies rated: {len(history)}\n" | |
| output += f"Average rating: {np.mean([r for _, r in history]):.2f}\n\n" | |
| output += "=" * 60 + "\n\n" | |
| output += "Top 10 Highest Rated Movies:\n\n" | |
| for i, (title, rating) in enumerate(history[:10], 1): | |
| stars = "*" * int(rating) | |
| output += f"{i}. {title} - {rating:.1f}/5 {stars}\n" | |
| return output | |
| except Exception as e: | |
| return f"Error: {str(e)}" | |
| def get_movie_info(movie_title_search): | |
| if movies_df is None: | |
| return "Error: Movies data not loaded" | |
| try: | |
| matches = movies_df[movies_df['title'].str.contains(movie_title_search, case=False, na=False)] | |
| if len(matches) == 0: | |
| return f"No movies found matching '{movie_title_search}'" | |
| output = f"Search Results for '{movie_title_search}'\n\n" | |
| output += f"Found {len(matches)} movie(s):\n\n" | |
| output += "=" * 60 + "\n\n" | |
| for i, (_, row) in enumerate(matches.head(20).iterrows(), 1): | |
| output += f"{i}. {row['title']} (ID: {row['movie_id']})\n" | |
| if len(matches) > 20: | |
| output += f"\n... and {len(matches) - 20} more results" | |
| return output | |
| except Exception as e: | |
| return f"Error: {str(e)}" | |
| with gr.Blocks(theme=gr.themes.Soft(), title="Movie Recommender - DataSynthis") as demo: | |
| gr.Markdown(""" | |
| # Hybrid Movie Recommendation System | |
| ### DataSynthis Job Task - Powered by AI | |
| This system combines Collaborative Filtering, SVD Matrix Factorization, and Neural Networks | |
| to provide personalized movie recommendations from the MovieLens 1M dataset. | |
| """) | |
| with gr.Tabs(): | |
| with gr.Tab("Get Recommendations"): | |
| gr.Markdown("### Get personalized movie recommendations for any user") | |
| with gr.Row(): | |
| with gr.Column(scale=1): | |
| user_id_input = gr.Number( | |
| label="User ID", | |
| value=1, | |
| minimum=1, | |
| maximum=6040, | |
| step=1, | |
| info=f"Enter a user ID (1-6040)" | |
| ) | |
| num_recs_input = gr.Slider( | |
| label="Number of Recommendations", | |
| minimum=5, | |
| maximum=20, | |
| value=10, | |
| step=1 | |
| ) | |
| recommend_btn = gr.Button("Get Recommendations", variant="primary") | |
| with gr.Column(scale=2): | |
| recommendations_output = gr.Textbox( | |
| label="Recommendations", | |
| lines=20, | |
| max_lines=30 | |
| ) | |
| recommend_btn.click( | |
| fn=get_recommendations, | |
| inputs=[user_id_input, num_recs_input], | |
| outputs=recommendations_output | |
| ) | |
| gr.Markdown(""" | |
| **How it works:** | |
| - Enter a User ID (between 1 and 6040) | |
| - Choose how many recommendations you want | |
| - Click "Get Recommendations" to see personalized movie suggestions | |
| """) | |
| with gr.Tab("User History"): | |
| gr.Markdown("### View a user's rating history") | |
| with gr.Row(): | |
| with gr.Column(scale=1): | |
| user_id_history = gr.Number( | |
| label="User ID", | |
| value=1, | |
| minimum=1, | |
| maximum=6040, | |
| step=1 | |
| ) | |
| history_btn = gr.Button("View History", variant="primary") | |
| with gr.Column(scale=2): | |
| history_output = gr.Textbox( | |
| label="Rating History", | |
| lines=20, | |
| max_lines=30 | |
| ) | |
| history_btn.click( | |
| fn=get_user_history, | |
| inputs=user_id_history, | |
| outputs=history_output | |
| ) | |
| with gr.Tab("Search Movies"): | |
| gr.Markdown("### Search for movies in the database") | |
| with gr.Row(): | |
| with gr.Column(scale=1): | |
| movie_search = gr.Textbox( | |
| label="Movie Title Search", | |
| placeholder="e.g., Star Wars, Godfather, Titanic...", | |
| value="Star Wars" | |
| ) | |
| search_btn = gr.Button("Search", variant="primary") | |
| with gr.Column(scale=2): | |
| search_output = gr.Textbox( | |
| label="Search Results", | |
| lines=20, | |
| max_lines=30 | |
| ) | |
| search_btn.click( | |
| fn=get_movie_info, | |
| inputs=movie_search, | |
| outputs=search_output | |
| ) | |
| with gr.Tab("About"): | |
| gr.Markdown(""" | |
| ## About This System | |
| ### Model Architecture | |
| This is a Hybrid Recommendation System that combines three powerful approaches: | |
| 1. Item-Based Collaborative Filtering | |
| - Uses cosine similarity between movies | |
| - Recommends movies similar to what you've liked before | |
| 2. SVD Matrix Factorization | |
| - Decomposes the user-movie rating matrix | |
| - Discovers latent factors that explain user preferences | |
| 3. Neural Collaborative Filtering (NCF) | |
| - Deep learning model with user and movie embeddings | |
| - Learns complex non-linear patterns in user behavior | |
| ### Dataset | |
| - MovieLens 1M dataset | |
| - 1,000,209 ratings from 6,040 users on 3,900 movies | |
| - Ratings scale: 1-5 stars | |
| ### Performance Metrics | |
| - Precision@10: 26.77% | |
| - NDCG@10: 28.50% | |
| - Model improves recommendations by 40% vs baseline | |
| ### Created For | |
| DataSynthis Job Task | |
| ### Technologies Used | |
| - PyTorch (Neural Networks) | |
| - Scikit-learn (SVD, Similarity) | |
| - Pandas & NumPy (Data Processing) | |
| - Gradio (Web Interface) | |
| Note: This model is trained on the MovieLens 1M dataset. | |
| User IDs range from 1 to 6040, and movie IDs range from 1 to 3952. | |
| """) | |
| gr.Markdown(""" | |
| --- | |
| Hybrid Movie Recommendation System | Built for DataSynthis | |
| """) | |
| if __name__ == "__main__": | |
| demo.launch( | |
| share=False, | |
| server_name="0.0.0.0", | |
| server_port=7860 | |
| ) |