| Current Path : /proc/thread-self/root/home/deltalab/PMS/recommendations/user_profiling/ |
| Current File : //proc/thread-self/root/home/deltalab/PMS/recommendations/user_profiling/app_test_PySpark.py |
from components.ProfilingUsers import ProfilingUsers
from components.ProfileBased_RS import ProfileBased_RS
from _library.visual_utils import visualize_user_profile,visualize_spark_settings
from _library.data_loader import load_indacoProducts
from _library.mongodb_utils import simplified_SKUs
#from _library.io_toolkit import write_profiles, saveComputationalTimes
#import multiprocessing as mp
#import os
from pyspark import SparkContext,SparkConf
GENERATE_RECOMMENDATIONS = False
if __name__ == '__main__':
# Inspect Context
conf = (SparkConf().setMaster("local")
.setAppName("Multiprocess_Recommender_System")
.set("spark.executor.memory","472m"))
sc = SparkContext(conf=conf)
visualize_spark_settings(sc)
# Read data
indacoProducts_df, indacoOrders_df, indacoCategories = load_indacoProducts(visualize_aggregated_territories = True)
# ----------------------------------- PRE-PROCESSING ----------------------------------------------
# [PRE-PROCESSING A] Simplify SKUs
indacoProducts_df, product_names, sku_mapping = simplified_SKUs(indacoProducts_df, product_identifier = 'SKU')
# [PRE-PROCESSING B] Add the simplified SKUs to the orders
reversed_skuMapping = {indaco_sku: simplified_sku for simplified_sku, indaco_sku in sku_mapping.items()}
indacoOrders_df.insert(loc = 4, column = 'SKU', value = indacoOrders_df['sku'].apply(
lambda indaco_sku: reversed_skuMapping[indaco_sku]))
indacoOrders_df.rename(columns = {'sku': 'indaco_sku'}, inplace = True)
# Enhanced the order with information of the products
duplicate_columns = ['Product Type', 'indaco_sku', 'Title']
enhancedOrders = indacoOrders_df.merge(indacoProducts_df.drop(columns = duplicate_columns), how = 'left', on ='SKU')
enhancedOrders = enhancedOrders.dropna(subset = ['SKU'])
# Generate profile for the users users
profiler = ProfilingUsers(orders = enhancedOrders)
userProfiles = profiler.mine_orders()
# --------------------------- RECOMMENDER SYSTEM ----------------------------
if GENERATE_RECOMMENDATIONS:
verbose = False
# Initialise
#todo
# Save the results
#for p in sys_processes:
# user_recommendations, user_computationalTime = queue.get()
# userRecommendations_byUsers.update(user_recommendations)
# computationalTime_byUser.update(user_computationalTime)
# Wait all processes
# Save computational times
#saveComputationalTimes(computationalTime_byUser)
# ----------------------------------------------------------------------------
# Visualize the user profiles
print("\nUSER PROFILES:")
for user_id, user_profile in userProfiles.items():
# Merge the user profile with the recommendended products
if GENERATE_RECOMMENDATIONS:
print("Generating recommendations [PySpark]")
#user_recommendations = userRecommendations_byUsers[user_id]
#user_profile.update(user_recommendations)
# Visualize profile
visualize_user_profile(user_id, user_profile)
# Write the user profiles
#write_profiles(userProfiles, sku_mapping)