Your IP : 216.73.216.52


Current Path : /proc/thread-self/root/home/deltalab/PMS/recommendations/user_profiling/
Upload File :
Current File : //proc/thread-self/root/home/deltalab/PMS/recommendations/user_profiling/app_test_PySpark.py

from components.ProfilingUsers import ProfilingUsers
from components.ProfileBased_RS import ProfileBased_RS

from _library.visual_utils import visualize_user_profile,visualize_spark_settings
from _library.data_loader import load_indacoProducts
from _library.mongodb_utils import simplified_SKUs
#from _library.io_toolkit import write_profiles, saveComputationalTimes

#import multiprocessing as mp

#import os

from pyspark import SparkContext,SparkConf

GENERATE_RECOMMENDATIONS = False

if __name__ == '__main__':

    # Inspect Context

    conf = (SparkConf().setMaster("local")
                    .setAppName("Multiprocess_Recommender_System")
                    .set("spark.executor.memory","472m"))

    sc = SparkContext(conf=conf)

    visualize_spark_settings(sc)

    
    # Read data
    indacoProducts_df, indacoOrders_df, indacoCategories = load_indacoProducts(visualize_aggregated_territories = True)
    
    # ----------------------------------- PRE-PROCESSING ----------------------------------------------
    
    # [PRE-PROCESSING A] Simplify SKUs
    indacoProducts_df, product_names, sku_mapping = simplified_SKUs(indacoProducts_df, product_identifier = 'SKU')
    
    # [PRE-PROCESSING B] Add the simplified SKUs to the orders
    reversed_skuMapping = {indaco_sku: simplified_sku for simplified_sku, indaco_sku in sku_mapping.items()}
    indacoOrders_df.insert(loc = 4, column = 'SKU', value = indacoOrders_df['sku'].apply(
        lambda indaco_sku: reversed_skuMapping[indaco_sku]))
    indacoOrders_df.rename(columns = {'sku': 'indaco_sku'}, inplace = True)
    
    # Enhanced the order with information of the products
    duplicate_columns = ['Product Type', 'indaco_sku', 'Title']
    enhancedOrders = indacoOrders_df.merge(indacoProducts_df.drop(columns = duplicate_columns), how = 'left', on ='SKU')
    enhancedOrders = enhancedOrders.dropna(subset = ['SKU'])
    
    # Generate profile for the users users
    profiler = ProfilingUsers(orders = enhancedOrders)
    userProfiles = profiler.mine_orders()

    # --------------------------- RECOMMENDER SYSTEM ----------------------------
    if GENERATE_RECOMMENDATIONS:
        
        verbose = False
        
        # Initialise

        #todo

        # Save the results
        #for p in sys_processes:
        #    user_recommendations, user_computationalTime = queue.get() 
            
        #    userRecommendations_byUsers.update(user_recommendations)
        #    computationalTime_byUser.update(user_computationalTime)
        
        # Wait all processes
            
        # Save computational times
        #saveComputationalTimes(computationalTime_byUser)
    # ----------------------------------------------------------------------------

    # Visualize the user profiles
    print("\nUSER PROFILES:")
    for user_id, user_profile in userProfiles.items():
        
        # Merge the user profile with the recommendended products 
        if GENERATE_RECOMMENDATIONS:
            print("Generating recommendations [PySpark]")
            #user_recommendations = userRecommendations_byUsers[user_id]
            #user_profile.update(user_recommendations)
        
        # Visualize profile
        visualize_user_profile(user_id, user_profile)
        
    # Write the user profiles
    #write_profiles(userProfiles, sku_mapping)