Your IP : 216.73.216.52


Current Path : /proc/thread-self/root/home/deltalab/PMS/recommendations/user_profiling/components/
Upload File :
Current File : //proc/thread-self/root/home/deltalab/PMS/recommendations/user_profiling/components/DbService.py

from os import path
from _library.io_toolkit import load_collectionTypes, read_settings, read_indaco_credentials

from components.mongoDbService import MongoDB
from pandas import DataFrame
from sqlalchemy import create_engine,text
import pandas as pd
from cryptography.fernet import Fernet
import numpy as np

class DbService:

    ENCRYPTING_KEY = None

    def __init__(self,db_type):
        appSettings = read_settings()
        self.db_type = db_type
        if(db_type in appSettings['databaseName']):
            hostname, port, username, password, db_name = read_indaco_credentials(db_type)
        else:
            print(f"Database not available! ?{appSettings['databaseName']}?")
            exit(1)

        if db_type == "mongodb":
            self.db_service = MongoDB(hostname, port, username, password, db_name)
        else:            
            #url_engine = f"mssql+pymssql://{username}:{password}@{hostname}:{port}/{db_name}?&Trusted_Connection=yes&TrustedServerCertificate=yes"
            url_engine = f"mssql+pymssql://{username}:{password}@{hostname}:{port}/{db_name}"

            self.db_service = create_engine(url_engine) 
        
    # --------------------------- GET DATA --------------------------------------------------
    def get_productTypes(self) -> DataFrame:
        if(self.db_type == "mongodb"):
            productTypes = self.db_service.getProductTypes()
        else:

            query = '''

            SELECT cat1._id,
                cat1.label AS 'categoryName',
                cat1.googleId,
                cat2.label AS 'parent',
                cat1.isRoot,
                cat1.isLeaf,
                cat1.createdAt,
                cat1.updatedAt
            FROM dbo.pms_categories cat1
            INNER JOIN dbo.pms_categories cat2 ON (cat1.parentId = cat2._id)

            '''
      
            productTypes = self.execute_query(query)
        return productTypes
    
    def get_products(self, consider_delatedProducts = False, consider_unavailableProducts = False) -> DataFrame:
        if(self.db_type == "mongodb"):
            products = self.db_service.getProducts(consider_delatedProducts, consider_unavailableProducts)
        else:

            query = '''

             SELECT DISTINCT pms_products.title, 
                pms_products_offers.sku, 
                pms_products.brand, 
                pms_products.refrigerated, 
                pms_products.weight AS 'weight [grams]', 
                pms_products_offers.ID AS '_id', 
                pms_products_offers.channelId AS 'channel',
                pms_products.categoryId, 
                pms_products.partnerId, 
                warehouseId, 
                ISNULL(
                    indaco_general_productionarea,-1
                ) AS 'production_areas', 
                ISNULL(indaco_generalfood_biologic, 0) AS 'biologic', 
                ISNULL(indaco_generalfood_vegan, 0) AS 'vegan', 
                ISNULL(
                    indaco_generalfood_biodynamic, 0
                ) AS 'biodinamic', 
                ISNULL(
                    indaco_generalfood_glutenfree, 0
                ) AS 'gluten_free', 
                ISNULL(indaco_general_isforadult, 0) AS 'isforadult' 
                FROM 
                pms_products 
                INNER JOIN pms_products_offers ON (
                    pms_products._id = pms_products_offers.parent_id
                ) 
                INNER JOIN pms_channels ON (pms_products_offers.channelId = pms_channels._id)

                INNER JOIN pms_products_inventoryLevels_object ON(
                    pms_products._id = pms_products_inventoryLevels_object.parent_ID
                ) 
                FULL OUTER JOIN (
                    SELECT 
                    parent_id, 
                    indaco_general_productionarea, 
                    indaco_generalfood_biologic, 
                    indaco_generalfood_vegan, 
                    indaco_generalfood_biodynamic, 
                    indaco_generalfood_glutenfree, 
                    indaco_general_isforadult 
                    FROM 
                    (
                        SELECT 
                        parent_id, 
                        NAME, 
                        value 
                        FROM 
                        pms_products_offers_attributes
                    ) s PIVOT (
                        Max(value) FOR NAME IN (
                        indaco_general_productionarea, indaco_generalfood_biologic, 
                        indaco_generalfood_vegan, indaco_generalfood_biodynamic, 
                        indaco_generalfood_glutenfree, 
                        indaco_general_isforadult
                        )
                    ) AS p
                ) AS p ON (pms_products._id = p.parent_id) 
                WHERE 
                pms_products_offers.deleted = 0
            
            '''
      
            products = self.execute_query(query)
            products['production_areas'].fillna(-1,inplace=True)

            # Convert the production areas codes into names
            file_path = path.join('_library', 'INDACO_collectionCodes.json')
            _, collectionTypes  = load_collectionTypes(file_path, verbose = False)
            productionArea_codes = collectionTypes['production_areas']
            products['production_areas'] = products['production_areas'].apply(
                lambda area_code: 
                    productionArea_codes[str(area_code)].capitalize() 
                    if str(area_code) in productionArea_codes.keys() else f"Unknown (code:{area_code})"
                    if int(area_code) != -1 else ""
            )
        return products
    
    def get_dBproduct(self, item_sku, as_dict,channel=None,product=False):
        if(self.db_type == "mongodb"):
            product = self.db_service.getProduct(item_sku, as_dict)
        else:
            if not product:
                query = f'''

                    SELECT ID AS '_id',
                            sku
                        FROM pms_products_offers
                        WHERE sku = '{item_sku}'
                        AND channelId = '{channel}'
                '''
            else:
                query = f'''

                    SELECT _id,
                            sku
                        FROM pms_products
                        WHERE sku = '{item_sku}'
                '''
            product = self.execute_query(query)
            product = product.to_dict("records")[0]
        return product
    
    def get_sellers(self, sellerId = None):
        if(self.db_type == "mongodb"):
            sellers = self.db_service.getSellers(sellerId)
        else:
            query = '''

             SELECT 
                pms_partners._id, 
                companyName, 
                email, 
                address, 
                pms_partners.phone, 
                pms_warehouses._id AS "warehouse_id", 
                vatNumber, 
                active, 
                pms_partners.createdAt, 
                pms_partners.updatedAt 
                FROM 
                pms_partners 
                INNER JOIN pms_warehouses ON (
                    pms_partners._id = pms_warehouses.partnerId
                )

            '''
      
            sellers = self.execute_query(query)
        return sellers
    
    def get_warehouses(self) -> DataFrame:
        if(self.db_type == "mongodb"):
            warehouses = self.db_service.getWarehouses()
        else:
            query = '''

             SELECT _id,
                    name,
                    phone,
                    type,
                    size,
                    isIndaco,
                    isShared,
                    partnerId,
                    refrigerated,
                    createdAt,
                    updatedAt
                FROM pms_warehouses
            
            '''
      
            warehouses = self.execute_query(query)
        return warehouses
    
    def get_orders(self) -> DataFrame:
        if(self.db_type == "mongodb"):
            orders = self.db_service.getOrders()
        else:
            query = '''
              SELECT DISTINCT dbo.magento_sales_order.entity_id AS 'Transaction id',
                dbo.magento_sales_order.created_at AS 'timestamp',
                dbo.magento_customer_entity.email AS 'customer',
                name AS 'product_name',
                sku,
                qty_ordered AS 'quantity',
                consent4 as 'consenso_dati_particolari',
                adult as 'consenso_adult'
            FROM dbo.magento_sales_order
                        
                        
            INNER JOIN (SELECT email,consent2,consent4,adult, 
            MAX([timestamp]) AS most_recent_signin
            FROM dbo.iubenda_consent
            WHERE consent2 IS NOT NULL AND email IS NOT NULL
            GROUP BY email,consent2,consent4,adult) AS t_consensi
                        ON (dbo.magento_sales_order.customer_email = t_consensi.email)
                        INNER JOIN dbo.magento_customer_entity
                        ON (customer_id = dbo.magento_customer_entity.entity_id)
                        INNER JOIN dbo.magento_sales_order_item
                        ON (dbo.magento_sales_order.entity_id = dbo.magento_sales_order_item.order_id)
                        WHERE t_consensi.consent2 = 1

            '''
      
            orders = self.execute_query(query)
            # Encrypt emails 
            self.__encryptCustomers(orders['customer'].unique())
            
            # Assign an anoymous mapping
            customer_ids = orders['customer'].apply(
                lambda customer: DbService.anonymized_customers[self.__encrypted_customers[customer]])
            orders.insert(2, column = 'customer_id', value = customer_ids)
        
        return orders
    
    def get_productTypeInfo(self, category_name) -> dict:
        if(self.db_type == "mongodb"):
            categoryInfo = self.db_service.getCategoryInfo(category_name)
        else:
            query = f'''

                SELECT _id,
                    label
                FROM pms_categories
                WHERE label = '{category_name}'
            '''
            categoryInfo = self.execute_query(query)
            categoryInfo = categoryInfo.to_dict("records")[0]
        return categoryInfo
    
    def get_decryptedUsers(self, userIds) -> dict:
        if(self.db_type == "mongodb"):
            decryptedUsers = self.db_service.decryptCustomers(userIds)
        else:
            decryptedUsers = self.decryptCustomers(userIds)

        return decryptedUsers 
    
    # def get_customerProfiles(self) -> dict:
    #     userProfiles = self.db_service.getCustomerProfiles()
    #     return userProfiles
    

    # -------------------------- SQLALCHEMY ----------------------------
    def execute_query(self,query_str):
        query = text(query_str)
        with self.db_service.begin() as conn:
            toRtn = pd.read_sql_query(query, conn)
        return toRtn
    # -------------------------- WRITE DATA ----------------------------
    def write_newDbItem(self, tableName, object):
        hostname, port, username, password, db_name = read_indaco_credentials("mongodb")
        db_service = MongoDB(hostname, port, username, password, db_name)
        db_service.writeNewDbItem(tableName, object)
    
    def drop_existingTable(self, tableName):
        hostname, port, username, password, db_name = read_indaco_credentials("mongodb")
        db_service = MongoDB(hostname, port, username, password, db_name)
        db_service.dropCollection(tableName)

    def update_attributeDbItem(self, object, attribute_name, attribute_value):
        hostname, port, username, password, db_name = read_indaco_credentials("mongodb")
        db_service = MongoDB(hostname, port, username, password, db_name)
        db_service.setNewProductAttribute(object, attribute_name, attribute_value)

    # ENCRYPTION
    def decryptCustomers(self, customer_ids):
        
        # Get the encrypting key
        fernet = Fernet(DbService.ENCRYPTING_KEY)
        
        # Reverse the mapping functin
        anonymity_reverseMapping = dict(map(reversed, DbService.anonymized_customers.items()))
        
        # Map the encryped user names
        decryptedUsers = dict()
        for customer_id in customer_ids:
            if customer_id in anonymity_reverseMapping.keys():
                decryptedUsers[customer_id] = fernet.decrypt(anonymity_reverseMapping[customer_id]).decode() 

        return decryptedUsers        

    def __encryptCustomers(self, unique_customers):
        
        # Initialize and generate the encrypting key
        if not DbService.ENCRYPTING_KEY:
            DbService.ENCRYPTING_KEY = Fernet.generate_key()
        fernet = Fernet(DbService.ENCRYPTING_KEY)
        
        # Encrypt the custumers
        encryptedCustomers = [fernet.encrypt(customer.encode()) for customer in unique_customers]
        self.__encrypted_customers = dict(zip(unique_customers, encryptedCustomers))
        # Mapping the encryptions into a simple integer value
        DbService.anonymized_customers = {encrypted_customer: (idk + 1) 
                                        for idk, encrypted_customer in enumerate(self.__encrypted_customers.values())}

    def enhanced_products_df(self,products, categories, sellers, warehouses, orders):
        products = products.merge(sellers, left_on = "partnerId", right_on = "_id")
        enhanced_df = products.merge(categories, how = 'left', left_on = "categoryId", right_on = "_id")

        # Retrieve the warehouse names
        warehouse_names = {item['_id']: item['name'] for item in warehouses.to_dict(orient = 'records')}
        enhanced_df['warehouses'] = enhanced_df['warehouse_id'].apply(
            lambda warehouses: warehouse_names[warehouses])
        # Fill the categories 
        # 1) Manual mapping
        manual_catMapping = {
            'affogato di sabbionara': 'Formaggio',
            'casolét val di sole': 'Formaggio',
            'cuor di fassa': 'Formaggio',
            'fontal di cavalese': 'Formaggio',
            'mezzano trentino di alta montagna': 'Formaggio',
            'primiero fresco': 'Formaggio',
            'puzzone di moena dop': 'Formaggio',
            'trentingrana 1kg': 'Formaggio',
            'trentingrana 250gr': 'Formaggio'
        }
        nanCond = enhanced_df['categoryName'].isnull()
        enhanced_df.loc[nanCond, 'categoryName'] = enhanced_df.loc[nanCond, 'title'].str.lower().apply(
            lambda product_name: manual_catMapping[product_name] if product_name in manual_catMapping.keys() else np.nan)
        
        # 2) Fill with the default name
        emptyCategory_value = ''
        enhanced_df['categoryName'] = enhanced_df['categoryName'].fillna(value = emptyCategory_value)

        # 3) Fill the google id
        enhanced_df['googleId'] = enhanced_df['googleId'].fillna(value = -1)
        
        # Select only useful columns
        enhanced_df.drop(columns = ['_id_y', 'isLeaf'], inplace = True)
        enhanced_df.rename(columns = {'_id_x': '_id'}, inplace = True)
        
        # Visualize potential missing products
        if len(enhanced_df) != len(products): 
            discarted_products = np.setdiff1d(products["title"].to_numpy(),
                                            enhanced_df["title"].to_numpy())
            
            print(f'{len(discarted_products)} products have been discarted due to an issue with the "categoryId"')
            print("-" * 75)
            print("-->", "\n--> ".join(sorted(discarted_products)))
            
        item_w_unknownCategory = sorted(enhanced_df.loc[enhanced_df['categoryName'] == emptyCategory_value, 'title'].tolist())
        if len(item_w_unknownCategory) > 0:
            print("-" * 70, "\n" + "-" * 70)
            print(f'[WARNING] The attribute "categoryId" has not been set to {len(item_w_unknownCategory)} products. \n'\
                f'It has been filled with a category equal to "{emptyCategory_value}"')
            print("-" * 70, "\n" + "-" * 70)
            print("-->", "\n--> ".join(['(' + str(idk + 1) + ') '+ item 
                                            for idk, item in enumerate(item_w_unknownCategory)]))
        
        # Mapping the column names
        enhanced_df.rename(
            columns = {
                'title': 'Title',
                'sku': 'indaco_sku',
                'brand' : 'Vendor',
                'categoryName': 'Product Type',
                'googleId': 'Type id',
                'companyName': 'Seller',
                '_id': 'productId'}, 
            inplace = True)
        # [NEW ATTRIBUTE] inTrentino flag
        enhanced_df['inTrentino_source'] = enhanced_df['Seller'].apply(lambda name: True if name  == "Vendi24" else False)
        
        # [NEW ATTRIBUTE] Item frequency within transactions/orders
        num_transactions = len(orders['Transaction id'].unique())
        normalizedFreq_func =  lambda sku: len(orders.loc[orders['sku'] == sku, 'Transaction id'].unique()) / num_transactions
        enhanced_df['Frequency'] = enhanced_df['indaco_sku'].apply(lambda sku: np.round(normalizedFreq_func(sku), 4))
    
        # Minor normalization
        #enhanced_df['indaco_sku'] = enhanced_df['indaco_sku'].str.capitalize()
        enhanced_df['productId'] = enhanced_df['productId'].astype(str) 
        enhanced_df['Vendor'] = enhanced_df['Vendor'].fillna(value = "")
        
        # Reorder columns 
        relevant_columns = ['Vendor', 'Title', 'Product Type', 'Type id', 'refrigerated', 'weight [grams]', 'warehouses', 
                            'Seller', 'production_areas', 'inTrentino_source', 'Frequency', 'indaco_sku','biologic',
                            'isforadult','vegan','biodinamic','gluten_free','channel'] #,'productId'
        enhanced_df = enhanced_df[relevant_columns]
        enhanced_df = enhanced_df.astype({'isforadult': 'int32',
                            'biologic':'int32',
                            'vegan':'int32',
                            'biodinamic':'int32',
                            'gluten_free':'int32',
                            'channel':'str'})
        return enhanced_df