| Current Path : /proc/thread-self/root/home/deltalab/PMS/recommendations/user_profiling/components/ |
| Current File : //proc/thread-self/root/home/deltalab/PMS/recommendations/user_profiling/components/DbService.py |
from os import path
from _library.io_toolkit import load_collectionTypes, read_settings, read_indaco_credentials
from components.mongoDbService import MongoDB
from pandas import DataFrame
from sqlalchemy import create_engine,text
import pandas as pd
from cryptography.fernet import Fernet
import numpy as np
class DbService:
ENCRYPTING_KEY = None
def __init__(self,db_type):
appSettings = read_settings()
self.db_type = db_type
if(db_type in appSettings['databaseName']):
hostname, port, username, password, db_name = read_indaco_credentials(db_type)
else:
print(f"Database not available! ?{appSettings['databaseName']}?")
exit(1)
if db_type == "mongodb":
self.db_service = MongoDB(hostname, port, username, password, db_name)
else:
#url_engine = f"mssql+pymssql://{username}:{password}@{hostname}:{port}/{db_name}?&Trusted_Connection=yes&TrustedServerCertificate=yes"
url_engine = f"mssql+pymssql://{username}:{password}@{hostname}:{port}/{db_name}"
self.db_service = create_engine(url_engine)
# --------------------------- GET DATA --------------------------------------------------
def get_productTypes(self) -> DataFrame:
if(self.db_type == "mongodb"):
productTypes = self.db_service.getProductTypes()
else:
query = '''
SELECT cat1._id,
cat1.label AS 'categoryName',
cat1.googleId,
cat2.label AS 'parent',
cat1.isRoot,
cat1.isLeaf,
cat1.createdAt,
cat1.updatedAt
FROM dbo.pms_categories cat1
INNER JOIN dbo.pms_categories cat2 ON (cat1.parentId = cat2._id)
'''
productTypes = self.execute_query(query)
return productTypes
def get_products(self, consider_delatedProducts = False, consider_unavailableProducts = False) -> DataFrame:
if(self.db_type == "mongodb"):
products = self.db_service.getProducts(consider_delatedProducts, consider_unavailableProducts)
else:
query = '''
SELECT DISTINCT pms_products.title,
pms_products_offers.sku,
pms_products.brand,
pms_products.refrigerated,
pms_products.weight AS 'weight [grams]',
pms_products_offers.ID AS '_id',
pms_products_offers.channelId AS 'channel',
pms_products.categoryId,
pms_products.partnerId,
warehouseId,
ISNULL(
indaco_general_productionarea,-1
) AS 'production_areas',
ISNULL(indaco_generalfood_biologic, 0) AS 'biologic',
ISNULL(indaco_generalfood_vegan, 0) AS 'vegan',
ISNULL(
indaco_generalfood_biodynamic, 0
) AS 'biodinamic',
ISNULL(
indaco_generalfood_glutenfree, 0
) AS 'gluten_free',
ISNULL(indaco_general_isforadult, 0) AS 'isforadult'
FROM
pms_products
INNER JOIN pms_products_offers ON (
pms_products._id = pms_products_offers.parent_id
)
INNER JOIN pms_channels ON (pms_products_offers.channelId = pms_channels._id)
INNER JOIN pms_products_inventoryLevels_object ON(
pms_products._id = pms_products_inventoryLevels_object.parent_ID
)
FULL OUTER JOIN (
SELECT
parent_id,
indaco_general_productionarea,
indaco_generalfood_biologic,
indaco_generalfood_vegan,
indaco_generalfood_biodynamic,
indaco_generalfood_glutenfree,
indaco_general_isforadult
FROM
(
SELECT
parent_id,
NAME,
value
FROM
pms_products_offers_attributes
) s PIVOT (
Max(value) FOR NAME IN (
indaco_general_productionarea, indaco_generalfood_biologic,
indaco_generalfood_vegan, indaco_generalfood_biodynamic,
indaco_generalfood_glutenfree,
indaco_general_isforadult
)
) AS p
) AS p ON (pms_products._id = p.parent_id)
WHERE
pms_products_offers.deleted = 0
'''
products = self.execute_query(query)
products['production_areas'].fillna(-1,inplace=True)
# Convert the production areas codes into names
file_path = path.join('_library', 'INDACO_collectionCodes.json')
_, collectionTypes = load_collectionTypes(file_path, verbose = False)
productionArea_codes = collectionTypes['production_areas']
products['production_areas'] = products['production_areas'].apply(
lambda area_code:
productionArea_codes[str(area_code)].capitalize()
if str(area_code) in productionArea_codes.keys() else f"Unknown (code:{area_code})"
if int(area_code) != -1 else ""
)
return products
def get_dBproduct(self, item_sku, as_dict,channel=None,product=False):
if(self.db_type == "mongodb"):
product = self.db_service.getProduct(item_sku, as_dict)
else:
if not product:
query = f'''
SELECT ID AS '_id',
sku
FROM pms_products_offers
WHERE sku = '{item_sku}'
AND channelId = '{channel}'
'''
else:
query = f'''
SELECT _id,
sku
FROM pms_products
WHERE sku = '{item_sku}'
'''
product = self.execute_query(query)
product = product.to_dict("records")[0]
return product
def get_sellers(self, sellerId = None):
if(self.db_type == "mongodb"):
sellers = self.db_service.getSellers(sellerId)
else:
query = '''
SELECT
pms_partners._id,
companyName,
email,
address,
pms_partners.phone,
pms_warehouses._id AS "warehouse_id",
vatNumber,
active,
pms_partners.createdAt,
pms_partners.updatedAt
FROM
pms_partners
INNER JOIN pms_warehouses ON (
pms_partners._id = pms_warehouses.partnerId
)
'''
sellers = self.execute_query(query)
return sellers
def get_warehouses(self) -> DataFrame:
if(self.db_type == "mongodb"):
warehouses = self.db_service.getWarehouses()
else:
query = '''
SELECT _id,
name,
phone,
type,
size,
isIndaco,
isShared,
partnerId,
refrigerated,
createdAt,
updatedAt
FROM pms_warehouses
'''
warehouses = self.execute_query(query)
return warehouses
def get_orders(self) -> DataFrame:
if(self.db_type == "mongodb"):
orders = self.db_service.getOrders()
else:
query = '''
SELECT DISTINCT dbo.magento_sales_order.entity_id AS 'Transaction id',
dbo.magento_sales_order.created_at AS 'timestamp',
dbo.magento_customer_entity.email AS 'customer',
name AS 'product_name',
sku,
qty_ordered AS 'quantity',
consent4 as 'consenso_dati_particolari',
adult as 'consenso_adult'
FROM dbo.magento_sales_order
INNER JOIN (SELECT email,consent2,consent4,adult,
MAX([timestamp]) AS most_recent_signin
FROM dbo.iubenda_consent
WHERE consent2 IS NOT NULL AND email IS NOT NULL
GROUP BY email,consent2,consent4,adult) AS t_consensi
ON (dbo.magento_sales_order.customer_email = t_consensi.email)
INNER JOIN dbo.magento_customer_entity
ON (customer_id = dbo.magento_customer_entity.entity_id)
INNER JOIN dbo.magento_sales_order_item
ON (dbo.magento_sales_order.entity_id = dbo.magento_sales_order_item.order_id)
WHERE t_consensi.consent2 = 1
'''
orders = self.execute_query(query)
# Encrypt emails
self.__encryptCustomers(orders['customer'].unique())
# Assign an anoymous mapping
customer_ids = orders['customer'].apply(
lambda customer: DbService.anonymized_customers[self.__encrypted_customers[customer]])
orders.insert(2, column = 'customer_id', value = customer_ids)
return orders
def get_productTypeInfo(self, category_name) -> dict:
if(self.db_type == "mongodb"):
categoryInfo = self.db_service.getCategoryInfo(category_name)
else:
query = f'''
SELECT _id,
label
FROM pms_categories
WHERE label = '{category_name}'
'''
categoryInfo = self.execute_query(query)
categoryInfo = categoryInfo.to_dict("records")[0]
return categoryInfo
def get_decryptedUsers(self, userIds) -> dict:
if(self.db_type == "mongodb"):
decryptedUsers = self.db_service.decryptCustomers(userIds)
else:
decryptedUsers = self.decryptCustomers(userIds)
return decryptedUsers
# def get_customerProfiles(self) -> dict:
# userProfiles = self.db_service.getCustomerProfiles()
# return userProfiles
# -------------------------- SQLALCHEMY ----------------------------
def execute_query(self,query_str):
query = text(query_str)
with self.db_service.begin() as conn:
toRtn = pd.read_sql_query(query, conn)
return toRtn
# -------------------------- WRITE DATA ----------------------------
def write_newDbItem(self, tableName, object):
hostname, port, username, password, db_name = read_indaco_credentials("mongodb")
db_service = MongoDB(hostname, port, username, password, db_name)
db_service.writeNewDbItem(tableName, object)
def drop_existingTable(self, tableName):
hostname, port, username, password, db_name = read_indaco_credentials("mongodb")
db_service = MongoDB(hostname, port, username, password, db_name)
db_service.dropCollection(tableName)
def update_attributeDbItem(self, object, attribute_name, attribute_value):
hostname, port, username, password, db_name = read_indaco_credentials("mongodb")
db_service = MongoDB(hostname, port, username, password, db_name)
db_service.setNewProductAttribute(object, attribute_name, attribute_value)
# ENCRYPTION
def decryptCustomers(self, customer_ids):
# Get the encrypting key
fernet = Fernet(DbService.ENCRYPTING_KEY)
# Reverse the mapping functin
anonymity_reverseMapping = dict(map(reversed, DbService.anonymized_customers.items()))
# Map the encryped user names
decryptedUsers = dict()
for customer_id in customer_ids:
if customer_id in anonymity_reverseMapping.keys():
decryptedUsers[customer_id] = fernet.decrypt(anonymity_reverseMapping[customer_id]).decode()
return decryptedUsers
def __encryptCustomers(self, unique_customers):
# Initialize and generate the encrypting key
if not DbService.ENCRYPTING_KEY:
DbService.ENCRYPTING_KEY = Fernet.generate_key()
fernet = Fernet(DbService.ENCRYPTING_KEY)
# Encrypt the custumers
encryptedCustomers = [fernet.encrypt(customer.encode()) for customer in unique_customers]
self.__encrypted_customers = dict(zip(unique_customers, encryptedCustomers))
# Mapping the encryptions into a simple integer value
DbService.anonymized_customers = {encrypted_customer: (idk + 1)
for idk, encrypted_customer in enumerate(self.__encrypted_customers.values())}
def enhanced_products_df(self,products, categories, sellers, warehouses, orders):
products = products.merge(sellers, left_on = "partnerId", right_on = "_id")
enhanced_df = products.merge(categories, how = 'left', left_on = "categoryId", right_on = "_id")
# Retrieve the warehouse names
warehouse_names = {item['_id']: item['name'] for item in warehouses.to_dict(orient = 'records')}
enhanced_df['warehouses'] = enhanced_df['warehouse_id'].apply(
lambda warehouses: warehouse_names[warehouses])
# Fill the categories
# 1) Manual mapping
manual_catMapping = {
'affogato di sabbionara': 'Formaggio',
'casolét val di sole': 'Formaggio',
'cuor di fassa': 'Formaggio',
'fontal di cavalese': 'Formaggio',
'mezzano trentino di alta montagna': 'Formaggio',
'primiero fresco': 'Formaggio',
'puzzone di moena dop': 'Formaggio',
'trentingrana 1kg': 'Formaggio',
'trentingrana 250gr': 'Formaggio'
}
nanCond = enhanced_df['categoryName'].isnull()
enhanced_df.loc[nanCond, 'categoryName'] = enhanced_df.loc[nanCond, 'title'].str.lower().apply(
lambda product_name: manual_catMapping[product_name] if product_name in manual_catMapping.keys() else np.nan)
# 2) Fill with the default name
emptyCategory_value = ''
enhanced_df['categoryName'] = enhanced_df['categoryName'].fillna(value = emptyCategory_value)
# 3) Fill the google id
enhanced_df['googleId'] = enhanced_df['googleId'].fillna(value = -1)
# Select only useful columns
enhanced_df.drop(columns = ['_id_y', 'isLeaf'], inplace = True)
enhanced_df.rename(columns = {'_id_x': '_id'}, inplace = True)
# Visualize potential missing products
if len(enhanced_df) != len(products):
discarted_products = np.setdiff1d(products["title"].to_numpy(),
enhanced_df["title"].to_numpy())
print(f'{len(discarted_products)} products have been discarted due to an issue with the "categoryId"')
print("-" * 75)
print("-->", "\n--> ".join(sorted(discarted_products)))
item_w_unknownCategory = sorted(enhanced_df.loc[enhanced_df['categoryName'] == emptyCategory_value, 'title'].tolist())
if len(item_w_unknownCategory) > 0:
print("-" * 70, "\n" + "-" * 70)
print(f'[WARNING] The attribute "categoryId" has not been set to {len(item_w_unknownCategory)} products. \n'\
f'It has been filled with a category equal to "{emptyCategory_value}"')
print("-" * 70, "\n" + "-" * 70)
print("-->", "\n--> ".join(['(' + str(idk + 1) + ') '+ item
for idk, item in enumerate(item_w_unknownCategory)]))
# Mapping the column names
enhanced_df.rename(
columns = {
'title': 'Title',
'sku': 'indaco_sku',
'brand' : 'Vendor',
'categoryName': 'Product Type',
'googleId': 'Type id',
'companyName': 'Seller',
'_id': 'productId'},
inplace = True)
# [NEW ATTRIBUTE] inTrentino flag
enhanced_df['inTrentino_source'] = enhanced_df['Seller'].apply(lambda name: True if name == "Vendi24" else False)
# [NEW ATTRIBUTE] Item frequency within transactions/orders
num_transactions = len(orders['Transaction id'].unique())
normalizedFreq_func = lambda sku: len(orders.loc[orders['sku'] == sku, 'Transaction id'].unique()) / num_transactions
enhanced_df['Frequency'] = enhanced_df['indaco_sku'].apply(lambda sku: np.round(normalizedFreq_func(sku), 4))
# Minor normalization
#enhanced_df['indaco_sku'] = enhanced_df['indaco_sku'].str.capitalize()
enhanced_df['productId'] = enhanced_df['productId'].astype(str)
enhanced_df['Vendor'] = enhanced_df['Vendor'].fillna(value = "")
# Reorder columns
relevant_columns = ['Vendor', 'Title', 'Product Type', 'Type id', 'refrigerated', 'weight [grams]', 'warehouses',
'Seller', 'production_areas', 'inTrentino_source', 'Frequency', 'indaco_sku','biologic',
'isforadult','vegan','biodinamic','gluten_free','channel'] #,'productId'
enhanced_df = enhanced_df[relevant_columns]
enhanced_df = enhanced_df.astype({'isforadult': 'int32',
'biologic':'int32',
'vegan':'int32',
'biodinamic':'int32',
'gluten_free':'int32',
'channel':'str'})
return enhanced_df