import math import pandas as pd import re,json import time import numpy as np from get_ebay_item_thread_db_specifics_api import Database import os import glob from shutil import copyfile from Lib import hashlib import pandas as pd from PIL import Image """ 33709 Fog Lights 33710 Head Lights 33716 3rd Brake Light """ category_filter = [33710,33709,33716] def specific_attribute_parser_df(infile): df = pd.read_excel(infile) print(df.info()) df = df[df["pCategoryID"].isin(category_filter) & df["specifics"].notnull()] # 筛选类目和非空 print(df.info()) data = time.strftime("%Y-%m-%d") dataframes = [] for i in df.index: ebayno = df.loc[i,"ebayno"] specific = df.loc[i,"specifics"] item = json.loads(specific) item["ebayno"] = ebayno myseries = pd.Series(item) #用字典创建series dataframes.append(myseries) # series数组 result = pd.DataFrame(dataframes) # 构造pandas res = result.count(axis='index') # 将列非空计数 res = res.sort_values(ascending=False) # series 降序 根据count排序 columns = list(df.columns) col = list(res.index) col.remove("ebayno") columns.extend(col) df = pd.merge(df,result,on="ebayno",how="left") df = df.sort_values(by=["sold"],ascending=False) return df[columns] def default(obj, value): if obj is None: return value else: return ";".join(obj) def get_sku_number(df): feild = ['Manufacturer Part Number', 'Interchange Part Number','Other Part Number', 'Sku'] df["SKU_SUM"] = ''# 创建一列 for f in feild: # 抽取列中的sku相关信息 for i in df[df[f].notnull()].index: # 非NaN if not df.loc[i,'SKU_SUM']: # 不等于‘’ values = df.loc[i,f] if values: print("解析前 = ",values) sku_number = default(re.findall(r"140\d{2}\D*$|141\d{2}\D*$",values,flags=re.I),'') print("sku_number = ",sku_number) df.loc[i,'SKU_SUM']=sku_number df = df[df["SKU_SUM"] != ''] df=df.sort_values(by='SKU_SUM') columns = list(df.columns) columns.remove("SKU_SUM") columns.insert(columns.index("ebayno"),"SKU_SUM") return df[columns] def ensure_SKU_JX_ebay(ebay,vio): sku_map = {} for sku in vio["产品SKU"]: nsku = str(sku).replace("-",'') sku_map[nsku] = sku print(sku_map) ebay["SKU_SUM"] = ['JX-'+str(i) for i in ebay["SKU_SUM"]] for i in ebay["SKU_SUM"].index: sku = ebay.loc[i,"SKU_SUM"] newsku = sku_map.setdefault(str(sku).replace("-",''),'') ebay.loc[i,"SKU"] = newsku # 增加一列,映射成功表示相同SKU columns = list(ebay.columns) columns.remove("SKU") columns.insert(columns.index("SKU_SUM"), "SKU") return ebay[columns] def sku_listing(df): df = df[df["SKU"] != ''] col = list(df.columns)[:14] res = df.count(axis='index') # 列非空计数 res = res.sort_values(ascending=False) # series 排序 根据count排序 res = res[res>0] # series 选择 print(res.index) for c in res.index: # 按列非空数从大到小排序 if c not in col: col.append(c) print(col) return df[col] def picture_archive(item,infile): # item sku : [ebayno1,ebayno2] for sku,ebaynos in item.items(): print(sku) print(ebaynos) os.makedirs(os.path.join(infile, sku), exist_ok=True) picset = set() for ebayno in ebaynos: picfile = glob.glob(infile + str(ebayno) + '*.jpg') for aa in picfile: md5 = hashlib.md5(open(aa, 'rb').read()).hexdigest() if not md5 in picset: picset.add(md5) copyfile(aa, os.path.join(infile, sku, os.path.basename(aa))) print(len(picset)) def picture_archive_map(sku_to_listing): skutl = sku_to_listing[["SKU", "ebayno"]] item = {} skus = skutl["SKU"].unique() for sku in skus: ebaynos = skutl.loc[skutl["SKU"] == sku, "ebayno"].values item[sku] = ebaynos return item
图片转存二
猜你喜欢
转载自blog.csdn.net/zn505119020/article/details/79257102
今日推荐
周排行