图片转存二

import math
import pandas as pd
import re,json
import time
import numpy as np
from get_ebay_item_thread_db_specifics_api import Database
import os
import glob
from shutil import copyfile
from Lib import hashlib
import pandas as pd
from PIL import Image



"""
33709	Fog Lights
33710	Head Lights
33716	3rd Brake Light
"""
category_filter = [33710,33709,33716]

def specific_attribute_parser_df(infile):
    df = pd.read_excel(infile)
    print(df.info())
    df = df[df["pCategoryID"].isin(category_filter) & df["specifics"].notnull()] # 筛选类目和非空
    print(df.info())
    data = time.strftime("%Y-%m-%d")
    dataframes = []
    for i in df.index:
        ebayno = df.loc[i,"ebayno"]
        specific = df.loc[i,"specifics"]
        item = json.loads(specific)
        item["ebayno"] = ebayno
        myseries = pd.Series(item) #用字典创建series
        dataframes.append(myseries)  # series数组
    result = pd.DataFrame(dataframes) # 构造pandas
    res = result.count(axis='index') # 将列非空计数
    res = res.sort_values(ascending=False) # series 降序 根据count排序
    columns = list(df.columns)
    col = list(res.index)
    col.remove("ebayno")
    columns.extend(col)
    df = pd.merge(df,result,on="ebayno",how="left")
    df = df.sort_values(by=["sold"],ascending=False)
    return df[columns]

def default(obj, value):
    if obj is None:
        return value
    else:
        return ";".join(obj)
def get_sku_number(df):
    feild = ['Manufacturer Part Number',  'Interchange Part Number','Other Part Number', 'Sku']
    df["SKU_SUM"] = ''# 创建一列
    for f in feild: # 抽取列中的sku相关信息
        for i in df[df[f].notnull()].index: # 非NaN
            if not df.loc[i,'SKU_SUM']: # 不等于‘’
                values = df.loc[i,f]
                if values:
                    print("解析前 = ",values)
                    sku_number = default(re.findall(r"140\d{2}\D*$|141\d{2}\D*$",values,flags=re.I),'')
                    print("sku_number = ",sku_number)
                    df.loc[i,'SKU_SUM']=sku_number
    df = df[df["SKU_SUM"] != '']
    df=df.sort_values(by='SKU_SUM')
    columns = list(df.columns)
    columns.remove("SKU_SUM")
    columns.insert(columns.index("ebayno"),"SKU_SUM")
    return df[columns]


def ensure_SKU_JX_ebay(ebay,vio):
    sku_map = {}
    for sku in vio["产品SKU"]:
        nsku = str(sku).replace("-",'')
        sku_map[nsku] = sku
    print(sku_map)
    ebay["SKU_SUM"] = ['JX-'+str(i) for i in ebay["SKU_SUM"]]
    for i in ebay["SKU_SUM"].index:
        sku = ebay.loc[i,"SKU_SUM"]
        newsku = sku_map.setdefault(str(sku).replace("-",''),'')
        ebay.loc[i,"SKU"] = newsku  # 增加一列,映射成功表示相同SKU
    columns = list(ebay.columns)
    columns.remove("SKU")
    columns.insert(columns.index("SKU_SUM"), "SKU")
    return ebay[columns]
def sku_listing(df):
    df = df[df["SKU"] != '']
    col = list(df.columns)[:14]
    res = df.count(axis='index') # 列非空计数
    res = res.sort_values(ascending=False) # series 排序 根据count排序
    res = res[res>0] # series 选择
    print(res.index)
    for c in res.index: # 按列非空数从大到小排序
        if c not in col:
            col.append(c)
    print(col)
    return df[col]
def picture_archive(item,infile):
    # item sku : [ebayno1,ebayno2]
    for sku,ebaynos in item.items():
        print(sku)
        print(ebaynos)
        os.makedirs(os.path.join(infile, sku), exist_ok=True)
        picset = set()
        for ebayno in ebaynos:
            picfile = glob.glob(infile + str(ebayno) + '*.jpg')
            for aa in picfile:
                md5 = hashlib.md5(open(aa, 'rb').read()).hexdigest()
                if not md5 in picset:
                    picset.add(md5)
                    copyfile(aa, os.path.join(infile, sku, os.path.basename(aa)))
        print(len(picset))

def picture_archive_map(sku_to_listing):
    skutl = sku_to_listing[["SKU", "ebayno"]]
    item = {}
    skus = skutl["SKU"].unique()
    for sku in skus:
        ebaynos = skutl.loc[skutl["SKU"] == sku, "ebayno"].values
        item[sku] = ebaynos
    return item

猜你喜欢

转载自blog.csdn.net/zn505119020/article/details/79257102