抓取淘宝商品信息并制作商品信息比价表(以口红为例)

快速抓取淘宝上口红信息就可以很好的为女友服务,帮女友挑选心怡的商品喽~~~
反正小编是没有女朋友的(骄傲脸.jpg)

import requests
import re
import os

def getHtmlText(url):
    try:
        r=requests.get(url,timeout=30)
        r.raise_for_status()
        r.encoding=r.apparent_encoding
        return r.text
    except:
        raise
        print("产生异常")

def pageParser(ilt,html):
    tlt=re.findall(r'\"raw_title\":\".*?\"',html)
    plt=re.findall(r'\"view_price\":\".*?\"',html)
    for i in range(len(plt)):
        title=eval(tlt[i].split(":")[-1])
        price=eval(plt[i].split(":")[-1])
        ilt.append([price,title])


def printGoodsList(ilt):
    if not os.path.exists("file"):
        os.mkdir("file")
    fp=open("file/infor.txt","w")
    header="\n\t\t\t\t淘宝书包商品信息比价表"
    print(header)
    count=1
    tplt="{:4}\t{:8}\t{:16}"
    title=tplt.format("序号","价格","名称")
    print(title)
    fp.write(header)
    fp.write(title)
    for i in ilt:
        l=tplt.format(count,i[0],i[1])
        print(l)
        fp.write(l)
        count+=1
    fp.close()




def main():
    goods='口红'
    infoList=[]
    depth=10
    start_url=r"https://s.taobao.com/search?q="+goods
    for i in range(depth):
        try:
            url=start_url+'&s='+str(i*44)
            html=getHtmlText(url)
            pageParser(infoList,html)
        except:
            continue
    printGoodsList(infoList)

if __name__=="__main__":
    main()

这里写图片描述

猜你喜欢

转载自blog.csdn.net/weifuliu/article/details/80466534