导包
from urllib import request,parse
from piaot import *
import json
import pymysql
yeshu是输入的页数
自己创建一个数据库函数
def sql(sql_z):
# 打开数据库连接
db = pymysql.connect("192.168.43.128", "root", "123456", "xq", charset='utf8')
# 使用 cursor() 方法创建一个游标对象 cursor
cursor = db.cursor()
# 使用 execute() 方法执行 SQL 查询
cursor.execute("")
# 使用 fetchone() 方法获取单条数据.
data = cursor.fetchone(sql_z)
print("Database version : %s " % data)
# 关闭数据库连接
db.close()
将爬取得网页做成函数
xq函数,参数:(xq_url=网站url,shu=为递归所需参数无须添加,yeshu=共页数)
def xq(xq_url=None,shu=0,yeshu=1):
# 判断是否是带参数(url循环条件的id值)
if xq_url:
url = "https://xueqiu.com/v4/statuses/public_timeline_by_category.json?since_id=-1&max_id=" + str(xq_url) + "&count=15&category=-1"
# print(xq_url[0])
else:
url = "https://xueqiu.com/v4/statuses/public_timeline_by_category.json?since_id=-1&max_id=-1&count=10&category=-1"
headers={
"User-Agent":pa(),
"Cookie":"device_id=40472336a6cdeeadffefa199fa0bf24a; _ga=GA1.2.280017243.1531224612; s=do126sw0xe; __utmz=1.1531224738.2.2.utmcsr=baidu|utmccn=(organic)|utmcmd=organic; __utma=1.280017243.1531224612.1531224738.1531234688.3; aliyungf_tc=AQAAAAGJygtrxwQAJ0V5akZpYkAxmbvB; xq_a_token=584d0cf8d5a5a9809761f2244d8d272bac729ed4; xq_a_token.sig=x0gT9jm6qnwd-ddLu66T3A8KiVA; xq_r_token=98f278457fc4e1e5eb0846e36a7296e642b8138a; xq_r_token.sig=2Uxv_DgYTcCjz7qx4j570JpNHIs; u=811534314860368; Hm_lvt_1db88642e346389874251b5a1eded6e3=1533223538,1534314860; Hm_lpvt_1db88642e346389874251b5a1eded6e3=1534314860; _gid=GA1.2.1854937153.1534314882"}
print(url)
# 重定向
req=request.Request(url,headers=headers)
# 开启端口
ht=request.urlopen(req)
# 返回二进制
html=ht.read()
# json装置
html=json.loads(html.decode('utf-8'))
# 循环list的值
tz=''
if yeshu == shu:
return '循环结束'
for i in html['list']:
tz=i['id']
b=i['data']
# 将json转成字典
b = json.loads(b)
c=[[b['id']],[b['title']],[b['description']],[b['target']]]
print(c)
# msql存储
sql_z = "insert into xq_1(uid,title,description,target) values(b['id'],b['title'],b['description'],b['target']);"
sql(sql_z)
# 自循环加一,循环判定值
shu+=1
#返回循环参数:url的id值,循环判定值,页数
return xq(tz,shu,yeshu)
调用函数
if __name__=="__main__":
print(xq(yeshu=1))