#encoding:'utf-8'
import urllib.request
from bs4 import BeautifulSoup
import os
import time
import pymysql
import xlwt
def getDatas():
# url="https://movie.douban.com/top250"
url="file:///E:/scrapy/2018-04-27/movie/movie.html"
header={'User-Agent': "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.1 (KHTML, like Gecko) Chrome/22.0.1207.1 Safari/537.1"}
ret=urllib.request.Request(url=url,headers=header)
res=urllib.request.urlopen(ret)
# 转化格式
response=BeautifulSoup(res,'html.parser')
# 找到想要数据的父元素
datas=response.find_all('div',{'class':'item'})
# print(datas)
con=pymysql.connect(host="localhost",user="root",passwd="123456",db="douban0424",port=3306,charset="utf8")
if con:
print("链接成功++++++++++++")
#获取游标
for item in datas:
cur=con.cursor()
if cur:
print("游标获取成功=============")
# print(item)
dict1={}
dict1['rank']=item.find('div',{'class':'pic'}).find('em').get_text()
dict1['title']=item.find('div',{'class':'info'}).find('div',{'class':'hd'}).find('a').find('span',{'class':'title'}).get_text()
dict1['picUrl']=item.find('div',{'class':'pic'}).find('a').find('img').get('src')
#创建添加数据sql语句
sql="insert into doubanInfo value(null,%s,%s,%s)"
cur.execute(sql,(dict1['rank'],dict1['title'],dict1['picUrl']))
# 提交数据
con.commit()
# 关闭游标
cur.close()
con.close()
# if __name__=="__main__":
getDatas()