1 #-*- coding = utf-8 -*- 2 #获取豆果网图片 3 import io 4 from bs4 import BeautifulSoup 5 import requests 6 7 url = "https://www.douguo.com/cookbook/2029254.html" 8 9 header = {'User-Agent':'Mozilla/5.0 (Windows NT 6.1; WOW64; rv:23.0) Gecko/20100101 Firefox/23.0'} 10 html = requests.get(url,headers = header) 11 text = BeautifulSoup(html.content,"lxml") 12 img_title = text.select("#banner img") 13 imgg = img_title[0].get("src") 14 15 16 def get_img_data(ul): 17 htm = requests.get(ul,headers = header) 18 f = open("1.jpg","wb") 19 f.write(htm.content) 20 f.close() 21 menu_img = get_img_data(imgg) 22 menu_title_0 = text.select('.title.text-lips')[0].text 23 menu_intro = text.select('.intro')[0].text 24 menu_title_1 = text.select('.mini-title')[0].text 25 menu_content_scname = text.find_all('span',class_='scname') 26 menu_content_scnum = text.find_all('span',class_='scnum') 27 menu_title_2 = text.select('.mini-title')[1].text 28 menu_step = text.select('.stepinfo') 29 30 print(menu_title_0) 31 print(menu_intro) 32 print(menu_title_1) 33 count = 0 34 for i in menu_content_scname: 35 print(i.text," ",menu_content_scnum[count].text) 36 count = count + 1 37 print(menu_title_2) 38 for menu_step_i in menu_step: 39 print(menu_step_i.text)
python-python爬取豆果网(菜谱信息)
猜你喜欢
转载自www.cnblogs.com/0526yao/p/10306119.html
今日推荐
周排行