学习编程就想学习骑自行车一样,对新手来说最重要的是持之以恒的练习。
在《汲取地下水》这一章节中看见的一句话:“别担心自己的才华或能力不足。持之以恒地练习,才华便会有所增长”,现在想来,真是如此。
'''
爬虫练习 ---下厨房
version:01
author:金鞍少年
date:2020-02-24
'''
import requests
from bs4 import BeautifulSoup
import re
class xiachufang():
def __init__(self):
self.count = 1 # 计数
self.comp = re.compile('[^A-^a-z^0-9^\u4e00-\u9fa5]') # 去掉抓取标题中特殊字符
self.headers = {
'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/73.0.3683.103 Safari/537.36',
'referer': 'http://www.xiachufang.com/category/52107/'
}
def get_url(self):
while self.count <= 5:
url = 'http://www.xiachufang.com/category/52107/?page='+str(self.count) # 分页url
self.count += 1
re = requests.get(url, headers=self.headers)
if re.status_code == 200:
page_data = BeautifulSoup(re.text, 'html.parser')
yield page_data
else:
print('链接失败!')
def get_data(self,page_data):
Menu_table = page_data.find('div', class_="normal-recipe-list").findAll("div",class_="info pure-u")
for index,meun in enumerate(Menu_table):
tag_a = meun.find('a')
foods_name = (tag_a.text[17:-13])
foods_name = self.comp.sub('', foods_name) # 去掉标题中特殊字符
Foodstuff = meun.find('p', class_="ing ellipsis").text[1:-1]
foods_url = r'http://www.xiachufang.com/'+tag_a['href']
foods = (self.count, index, foods_name, Foodstuff, foods_url)
yield foods
def Save_foods(self, foods):
food_name = ('%s-%s-%s'%((foods[0]-1), foods[1], foods[2]))
food_content = '食材:'+foods[3]+'\n链接:'+foods[4]
path = r'./下厨房/'
with open('./下厨房/'+food_name+'.txt', 'w', encoding='utf-8') as f:
f.write(food_content)
print('下载 %s 成功' % food_name)
def fun(self):
for page_data in self.get_url():
for foods in self.get_data(page_data):
self.Save_foods(foods)
if __name__=='__main__':
x = xiachufang()
x.fun()