import shutil,os #文件处理的高级模块 from bs4 import BeautifulSoup from urllib.request import Request,urlopen,urlretrieve import sqlite3 class Image_downLoad(object): def __init__(self): self.base_url='https://www.meishij.net/chufang/diy/?&page=1' self.current_page=1 def stat_downLoad(self):#=================== #判断是否存在指定的文件夹 if os.path.exists('image'): #删除树状结构的文件夹,忽略错误信息 shutil.rmtree('image',True) os.makedirs('image')#创建文件夹 os.chdir('image')#如果对某个文件夹内部进行操作,首先进入该文件夹内部 self.get_page_code_with_url(self.base_url) def get_page_code_with_url(self,full_url): headers={ 'User-Agent':' Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/67.0.3396.99 Safari/537.36' } request = Request(full_url,headers=headers) try: response=urlopen(request) code=response.read().decode() except Exception as e: print('请求失败') else: self.get_data_with_code(code) def get_data_with_code(self,code): print('正在下载第{}页'.format(self.current_page)) soup=BeautifulSoup(code,'lxml') page_name='Page{}'.format(self.current_page) os.mkdir(page_name) os.chdir(page_name) image_list=soup.select('div.listtyle1_list div a img')#=================================== talk_list=soup.select('div.listtyle1_list div.c1 span') # print(talk_list) tack_list1=[] for tack in talk_list: tack=tack.text[-8:-3] tack_list1.append(tack) # print(tack_list1) for i in range(len(image_list)): image=image_list[i] image_src = image.get('src') image_alt=image.get('alt') num=tack_list1[i] image_alt=image_alt.split('(')[0]+str(num)+'.jpg' print(image_alt,image_src) urlretrieve(image_src,image_alt) #下载操作 Sql.insert_info_to_table(image_alt,image_src) os.chdir(os.path.pardir) #移动到父级目录 self.current_page+=1 self.get_next_page(code) #在当前页源码中寻在下一页的链接 def get_next_page(self,code): soup=BeautifulSoup(code,'lxml')#解析当前页码 next_page=soup.select('div.listtyle1_page_w a.next')[0]#============================ url=next_page.get('href')#获取该标签的href属性 self.get_page_code_with_url(url) class Sql(object): connect = None cursor = None @classmethod def create_db_and_table(cls): # 创建数据库和表 cls.connect = sqlite3.connect('msjDB') cls.cursor = cls.connect.cursor() cls.cursor.execute( 'create table if not exists qbTable (name text,src text)') cls.connect.commit() @classmethod def insert_info_to_table(cls, image_alt,image_src): cls.cursor.execute('insert into qbtable (name,src) VALUES ("{}","{}")'. \ format(image_alt,image_src)) cls.connect.commit() @classmethod def close_db(cls): cls.cursor.close() cls.connect.close() Sql.create_db_and_table() downLoad=Image_downLoad() downLoad.stat_downLoad() Sql.close_db()#网站维护更改,代码将不可使用#
案例精选:爬取美食杰所有图片批量下载并入库
猜你喜欢
转载自blog.csdn.net/qq_38059635/article/details/81229167
今日推荐
周排行