欢迎加入QQ学习交流群,与我们一起学习,一起进步吧!
群号:225361733
可以QQ扫一扫加入群聊哦!
已更新源码
旧版本源码bilibili视频:https://www.bilibili.com/video/BV1cZ4y1s7am/
新版本源码bilibili视频:https://www.bilibili.com/video/BV19Q4y1P7Hg/
import requests
import re
import csv
import xlwt
print("欢迎使用多功能图片爬取器!作者Python狂MC小豆!\n如果异常退出(exe和Python终端)或报错(python编辑器)\n可能是由于图片或网站做了进一步的反爬虫\n本作品使用添加请求头的User-Agent来反防爬虫\n也有可能是你做了不正当的操作")
def gif():
headers={'User-Agent':'Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/80.0.3987.149 Safari/537.36'}
webPage=requests.get(wangzhanurl,headers=headers)
webPage=webPage.text
image_re=re.compile(r'https.*?gif')
sprite_image=image_re.findall(webPage)
a=range(len(sprite_image))
for b in a:
sprite_image_1=requests.get(sprite_image[b],headers=headers)
spritePage=open("爬取gif图片%s.gif"%b,"wb")
spritePage.write(sprite_image_1.content)
spritePage.close()
print("成功保存%s个图片\n"%b)
def png():
headers={'User-Agent':'Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/80.0.3987.149 Safari/537.36'}
webPage=requests.get(wangzhanurl,headers=headers)
webPage=webPage.text
image_re=re.compile(r'https.*?png')
sprite_image=image_re.findall(webPage)
a=range(len(sprite_image))
for b in a:
sprite_image_1=requests.get(sprite_image[b],headers=headers)
spritePage=open("爬取png图片%s.png"%b,"wb")
spritePage.write(sprite_image_1.content)
spritePage.close()
print("成功保存%s个图片\n"%b)
def jpg():
headers={'User-Agent':'Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/80.0.3987.149 Safari/537.36'}
webPage=requests.get(wangzhanurl,headers=headers)
webPage=webPage.text
image_re=re.compile(r'https.*jpg')
sprite_image=image_re.findall(webPage)
a=range(len(sprite_image))
for b in a:
sprite_image_1=requests.get(sprite_image[b],headers=headers)
spritePage=open("爬取jpg图片%s.jpg"%b,"wb")
spritePage.write(sprite_image_1.content)
spritePage.close()
print("成功保存%s个图片\n"%b)
def url_csv():
csvfile = open('爬取数据.csv', 'w', newline='')
writer = csv.writer(csvfile)
headers={'User-Agent':'Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/80.0.3987.149 Safari/537.36'}
webPage=requests.get(wangzhanurl,headers=headers)
webPage=webPage.text
image_re=re.compile(r'https.*?png')
sprite_image=image_re.findall(webPage)
a=range(len(sprite_image))
writer.writerow(["png图列表"])
writer.writerow(['图片序号', '图片网址'])
for b in a:
sprite_image_1=requests.get(sprite_image[b])
data = [(b,sprite_image_1.url)]
writer.writerows(data)
print("写入了%s个png数据\n"%b)
b=0
image_re=re.compile(r'https.*?jpg')
sprite_image=image_re.findall(webPage)
a=range(len(sprite_image))
writer.writerow(["jpg图列表"])
writer.writerow(['图片序号', '图片网址'])
for b in a:
sprite_image_1=requests.get(sprite_image[b])
data = [(b,sprite_image_1.url)]
writer.writerows(data)
print("写入了%s个jpg数据\n"%b)
b=0
image_re = re.compile(r'https.*?gif')
sprite_image = image_re.findall(webPage)
a = range(len(sprite_image))
writer.writerow(["gif图列表"])
writer.writerow(['图片序号', '图片网址'])
for b in a:
sprite_image_1 = requests.get(sprite_image[b])
data = [(b, sprite_image_1.url)]
writer.writerows(data)
print("写入了%s个gif数据\n" %b)
csvfile.close()
def url_excel():
workbook=xlwt.Workbook()
sheet=workbook.add_sheet("png图列表")
sheet.write(0,0,'图片序号')
sheet.write(0,1,'图片网址')
headers={'User-Agent':'Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/80.0.3987.149 Safari/537.36'}
webPage=requests.get(wangzhanurl,headers=headers)
webPage=webPage.text
image_re=re.compile(r'https.*?png')
sprite_image=image_re.findall(webPage)
a=range(len(sprite_image))
for b in a:
sprite_image_1=requests.get(sprite_image[b])
sheet.write(b+1,0,b)
sheet.write(b+1,1,sprite_image_1.url)
workbook.save("爬取数据.xls")
print("写入了%s个png数据\n"%b)
b=0
sheet=workbook.add_sheet("jpg图列表")
sheet.write(0,0,'图片序号')
sheet.write(0,1,'图片网址')
headers={'User-Agent':'Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/80.0.3987.149 Safari/537.36'}
webPage=requests.get(wangzhanurl,headers=headers)
webPage=webPage.text
image_re=re.compile(r'https.*?jpg')
sprite_image=image_re.findall(webPage)
a=range(len(sprite_image))
for b in a:
sprite_image_1=requests.get(sprite_image[b])
sheet.write(b+1,0,b)
sheet.write(b+1,1,sprite_image_1.url)
workbook.save("爬取数据.xls")
print("写入了%s个jpg数据\n"%b)
b=0
sheet=workbook.add_sheet("gif图列表")
sheet.write(0,0,'图片序号')
sheet.write(0,1,'图片网址')
headers={'User-Agent':'Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/80.0.3987.149 Safari/537.36'}
webPage=requests.get(wangzhanurl,headers=headers)
webPage=webPage.text
image_re=re.compile(r'https.*?gif')
sprite_image=image_re.findall(webPage)
a=range(len(sprite_image))
for b in a:
sprite_image_1=requests.get(sprite_image[b])
sheet.write(b+1,0,b)
sheet.write(b+1,1,sprite_image_1.url)
workbook.save("爬取数据.xls")
print("写入了%s个gif数据\n"%b)
while True:
wangzhanurl=input("你要在什么网站爬图片?输入url:")
doingsth=int(input("你要:\n1.爬gif图请输0\n2.爬png图请输1\n3.爬jpg图请输2\n4.爬png,jpg,gif图片url并输出为csv表格请输3\n5.停止程序请输4\n6.一口气把jpg,png,gif全爬输5\n7.爬png,jpg,gif图片url并输出为Excel表格请输6\n请输入:"))
if doingsth==1:
png()
print("完成")
b=0
elif doingsth==2:
jpg()
print("完成")
b=0
elif doingsth==3:
url_csv()
print("完成")
b=0
elif doingsth==4:
break
elif doingsth==0:
gif()
print("完成")
b=0
elif doingsth==5:
gif()
b=0
png()
b=0
jpg()
print("完成")
b=0
elif doingsth==6:
url_excel()
print("完成")
b=0
else:
print("输的不对哦")