一:爬取百度图片
注意:如果提示requests库不存在,命令行运行 pip install requests 即可
import json
import requests
import threading
def download_img(image_url, word, index):
img_name = word + "_" + str(index)
try:
response_img = requests.get(image_url, headers={"Referer": "http://image.baidu.com"})
with open("images/" + word + "/" + str(img_name) + ".jpg", 'wb') as f:
f.write(response_img.content)
except Exception as e:
print("download_img")
print(e)
def get_page(word, pn, rn):
try:
url = "http://image.baidu.com/search/acjson?tn=resultjson_com&ipn=rj&ct=201326592&is=&fp=result&queryWord=" + word + "&cl=2&lm=-1&ie=utf-8&oe=utf-8&adpicid=&st=&z=&ic=&hd=&latest=©right=&word=" + word + "&s=&se=&tab=&width=&height=&face=&istype=&qc=&nc=1&fr=&expermode=&force=&cg=star&pn=" + str(
pn) + "&rn=" + str(rn) + "&gsm=&1568680507018="
index = pn + 1
response = requests.get(url)
obj = json.loads(response.text)
items = obj['data']
for item in items:
if 'middleURL' in item:
url_img = item['middleURL']
threading.Thread(target=download_img, args=[url_img, word, index]).start()
index += 1
except Exception as e:
print("get_page")
print(e)
def get_n_page(page_num, word, pn, rn):
for _ in range(page_num):
get_page(word, pn, rn)
pn += rn
def main():
# 程序修改处
word = "苍老师" # 查询关键字
pn = 0 # 开始页码
rn = 20 # 每页数量
page_count = 3 # 获取页数
import os
if not os.path.isdir("images/" + word):
os.makedirs("images/" + word)
# get_page(word, pn, rn) # 获取单页方法
get_n_page(page_count, word, pn, rn) # 获取多页方法
main()
二:图片批量重命名
import os
def rename(dir_path, name):
index = 0
for file in os.listdir(dir_path):
old_file_path = os.path.join(dir_path, file)
new_file_path = os.path.join(dir_path, name + '.' + str(index) + '.jpg')
os.rename(old_file_path, new_file_path)
index += 1
if __name__ == '__main__':
# 程序修改处
dir_path = r"图片所在文件夹的路径"
rename(dir_path, 'dj')