#解析标签内容-------使用get_text()获得文本内容,使用get('')方法获取标签属性值
import requests
from bs4 import BeautifulSoup
import re
url = 'http://tieba.baidu.com/p/4178314700'
def get_html():
html = requests.get(url).text
return html
def getimage(html):
soup = BeautifulSoup(html,'lxml')
#reg = re.compile('http://.*?\.jpg')
#imglist1 = re.findall(reg,html)
#print(type(imglist1[0]))#<class 'str'>
imglist = []
List = soup.find_all('img')
#print(type(List[0]))#<class 'bs4.element.Tag'>
#print(type(List[0].get_text()))
test =[]
pattern = re.compile(r'https:/.*.jpg')
for img in List:
imglist.append(img.get('src'))
#print(type(imglist[0]))#<class 'str'>
for i in imglist:
if re.match(pattern,i):
test.append(i)
for i in test:
print(i)
x = 0
for i in test:
with open('C:/Users/Lenovo/Desktop/Pic/%s.jpg' %x, 'wb') as file:
file.write(requests.get(i).content)
x+=1
# print(i.find_all('a'))#两层标签查找
#List2 = soup.find_all(attrs = {'name':'elements'})
if __name__ == '__main__':
html = get_html()
getimage(html)