Beautifulsoup常见用法

import re

from bs4 import BeautifulSoup
html="""
"""
soup=BeautifulSoup(html,'lxml')
print(soup.title)
print(type(soup.title))
print(soup.title.string)
print(soup.head)
print(soup.p) #只会选择第一个匹配的节点
print(soup.title.name)#获取节点名称
print(soup.p.attrs) #获取属性
print(soup.p.attrs['name']) #print(soup.p['name'])亦可
print(soup.p.string)#获取内容
print(soup.p.contents)#获取直接子节点
print(soup.p.children)#获取子节点
print(soup.p.descendants)#获取所有子孙节点
print(soup.p.parent)#获取直接父节点
print(list(enumerate(soup.a.parents)))#获取所有祖先节点
#兄弟节点
print(soup.a.next_sibling)
print(soup.a.previous_sibling)
#所有兄弟节点
print(list(enumerate(soup.a.next_siblings)))
print(list(enumerate(soup.a.previous_siblings)))
#find_all
#print(soup.find_all(name=,attrs=,recursive=,text=,**kwargs=))
#name匹配节点名称,attrs属性,text匹配文本,可以是字符串,也可以是正则表达式对象
print(soup.find_all(name='ul'))
print(soup.find_all(attrs={'id':'list-1'}))
print(soup.find_all(text=re.compile('link')))



猜你喜欢

转载自blog.csdn.net/qq_36718317/article/details/80979015