BeautifulSoup方法和属性的调用二

# -*- coding:utf-8 -*-
from bs4 import BeautifulSoup

html = """  
<html><head><title>The Dormouse's story</title></head>  
<body>  
<p>12345</p>  
<p>123456</p> 
<p>abcde</p>
<p>abcdef</p>
<div><p>第二代p</p></div>
</body>  
"""
import re
reg = re.compile('\d{5}')   #出现5个数字
reg1 = re.compile('(abcde){1}')  #abcde整个group出现依次

soup = BeautifulSoup(html,'html.parser')
print(soup.find_all('p',text=reg))    #[<p>12345</p>, <p>123456</p>]
print(soup.find_all('p',text=reg1))   #[<p>abcde</p>, <p>abcdef</p>]
print(soup.find('body').find_all('p',recursive=False))  #只获取p标签第一层

猜你喜欢

转载自blog.csdn.net/qq_42379006/article/details/80643508