# -*- coding:utf-8 -*-
from bs4 import BeautifulSoup
html = """
<html><head><title>The Dormouse's story</title></head>
<body>
<p>12345</p>
<p>123456</p>
<p>abcde</p>
<p>abcdef</p>
<div><p>第二代p</p></div>
</body>
"""
import re
reg = re.compile('\d{5}') #出现5个数字
reg1 = re.compile('(abcde){1}') #abcde整个group出现依次
soup = BeautifulSoup(html,'html.parser')
print(soup.find_all('p',text=reg)) #[<p>12345</p>, <p>123456</p>]
print(soup.find_all('p',text=reg1)) #[<p>abcde</p>, <p>abcdef</p>]
print(soup.find('body').find_all('p',recursive=False)) #只获取p标签第一层
BeautifulSoup方法和属性的调用二
猜你喜欢
转载自blog.csdn.net/qq_42379006/article/details/80643508
今日推荐
周排行