Xpath教程

from lxml import etree
import requests # urllib urllib2 urllib3

url = "https://www.qidian.com/free/all"
response = requests.get(url=url)
response.encoding = response.apparent_encoding

root = etree.HTML(response.content)

查找网页内某个标签,例如 title

root.xpath('//title')

获取某个标签的内容, 例如<title>网页标题 </tit le>

root.xpath('// title/text()')

获取某个标签及子标签下的内容, 例如<title>网页标题 </tit le>

root.xpath('// title//text()')

获取img标签的src属性例如<img sr c="1. jpg" />

ro ot.xpath('//img/@src')

根据id精确查找标签内容 <img src="1.jpg" id="img1"/>

root.xpath(' //img[@id="img1"]/@src')

根据class查找标签内容 <a c la ss="a1">标签</a>

root. xpath('//a[@class="a1"]/text()')

根据其它属性查找标签内容 <a data="cate" title="a" name="a">分类1</a>

root.xpath('//a[@data="cate"]/text()')

root.xpath('//a[@title="a"]/text()')

root.xpath('//a[@name="a"]/text()')

根据标签内容查找标签 <a>下一页</a>

root.xpath('//a[text()="下一页"]')

使用string('.') <a c la ss="a1">标签</a>

root.xpath('//a[@class="a1"]').xpath('string(.)')

属性模糊定位 <a class="a1">123</a>

root.xpath('//a[contains,(@class,"a")]')

文本模糊定位 <a href="http://www.baidu.com">百度搜索</a>

root.xpath('//a[contains,(text(),"百度")]')

猜你喜欢