import requests
from lxml import etree
import time
import random
headers = { 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/70.0.3514.0 Safari/537.36'}
url = 'http://www.xbiquge.la/13/13959/'
for i in range(5939025, 5939028):
url_q = url + str(i) + '.html'
wz = requests.get(url_q, headers = headers)
wz.encoding='utf-8'
# print(wz.text)
selector = etree.HTML(wz.text)
html_xs = selector.xpath('//div[@id="content"]/text()')
# print(html_xs)
a = ''
for x in range(len(html_xs)):
a = a + html_xs[x]
print(a)
with open('圣墟1.txt', 'a', encoding='utf-8', newline='')as sx:
sx.write(a)
才开始学习爬虫,经验不足,自己也讲不太清楚,所以就没做注释。做的很粗糙。万望海涵。