python获取小说网站新笔趣阁小说(新手向)

import requests
from bs4 import BeautifulSoup
import time
import os

print("爬虫对应小说网站,新笔趣阁,地址:http://www.xbiquge.la")

novel_Code = input("请输入小说代码(格式:xx-xx ,例:0-69)(地址栏的后面的数字):")

url = "http://www.xbiquge.la/%s/%s/" % (novel_Code.split("-")[0], novel_Code.split("-")[1])

print("正在爬取,请稍等")

html_code = requests.get(url)

html_code.encoding = "utf-8"

soup_1 = BeautifulSoup(html_code.text, "html.parser")

novel_Name = soup_1.find(id="info").find("h1").get_text()

chapter_Name = []

chapter_Link = []

for cN in soup_1.find(id="list").find_all("a"):
    chapter_Name.append(cN.get_text())
    chapter_Link.append("http://www.xbiquge.la" + cN.get("href"))

path = "./%s" % (novel_Name)

if os.path.exists(path):
    pass
else:
    os.makedirs(path)

i = 0
for cL in range(len(chapter_Link)):
    i = i + 1
    print("%s - 章节名称:%s,章节地址:%s" % (str(i), chapter_Name[i - 1], chapter_Link[i - 1]))
    novel_Content_code = requests.get(chapter_Link[i - 1])

    novel_Content_code.encoding = "utf-8"

    novel_Content_1 = BeautifulSoup(novel_Content_code.text, "html.parser")

    adver = "亲,点击进去,给个好评呗,分数越高更新越快,据说给新笔趣阁打满分的最后都找到了漂亮的老婆哦!手机站全新改版升级地址:http://m.xbiquge.la,数据和书签与电脑站同步,无广告清新阅读!"

    novel_Content_2 = novel_Content_1.find(id="content").get_text().replace(adver, "").replace("<br />", "").replace(
        "&nbsp;", "")

    with open(path + "/" + str(i) + "-" + chapter_Name[i - 1] + ".txt", "w", encoding="utf-8") as f:
        f.write(novel_Content_2)

    time.sleep(1)

相关运行截图:

 

注意:由于这个小说网站设置了反爬,所以我设置了1秒爬取一节。

发布了7 篇原创文章 · 获赞 2 · 访问量 1114

猜你喜欢

转载自blog.csdn.net/Ferencz/article/details/104073250