基于Flask+requests+bs4的简单小说网页

这个算是一个简单的flask项目，主要用于熟悉flask的各种操作并综合了前段时间学的一个简单的爬虫的方法。虽然目前这个项目还非常简单，但是在我看来确实趣味十足。

在这里，我主要展示后台的代码，以及完成之后的一个效果。不过由于这个小项目将涉及到数据库的一些操作，所以在最开始，我将接着上一篇介绍一些models数据模型层

1. 安装flask-Sqlalchemy

pip flask-sqlalchemy

2. 配置数据库信息

PandaInit/setting.py

import os

from PandaInit.function import get_uri

# 基础路径
BASE_DIR = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
# 页面路径
TEMPLATES_DIR = os.path.join(BASE_DIR, 'templates')
# 静态路径
STATIC_DIR = os.path.join(BASE_DIR, 'static')
# 数据库连接
DATABASE = {
    # 地址
    'HOST': 'localhost',
    # 端口
    'PORT': '3306',
    # 数据库用户名
    'USER': 'root',
    # 密码
    'PASSWORD': '123456',
    # 驱动
    'DRIVER': 'pymysql',
    # 数据库
    'DB': 'mysql',
    # 数据库名字
    'NAME': 'pandatxtdb'
}

SQLALCHEMY_DATABASE_URI = get_uri(DATABASE)

PandaInit/function.py

from flask_sqlalchemy import SQLAlchemy

db = SQLAlchemy()


def get_uri(DATABASE):
    """
    获取uri的值
    :param DATABASE: 字典
    :return: uri
    """
    host = DATABASE['HOST']
    port = DATABASE['PORT']
    user = DATABASE['USER']
    password = DATABASE['PASSWORD']
    driver = DATABASE['DRIVER']
    name = DATABASE['NAME']
    mydb = DATABASE['DB']
    return '{}+{}://{}:{}@{}:{}/{}'.format(mydb, driver, user, password, host, port, name)


def init_app(app):
    """初始化app"""
    db.init_app(app)

PandaInit/App.py

from flask import Flask

from PandaInit.settings import TEMPLATES_DIR, STATIC_DIR, SQLALCHEMY_DATABASE_URI
from Pandatxt.views import pandatxt
from PandaInit.function import init_app


def create_app():
    app = Flask(__name__, template_folder=TEMPLATES_DIR, static_folder=STATIC_DIR)
    # 注册蓝图
    app.register_blueprint(blueprint=pandatxt, url_prefix='/pandatxt')
    # 连接数据库的配置
    app.config['SQLALCHEMY_DATABASE_URI'] = SQLALCHEMY_DATABASE_URI

    app.config['SQLALCHEMY_TRACK_MODIFICATIONS'] = False

    init_app(app)
    return app

3. models数据模型的设计

PandaTxt/models.py

from PandaInit.function import db


class XiaoShuoTitles(db.Model):
    """每本书的名字"""
    id = db.Column(db.Integer, primary_key=True, autoincrement=True)
    t_title = db.Column(db.String(32), unique=True)  # 书名
    t_desc = db.Column(db.String(512))  # 对这本书的描述
    t_url = db.Column(db.String(64))  # 这本书的url（目前我是直接在笔趣阁爬取的小说）
    t_img = db.Column(db.String(64))  # 封面图
    everychapters = db.relationship('XiaoShuoEveryChapter', backref='titles', lazy=True)  # 反向关联xiaoshuoeverychapter表
    __tablename__ = 'xiaoshuotitle'

    def __init__(self, title, url, desc=None, img=None):
        self.t_title = title
        self.t_url = url
        self.t_desc = desc
        self.t_img = img


class XiaoShuoEveryChapter(db.Model):
    """每一章"""
    id = db.Column(db.Integer, primary_key=True, autoincrement=True)
    ec_url = db.Column(db.String(64))  # 每一章节的url
    c_id = db.Column(db.Integer, db.ForeignKey('xiaoshuotitle.id'))  # 外键关联Xiaoshuotitles表
    ec_title = db.Column(db.String(64))  # 每一章节的名字
    contents = db.relationship('XiaoShuoContents', backref='every_chapter', lazy=True)  # 反向关联xiaoshuocontents表
    __tablename__ = 'xiaoshuoeverychapter'

    def __init__(self, url, c_id, title):
        self.ec_url = url
        self.c_id = c_id
        self.ec_title = title


class XiaoShuoContents(db.Model):
    id = db.Column(db.Integer, primary_key=True, autoincrement=True)
    c_content = db.Column(db.Text)  # 每一章节的内容
    ec_id = db.Column(db.Integer, db.ForeignKey('xiaoshuoeverychapter.id'), nullable=True)  # 外键，关联每一章xiaoshuoeverychapter表

    __tablename__ = 'xiaoshuocontent'

    def __init__(self, content, t_id):
        self.c_content = content
        self.ec_id = t_id

4. views控制层

pandatxt/views.py

from threading import Thread

from flask import Blueprint, render_template
import requests
from bs4 import BeautifulSoup as bs

from Pandatxt.models import db, XiaoShuoTitles, XiaoShuoEveryChapter, XiaoShuoContents

pandatxt = Blueprint('pandatxt', __name__)


@pandatxt.route('/createdb/')
def create_db():
    db.create_all()
    return '创建成功！'


@pandatxt.route('/getname/')
def get_name():
    """
    获取小说名字, 并存入数据库
    :return: 添加成功！
    """
    req = requests.get('https://www.biquge5200.cc/xuanhuanxiaoshuo/')
    after_bs = bs(req.text, 'html5lib')
    new_update = after_bs.find_all('div', class_='l')  # 最新更新所有内容
    after_new_updata = bs(str(new_update), 'html5lib')
    span2 = after_new_updata.find_all('span', class_='s2')  # 找到span2的内容
    after_span2 = bs(str(span2), 'html5lib')
    a_list = after_span2.find_all('a')  # 找到所有a标签
    for a in a_list:
        t_title = a.text
        t_url = a.get('href')
        t = XiaoShuoTitles.query.filter(XiaoShuoTitles.t_title == t_title).all()
        if not t:
            title = XiaoShuoTitles(t_title, t_url)
            db.session.add(title)
    db.session.commit()
    titles = XiaoShuoTitles.query.all()
    return render_template('index/index.html/', titles=titles)


@pandatxt.route('/getchapterurl/<int:id>/')
def get_every_chapter_url(id):
    """获取一本书的每一章节的url"""
    title_list = XiaoShuoTitles.query.get(id)
    have_chapters = XiaoShuoEveryChapter.query.filter(XiaoShuoEveryChapter.c_id == id).all()
    if not have_chapters:
        req = requests.get(title_list.t_url)
        after_bs = bs(req.text, 'html5lib')
        list_div = after_bs.find('div', id='list')
        after_div = bs(str(list_div), 'html5lib')
        a_list = after_div.find_all('a')
        for i in range(len(a_list)-9):
            ec_url = a_list[9+i].get('href')
            everychapter = XiaoShuoEveryChapter(ec_url, title_list.id, a_list[9+i].text)
            db.session.add(everychapter)
        db.session.commit()
    chapters = XiaoShuoEveryChapter.query.filter(XiaoShuoEveryChapter.c_id == id)
    return render_template('chapters/chapters.html/', chapters=chapters)


@pandatxt.route('/getcontent/<int:id>/')
def get_content(id):
    contents = XiaoShuoContents.query.filter(XiaoShuoContents.ec_id == id).all()
    ec = XiaoShuoEveryChapter.query.get(id)
    if not contents:
        req = requests.get(ec.ec_url)
        afrer_bs = bs(req.text)
        div_content = afrer_bs.find('div', id='content')
        con = XiaoShuoContents(str(div_content), id)
        db.session.add(con)
        db.session.commit()
    content = XiaoShuoContents.query.filter(XiaoShuoContents.ec_id == id).all()
    return render_template('content/content.html', content=content[0])

templates视图层效果展示

章节展示页面

内容查看页面

最后，目前这个小项目只能算一个小说网站的雏形,到后面我还进一步优化这个项目，比如说数据库的设计更全面一点，具备小说类型分类，创建时间，更新时间，点击排名等。还会加入一些新的功能，比如小说下载功能等