Node.js 知乎热榜爬虫 导出为JSON文件(正则表达式)
环境
node.js v12.16.3
[email protected]
[email protected]
npm安装
npm install axios cheerio
实现代码
const cheerio = require("cheerio");
const axios = require("axios");
const fs = require("fs");
axios.get(`https://www.zhihu.com/billboard`).then((response) => {
let $ = cheerio.load(response.data);
var hotList = []
$("a.HotList-item").each((index, element) => {
var entry = {
}
entry["title"] = $(element).find(".HotList-itemTitle").text();
entry["excerpt"] = $(element).find(".HotList-itemTitle").text();
var reg = /<img src="(.*)" alt=/;
var picture_url = reg.exec(String($(element).html()));
if (picture_url != null) {
entry["picture_url"] = picture_url[1].trim();
} else {
entry["picture_url"] = "null";
}
hotList.push(entry);
});
fs.writeFile('ZhihuBillboard.json', JSON.stringify(hotList, null, "\t"), (err) => {
if (err == null) {
console.log("Successfully!");
} else {
console.log(err);
}
});
});
测试结果
最后
- 由于博主水平有限,不免有疏漏之处,欢迎读者随时批评指正,以免造成不必要的误解!