Js:F12 控制台
1.直接输出
- 一日一技 | Bilibili 视频合集信息提取
- b站的标签可能会变动,如果不能用了,就按f12找最新的class名称
- 从 稍后播放 进入 和 从 收藏|搜索页 进入的是不一样的页面,标签不一样
var result = '';
// 如果是从 稍后播放 进入
var content = document.getElementsByClassName('multip-list')[0];
if (!content) {
// 找不到标签,则证明是从收藏或者搜索页进入
// https://www.bilibili.com/video/BV1Kr4y1i7ru
content = document.getElementsByClassName('list-box')[0].querySelectorAll('li');
} else {
// 找出所有class=multip-list-item的标签
// https://www.bilibili.com/list/watchlater?bvid=BV1Kr4y1i7ru&oid=765670802
content = content.querySelectorAll(".multip-list-item");
}
//console.log(content)
for (var i = 0; i< content.length; i++){
temp = content[i].innerText.split('\n');
// ["P184", "32. 运维-分库分表-Mycat管理工具", "09:53"]
// ["32. 运维-分库分表-Mycat管理工具", "09:53"]
//console.log('temp', content[i].innerText, temp)
// temp[0]=标题 temp[1]=时长
// 选择一:输出标题和时长
//temp.join('\t')
// 选择二:只输出标题,后面的replace是去掉空格,可以省略
let index = temp.length > 2 ? 1 : 0;
temp = temp[index].replace(/\s+/g,"");
result += temp;
// 留空一行
result += '\n\n';
}
console.log(result.replace(/,/g, '\t'));
- 右键将console所有内容保存到文件
2.加强版:导出excel
//获取分P播放列表(并将列表保存本地 excel 文件)
var listBox = document.getElementsByClassName("list-box")[0],
liList = listBox.getElementsByTagName("li"),
title = document.getElementsByTagName("h1")[0].title,
musicList = [];
musicTimeList = [];
[].forEach.call(liList, (item, index) => {
//console.log('item:',item)
//console.log('item:',item.getElementsByClassName("duration")[0].innerText)
musicList.push(item.getElementsByTagName("a")[0].title.replace(/\s+/g,""));
musicTimeList.push(item.getElementsByClassName("duration")[0].innerText);
});
var maxWidth = musicList[0].length;
// 设置网页显示的宽度(对保存文件的意义不大)
for (const item of musicList) {
item.length > maxWidth ? (maxWidth = item.length) : null;
}
var fontSize = 20;
var tempDom = `<table id="music" border="1" cellspacing="0" cellpadding="20" width='${
maxWidth * fontSize}px'>
<caption style="font-size: ${
fontSize}px; font-weight: bold;">${
title}</caption>
<thead style="background-color: darkorange;">
<tr align="center">
<th>序号</th>
<th>分P名称</th>
<th>时长</th>
</tr>
</thead>
<thbody>`;
musicList.forEach((item, index) => {
tempDom += `<tr align="center">
<td>${
++index}</td>
<td align="left">${
musicList[index - 1]}</td>
<td>${
musicTimeList[index - 1]}</td>
</tr>`;
});
tempDom += `</thbody>
</table>`;
//渲染到页面上
//document.body.innerHTML = tempDom;
// ${document.getElementById("music").outerHTML}
var html = `<!DOCTYPE html>
<html lang="en">
<head>
<meta charset="UTF-8">
</head>
<body>
${
tempDom}
</body>
</html>`;
var downloadText = function downloadText(content) {
var content = new Blob([content], {
type: "application/vnd.ms-excel" });
console.log(content);
var url = window.URL.createObjectURL(content);
var a = document.createElement("a");
a.download = title || "bilibili分P目录";
a.href = url;
a.click();
window.URL.revokeObjectURL(url);
};
downloadText(html);
java
1.主要代码
import com.alibaba.fastjson.JSON;
import com.alibaba.fastjson.JSONArray;
import com.alibaba.fastjson.JSONObject;
import com.alibaba.fastjson.parser.Feature;
import org.apache.commons.lang3.StringUtils;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;
import java.util.*;
import java.util.stream.Collectors;
/**
* Java如何解析html里面的内容并存到数据库
* https://blog.csdn.net/qq_37284798/article/details/125410786
* https://blog.csdn.net/jun123355/article/details/126459050
*/
public class BilibiliUtil {
public static void main(String[] args) throws Exception {
// 获取html的文档对象
Document doc = Jsoup.connect("https://www.bilibili.com/video/BV1Kr4y1i7ru/").get();
// 获取视频名称
String title = doc.getElementsByTag("h1").first().attr("title");
// List<Map<String, Object>> list = new ArrayList();
//
// // 获取视频列表
// // 无法使用,数据是通过<script>返回的
// Element searchInfo = doc.getElementsByClass("list-box").first();
// Elements elements = searchInfo.getElementsByTag("li");
// for (Element element : elements) {
// // 获取分p标题
// String name = element.getElementsByTag("a").attr("title");
// if (StringUtils.isBlank(name)) {
// continue;
// }
// Map<String, Object> map = new HashMap();
// list.add(map);
// map.put("name", name);
//
// // 获取时长
// Elements duration = element.getElementsByClass("duration");
// if (duration != null && duration.size() > 0) {
// String time = duration.first().text();
// map.put("time", time);
// }
// }
// window.__INITIAL_STATE__
// Jsoup获取script标签之间的文本内容
// https://blog.csdn.net/qq_23114831/article/details/122434465
// https://www.shuzhiduo.com/A/kjdwK7AdNp/
Elements scriptEle = doc.select("script");
// Elements elScripts = doc.getElementsByTag("script");
if (scriptEle != null) {
for (int i = 0; i < scriptEle.size(); i++) {
Element element = scriptEle.get(i);
String data = element.data();
if (data != null && data.startsWith("window.__INITIAL_STATE__")) {
System.out.println(data);
// {"aid":765670802,"bvid":"BV1Kr4y1i7ru","p":1,"episode":"",
// "videoData":{"bvid":"BV1Kr4y1i7ru","aid":765670802,"videos":195,"tid":231,"tname":"计算机技术","copyright":1,"pic":"",
// "title":"黑马程序员 MySQL数据库入门到精通,从mysql安装到mysql高级、mysql优化全囊括","pubdate":1642467619,"ctime":1642411456,
// "desc":"本课程是目前为止,MySQL方面最为全面的一套课程",
// "pages":[{"cid":772184762,"page":1,"from":"vupload","part":"01.MySQL课程介绍","duration":552,"vid":"","weblink":"","dimension":{"width":1920,"height":1080,"rotate":0},"first_frame":"http:\u002F\u002Fi0.hdslb.com\u002Fbfs\u002Fstoryff\u002Fn220713143uztk9kzge34j2edgjkyd4d_firsti.jpg"},
// {"cid":486995038,"page":2,"from":"vupload","part":"02. 基础-课程内容&数据库相关概念","duration":535,"vid":"","weblink":"","dimension":{"width":1920,"height":1080,"rotate":0},"first_frame":"http:\u002F\u002Fi0.hdslb.com\u002Fbfs\u002Fstoryff\u002Fn220118a21vgrql5xd9oqi35psdxsmax_firsti.jpg"},
// {"cid":486996065,"page":3,"from":"vupload","part":"03. 基础-概述-MySQL安装及启动","duration":842,"vid":"","weblink":"","dimension":{"width":1920,"height":1080,"rotate":0},"first_frame":"http:\u002F\u002Fi1.hdslb.com\u002Fbfs\u002Fstoryff\u002Fn220118a219vca8y0ps5eo2wvd1bux17_firsti.jpg"}
String jsonStr = data.substring("window.__INITIAL_STATE__=".length());
// Feature.OrderedField fastJson 中提供的 将字符串转为某些对象时,为了保证对象内部的顺序保持不变
JSONObject jsonObject = JSONObject.parseObject(jsonStr, Feature.OrderedField);
// videoData.pages
// "pages": [{
// "cid": 772184762,
// "page": 1,
// "from": "vupload",
// "part": "01.MySQL课程介绍",
// "duration": 552,
// "vid": "",
// "weblink": "",
// "dimension": {"width": 1920, "height": 1080, "rotate": 0},
// "first_frame": "http:\u002F\u002Fi0.hdslb.com\u002Fbfs\u002Fstoryff\u002Fn220713143uztk9kzge34j2edgjkyd4d_firsti.jpg"
// }
JSONArray jsonArray = jsonObject.getJSONObject("videoData").getJSONArray("pages");
Object collect = jsonArray.stream().map(o -> {
JSONObject jsonObject1 = (JSONObject) o;
// Map<String, Object> map = new HashMap();
// 保持Map元素的顺序
Map<String, Object> map = new LinkedHashMap();
map.put("集数", jsonObject1.get("page"));
// Java 利用replaceAll替换字符串的空格
// https://www.cnblogs.com/tunqing/p/15571419.html
map.put("标题", jsonObject1.get("part").toString().replaceAll("\\s*", ""));
map.put("时长", secondConvertHourMinSecond(Long.parseLong(jsonObject1.get("duration").toString())));
return map;
// }).collect(Collectors.toList());
}).collect(Collectors.toCollection(JSONArray::new));
JSONObject jsonObject1 = new JSONObject();
jsonObject1.put("sheet", collect);
// json字符串生成excel 通用方法
JsonToExcel.jsonToExcel("D:\\export_tmp\\" + title + ".xlsx", jsonObject1);
break;
}
}
}
}
/**
* 将秒数转化为时分秒格式【01:01:01】
* https://blog.51cto.com/u_11269274/5252232
* https://www.ab62.cn/article/18304.html
* @param second 需要转化的秒数
* @return
*/
public static String secondConvertHourMinSecond(long second) {
int temp = (int) second;
int hh = temp / 3600;
int mm = (temp % 3600) / 60;
int ss = (temp % 3600) % 60;
return (hh == 0 ? "" : ((hh < 10 ? ("0" + hh) : hh) + ":")) +
(mm < 10 ? ("0" + mm) : mm) + ":" +
(ss < 10 ? ("0" + ss) : ss);
}
}
2.pom
<!--JAVA 解析HTML-->
<dependency>
<groupId>org.jsoup</groupId>
<artifactId>jsoup</artifactId>
<version>1.14.1</version>
</dependency>
<dependency>
<groupId>com.alibaba</groupId>
<artifactId>fastjson</artifactId>
<version>2.0.7</version>
</dependency>
<!-- 引入poi-ooxml,就会引入poi-->
<!-- <dependency>-->
<!-- <groupId>org.apache.poi</groupId>-->
<!-- <artifactId>poi</artifactId>-->
<!-- <version>4.1.0</version>-->
<!-- </dependency>-->
<dependency>
<groupId>org.apache.poi</groupId>
<artifactId>poi-ooxml</artifactId>
<version>4.1.0</version>
</dependency>
3.通用json转excel工具
import com.alibaba.fastjson.JSONArray;
import com.alibaba.fastjson.JSONObject;
import org.apache.poi.hssf.usermodel.HSSFCell;
import org.apache.poi.hssf.usermodel.HSSFRow;
import org.apache.poi.hssf.usermodel.HSSFSheet;
import org.apache.poi.hssf.usermodel.HSSFWorkbook;
import java.io.File;
import java.io.FileOutputStream;
import java.util.Set;
/**
* JSON转EXCEL,支持多个数组生成多个sheet
* https://blog.csdn.net/hanne_lovegood/article/details/124351031
*/
public class JsonToExcel {
public static void main(String[] args) throws Exception {
// json文件地址
String jsonFilePath = "D:\\export_tmp\\test4.txt";
// 设置生成的文件名及路径
String fileName = "D:\\export_tmp\\test4.xlsx";
// 保证文件存在
File file = new File(jsonFilePath);
if (!file.exists()) {
System.out.println("json文件不存在");
}
String jsonString = readFileContent(file);
if (jsonString == null || jsonString.trim().length() == 0) {
System.out.println("文件内容为null");
}
JSONObject jsonObject = null;
// JSONObject jsonObject = JSONObject.parseObject(jsonString, Feature.OrderedField);
// java判断JSON字符串是JSONObject或JSONArray https://blog.csdn.net/weixin_45353083/article/details/109749473
// Feature.OrderedField fastJson 中提供的 将字符串转为某些对象时,为了保证对象内部的顺序保持不变
Object object = JSONObject.parse(jsonString, Feature.OrderedField);
if (object instanceof JSONObject) {
jsonObject = (JSONObject) object;
} else if (object instanceof JSONArray) {
jsonObject = new JSONObject();
JSONArray jsonArray = (JSONArray) object;
jsonObject.put("sheet", jsonArray);
} else{
System.out.println("文件格式错误");
return;
}
// 调用转换方法
jsonToExcel(fileName, jsonObject);
System.out.println("生成文件成功:" + fileName);
}
/**
* 通用json数组导出excel
* @param fileName 文件路径,绝对路径
* @param jsonObject
* @throws Exception
*/
public static void jsonToExcel(String fileName, JSONObject jsonObject) throws Exception {
// 创建HSSFWorkbook对象
HSSFWorkbook wb = new HSSFWorkbook();
Set<String> ArrayKeys = jsonObject.keySet();
// 多个数组的建成多个sheet
for (String arrayKey : ArrayKeys) {
Object value = jsonObject.get(arrayKey);
if (!(value instanceof JSONArray)) {
// 非数组则跳过,只处理第一层,嵌套json暂不考虑
continue;
}
JSONArray jsonArray = jsonObject.getJSONArray(arrayKey);
if (jsonArray == null || jsonArray.isEmpty()) {
continue;
}
// 创建HSSFSheet对象
HSSFSheet sheet = wb.createSheet(ArrayKeys.size() > 1 ? arrayKey : "sheet");
Set keys = null;
int rowNo = 0;
HSSFRow row = sheet.createRow(0);
// 获取标题
for (int i = 0; i < jsonArray.size(); i++) {
if (keys == null) {
JSONObject item = jsonArray.getJSONObject(i);
keys = item.keySet();
for (Object s : keys) {
HSSFCell cell = row.createCell(rowNo++);
cell.setCellValue(s.toString());
}
} else {
break;
}
}
// 获取数据一次循环一行
for (int i = 0; i < jsonArray.size(); i++) {
rowNo = 0;
JSONObject item = jsonArray.getJSONObject(i);
row = sheet.createRow(i + 1);
keys = item.keySet();
for (Object s : keys) {
HSSFCell cell = row.createCell(rowNo++);
cell.setCellValue(item.getString(s.toString()));
}
}
}
// 创建Excel文件
File file = new File(fileName);
file.createNewFile();
// 输出到Excel文件
FileOutputStream output = new FileOutputStream(fileName);
wb.write(output);
wb.close();
output.flush();
output.close();
}
/**
* 读取文件内容
* @param file
*/
public static String readFileContent(File file) {
BufferedReader reader = null;
StringBuffer sbf = new StringBuffer();
try (InputStreamReader in = new InputStreamReader(new FileInputStream(file),"UTF-8");) {
reader = new BufferedReader(in);
String tempStr;
while ((tempStr = reader.readLine()) != null) {
sbf.append(tempStr);
}
reader.close();
return sbf.toString();
} catch (IOException e) {
e.printStackTrace();
} finally {
if (reader != null) {
try {
reader.close();
} catch (IOException e1) {
e1.printStackTrace();
}
}
}
return sbf.toString();
}
}
扩展:B站进入页面如何让它不要自动播放
- 取消勾选
扩展:B站分集为什么不连播
- 播放设置–更多播放设置–播放设置–自动切换