1. 导入第三方jar,3个都要下载
2. 引入jar,右键项目Build Path
3. 导入jar
4. 代码编写
使用File类加载文件,我使用的是绝对路径(完整代码如下)
package com.test.demo; import java.io.File; import java.util.ArrayList; import org.jsoup.Jsoup; import org.jsoup.nodes.Document; import org.jsoup.select.Elements; public class testDemoH4 { public static void main(String[] args) throws Exception { parseHtmlH4(); } private static void parseHtmlH4() throws Exception { //读取文件中的panel.html File file = new File("D:/html/panel.html"); String fileName = file.getName(); //文件名前缀 String firstName = fileName.substring(0,fileName.lastIndexOf(".")); if(!file.exists()){ return; } System.out.println(firstName+": "); System.out.println("{"); //获取文件 Document doc = Jsoup.parse(file, "UTF-8"); //获取html文件中的<table class="doc-table">标签 Elements elements = doc.getElementsByClass("doc-table"); Elements h = doc.select("h4"); //有多个标签,所以遍历 //System.out.println(h.size()); ArrayList<String> index = new ArrayList<String>(); for(int q = 0;q<h.size();q++){ //System.out.println(h.get(q)); String st = h.get(q).text(); index.add(st); } //判断条件,需要的表格在标签<h4>Usage Example</h4>后边 int indexofH4 = index.indexOf("Usage Example"); //有多个标签,所以遍历h4标签 for(int i=0;i<elements.size();i++){ int H4 = indexofH4+1+i; String nameOfH4 = index.get(H4); System.out.println(nameOfH4+": ["); //获取tr标签 Elements tr = elements.get(i).select("tr"); ArrayList<String> thIndex = new ArrayList<String>(); for(int j=0;j<tr.size();j++){ //获取tr下的th,td,每个tr对应的td个数一样,所以一起遍历,一起输出 Elements th = tr.get(j).select("th"); Elements td = tr.get(j).select("td"); //将th遍历存入list集合中,用的时候好取 for(int w = 0;w<th.size();w++){ String thToString = th.get(w).text(); thIndex.add(thToString); } //遍历td for(int w = 0;w<td.size();w++){ String value = td.get(w).text(); String valueOfTh = thIndex.get(w); if(w==0){ System.out.print("{"+valueOfTh+": "+"\""+value+"\""+","); }else if(w==td.size()-1){ //根据字段中是否存在"Return"来判断是是否有返回值,并更改输出:有返回值则为:hasReturn: true; if(value.contains("Return")){ System.out.print("hasReturn"+": "+"true"+","); System.out.print(valueOfTh+": "+"\""+value+"\""+"},"); System.out.println(); }else{ System.out.print(valueOfTh+": "+"\""+value+"\""+"},"); System.out.println(); } }else{ System.out.print(valueOfTh+": "+"\""+value+"\""+","); } } } System.out.println("]"+","); } System.out.println("}"); } }
5. 下载的网页:
6. 最后成果