htmlPath是本地网页文件路径
解码要与网页一致
package Spider; import java.io.BufferedReader; import java.io.File; import java.io.FileInputStream; import java.io.InputStreamReader; import java.util.regex.Matcher; import java.util.regex.Pattern; public class Doc { String htmlPath =null; StringBuilder sb=new StringBuilder(); public void doc() throws Exception { File input = new File(htmlPath); String regx="<a[\\s\\S]+/a>"; BufferedReader br=new BufferedReader(new InputStreamReader(new FileInputStream(input),"utf-8")); String s=""; while((s=br.readLine())!=null) { sb.append(s); } Pattern p =Pattern.compile(regx); Matcher matcher = p.matcher(sb); matcher.find(); System.out.println(matcher.group()); System.out.println("完毕"); br.close(); } }
2019-05-16 02:05:07