import java.io.BufferedReader; import java.io.FileInputStream; import java.io.InputStreamReader; import java.util.regex.Matcher; import java.util.regex.Pattern; public class PatternTest { /** * @param args */ public static void main(String[] args) { String path="d:/test.txt"; PatternTest t = new PatternTest(); String regexhref = "<(?i)img.*?>"; String content = t.redFIle(path); String sss= t.replaceHref(content,regexhref); } public String redFIle(String path) { FileInputStream fin; StringBuffer sb = new StringBuffer(); try { fin = new FileInputStream(path); InputStreamReader rdr = new InputStreamReader(fin, "utf-8"); BufferedReader br = new BufferedReader(rdr);// 文件读字符流 String s; while ((s = br.readLine()) != null) { sb.append(s); } br.close(); } catch (Exception e) { e.printStackTrace(); } return sb.toString(); } public String replaceHref(String hBody, String regex) { String url=""; String includ = ""; Pattern pt = Pattern.compile(regex,Pattern.DOTALL); Matcher mt = pt.matcher(hBody); while (mt.find()) { includ = mt.group(); System.out.println(includ); //打印img标签 String srcregex = "src=(\".*?\")|src=(\'.*?\')|src=(.*?\\s)|src=(.*?>)"; url = getContents(includ, srcregex).trim(); //System.out.println(url);//打印src中的内容 } return url; } //提取href中的内容 private String getContents(String contents,String regx){ String url=""; Pattern srcpt=Pattern.compile(regx); Matcher srcmt=srcpt.matcher(contents); while(srcmt.find()){ url= srcmt.group().replaceAll("src=\"|\"|src=\'|\'|src=|>", ""); } return url; } }
提取页面、文件中的链接
猜你喜欢
转载自javapx.iteye.com/blog/1958294
今日推荐
周排行