*、去除html元素
public static String splitAndFilterString(String input, int length) { if (input == null || input.trim().equals("")) { return ""; } // 去掉所有html元素, //-------------------- String htmlStr = input; // 含html标签的字符串 String str = ""; java.util.regex.Pattern p_script; java.util.regex.Matcher m_script; java.util.regex.Pattern p_style; java.util.regex.Matcher m_style; java.util.regex.Pattern p_html; java.util.regex.Matcher m_html; java.util.regex.Pattern p_ba; java.util.regex.Matcher m_ba; try { String regEx_script = "<[\\s]*?script[^>]*?>[\\s\\S]*?<[\\s]*? \\/[\\s]*?script[\\s]*?>"; // 定义script的正则表达式{或<script[^>]*?>[\\s\\S]*?<\\/script> // } String regEx_style = "<[\\s]*?style[^>]*?>[\\s\\S]*?<[\\s]*? \\/[\\s]*?style[\\s]*?>"; // 定义style的正则表达式{或<style[^>]*?>[\\s\\S]*?<\\/style> // } String regEx_html = "<[^>]+>"; // 定义HTML标签的正则表达式 String patternStr = "\\s+"; p_script = Pattern.compile(regEx_script, Pattern.CASE_INSENSITIVE); m_script = p_script.matcher(htmlStr); htmlStr = m_script.replaceAll(""); // 过滤script标签 p_style = Pattern.compile(regEx_style, Pattern.CASE_INSENSITIVE); m_style = p_style.matcher(htmlStr); htmlStr = m_style.replaceAll(""); // 过滤style标签 p_html = Pattern.compile(regEx_html, Pattern.CASE_INSENSITIVE); m_html = p_html.matcher(htmlStr); htmlStr = m_html.replaceAll(""); // 过滤html标签 p_ba = Pattern.compile(patternStr, Pattern.CASE_INSENSITIVE); m_ba = p_ba.matcher(htmlStr); htmlStr = m_ba.replaceAll(" "); // 过滤空格 str = htmlStr; } catch (Exception e) { e.printStackTrace(); } int len = str.length(); if (len <= length) { str = str.replace("\'", "\''"); return str; } else { str = str.substring(0, length); str += "..."; str = str.replace("\'", "\''"); } return str; }
input:带有html元素的字段
length:目标长度