xml文件如下:
<?xml version="1.0" encoding="UTF-8"?> <articles> <article category="xml"> <title>xml概述</title> <author>janet</author> <email>[email protected]</email> <date>20080801</date> </article> <article category="java"> <title>Java基本语法</title> <author>janet</author> <email>[email protected]</email> <date>20080802</date> </article> </articles>
注意:xml文件的顶部<?xml version="1.0" encoding="UTF-8"?>这句话之前不能有任何空格,空行之类的,否则会出错。
1、用纯DOM来做。
用Element root=document.getDocumentElement()拿到根后,然后不停的遍历即可。
DOM的特点是将XML映射成一个Document,是一次性将所有XML全部载入到内存中。
例子如下:
package testXmlParse.dom; import java.io.File; import javax.xml.parsers.DocumentBuilder; import javax.xml.parsers.DocumentBuilderFactory; import org.w3c.dom.Document; import org.w3c.dom.Element; import org.w3c.dom.Node; import org.w3c.dom.NodeList; public class TestXml_DOM { public static void main(String[] args) { File file=new File("D:\\temp\\Bosch\\articles.xml"); DocumentBuilderFactory dbf=DocumentBuilderFactory.newInstance(); try { DocumentBuilder db= dbf.newDocumentBuilder(); Document document=db.parse(file);//开始解析文件 //下面拿到根 Element root=document.getDocumentElement(); System.out.println(root.getNodeName()); //开始遍历根下面的子结点 NodeList nodeList=root.getChildNodes(); for(int i=0;i<nodeList.getLength();i++){ Node node=nodeList.item(i); if("article".equals(node.getNodeName())){ //打印书的分类名称 System.out.println("\r\n找到一本新书,书的分类是:"+node.getAttributes().getNamedItem("category").getNodeValue()+"."); //取得article下面的所有子结点 NodeList childNodeList=node.getChildNodes(); //遍历article for(int j=0;j<childNodeList.getLength();j++){ //拿到article下面的每一个item Node childNode=childNodeList.item(j); if("title".equals(childNode.getNodeName())){ System.out.println("title:"+childNode.getTextContent()); }else if("author".equals(childNode.getNodeName())){ System.out.println("author:"+childNode.getTextContent()); }else if("email".equals(childNode.getNodeName())){ System.out.println("email:"+childNode.getTextContent()); }else if("date".equals(childNode.getNodeName())){ System.out.println("date:"+childNode.getTextContent()); } } } } } catch (Exception e) { e.printStackTrace(); } } }
2、用SAX来做:
比纯DOM方便,快的多,是只把需要的载入内存中,不像DOM那样全部载入内存。
缺点是只能以File为载体,单纯的String好像不能解析,研究中……
package testXmlParse.sax; import java.io.File; import javax.xml.parsers.SAXParser; import javax.xml.parsers.SAXParserFactory; public class XmlParse { public static void main(String[] args) { File xmlFile=new File("d:/temp/Bosch/onhand.xml"); SAXParserFactory factory=SAXParserFactory.newInstance(); try { SAXParser parser=factory.newSAXParser(); parser.parse(xmlFile, new MySaxHandler()); } catch (Exception e) { e.printStackTrace(); } } }
package testXmlParse.sax; import java.text.DateFormat; import java.text.SimpleDateFormat; import org.xml.sax.Attributes; import org.xml.sax.SAXException; import org.xml.sax.helpers.DefaultHandler; public class MySaxHandler extends DefaultHandler { static DateFormat dateFormat = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss"); private String content; @Override public void characters(char[] ch, int start, int length) throws SAXException { content = new String(ch, start, length); } @Override public void endElement(String uri, String localName, String name) throws SAXException { if ("title".equals(name)) { System.out.println("标题:" + content); } else if ("author".equals(name)) { System.out.println("作者:" + content); } else if ("email".equals(name)) { System.out.println("电子邮件:" + content); } else if ("body".equals(name)) { System.out.println("内容:" + content); } else if ("date".equals(name)) { System.out.println("发表日期:" + content); } } @Override public void startElement(String uri, String localName, String name, Attributes attributes) throws SAXException { if ("article".equals(name)) { System.out.println("\r\n找到一篇文章,所属分类:" + attributes.getValue("category")+". "); } } }
3、DOM4J
package testXmlParse.dom4j; import java.io.BufferedReader; import java.io.File; import java.io.FileInputStream; import java.io.IOException; import java.io.InputStreamReader; import java.util.ArrayList; import java.util.HashMap; import java.util.List; import java.util.Map; import org.apache.log4j.Logger; import org.dom4j.Element; /** * @autor germmy * @date 2012-12-11 * @version */ @SuppressWarnings("unchecked") public class XmlUtil { protected static Logger logger = Logger.getLogger(Dom4jParser.class); public static final String[]NODENAMES={"title","author","email","date"}; /** * 采用指定的编码读取XML * * @param fileName * @return */ public static String readFile_encoding(String fileName) { String encoding = "UTF-8"; // 字符编码 File file = new File(fileName); BufferedReader reader = null; StringBuffer sb = new StringBuffer(""); try { // reader = new BufferedReader(new FileReader(file)); InputStreamReader read = new InputStreamReader(new FileInputStream( file), encoding); reader = new BufferedReader(read); String tempString = null; int line = 1; while ((tempString = reader.readLine()) != null) { sb.append(tempString); line++; } reader.close(); } catch (IOException e) { e.printStackTrace(); } finally { if (reader != null) { try { reader.close(); } catch (IOException e1) { } } } return sb.toString(); } public static List getOrderInfoList(String xml){ List list=new ArrayList(); Map map=new HashMap(); try { List articles = Dom4jParser.getElements(xml,"article");// 所有的Item节点 if(articles!=null){ logger.debug("article.length["+articles.size()+"]"); for (int i = 0; i < articles.size(); i++) { map=new HashMap(); list.add(map); Element element = (Element) articles.get(i); for(int j=0;j<NODENAMES.length;j++){ Dom4jParser.parseText(map,element,NODENAMES[j]); } } } } catch (Exception e) { list=null; logger.error("",e); } return list; } public static void main(String[] args) { String xml=readFile_encoding("d:/temp/Bosch/articles.xml"); XmlUtil.getOrderInfoList(xml); //getOrders(ConfigUtil.testXml); } }
package testXmlParse.dom4j; import java.util.List; import java.util.Map; import org.apache.log4j.Logger; import org.dom4j.Document; import org.dom4j.DocumentException; import org.dom4j.DocumentHelper; import org.dom4j.Element; /** * @autor germmy * @date 2012-12-11 * @version */ @SuppressWarnings("unchecked") public class Dom4jParser { protected static Logger logger = Logger.getLogger(Dom4jParser.class); public static Document getDom4JDocument(String xml){ Document doc = null; try { doc = DocumentHelper.parseText(xml); } catch (DocumentException e) { logger.error("",e); } return doc; } public static Element getRoot(String xml){ Document doc=getDom4JDocument(xml); if(doc!=null){ Element root = doc.getRootElement();// 指向根节点 return root; }else{ return null; } } public static List getElements(String xml,String nodeName){ Document doc=getDom4JDocument(xml); List elements=null; if(doc!=null){ Element root = doc.getRootElement();// 指向根节点 if(root!=null){ elements = root.elements(nodeName);// 所有的Item节点 } } if(elements==null){ logger.debug("can't find elements["+nodeName+"] in xml"); } return elements; } public static void parseText(Map map,Element element,String nodeName){ Element e=element.element(nodeName); if(e!=null){ System.out.println("nodeName:"+nodeName); System.out.println("value:"+e.getTextTrim()); map.put(nodeName, e.getTextTrim()); }else{ logger.debug("can't find Element["+nodeName+"] in xml"); } } }
4、SAXReader,以前写过,待整理。
refurl:http://developer.51cto.com/art/200903/117512.htm xml解析的4个方法