1.存在特殊字符请参考:http://blog.csdn.net/zhutulang/article/details/37736407
2.父子节点同名可以通过count计数的方式避免,具体代码如下:
public static void main(String [] args){ try { FileInputStream input = new FileInputStream(tmpFileStr+"/"+"farfetch.xml"); List<HashMap<String, String>> list = _readXml(input, "product"); String starttime = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss").format(new Date()); System.out.println("---"+starttime+"--------Farfetch开始更新-----------");
public static List<HashMap<String, String>> _readXml(InputStream input, String nodeName){ try { SAXParserFactory spf = SAXParserFactory.newInstance(); SAXParser parser = spf.newSAXParser(); SaxHandler handler = new SaxHandler(nodeName); parser.parse(input, handler); input.close(); return handler.getList(); } catch (ParserConfigurationException e) { e.printStackTrace(); } catch (SAXException e) { e.printStackTrace(); } catch (IOException e) { e.printStackTrace(); } return null; }
核心代码:
package tools; import java.util.ArrayList; import java.util.HashMap; import java.util.List; import org.xml.sax.Attributes; import org.xml.sax.SAXException; import org.xml.sax.helpers.DefaultHandler; public class SaxHandler extends DefaultHandler { private HashMap<String, String> map = null; private List<HashMap<String, String>> list = null; /** * 正在解析的元素的标签 */ private String currentTag = null; /** * 正在解析的元素的值 */ private String currentValue = null; private String nodeName = null; //计算第几次进入结束标签,避免两个同名porduct出问题 int count =0; //对于含有特殊字符的URL解析时进行拼接 StringBuilder sb = new StringBuilder(); public List<HashMap<String, String>> getList(){ return list; } public SaxHandler(String nodeName) { this.nodeName = nodeName; } @Override public void startDocument() throws SAXException { // TODO 当读到一个开始标签的时候,会触发这个方法 list = new ArrayList<HashMap<String,String>>(); } @Override public void startElement(String uri, String localName, String name, Attributes attributes) throws SAXException { // TODO 当遇到文档的开头的时候,调用这个方法 sb.setLength(0); if(name.equals(nodeName) && map == null){ map = new HashMap<String, String>(); } /****对两个相同的属性名进行特殊处理 start*****/ if("discount".equals(name)){ map.put("discount_currency", attributes.getValue("currency")); attributes = null; } if("price".equals(name)){ map.put("price_currency", attributes.getValue("currency")); attributes = null; } /****对两个相同的属性名进行特殊处理 end*****/ if(attributes != null && map != null){ for(int i = 0; i < attributes.getLength();i++){ map.put(attributes.getQName(i), attributes.getValue(i)); } } currentTag = name; } @Override public void characters(char[] ch, int start, int length) throws SAXException { // TODO 这个方法用来处理在XML文件中读到的内容 System.out.println("characters()"); if(currentTag != null && map != null){ currentValue = new String(ch, start, length); if(currentValue != null && !currentValue.trim().equals("") && !currentValue.trim().endsWith("\n")){ sb.append(currentValue); map.put(currentTag, sb.toString()); } } // currentTag=null; // currentValue=null; } @Override public void endElement(String uri, String localName, String name) throws SAXException { // TODO 在遇到结束标签的时候,调用这个方法 System.out.println("endElement()"); if(name.equals(nodeName)){ if (count > 0) { list.add(map); map = null; count = 0; } count++; } currentTag = null; currentValue=null; super.endElement(uri, localName, name); } }
xml文件具体内容如下:
<?xml version="1.0" encoding="utf-8"?> <merchandiser xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:noNamespaceSchemaLocation="merchandiser.xsd"> <header> <merchantId>37938</merchantId> <merchantName>FARFETCH.COM Australia</merchantName> <createdOn>12/05/2015 07:22:29</createdOn> </header> <product product_id="10100630" name="WERKSTATT:MÜNCHEN leather belt" sku_number="10100630" manufacturer_name="WERKSTATT:MÜNCHEN"> <category> <primary>Accessories</primary> <secondary>Belts</secondary> </category> <URL> <product>http://click.linksynergy.com/link?id=DGme2yum/2Y&offerid=389625.10100630&type=15&murl=http%3A%2F%2Fwww.farfetch.com%2Fshopping%2Fmen%2Fitem10100630.aspx</product> <productImage>http://cdn-images.farfetch.com/10/10/06/30/10100630_579509_800.jpg</productImage> </URL> <description> <short>Black leather belt from Werkstatt: Munchen featuring a silver buckle and silver loop.</short> </description> <discount currency="AUD"> <type>amount</type> </discount> <price currency="AUD"> <retail>1265.42</retail> </price> <brand>WERKSTATT:MÜNCHEN</brand> <shipping> <availability>in-stock</availability> </shipping> <pixel>http://ad.linksynergy.com/fs-bin/show?id=DGme2yum/2Y&bids=389625.10100630&type=15&subid=0</pixel> <attributeClass class_id="60"> <Product_Type>Accessories</Product_Type> <Size>S</Size> <Color>Black</Color> <Gender>Male</Gender> </attributeClass> </product> <product product_id="10100676" name="WERKSTATT:MÜNCHEN leather belt" sku_number="10100676" manufacturer_name="WERKSTATT:MÜNCHEN"> <category> <primary>Accessories</primary> <secondary>Belts</secondary> </category> <URL> <product>http://click.linksynergy.com/link?id=DGme2yum/2Y&offerid=389625.10100676&type=15&murl=http%3A%2F%2Fwww.farfetch.com%2Fshopping%2Fmen%2Fitem10100676.aspx</product> <productImage>http://cdn-images.farfetch.com/10/10/06/76/10100676_579578_800.jpg</productImage> </URL> <description> <short>Black leather belt from Werkstatt: Munchen featuring a silver buckle and leather loop.</short> </description> <discount currency="AUD"> <type>amount</type> </discount> <price currency="AUD"> <retail>1539.03</retail> </price> <brand>WERKSTATT:MÜNCHEN</brand> <shipping> <availability>in-stock</availability> </shipping> <pixel>http://ad.linksynergy.com/fs-bin/show?id=DGme2yum/2Y&bids=389625.10100676&type=15&subid=0</pixel> <attributeClass class_id="60"> <Product_Type>Accessories</Product_Type> <Size>S-M-L-XL</Size> <Color>Black</Color> <Gender>Male</Gender> </attributeClass> </product> <product product_id="10212594" name="SCUNZANI IVO toad skin belt" sku_number="10212594" manufacturer_name="SCUNZANI IVO"> <category> <primary>Accessories</primary> <secondary>Belts</secondary> </category> <URL> <product>http://click.linksynergy.com/link?id=DGme2yum/2Y&offerid=389625.10212594&type=15&murl=http%3A%2F%2Fwww.farfetch.com%2Fshopping%2Fmen%2Fitem10212594.aspx</product> <productImage>http://cdn-images.farfetch.com/10/21/25/94/10212594_1130649_800.jpg</productImage> </URL> <description> <short>Green and black toad skin belt from Scunzani Ivo featuring a silver-tone buckle. Please note that this item cannot be shipped outside the E.U.</short> </description> <discount currency="AUD"> <type>amount</type> </discount> <price currency="AUD"> <retail>461.25</retail> </price> <brand>SCUNZANI IVO</brand> <shipping> <availability>in-stock</availability> </shipping> <pixel>http://ad.linksynergy.com/fs-bin/show?id=DGme2yum/2Y&bids=389625.10212594&type=15&subid=0</pixel> <attributeClass class_id="60"> <Product_Type>Accessories</Product_Type> <Size>M-L</Size> <Color>Green</Color> <Gender>Male</Gender> </attributeClass> </product> <product product_id="11249583" name="JIL SANDER VINTAGE straight leg trousers" sku_number="11249583" manufacturer_name="JIL SANDER VINTAGE"> <category> <primary>Vintage & Archive</primary> <secondary>Trousers</secondary> </category> <URL> <product>http://click.linksynergy.com/link?id=DGme2yum/2Y&offerid=389625.11249583&type=15&murl=http%3A%2F%2Fwww.farfetch.com%2Fshopping%2Fwomen%2Fitem11249583.aspx</product> <productImage>http://cdn-images.farfetch.com/11/24/95/83/11249583_6003611_800.jpg</productImage> </URL> <description> <short>Sky grey linen-wool blend straight leg trousers from Jil Sander Vintage featuring a button and zip fly, side pockets and a back pocket. Circa 1990. Please note that vintage items are not new and therefore might have minor imperfections.</short> </description> <discount currency="AUD"> <type>amount</type> </discount> <price currency="AUD"> <retail>189.77</retail> </price> <brand>JIL SANDER VINTAGE</brand> <shipping> <availability>in-stock</availability> </shipping> <pixel>http://ad.linksynergy.com/fs-bin/show?id=DGme2yum/2Y&bids=389625.11249583&type=15&subid=0</pixel> <attributeClass class_id="60"> <Product_Type>Vintage & Archive</Product_Type> <Size>36</Size> <Color>Grey</Color> <Gender>Female</Gender> </attributeClass> </product> <trailer> <numberOfProducts>118204</numberOfProducts> </trailer> </merchandiser>