概述
以前项目中使用到了xml_parser,支持中文解析,应该是参考了如下两篇文章
1、boost.property_tree解析xml的帮助类:https://blog.csdn.net/NCTU_to_prove_safety/article/details/73614348
2、The help class of boost.property_tree parsing xml and the solution of Chinese parsing problems:https://blog.krybot.com/a?ID=00950-284221e4-a6f8-420e-a7dc-ba233446b333
项目中使用的xml_parser.hpp
/**
* @file xml_parser.hpp
*
* Declares the XML parser class
* This class encapsulates a collection of operations for property_tree .
*/
#ifndef XML_PARSER_INCLUDED
#define XML_PARSER_INCLUDED
#include<boost/property_tree/ptree.hpp>
#include<boost/property_tree/xml_parser.hpp>
#include <boost/lexical_cast.hpp>
#include <boost/algorithm/string/classification.hpp>
#include <boost/algorithm/string/split.hpp>
#include <boost/algorithm/string.hpp>
#include <boost/property_tree/detail/xml_parser_writer_settings.hpp>
#include <boost/property_tree/ptree.hpp>
#include <mutex>
using namespace boost;
using namespace boost::property_tree;
#include <map>
#include <vector>
#include <codecvt>
#include <locale>
#include <iostream>
using namespace std;
const wstring XMLATTR = L"<xmlattr>";
const wstring XMLCOMMENT = L"<xmlcomment>";
const wstring XMLATTR_DOT = L"<xmlattr>.";
const wstring XMLCOMMENT_DOT = L"<xmlcomment>.";
/** A class encapsulates a collection of operations for property_tree */
class CXMLParser
{
public:
/** Default constructor. */
CXMLParser()
{
}
/** Destructor. */
~CXMLParser()
{
}
/**
* Reads XML file.
*
* @param fileName Full path of the input file.
* @param [in,out] pt The wptree object.
*/
void ReadXMLFile(const string& fileName, wptree& pt)
{
ReadXMLFile(to_wstr(fileName), pt);
}
/**
* Reads XML file.
*
* @param fileName Full path of the input file.
* @param [in,out] pt The wptree object
*/
void ReadXMLFile(const wstring& fileName, wptree& pt)
{
std::wifstream f(to_str(fileName));
std::locale utf8Locale(std::locale(), new std::codecvt_utf8<wchar_t>);
f.imbue(utf8Locale);
//wcout.imbue(std::locale("chs")); //convert for Chinese output format
property_tree::read_xml(f, pt, property_tree::xml_parser::trim_whitespace);
f.close();
}
/**
* Writes XML file.
*
* @param fileName Full path of the input file.
* @param [in,out] pt The wptree object
*/
void WriteXMLFile(const string& fileName, wptree& pt)
{
// for Chinese output
std::locale current_locale(locale(""), new std::codecvt_utf8<wchar_t>);
// make out layout formated
auto settings = property_tree::xml_writer_make_settings<wstring>(L'\t', 1, L"utf-8");
property_tree::write_xml(fileName, pt, current_locale, settings);
}
/**
* Writes XML file.
*
* @param fileName Full path of the input file.
* @param [in,out] pt The wptree object
*/
void WriteXMLFile(const wstring& fileName, wptree& pt)
{
WriteXMLFile(to_str(fileName), pt);
}
/**
* Reads XML content.
*
* @param content The input stringsream content.
* @param [in,out] pt The wptree object
*/
void ReadXMLContent(const wstring& content, wptree& pt)
{
std::wstringstream ss;
ss << content;
property_tree::read_xml(ss, pt);
}
/**
* Reads XML content.
*
* @param content The input stringsream content.
* @param [in,out] pt The wptree object
*/
void ReadXMLContent(const string& content, wptree& pt)
{
ReadXMLContent(to_wstr(content), pt);
}
/**
* Writes XML content.
*
* @param [in,out] content The output stringsream content.
* @param [in,out] pt The wptree object
*/
void WriteXMLContent(string& content, wptree& pt)
{
wstring wstr;
WriteXMLContent(wstr, pt);
content = to_str(wstr);
}
/**
* Writes XML content.
*
* @param [in,out] content The output stringsream content.
* @param [in,out] pt The wptree object
*/
void WriteXMLContent(wstring& content, wptree& pt)
{
std::wstringstream out;
property_tree::write_xml(out, pt);
content = out.str();
}
/**
* convert UTF-8 string to wstring.
*
* @param str The string to converted.
*
* @return Str as a wstring.
*/
std::wstring to_wstr(const std::string& str)
{
std::wstring_convert<std::codecvt_utf8<wchar_t> > conv;
#ifdef WIN32
std::locale::global(std::locale("chs"));
#else
std::locale::global(std::locale("zh_CN.utf8"));
#endif
return conv.from_bytes(str);
}
/**
* convert wstring to UTF-8 string.
*
* @param str The wstring to converted.
*
* @return Str as a string.
*/
std::string to_str(const std::wstring& str)
{
std::wstring_convert<std::codecvt_utf8<wchar_t> > conv;
return conv.to_bytes(str);
}
/**
* Converts a str to an uint16_t.
*
* @param str The wstring to converted.
*
* @return Str as an uint16_t.
*/
uint16_t to_uint16(const std::wstring& str)
{
std::wstring s = boost::trim_copy(str);
if (!s.empty())
return to_type<uint16_t>(str);
else
return 0;
}
uint32_t to_uint32(const std::wstring& str)
{
std::wstring s = boost::trim_copy(str);
if (!s.empty())
return boost::lexical_cast<uint32_t>(str);
else
return 0;
}
uint64_t to_uint64(const std::wstring& str)
{
std::wstring s = boost::trim_copy(str);
if (!s.empty())
return boost::lexical_cast<uint64_t>(str);
else
return 0;
}
/**
* Converts a str to a double.
*
* @param str The wstring to converted.
*
* @return Str as a double.
*/
double to_double(const std::wstring& str)
{
return to_type<double>(str);
}
/**
* Converts a string to a special type.
*
* @param str The wstring to converted..
*
* @return String as a T.
*/
template<typename T>
T to_type(const std::wstring& str)
{
return boost::lexical_cast<T>(str);
}
/**
* Converts this object to a double vector.
*
* @param str The wstring to converted.
* @param any_of The split strings.
*
* @return The given data converted to a double array
*/
vector<double> to_doubleVector(const std::wstring& str, const std::wstring& any_of)
{
return to_vector<double>(str, any_of);
}
/**
* Converts this object to a string vector.
*
* @param str The wstring to converted.
* @param any_of The split strings.
*
* @return The given data converted to a double array
*/
vector<std::string> to_stringVector(const std::wstring& str, const std::wstring& any_of)
{
vector<std::string> vec;
vector<wstring> strVec;
boost::split(strVec, str, boost::is_any_of(any_of));
for (auto &str : strVec)
{
const std::string &data = to_str(str);
vec.push_back(data);
}
return vec;
}
/**
* Converts this object to a vector.
*
* @param wstr The wstring to converted.
* @param any_of The split strings.
*
* @return The given data converted to an array.
*/
template<typename T>
vector<T> to_vector(const std::wstring& wstr, const std::wstring& any_of)
{
vector<wstring> strVec;
boost::split(strVec, wstr, boost::is_any_of(any_of));
vector<T> vec;
for (auto str : strVec)
{
auto trimed = boost::trim_copy(str);
if (trimed.empty())
continue;
auto data = to_type<T>(trimed);
vec.push_back(data);
}
return vec;
}
/**
* Converts vector to a wstring.
*
* @param vec The vector need to convert.
* @param any_of The split strings.
*
* @return The converted string.
*/
template<typename T>
wstring to_wstr(const vector<T>& vec, const string& split)
{
stringstream result;
std::copy(vec.begin(), vec.end(), std::ostream_iterator<T>(result, split.c_str()));
string str = result.str();
boost::trim_right_if(str, boost::is_any_of(split));
return to_wstr(str);
}
wstring int32_to_wstr(const vector<uint32_t>& vec, const string& split){
vector<int32_t> vec_32;
for(auto v: vec){
vec_32.push_back(static_cast<int32_t>(v));
}
stringstream result;
std::copy(vec_32.begin(), vec_32.end(), std::ostream_iterator<int32_t>(result, split.c_str()));
string str = result.str();
boost::trim_right_if(str, boost::is_any_of(split));
return to_wstr(str);
}
/**
* Converts array to a wstring.
*
* @param vec The arrat need to convert.
* @param len The array size.
* @param any_of The split strings.
*
* @return The converted string.
*/
template<typename T>
wstring to_wstr(const T* arr, int32_t len, const string& split)
{
vector<T> vec(arr, arr + len);
return to_wstr<T>(vec, split);
}
/**
* Get list of child nodes.
*
* @param root The root wptree object.
* @param key The key of the node.
*
* @return Child nodes.
*/
auto Descendants(const wptree& root, const wstring& key)->decltype(root.get_child_optional(key))
{
return root.get_child_optional(key);
}
/**
* Get list of child nodes based on child node attributes
*
* @param parant The parant wptree object.
* @param tagName Name of the tag.
* @param attrName Name of the attribute.
* @param attrVal The attribute value.
*
* @return The childs by attribute.
*/
template<typename T>
vector<wptree> GetChildsByAttr(const wptree& parant, const wstring& tagName, const wstring& attrName, const T& attrVal)
{
vector<wptree> v;
for (auto& child : parant)
{
if (child.first != tagName)
continue;
auto attr = Attribute<T>(child, attrName);
if (attr&&*attr == attrVal)
v.push_back(child.second);
}
return v;
}
/**
* Gets attribute value of a node.
*
* @param node The wptree object.
* @param attrName Name of the attribute.
*
* @return Attribute value of a node;
*/
template<typename R>
optional<R> Attribute(const wptree& node, const wstring& attrName)
{
return node.get_optional<R>(XMLATTR_DOT + attrName);
}
/**
* Gets attribute value of a node, default is wstring.
*
* @param node The wptree object.
* @param attrName Name of the attribute.
*
* @return Attribute value of a node;
*/
optional<wstring> Attribute(const wptree& node, const wstring& attrName)
{
return Attribute<wstring>(node, attrName);
}
/**
* Gets attribute value of value_type
*
* @param pair The pair.
* @param attrName Name of the attribute.
*
* @return Attribute value of value_type
*/
template<typename R>
optional<R> Attribute(const wptree::value_type& pair, const wstring& attrName)
{
if (pair.first == XMLATTR)
return pair.second.get_optional<R>(attrName);
else if (pair.first == XMLCOMMENT)
return optional<R>();
else
return pair.second.get_optional<R>(XMLATTR_DOT + attrName);
}
/**
* Get property value of value_type, default is string.
*
* @param pair The pair.
* @param attrName Name of the attribute.
*
* @return Attribute value of value_type
*/
optional<wstring> Attribute(const wptree::value_type& pair, const wstring& attrName)
{
return Attribute<wstring>(pair, attrName);
}
/**
* Gets filed value of value_type
*
* @param pair The pair.
* @param fieldName Name of the field.
*
* @return Attribute value of value_type
*/
template<typename R>
optional<R> FieldValue(const wptree::value_type& pair, const wstring& fieldName)
{
if (pair.first == fieldName)
return pair.second.data();
else
return optional<R>();
}
/**
* Get filed value of value_type, default is string.
*
* @param pair The pair.
* @param fieldName Name of the field.
*
* @return Attribute value of value_type
*/
optional<wstring> FieldValue(const wptree::value_type& pair, const wstring& fieldName)
{
return FieldValue<wstring>(pair, fieldName);
}
/**
* Generate a Multimap of <string, ptree> based on a property
*
* @param root The root.
* @param key The key.
* @param attrName Name of the attribute.
*
* @return A multimap<wstring,wptree>
*/
template<class F = std::function<bool(wstring&)>>
multimap<wstring, wptree> MakeMapByAttr(const wptree& root, const wstring& key, const wstring& attrName, F predict = [](wstring& str){
return true; })
{
multimap<wstring, wptree> resultMap;
auto list = Descendants(root, key);
if (!list)
return resultMap;
for (auto& item : *list)
{
auto attr = Attribute(item, attrName);
if (attr&&predict(*attr))
resultMap.insert(std::make_pair(*attr, item.second));
}
return resultMap;
}
/**
* Reads file to string.
*
* @param path Full path of the file.
*
* @return The file to string.
*/
string ReadFileToString(const string& path)
{
wstring wstr = ReadFileToString(to_wstr(path));
return to_str(wstr);
}
/**
* Reads file to wstring.
*
* @param path Full pathname of the file.
*
* @return The file to wstring.
*/
wstring ReadFileToString(const wstring& path)
{
wstring content;
std::wifstream fin(to_str(path));
std::locale utf8Locale(std::locale(), new std::codecvt_utf8<wchar_t>);
fin.imbue(utf8Locale);
//wcout.imbue(std::locale()); //convert for Chinese output format
fin.open(to_str(path), ios::in);
if (fin.is_open())
{
wstringstream ss;
ss << fin.rdbuf();
content = ss.str();
fin.close();
}
return content;
}
};
#endif
测试代码test_xml_parser.cpp
原文章中提供了测试代码,项目中也进行了单元测试
#include "xml_parser.hpp"
#include "gtest/gtest.h"
#include "gtest/gtest-spi.h"
#include <vector>
using namespace testing;
/*
<?xml version="1.0" encoding="UTF-8"?>
<Root>
<Scenes>
<Scene Name="测试1">
<Name1>"测试2"</Name1>
</Scene>
</Scenes>
</Root>
*/
/** Init crash exporter. */
const std::string filePath = "test1.xml";
class CXMLParserTest : public Test
{
public:
CXMLParser parser;
wptree pt;
private:
virtual void SetUp()
{
parser.ReadXMLFile(filePath, pt);
}
virtual void TearDown()
{
}
};
TEST_F(CXMLParserTest, TestDescendants)
{
auto elements = parser.Descendants(pt, L"Root.Scenes");
EXPECT_EQ(true, !!elements);
}
TEST_F(CXMLParserTest, XMLParserAttributeAndFieldValue)
{
auto elements = parser.Descendants(pt, L"Root.Scenes");
for (auto& element : *elements)
{
auto val = parser.Attribute(element, L"Name");
if (val)
{
EXPECT_EQ(L"测试1", *val);
continue;
}
for (auto& a : element.second)
{
auto val = parser.FieldValue(a, L"Name1");
if (val)
{
EXPECT_EQ(L"测试2", *val);
continue;
}
}
}
}
TEST_F(CXMLParserTest, test_Vector_to_wstring)
{
vector<double> vec{
1, 2, 3, 4, 5, 6 };
string split(";");
wstring wstr = parser.to_wstr<double>(vec, split);
EXPECT_EQ(L"1;2;3;4;5;6", wstr);
}
TEST_F(CXMLParserTest, test_Array_to_wstring)
{
double values[6] = {
1.1, 2.2, 3.3, 4.4, 5.5, 6.6 };
string split(";");
wstring wstr = parser.to_wstr<double>(values, 6, split);
EXPECT_EQ(L"1.1;2.2;3.3;4.4;5.5;6.6", wstr);
}
TEST_F(CXMLParserTest, test_wstring_to_vector)
{
wstring wstr = L"1;2;3;4;5;6";
wstring split(L";");
vector<double> vec = parser.to_vector<double>(wstr, split);
EXPECT_EQ(6, vec.size());
}
TEST_F(CXMLParserTest, test_wstring_to_uint16)
{
wstring wstr = L"11";
uint16_t u = parser.to_uint16(wstr);
EXPECT_EQ(11, u);
}
TEST_F(CXMLParserTest, test_wstring_to_double)
{
wstring wstr = L"11.22";
double u = parser.to_double(wstr);
EXPECT_EQ(11.22, u);
}
TEST(CXMLParserTest2, TestReadXMLContent)
{
CXMLParser parser;
wptree pt;
wstring wstr = parser.to_wstr(filePath);
wstring content = CUtility::ReadFileToString(wstr);
parser.ReadXMLContent(content, pt);
auto elements = parser.Descendants(pt, L"Root.Scenes");
EXPECT_EQ(true, !!elements);
for (auto& element : *elements)
{
auto val = parser.Attribute(element, L"Name");
if (val)
{
EXPECT_EQ(L"测试1", *val);
continue;
}
for (auto& a : element.second)
{
auto val = parser.FieldValue(a, L"Name1");
if (val)
{
EXPECT_EQ(L"测试2", *val);
continue;
}
}
}
}