libxml2入门和中文支持
libxml2是gnome做的一个xml的库,支持SAX和DOM。
在解析xml的时候,libxml2会将xml中不属于标签的部分作为text节点插入。如果是属性,则添加一个属性节点到父节点上,一个文字节点到属性节点下。那么整个xml就变成了一颗单纯的树。
支持多语言的问题上,libxml2的内核只支持UTF-8。但是可以通过注册编码句柄来添加语言支持,一般是配合iconv[2]使用的,因为libxml2的编译依赖就是iconv。下面是代码。
iconv_t iconv_utf8_gbk;
iconv_t iconv_gbk_utf8;
int gbk_input (unsigned char *out, int *outlen, const unsigned char
*in,
int *inlen)
{
char *outbuf = (char *) out;
char *inbuf = (char *) in;
size_t rslt;
rslt = iconv (iconv_utf8_gbk, (const char **) &inbuf, (size_t *)
inlen,
&outbuf, (size_t *) outlen);
if (rslt < 0)
return rslt;
*outlen = ((unsigned char *) outbuf - out);
*inlen = ((unsigned char *) inbuf - in);
return *outlen;
}
int gbk_output (unsigned char *out,
int *outlen, const unsigned char *in, int *inlen)
{
char *outbuf = (char *) out;
char *inbuf = (char *) in;
size_t rslt;
rslt = iconv (iconv_gbk_utf8, (const char **) &inbuf, (size_t *)
inlen,
&outbuf, (size_t *) outlen);
if (rslt < 0)
return rslt;
*outlen = ((unsigned char *) outbuf - out);
*inlen = ((unsigned char *) inbuf - in);
return *outlen;
}
static void print_element_names (xmlDocPtr doc, xmlNode * a_node,
int n)
{
xmlNode *cur_node = NULL;
xmlAttr *cur_attr = NULL;
xmlChar *key;
for (cur_node = a_node; cur_node; cur_node = cur_node->next)
{
for (int i = 0; i < n; ++i)
printf (" ");
// key = xmlNodeListGetString(doc,
cur_node->xmlChildrenNode, 1);
printf ("node %d: %s = %sn", cur_node->type,
cur_node->name, cur_node->content);
if (cur_node->properties != NULL){
for (cur_attr = cur_node->properties; cur_attr;
cur_attr = cur_attr->next){
printf ("attr %s = %sn", cur_attr->name,
cur_attr->children->content);
}
}
// xmlFree(key);
if (cur_node->type == XML_ELEMENT_NODE)
print_element_names (doc, cur_node->children, n + 1);
}
int _tmain (int argc, _TCHAR * argv[])
{
xmlDoc *doc = NULL;
xmlNode *root_element = NULL;
LIBXML_TEST_VERSION;
iconv_utf8_gbk = iconv_open ("utf-8", "gbk");
iconv_gbk_utf8 = iconv_open ("gbk", "utf-8");
xmlNewCharEncodingHandler ("gb2312", gbk_input,
gbk_output);//添加gb2312编码支持
xmlNewCharEncodingHandler ("gbk", gbk_input,
gbk_output);//添加gbk编码支持
doc = xmlReadFile ("Q.xml", NULL, 0);
if (doc == NULL) {
printf ("Failed to parse %sn", "Q.xml");
return 0;
}
root_element = xmlDocGetRootElement (doc);
print_element_names (doc, root_element, 0);
xmlFreeDoc (doc);
xmlCleanupParser ();
xmlMemoryDump ();
iconv_close (iconv_gbk_utf8);
iconv_close (iconv_utf8_gbk);
return 0;
}
在XML无指定编码的时候没有测试过,不过按照XML标准,这时候应该是UTF-8编码。不符合标准的话……自己想办法吧。
Reference:
-
The XML C parser and toolkit of Gnome: http://xmlsoft.org/
-
libiconv: http://www.gnu.org/savannah-checkouts/gnu/libiconv/