最近使用libxml2想做点东西,翻看一些example后还是有些疑问,去segmentfault问了下,感谢@pingjiang的热心解答,问题解决,记录如下
(一)如下是一个XML文件,p为根结点
<p>
<one>1</one>
<two>2</two>
<three>3</three>
</p>
我想在根节点之外再添加个标签如下:
<main>
<p>
<one>1</one>
<two>2</two>
<three>3</three>
</p>
</main>
(二)还有就是有一个子结构如下
<p>
<a1>
<one>1</one>
<two>2</two>
<three>3</three>
</a1>
<a2>
<one>1</one>
<two>2</two>
<three>3</three>
</a2>
......
</p>
向去掉最外层的p标签,而里面的内容保留(我找的示例删除节点,里面的子节点也都没有了)。
完整的实现:
/**
* section: Tree
* synopsis: Navigates a tree to print element names
* purpose: Parse a file to a tree, use xmlDocGetRootElement() to
* get the root element, then walk the document and print
* all the element name in document order.
* usage: tree1 filename_or_URL
* test: tree1 test2.xml > tree1.tmp && diff tree1.tmp $(srcdir)/tree1.res
* author: Dodji Seketeli
* copy: see Copyright for the status of this software.
*/
#include <stdio.h>
#include <string.h>
#include <libxml/parser.h>
#include <libxml/tree.h>
/*
*To compile this file using gcc you can type
* gcc `xml2-config --cflags --libs` -o tree1 tree1.c
*Run this program
* ./tree1 test.xml
*/
static const char* LEVELS[] = {"", " ", " ", " ", " ", " ", " ", " " };
static void printTree(xmlNode * a_node, int level);
/**
* print_element_names:
* @a_node: the initial xml node to consider.
*
* 打印所有兄弟节点和子节点的名字.
*/
static void print_element_names(xmlNode * a_node, const char* msg);
// 根据标签名称获取节点(可以实现更加复杂的逻辑,获取指定节点)
static xmlNode *getNode(xmlNode *rootNode, const char* tag, xmlNode **parentNode);
// 删除当前节点,但是保留子节点
static void removeNode(xmlNode *parentNode, xmlNode *nodeToDelete);
// 用一个父节点包装子节点
static void wrapWithNode(xmlNode *parentNode, xmlNode *node, xmlNode *newNode);
// 增加一个新节点
static void appendNewChildNode(xmlNode *parentNode, xmlNode *newNode);
/**
* print_element_names:
* @a_node: the initial xml node to consider.
*
* Prints the names of the all the xml elements
* that are siblings or children of a given xml node.
*/
static int test_removeNode(const char* filepath);
/**
* print_element_names:
* @a_node: the initial xml node to consider.
*
* Prints the names of the all the xml elements
* that are siblings or children of a given xml node.
*/
static int test_wrapWithNode(const char* filepath);
int main(int argc, char **argv)
{
if (argc != 2) {
printf("error: invalid arguments");
return -1;
}
/*
* this initialize the library and check potential ABI mismatches
* between the version it was compiled for and the actual shared
* library used.
*/
LIBXML_TEST_VERSION
printf("test: removeNode:
");
test_removeNode(argv[1]);
printf("
test: wrapWithNode
");
test_wrapWithNode(argv[1]);
/*
*Free the global variables that may
*have been allocated by the parser.
*/
xmlCleanupParser();
return 0;
}
void print_element_names(xmlNode * a_node, const char* msg)
{
xmlNode *cur_node = NULL;
if (msg != NULL && strlen(msg) > 0) {
printf("print: %s
", msg);
}
for (cur_node = a_node; cur_node; cur_node = cur_node->next) {
if (cur_node->type == XML_ELEMENT_NODE) {
printf("node type: Element, name: %s
", cur_node->name);
}
print_element_names(cur_node->children, "");
}
}
void printTree(xmlNode * a_node, int level)
{
xmlNode *cur_node = NULL;
//printf("%s", LEVELS[level]);
for (cur_node = a_node; cur_node; cur_node = cur_node->next) {
if (cur_node->type == XML_ELEMENT_NODE) {
printf("%s%s <%d>
", LEVELS[level], cur_node->name, cur_node->type);
printTree(cur_node->children, level + 1);
} else {
printf("%s#%s <%d>
", LEVELS[level], cur_node->name, cur_node->type);
}
}
}
xmlNode *getNode(xmlNode *rootNode, const char* tag, xmlNode **parentNode) {
xmlNode *cur = rootNode;
if ((cur->type == XML_ELEMENT_NODE) && (!xmlStrcmp(cur->name, (const xmlChar *)tag))){
*parentNode = NULL;
return cur;
}
*parentNode = cur;
cur = cur->xmlChildrenNode;
while (cur != NULL) {
if ((cur->type == XML_ELEMENT_NODE) && (!xmlStrcmp(cur->name, (const xmlChar *)tag))){
return cur;
}
if (cur->type == XML_ELEMENT_NODE) {
*parentNode = cur;
}
cur = cur->next;
}
return NULL;
}
// 删除当前节点,但是保留子节点
void removeNode(xmlNode *parentNode, xmlNode *nodeToDelete) {
if (nodeToDelete == NULL) {
printf("error: nodeToDelete is null");
return;
}
xmlNodePtr siblingNode = nodeToDelete->next;
while (siblingNode != NULL) {
if (siblingNode->type == XML_ELEMENT_NODE) {
printf("debug: found sibling: %s
", siblingNode->name);
break;
}
siblingNode = siblingNode->next;
}
printf("debug: parentNode: %s, nodeToDelete: %s
", parentNode->name, nodeToDelete->name);
printTree(parentNode, 0);
xmlNode *childrenNode = nodeToDelete->children;
if (childrenNode == NULL) {
printf("warn: childrenNode is null
");
}
//xmlUnlinkNode(nodeToDelete->children);
xmlNodePtr nextChildNode = NULL;
while (childrenNode != NULL) {
printf("debug: childrenNode: %s
", childrenNode->name);
nextChildNode = childrenNode->next;
xmlUnlinkNode(childrenNode);
if (siblingNode != NULL) {
printf("debug: addPreSibling: %s, sibling is %s
", childrenNode->name, siblingNode->name);
xmlAddPrevSibling(siblingNode, nextChildNode);
} else {
printf("debug: addChild: %s, parent is %s
", childrenNode->name, parentNode->name);
printTree(childrenNode, 0);
xmlAddChild(parentNode, childrenNode);
}
childrenNode = nextChildNode;
}
xmlUnlinkNode(nodeToDelete);
xmlFreeNode(nodeToDelete);
}
// 用一个父节点包装子节点
void wrapWithNode(xmlNode *parentNode, xmlNode *node, xmlNode *newNode) {
xmlUnlinkNode(node);
xmlAddChild(newNode, node);
xmlAddChild(parentNode, newNode);
}
// 增加一个新节点
void appendNewChildNode(xmlNode *parentNode, xmlNode *newNode) {
xmlAddChild(parentNode, newNode);
}
int test_removeNode(const char* filepath) {
xmlDoc *doc = NULL;
xmlNode *root_element = NULL;
xmlNode *parentNode = NULL;
xmlNode *curNode = NULL;
/*parse the file and get the DOM */
doc = xmlReadFile(filepath, NULL, 0);
if (doc == NULL) {
printf("error: could not parse file %s
", filepath);
}
/*Get the root element node */
root_element = xmlDocGetRootElement(doc);
// 删除节点,但是保留子节点
curNode = getNode(root_element, "p", &parentNode);
if (curNode == NULL) {
printf("error: p node is not found");
return -1;
}
if (parentNode == NULL) {
// 根节点只能有一个子节点,这里就不处理了
printf("error: This is root node, should treat specially. root node should have only one node");
return -1;
}
removeNode(parentNode, curNode);
// 重新获取跟节点,应该是main了
root_element = xmlDocGetRootElement(doc);
print_element_names(root_element, "after delete");
/*free the document */
xmlFreeDoc(doc);
return 0;
}
int test_wrapWithNode(const char* filepath) {
xmlDoc *doc = NULL;
xmlNode *root_element = NULL;
xmlNode *newNode = NULL;
/*parse the file and get the DOM */
doc = xmlReadFile(filepath, NULL, 0);
if (doc == NULL) {
printf("error: could not parse file %s
", filepath);
}
/*Get the root element node */
root_element = xmlDocGetRootElement(doc);
// 增加一个父节点,根节点需要特殊处理
xmlUnlinkNode(root_element);
newNode = xmlNewNode(NULL, BAD_CAST "main");
xmlAddChild(newNode, root_element);
xmlDocSetRootElement(doc, newNode);
// 重新获取跟节点,应该是main了
root_element = xmlDocGetRootElement(doc);
print_element_names(root_element, "after wrap");
/*free the document */
xmlFreeDoc(doc);
return 0;
}
示例使用的是如下XML文件:
<parent> <p> <a1> <one>1</one> <two>2</two> <three>3</three> </a1> <a2> <one>1</one> <two>2</two> <three>3</three> </a2> </p> </parent>
结果:
print: after delete **删除节点p后的节点树** node type: Element, name: parent node type: Element, name: a1 node type: Element, name: one node type: Element, name: two node type: Element, name: three node type: Element, name: a2 node type: Element, name: one node type: Element, name: two node type: Element, name: three test: wrapWithNode **增加一个main节点后的节点树** print: after wrap node type: Element, name: main node type: Element, name: parent node type: Element, name: p node type: Element, name: a1 node type: Element, name: one node type: Element, name: two node type: Element, name: three node type: Element, name: a2 node type: Element, name: one node type: Element, name: two node type: Element, name: three
代码还没仔细看,略看下,写的非常不错,真乃大神,看来得细细的啃一下libxml2的源码才行….
想要完成全部的XML处理还需自己多多的了解。