zoukankan      html  css  js  c++  java
  • libxml2的xpath检索中文

    ZC: xmlXPathEvalExpression(...) 当 xpath的字符串中 包含中文的时候,返回NULL,暂时不知道该怎么处理了...

    ZC: 下面是测试的一些代码/文件,留着以后再研究吧...

    1、Qt5.3.2

    2、XML 的节点的属性中包含中文(XML保存成 UTF-8的格式)

    <?xml version="1.0" encoding="utf-8" ?>
    <root>
        
        <newNode2>content changed</newNode2>
        <newNode3 newAttr="YES">newNode3 content</newNode3>
        <ceshi attribute="测试">测试一下</ceshi>
        <node2 attribute="no">NODE CONTENT</node2>
        
        <son>
            <grandson>This is a grandson node</grandson>
        <newGrandSon>new content</newGrandSon></son>
    </root>

    3、测试代码:

      ZC: 尝试了 使用 UTF-8的字符串、本地编码格式的字符串,都解析不到 我要的节点...

    #include <libxml/parser.h>
    #include <libxml/tree.h>
    #include <libxml/xpath.h>
    //#include <iconv.h>
    
    #include <QDebug>
    #include <QTextCodec>
    
    
    MainWindow::MainWindow(QWidget *parent) :
        QMainWindow(parent),
        ui(new Ui::MainWindow)
    {
        ui->setupUi(this);
    }
    
    MainWindow::~MainWindow()
    {
        delete ui;
    }
    
    
    
    
    
    int code_convert(char* from_charset, char* to_charset, char* inbuf,
                   int inlen, char* outbuf, int outlen)
    {
        iconv_t cd;
        char** pin = &inbuf;
        char** pout = &outbuf;
        cd = iconv_open(to_charset,from_charset);
        if(cd == 0)
           return -1;
        memset(outbuf,0,outlen);
        if(iconv(cd,(const char**)pin,(unsigned int *)&inlen,pout,(unsigned int*)&outlen)
           == -1)
           return -1;
        iconv_close(cd);
        return 0;
    }
    
    //UNICODE码转为GB2312码
    //成功则返回一个动态分配的char*变量,需要在使用完毕后手动free,失败返回NULL
    char* u2g(char *inbuf)
    {
        int nOutLen = 2 * strlen(inbuf) - 1;
        char* szOut = (char*)malloc(nOutLen);
        if (-1 == code_convert("utf-8","gb2312",inbuf,strlen(inbuf),szOut,nOutLen))
        {
           free(szOut);
           szOut = NULL;
        }
        return szOut;
    }
    
    //GB2312码转为UNICODE码
    //成功则返回一个动态分配的char*变量,需要在使用完毕后手动free,失败返回NULL
    char* g2u(char *inbuf)
    {
        int nOutLen = 2 * strlen(inbuf) - 1;
        char* szOut = (char*)malloc(nOutLen);
        if (-1 == code_convert("gb2312","utf-8",inbuf,strlen(inbuf),szOut,nOutLen))
        {
           free(szOut);
           szOut = NULL;
        }
        return szOut;
    }
    
    xmlXPathObject* Get_NodeSet(xmlDoc* _pDoc, const xmlChar *szXpath)
    {
        xmlXPathContextPtr context;
        xmlXPathObjectPtr result;
    
        context = xmlXPathNewContext(_pDoc);
        if (context == NULL)
        {
            //printf("context is NULL
    ");
            return NULL;
        }
    
        result = xmlXPathEvalExpression(szXpath, context);
        xmlXPathFreeContext(context);
        if (result == NULL)
        {
            //printf("xmlXPathEvalExpression return NULL
    ");
            return NULL;
        }
    
        if (xmlXPathNodeSetIsEmpty(result->nodesetval))
        {
            xmlXPathFreeObject(result);
            //printf("nodeset is empty
    ");
            return NULL;
        }
    
        return result;
    }
    
    void MainWindow::on_pbtnXPath_clicked()
    {
        xmlDocPtr doc = NULL;             //定义解析文档指针
        xmlNodePtr curNode = NULL;         //定义结点指针(你需要它为了在各个结点间移动)
    
        char *szDocName = "F:/ZZ_Qt5/Qt532_vs2010/build-libxml2_zz-z-Debug/debug/ChangedXml.xml";
    
        doc = xmlReadFile(szDocName, "GB2312", XML_PARSE_RECOVER);  //解析文件
        //doc = xmlReadFile(szDocName, "UTF-8", XML_PARSE_RECOVER);
    
        if (NULL == doc)
        {
            qDebug() << "Document not parsed successfully.";
            return;
        }
    
        char* pcCeShi = "测试";
        QTextCodec *pCodec = QTextCodec::codecForName("GBK");
        QString strCeShi = pCodec->toUnicode(pcCeShi);
    
        //QString str = "/root/node2[@attribute='no']";
        QString str = "/root/node2[@attribute='"+strCeShi+"']";
        QByteArray ba = str.toUtf8();
        //QByteArray ba = str.toLocal8Bit();
        char pc[256] = {0};
        memcpy(&pc[0], ba.data(), ba.length());
        //pc[ba.length()] = '';
    
        char *p0 = "/root/node2[@attribute='测试']";
        char* p1 = g2u(p0);
        char pc1[256] = {0};
        memcpy(&pc1[0], p1, strlen(p1));
    
        //xmlChar *szXpath =BAD_CAST ("/root/node2[@attribute='no']");
        xmlChar *szXpath = BAD_CAST (p1);
        xmlXPathObjectPtr app_result = Get_NodeSet(doc, szXpath);  //查询并得到结果
    
        if (NULL == app_result)
        {
            qDebug() << "app_result is NULL";
            return;
        }
        xmlChar *szValue = NULL;
        if(app_result)
        {
            xmlNodeSetPtr nodeset = app_result->nodesetval;
            for (int i = 0; i < nodeset->nodeNr; i++)
            {
                curNode = nodeset->nodeTab[i];
                if(curNode != NULL)
                {
                    szValue = xmlGetProp(curNode,BAD_CAST "attribute");
                    if (szValue != NULL)
                    {
                        qDebug() << "attribute = " << (char*)szValue;
                        xmlFree(szValue);
                    }
    
                    szValue = xmlNodeGetContent(curNode);
                    if (szValue != NULL)
                    {
                        qDebug() << "content = " << (char*)szValue;
                        xmlFree(szValue);
                    }
                }
            }
            xmlXPathFreeObject (app_result);
        }
        xmlFreeDoc(doc);
    
        free(p1);
    }

    4、

    5、

    6、

  • 相关阅读:
    web.xml报错
    mysql字符集问题汇总
    查询所有表中的某个数存储过程脚本
    SQL Server生成数据库的数据字典存储过程
    浏览器无法访问虚拟机的服务器
    搭建lnmp环境,nginx的配置文件/etc/nginx/nginx.conf
    在centos6.5下搭建lnmp
    Linux服务器关联Git,通过执行更新脚本实现代码同步
    CentOS yum 安装时错误 Errno 14 Couldn't resolve host 解决办法
    LinqToSQL3
  • 原文地址:https://www.cnblogs.com/cppskill/p/7919995.html
Copyright © 2011-2022 走看看