zoukankan      html  css  js  c++  java
  • 在Swift中使用libxml2

    //
    //  main.swift
    //  C150805_libxml2r2
    //  http://git.oschina.net/yao_yu/Swift2015/tree/master/C150805_libxml2r2?dir=1&filepath=C150805_libxml2r2&oid=f80a7498226526b991e7913298c15cd38480aea5&sha=c073af33d0534a10098bb8fcc0706c2fd489dc3f
    //
    //  Created by yao_yu on 15/8/5.
    //  Copyright © 2015年 yao_yu. All rights reserved.
    //
    
    import Foundation
    
    /* ---------- 扩展 ---------- */
    
    extension NSString{
        convenience init?(urlString:String, encoding:NSStringEncoding) {
            let url = NSURL(string: urlString)
            do {
                try self.init(contentsOfURL: url!, encoding: encoding)
            } catch {}
        }
    }
    
    extension String {
         init?(XMLChar char: UnsafePointer<xmlChar>){
            self.init()
            if char != nil {
                self = String.fromCString(UnsafePointer<CChar>(char))!
            }
        }
    }
    
    /* ---------- XML节点 ---------- */
    
    class XMLNode {
        var xmlDoc:xmlDocPtr = nil
        var xmlNode:xmlNodePtr = nil
        
        init(node:xmlNodePtr, document:xmlDocPtr) {
            self.xmlNode = node
            self.xmlDoc = document
        }
        
        convenience init(document:xmlDocPtr) {
            self.init(node:xmlDocGetRootElement(document), document:document)
        }
        
        lazy var rawContent:String? = {
           return XMLNodeGetContent(self.xmlNode)
    //        return XMLNodeGetString(self.xmlDoc, xmlNode: self.xmlNode)
        }()
        
        lazy var children:[XMLNode] = {
            return self.xmlNodes2XMLNodes(XMLNodeGetChildren(self.xmlNode))
            }()
        
        lazy var attributes: [String: String] = {
            return XMLNodeGetAttributes(self.xmlNode)
        }()
        
        subscript(key:String) -> String? {
            return attributes[key]
        }
        
        private func xmlNodes2XMLNodes(nodes:[xmlNodePtr]) -> [XMLNode] {
            var xmlNodes = [XMLNode]()
            for node in nodes{
                xmlNodes.append(XMLNode(node: node, document: xmlDoc))
            }
            return xmlNodes
            
            //下面的代码引发:Command failed due to signal: Abort trap: 6
            //return nodes.map{[unowned self] in XMLNode(node:$0, document:self.xmlDoc)}
        }
    }
    
    extension XMLNode {
        func xPath(xpath: String) -> [XMLNode] {
            return xmlNodes2XMLNodes(XMLFindXPath(self.xmlDoc, xPath: xpath))
        }
    }
    
    /* ---------- libxml2读取工具函数 ---------- */
    
    func XMLNodeGetString(doc:xmlDocPtr, xmlNode:xmlNodePtr) -> String? {
        let contentChars = xmlNodeListGetString(doc, xmlNode, 1)
        if contentChars == nil { return nil }
        let contentString = String(XMLChar: contentChars)
        free(contentChars)
        assert(contentString != nil, "XMLNodeGetString: 值转换不成功")
        return contentString
    }
    
    func XMLNodeGetContent(xmlNode:xmlNodePtr) -> String? {
        let contentChars = xmlNodeGetContent(xmlNode)
        if contentChars == nil { return nil }
        let contentString = String(XMLChar: contentChars)
        free(contentChars)
        assert(contentString != nil, "XMLNodeGetContent: 值转换不成功")
        return contentString
    }
    
    func XMLNodeGetChildren(xmlNode: xmlNodePtr) -> [xmlNodePtr] {
        var children = [xmlNodePtr]()
        
        for var childNodePointer = xmlNode.memory.children;
            childNodePointer != nil;
            childNodePointer = childNodePointer.memory.next
        {
            if xmlNodeIsText(childNodePointer) == 0 {
                children.append(childNodePointer)
            }
        }
        
        return children
    }
    
    func XMLNodeGetAttributes(xmlNode: xmlNodePtr) -> [String: String] {
        var result:[String: String] = [String: String]()
        for var attribute: xmlAttrPtr = xmlNode.memory.properties;
            attribute != nil;
            attribute = attribute.memory.next
        {
            if let key:String = String(XMLChar: attribute.memory.name) {
                if let value:String = XMLNodeGetContent(attribute.memory.children) {
                    result[key] = value
                } else {
                    result[key] = ""
                }
            } else {
                print((">>>>>>>>>>>>>>>>>>>>>>>>错误:", String(XMLChar: attribute.memory.name)))
            }
        }
        return result
    }
    
    func XMLNodeGetAttribute(xmlNode: xmlNodePtr, key: String) -> String? {
        for var attribute: xmlAttrPtr = xmlNode.memory.properties;
            attribute != nil;
            attribute = attribute.memory.next
        {
            if key == String(XMLChar: attribute.memory.name) {
                return XMLNodeGetContent(attribute.memory.children)
            }
        }
        return nil
    }
    
    func XMLFindXPath(xmlDoc:xmlDocPtr, xPath: String) -> [xmlNodePtr] {
        let xPathContext = xmlXPathNewContext(xmlDoc)
        if xPathContext == nil {
            return []
        }
        
        xPathContext.memory.node = nil
        
        let xPathObject = xmlXPathEvalExpression(UnsafePointer<xmlChar>(xPath.cStringUsingEncoding(NSUTF8StringEncoding)!), xPathContext)
        xmlXPathFreeContext(xPathContext)
        if xPathObject == nil {
            return []
        }
        
        let nodeSet = xPathObject.memory.nodesetval
        if nodeSet == nil || nodeSet.memory.nodeNr == 0 || nodeSet.memory.nodeTab == nil {
            xmlXPathFreeObject(xPathObject)
            return []
        }
        
        var resultNodes = [xmlNodePtr]()
        for i in 0 ..< Int(nodeSet.memory.nodeNr) {
            resultNodes.append(nodeSet.memory.nodeTab[i])
        }
        
        xmlXPathFreeObject(xPathObject)
        
        return resultNodes
    }
    
    func XMLReadNSData(data:NSData?, encoding:NSStringEncoding = NSUTF8StringEncoding, isXML:Bool = false) -> xmlDocPtr?  {
        if let data = data {
            let cBuffer = UnsafePointer<CChar>(data.bytes)
            let cSize = CInt(data.length)
            //
    //        let cfEncoding = CFStringConvertNSStringEncodingToEncoding(encoding)
    //        let cfEncodingAsString:CFStringRef = CFStringConvertEncodingToIANACharSetName(cfEncoding)
    //        let cEncoding:UnsafePointer<CChar> = CFStringGetCStringPtr(cfEncodingAsString, CFStringEncoding(0))
    
            if isXML {
                let options = CInt(XML_PARSE_RECOVER.rawValue)
                return xmlReadMemory(cBuffer, cSize, nil, nil, options)
            } else {
                let options = CInt(HTML_PARSE_RECOVER.rawValue | HTML_PARSE_NOWARNING.rawValue | HTML_PARSE_NOERROR.rawValue)
                return htmlReadMemory(cBuffer, cSize, nil, nil, options)
            }
        }
        return nil
    }
    
    let GB18030_2000_Encoding = CFStringConvertEncodingToNSStringEncoding(CFStringEncoding(CFStringEncodings.GB_18030_2000.rawValue))
    
    /* ---------- 测试代码 ---------- */
    
    class CElapseTime {
        var startTime:NSDate
        var prompt:String
        var unsed:Bool = false
        
        init(prompt:String) {
            self.startTime = NSDate()
            self.prompt = prompt
        }
        
        var newprompt:String {
            return "(prompt)耗时:(NSDate().timeIntervalSinceDate(startTime))"
        }
    }
    
    func testParseSina() {
        
        var sURL:String
        var encoding:UInt
        
        (sURL,encoding) = ("http://www.baidu.com", NSUTF8StringEncoding)
        print("<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<(sURL)")
    
        var timer = CElapseTime(prompt: "读取网页")
        //let sContent = NSString(urlString:sURL, encoding: encoding)
        var sContent:NSString? = nil
        do{
            try sContent = NSString(contentsOfFile: "/Volumes/Data/Document/Test/sample.txt", encoding: NSUTF8StringEncoding)
        } catch {
            
        }
        print(timer.newprompt)
        let sTimer1 = timer.newprompt
        timer = CElapseTime(prompt: "数据解析")
        
        if let doc = XMLReadNSData(sContent?.dataUsingEncoding(NSUTF8StringEncoding)){
            let rootNode = XMLNode(document: doc)
            let findNodes = rootNode.xPath("//div")
            for childNode in findNodes {
                autoreleasepool{
                    let _ = (childNode.attributes, childNode.rawContent)
                }
    //            if let content = childNode.rawContent {
    //                print(content)
    //            }
            }
            print(findNodes.count)
        }
        print(sTimer1)
        print(timer.newprompt)
    }
    
    testParseSina()
  • 相关阅读:
    【动态规划】数的划分
    【动态规划】开心的小明
    【动态规划】回文字符串
    【动态规划】skiing_深度搜索_动态规划
    【动态规划】最大子串和
    JDBC中 execute 与 executeUpdate的区别
    poj 2449 Remmarguts' Date 求第k短路 Astar算法
    ACM-ICPC 2018 徐州赛区网络预赛 J. Maze Designer 最大生成树 lca
    hdu 2586 How far away ? 倍增求LCA
    acm模板生成
  • 原文地址:https://www.cnblogs.com/yaoyu126/p/4709988.html
Copyright © 2011-2022 走看看