zoukankan      html  css  js  c++  java
  • go解析xml的三种方式

    go解析xml的三种方式

    之前项目中用到过xml解析,在这里记录一下。

    小文件简单解析

    demo.xml

    <?xml version="1.0" encoding="UTF-8"?>
    <config>
       <smtpServer>smtp.163.com</smtpServer>
       <smtpPort>25</smtpPort>
       <sender>user@163.com</sender>
       <senderPasswd>123456</senderPasswd>
       <receivers flag="true">
         <age>16</age>
         <user>Mike_Zhang@live.com</user>
         <user>test1@qq.com</user>
         <script>
         <![CDATA[
            function matchwo(a,b) {
                if (a < b && a < 0) then {
                    return 1;
                } else {
                    return 0;
                }
            }
            ]]>
         </script>
      </receivers>
     </config>
    

    main.go

    package main
    
    import (
    	"fmt"
    	"io/ioutil"
    	"encoding/xml"
    )
    /*
    https://studygolang.com/static/pkgdoc/pkg/encoding_xml.htm
    */
    // 定义结构体映射xml结构
    type SConfig struct {
    	XMLName  xml.Name `xml:"config"` // 指定最外层的标签为config
    	SmtpServer string `xml:"smtpServer"` // 读取smtpServer配置项,并将结果保存到SmtpServer变量中
    	SmtpPort int `xml:"smtpPort"`
    	Sender string `xml:"sender"`
    	SenderPasswd string `xml:"senderPasswd"`
    	Receivers SReceivers `xml:"receivers"` // 读取receivers标签下的内容,以结构方式获取
      }
       
      type SReceivers struct {
    	Age int `xml:"age"`
    	Flag string `xml:"flag,attr"` // 读取flag属性
    	User []string `xml:"user"` // 读取user数组
    	Script string `xml:"script"` // 读取 <![CDATA[ xxx ]]> 数据
      }
    
    func readXml(path string) {
    	// 不用管理打开和关闭,ioutil 在内部已经处理过了
    	data, err := ioutil.ReadFile(path)
    	if err != nil {
    		fmt.Println("读文件出错!", err)
    		return
    	}
    	// fmt.Println(string(bytes))
    	v := SConfig{}
    	err = xml.Unmarshal(data, &v)
    	if err != nil {
    	  fmt.Printf("error: %v", err)
    	  return
    	}
       
    	//fmt.Println(v)
    	fmt.Println("SmtpServer : ",v.SmtpServer)
    	fmt.Println("SmtpPort : ",v.SmtpPort)
    	fmt.Println("Sender : ",v.Sender)
    	fmt.Println("SenderPasswd : ",v.SenderPasswd)
    	fmt.Println("Receivers.Flag : ",v.Receivers.Flag)
    	fmt.Println("Receivers.Age : ",v.Receivers.Age)
    	fmt.Println("Receivers.Script : ",v.Receivers.Script)
    	for i,element := range v.Receivers.User {
    	  fmt.Println(i,element)
    	}
    }
    
    func main() {
    	readXml("demo.xml")
    }
    

    输出:

    SmtpServer :  smtp.163.com
    SmtpPort :  25
    Sender :  user@163.com
    SenderPasswd :  123456
    Receivers.Flag :  true
    Receivers.Age :  16
    Receivers.Script :
    
            function matchwo(a,b) {
                if (a < b && a < 0) then {
                    return 1;
                } else {
                    return 0;
                }
            }
    
    
    0 Mike_Zhang@live.com
    1 test1@qq.com
    

    参考博客

    大文件解析

    对于超大xml文件的读取采用事件驱动的方式节省内存提高效率:

    demo.xml

    <?xml version="1.0" encoding="UTF-8"?>
    <config>
       <smtpServer>smtp.163.com</smtpServer>
       <smtpPort>25</smtpPort>
       <sender>user@163.com</sender>
      <senderPasswd>123456</senderPasswd>
       <receivers flag="true">
         <age>16</age>
         <user>Mike_Zhang@live.com</user>
         <user>test1@qq.com</user>
         <script>
         <![CDATA[
            function matchwo(a,b) {
                if (a < b && a < 0) then {
                    return 1;
                } else {
                    return 0;
                }
            }
            ]]>
         </script>
      </receivers>
     </config>
    

    main.go

    package main
    
    import (
    	"fmt"
    	"encoding/xml"
    	"bufio"
    	"os"
    	"io"
    )
    /*
    解析超大 xml 文件
    https://studygolang.com/static/pkgdoc/pkg/encoding_xml.htm
    */
    // 定义结构体映射xml结构
    type SConfig struct {
    	XMLName  xml.Name `xml:"config"` // 指定最外层的标签为config
    	SmtpServer string `xml:"smtpServer"` // 读取smtpServer配置项,并将结果保存到SmtpServer变量中
    	SmtpPort int `xml:"smtpPort"`
    	Sender string `xml:"sender"`
    	SenderPasswd string `xml:"senderPasswd"`
    	Receivers SReceivers `xml:"receivers"` // 读取receivers标签下的内容,以结构方式获取
      }
       
      type SReceivers struct {
    	Age int `xml:"age"`
    	Flag string `xml:"flag,attr"` // 读取flag属性
    	User []string `xml:"user"` // 读取user数组
    	Script string `xml:"script"` // 读取 <![CDATA[ xxx ]]> 数据
      }
    
    func readXml(path string) {
    	file, errOpen := os.Open(path) // 打开文件
    	if errOpen != nil {
    		fmt.Println("打开文件异常!", errOpen)
    		return
    	}
    
    	defer file.Close() // 关闭文件
    
    	// 创建带缓存的 Reader
    	reader := bufio.NewReader(file)
    
    	decoder := xml.NewDecoder(reader)
    
    	for t, err := decoder.Token(); err == nil || err == io.EOF; t, err = decoder.Token() {
    		switch token := t.(type) {
    			case xml.StartElement:
    				name := token.Name.Local
    				fmt.Println(name)
    				if name == "config" {
    					// 解析 config 
    					var sConfig = SConfig{}
    					configErr := decoder.DecodeElement(&sConfig, &token)
    					if configErr != nil {
    						fmt.Println("解析错误:")
    						fmt.Println(configErr)
    					} else {
    						fmt.Println(sConfig)
    					}
    					return
    				}
    		}
    	}
    }
    
    func main() {
    	readXml("demo.xml")
    } 
    

    输出:

    config
    {{ config} smtp.163.com 25 user@163.com 123456 {16 true [Mike_Zhang@live.com test1@qq.com]
    
            function matchwo(a,b) {
                if (a < b && a < 0) then {
                    return 1;
                } else {
                    return 0;
                }
            }
    
         }}
    

    复杂结构解析

    有的时候xml文件很复杂,嵌套很深,这个时候如果我们使用struct来映射就会很麻烦,好在开源了一个很方便的解析工具etree。这个etreepythonetreeapi几乎一样,用起来简单好用。

    bookstores.xml

    <bookstore xmlns:p="urn:schemas-books-com:prices">
    
      <book category="COOKING">
        <title lang="en">Everyday Italian</title>
        <author>Giada De Laurentiis</author>
        <year>2005</year>
        <p:price>30.00</p:price>
      </book>
    
      <book category="CHILDREN">
        <title lang="en">Harry Potter</title>
        <author>J K. Rowling</author>
        <year>2005</year>
        <p:price>29.99</p:price>
      </book>
    
      <book category="WEB">
        <title lang="en">XQuery Kick Start</title>
        <author>James McGovern</author>
        <author>Per Bothner</author>
        <author>Kurt Cagle</author>
        <author>James Linn</author>
        <author>Vaidyanathan Nagarajan</author>
        <year>2003</year>
        <p:price>49.99</p:price>
      </book>
    
      <book category="WEB">
        <title lang="en">Learning XML</title>
        <author>Erik T. Ray</author>
        <year>2003</year>
        <p:price>39.95</p:price>
      </book>
    
    </bookstore>
    

    main.go

    package main
    /*
    
    使用 etree 解析复杂结构的 xml 文件
    https://godoc.org/github.com/beevik/etree
    https://pkg.go.dev/github.com/beevik/etree?tab=doc
    https://github.com/beevik/etree
    */
    
    import (
    	"fmt"
    	"github.com/beevik/etree"// go get github.com/beevik/etree
    )
    
    func readXml(path string) {
    	doc := etree.NewDocument()
    	if err := doc.ReadFromFile(path); err != nil {
    		panic(err)
    	}
    
    	root := doc.SelectElement("bookstore")
    	fmt.Println("ROOT element:", root.Tag)
    
    	for _, book := range root.SelectElements("book") {
    		fmt.Println("CHILD element:", book.Tag)
    		if title := book.SelectElement("title"); title != nil {
    			lang := title.SelectAttrValue("lang", "unknown")
    			fmt.Printf("  TITLE: %s (%s)
    ", title.Text(), lang)
    		}
    		for _, attr := range book.Attr {
    			fmt.Printf("  ATTR: %s=%s
    ", attr.Key, attr.Value)
    		}
    	}
    }
    
    func main()  {
    	readXml("bookstores.xml")
    }
    

    输出:

    ROOT element: bookstore
    CHILD element: book
      TITLE: Everyday Italian (en)
      ATTR: category=COOKING
    CHILD element: book
      TITLE: Harry Potter (en)
      ATTR: category=CHILDREN
    CHILD element: book
      TITLE: XQuery Kick Start (en)
      ATTR: category=WEB
    CHILD element: book
      TITLE: Learning XML (en)
      ATTR: category=WEB
    
  • 相关阅读:
    java web项目防止多用户重复登录解决方案
    通过Google浏览器Cookie文件获取cookie信息,80以上版本有效
    js实现json数据导出为Excel下载到本地
    golang 搭建web服务器
    typescript笔记
    canvas屏幕动画
    canvas鼠标特效
    博客皮肤分享
    HTML的背景色和背景图、图片
    HTML表格头部、主体、页脚
  • 原文地址:https://www.cnblogs.com/bartggg/p/13067153.html
Copyright © 2011-2022 走看看