zoukankan      html  css  js  c++  java
  • nodeName,nodeValue未知 xml 入库方案 The ElementTree iterparse Function

    import xml.etree.ElementTree as ET
    from lxml.html import *
    from  xmljson import badgerfish as bf
    from pymongo import *
    
    xmlDict = {}
    def RecursionDict(dict_a):
            if isinstance(dict_a,dict):
                    for x in range(0,len(dict_a)):
                            temp_key = dict_a.keys()[x]
                            temp_value = dict_a[temp_key]
                            if   isinstance(temp_value,dict) and len(temp_value)== 1 :
                    w = temp_value[temp_value.keys()[0]]
                                if not  isinstance(w,dict):
                        xmlDict[temp_key] = w        
                            RecursionDict(temp_value)
    
    
    tree = ET.parse('listorderitems1493779131.xml')
    root = tree.getroot()
    xmlstr = ET.tostring(root, "us-ascii", "xml")
    print xmlstr
    
    res = bf.data(fromstring(xmlstr))
    print res
    RecursionDict(res)
    print xmlDict
    client = MongoClient()
    db = client.apixmldict
    collection = db.col
    collection.save(xmlDict)
    wuser@ubuntu:~/apiamzpy$ cat  wxmljsondict.py
    import xml.etree.ElementTree as ET
    from lxml.html import *
    from  xmljson import badgerfish as bf
    from pymongo import *
    
    xmlDict = {}
    def RecursionDict(dict_a):
            if isinstance(dict_a,dict):
                    for x in range(0,len(dict_a)):
                            temp_key = dict_a.keys()[x]
                            temp_value = dict_a[temp_key]
                            if   isinstance(temp_value,dict) and len(temp_value)== 1 :
                    w = temp_value[temp_value.keys()[0]]
                                if not  isinstance(w,dict):
                        xmlDict[temp_key] = w        
                            RecursionDict(temp_value)
    
    
    tree = ET.parse('listorderitems1493779131.xml')
    root = tree.getroot()
    xmlstr = ET.tostring(root, "us-ascii", "xml")
    print xmlstr
    
    res = bf.data(fromstring(xmlstr))
    print res
    RecursionDict(res)
    print xmlDict
    client = MongoClient()
    db = client.apixmldict
    collection = db.col
    collection.save(xmlDict)
    wuser@ubuntu:~/apiamzpy$ cat listorderitems1493779131.xml
    <?xml version="1.0"?>
    <ListOrderItemsResponse xmlns="https://mws.amazonservices.com/Orders/2013-09-01">
      <ListOrderItemsResult>
        <AmazonOrderId>123-1239963-8862642</AmazonOrderId>
        <OrderItems>
          <OrderItem>
            <ASIN>B01M123ABC</ASIN>
            <SellerSKU>ABCEHM054AWUS-USAS2</SellerSKU>
            <OrderItemId>12325810562154</OrderItemId>
            <Title>wwwTEST_DATA_Holife Vacuum Sealer, Silver Compact Food Saver Wet/Dry Vacuum Sealing System with Food Grade Starter Bags</Title>
            <QuantityOrdered>1</QuantityOrdered>
            <QuantityShipped>0</QuantityShipped>
            <PromotionIds/>
          </OrderItem>
        </OrderItems>
      </ListOrderItemsResult>
      <ResponseMetadata>
        <RequestId>8cc6b5dc-f79e-4da4-b914-9f14388c0bbf</RequestId>
      </ResponseMetadata>
    </ListOrderItemsResponse>
    wuser@ubuntu:~/apiamzpy$ 
    > db.col.save({'w':123})
    WriteResult({ "nInserted" : 1 })
    > db.col.find().pretty()
    { "_id" : ObjectId("590b35877511f2683d345653"), "w" : 123 }
    > db.col.find().pretty()
    { "_id" : ObjectId("590b35877511f2683d345653"), "w" : 123 }
    {
            "_id" : ObjectId("590b35ab1d41c832e2b6048b"),
            "exception" : "<ns0:ListOrderItemsResponse xmlns:ns0="https://mws.amazonservices.com/Orders/2013-09-01">
      <ns0:ListOrderItemsResult>
        <ns0:AmazonOrderId>123-1239963-8862642</ns0:AmazonOrderId>
        <ns0:OrderItems>
          <ns0:OrderItem>
            <ns0:ASIN>B01M123ABC</ns0:ASIN>
            <ns0:SellerSKU>ABCEHM054AWUS-USAS2</ns0:SellerSKU>
            <ns0:OrderItemId>12325810562154</ns0:OrderItemId>
            <ns0:Title>wwwTEST_DATA_Holife Vacuum Sealer, Silver Compact Food Saver Wet/Dry Vacuum Sealing System with Food Grade Starter Bags</ns0:Title>
            <ns0:QuantityOrdered>1</ns0:QuantityOrdered>
            <ns0:QuantityShipped>0</ns0:QuantityShipped>
            <ns0:PromotionIds />
          </ns0:OrderItem>
        </ns0:OrderItems>
      </ns0:ListOrderItemsResult>
      <ns0:ResponseMetadata>
        <ns0:RequestId>8cc6b5dc-f79e-4da4-b914-9f14388c0bbf</ns0:RequestId>
      </ns0:ResponseMetadata>
    </ns0:ListOrderItemsResponse>"
    }
    {
            "_id" : ObjectId("590b35ab1d41c832e2b6048c"),
            "asin" : "B01M123ABC",
            "sellersku" : "ABCEHM054AWUS-USAS2",
            "title" : "wwwTEST_DATA_Holife Vacuum Sealer, Silver Compact Food Saver Wet/Dry Vacuum Sealing System with Food Grade Starter Bags",
            "amazonorderid" : "123-1239963-8862642",
            "quantityshipped" : 0,
            "requestid" : "8cc6b5dc-f79e-4da4-b914-9f14388c0bbf",
            "quantityordered" : 1,
            "orderitemid" : NumberLong("12325810562154")
    }
    > db.col.find().pretty()
    { "_id" : ObjectId("590b35877511f2683d345653"), "w" : 123 }
    {
            "_id" : ObjectId("590b35ab1d41c832e2b6048b"),
            "exception" : "<ns0:ListOrderItemsResponse xmlns:ns0="https://mws.amazonservices.com/Orders/2013-09-01">
      <ns0:ListOrderItemsResult>
        <ns0:AmazonOrderId>123-1239963-8862642</ns0:AmazonOrderId>
        <ns0:OrderItems>
          <ns0:OrderItem>
            <ns0:ASIN>B01M123ABC</ns0:ASIN>
            <ns0:SellerSKU>ABCEHM054AWUS-USAS2</ns0:SellerSKU>
            <ns0:OrderItemId>12325810562154</ns0:OrderItemId>
            <ns0:Title>wwwTEST_DATA_Holife Vacuum Sealer, Silver Compact Food Saver Wet/Dry Vacuum Sealing System with Food Grade Starter Bags</ns0:Title>
            <ns0:QuantityOrdered>1</ns0:QuantityOrdered>
            <ns0:QuantityShipped>0</ns0:QuantityShipped>
            <ns0:PromotionIds />
          </ns0:OrderItem>
        </ns0:OrderItems>
      </ns0:ListOrderItemsResult>
      <ns0:ResponseMetadata>
        <ns0:RequestId>8cc6b5dc-f79e-4da4-b914-9f14388c0bbf</ns0:RequestId>
      </ns0:ResponseMetadata>
    </ns0:ListOrderItemsResponse>"
    }
    {
            "_id" : ObjectId("590b35ab1d41c832e2b6048c"),
            "asin" : "B01M123ABC",
            "sellersku" : "ABCEHM054AWUS-USAS2",
            "title" : "wwwTEST_DATA_Holife Vacuum Sealer, Silver Compact Food Saver Wet/Dry Vacuum Sealing System with Food Grade Starter Bags",
            "amazonorderid" : "123-1239963-8862642",
            "quantityshipped" : 0,
            "requestid" : "8cc6b5dc-f79e-4da4-b914-9f14388c0bbf",
            "quantityordered" : 1,
            "orderitemid" : NumberLong("12325810562154")
    }
    {
            "_id" : ObjectId("590b35cd1d41c832ec3d2c03"),
            "asin" : "B01M123ABC",
            "sellersku" : "ABCEHM054AWUS-USAS2",
            "title" : "wwwTEST_DATA_Holife Vacuum Sealer, Silver Compact Food Saver Wet/Dry Vacuum Sealing System with Food Grade Starter Bags",
            "amazonorderid" : "123-1239963-8862642",
            "quantityshipped" : 0,
            "requestid" : "8cc6b5dc-f79e-4da4-b914-9f14388c0bbf",
            "quantityordered" : 1,
            "orderitemid" : NumberLong("12325810562154")
    }
    > db.col.find().pretty()
    { "_id" : ObjectId("590b35877511f2683d345653"), "w" : 123 }
    {
            "_id" : ObjectId("590b35ab1d41c832e2b6048b"),
            "exception" : "<ns0:ListOrderItemsResponse xmlns:ns0="https://mws.amazonservices.com/Orders/2013-09-01">
      <ns0:ListOrderItemsResult>
        <ns0:AmazonOrderId>123-1239963-8862642</ns0:AmazonOrderId>
        <ns0:OrderItems>
          <ns0:OrderItem>
            <ns0:ASIN>B01M123ABC</ns0:ASIN>
            <ns0:SellerSKU>ABCEHM054AWUS-USAS2</ns0:SellerSKU>
            <ns0:OrderItemId>12325810562154</ns0:OrderItemId>
            <ns0:Title>wwwTEST_DATA_Holife Vacuum Sealer, Silver Compact Food Saver Wet/Dry Vacuum Sealing System with Food Grade Starter Bags</ns0:Title>
            <ns0:QuantityOrdered>1</ns0:QuantityOrdered>
            <ns0:QuantityShipped>0</ns0:QuantityShipped>
            <ns0:PromotionIds />
          </ns0:OrderItem>
        </ns0:OrderItems>
      </ns0:ListOrderItemsResult>
      <ns0:ResponseMetadata>
        <ns0:RequestId>8cc6b5dc-f79e-4da4-b914-9f14388c0bbf</ns0:RequestId>
      </ns0:ResponseMetadata>
    </ns0:ListOrderItemsResponse>"
    }
    {
            "_id" : ObjectId("590b35ab1d41c832e2b6048c"),
            "asin" : "B01M123ABC",
            "sellersku" : "ABCEHM054AWUS-USAS2",
            "title" : "wwwTEST_DATA_Holife Vacuum Sealer, Silver Compact Food Saver Wet/Dry Vacuum Sealing System with Food Grade Starter Bags",
            "amazonorderid" : "123-1239963-8862642",
            "quantityshipped" : 0,
            "requestid" : "8cc6b5dc-f79e-4da4-b914-9f14388c0bbf",
            "quantityordered" : 1,
            "orderitemid" : NumberLong("12325810562154")
    }
    {
            "_id" : ObjectId("590b35cd1d41c832ec3d2c03"),
            "asin" : "B01M123ABC",
            "sellersku" : "ABCEHM054AWUS-USAS2",
            "title" : "wwwTEST_DATA_Holife Vacuum Sealer, Silver Compact Food Saver Wet/Dry Vacuum Sealing System with Food Grade Starter Bags",
            "amazonorderid" : "123-1239963-8862642",
            "quantityshipped" : 0,
            "requestid" : "8cc6b5dc-f79e-4da4-b914-9f14388c0bbf",
            "quantityordered" : 1,
            "orderitemid" : NumberLong("12325810562154")
    }
    {
            "_id" : ObjectId("590b39841d41c833325a4dcd"),
            "asin" : "B01M123ABC",
            "sellersku" : "ABCEHM054AWUS-USAS2",
            "title" : "wwwTEST_DATA_Holife Vacuum Sealer, Silver Compact Food Saver Wet/Dry Vacuum Sealing System with Food Grade Starter Bags",
            "amazonorderid" : "123-1239963-8862642",
            "quantityshipped" : 0,
            "requestid" : "8cc6b5dc-f79e-4da4-b914-9f14388c0bbf",
            "quantityordered" : 1,
            "orderitemid" : NumberLong("12325810562154")
    }
    {
            "_id" : ObjectId("590b3cc51d41c83347fbfcb1"),
            "asin" : "B01M123ABC",
            "sellersku" : "ABCEHM054AWUS-USAS2",
            "title" : "wwwTEST_DATA_Holife Vacuum Sealer, Silver Compact Food Saver Wet/Dry Vacuum Sealing System with Food Grade Starter Bags",
            "amazonorderid" : "123-1239963-8862642",
            "quantityshipped" : 0,
            "requestid" : "8cc6b5dc-f79e-4da4-b914-9f14388c0bbf",
            "quantityordered" : 1,
            "orderitemid" : NumberLong("12325810562154")
    }
    > db.col.find().count()
    9
    > db.col.find().count()
    10
    >
     问题:nodeName,nodeValue未知 xml动态数据入库,   解决方案:  
     
    入库结果
     
    test-data
     
     

    ---->更高效的

    nodeName,nodeValue未知 xml 入库方案

    xml--->?--->database
    json只是过程,不是目的;


    想到一种算法是将xml转为string,然后借助正则辅助去处理该string,目测可行但是觉得似乎其不高效;;;;


    也许xpath的原理就是上述算法??

    SAX解析多层嵌套XML - donglindonglin的博客 - 博客频道 - CSDN.NET
    http://blog.csdn.net/donglindonglin/article/details/51996926

    wuser@ubuntu:~/apiamzpy$ python wl.py
    <listiterator object at 0x7f6c99c20ed0>
    [0, 23, 'w1']
    Traceback (most recent call last):
      File "wl.py", line 5, in <module>
        if  t0.next():
    StopIteration
    wuser@ubuntu:~/apiamzpy$ vim wl.py
    
    l0 = [0,23,'w1']
    t0 = l0.__iter__()
    print t0
    print list(t0)
    if  t0.next():
            t0.next()
    ~                        

    问题:

    xml取出所有的nodeName、nodeValue对

    0-不高效的方案:0-0-php / python 将xml处理为string;0-1-利用正则处理字符串。

    w

    http://effbot.org/zone/element-iterparse.htm

  • 相关阅读:
    uboot主Makefile之1——HOSTARCH&HOSTOS
    uboot主Makefile之3——BUILD_DIR(Line 78-93)
    Makefile的ifeq逻辑或,逻辑与的变通实现
    uboot主Makefile解析第二篇
    uboot主Makefile解析第一篇
    mkdir -p X 中的“-p”是的意思
    uboot主Makefile中的origin函数
    原生javascript实现call、apply和bind的方法
    js如何判断数组是Array类型
    三栏布局的5种解决方案及优缺点
  • 原文地址:https://www.cnblogs.com/rsapaper/p/6801561.html
Copyright © 2011-2022 走看看