转自
#这个解析xml的让我束手无策
#看了一下评论区大神的代码,觉得非常值得学习,就转载一下
#主要的收获是只要定义三个处理事件的函数就可以解析目录、xml,而这个类可以从dict继承
#但是大神的代码主要集中在start_element函数中,也就是处理了节点中的属性值
#我的疑问是如果要获取和保存节点中的内容该怎么做
1 from xml.parsers.expat import ParserCreate 2 import re ,enum,json 3 4 #创建枚举便于查询 5 @enum.unique 6 class WeekDay(enum.Enum): 7 Sun = 0 8 Mon = 1 9 Tue = 2 10 Wed = 3 11 Thu = 4 12 Fri = 5 13 Sat = 6 14 15 16 class WeatherParserHandler(dict): 17 # <yweather:condition text="Haze" code="21" temp="28" date="Wed, 27 May 2015 11:00 am CST" /> 18 #<yweather:forecast day="Wed" date="27 May 2015" low="20" high="33" text="Partly Cloudy" code="30" /> 19 # <yweather:location city="Beijing" region="" country="China"/> 20 #<yweather:location city="Beijing" region="" country="China"/> 21 def end_element(self, name): 22 pass 23 24 25 def data(self, text): 26 pass 27 28 def start_element(self ,name ,attrs): 29 weather_regex =re.compile(r"yweather:(w*)") 30 weather_match =weather_regex.match(name) 31 #符合条件的情况下: 32 if weather_match: 33 group_mark =weather_match.group(1) 34 if group_mark=="location": 35 self['city']=attrs["city"] 36 self['country']=attrs["country"] 37 #检查日期 38 elif group_mark=="condition": 39 today =str(attrs['date']).split(",")[0] 40 if WeekDay[today]: 41 self.__today =WeekDay[today].value 42 #不是星期六的情况下 43 if self.__today!=6: 44 self.__tomorrow=self.__today+1 45 #星期六的情况下 46 else: 47 self.__tomorrow=0 48 pass 49 #检查天气 50 elif group_mark=="forecast" : 51 #创建二级字典 52 attr =dict() 53 if WeekDay[attrs["day"]].value==self.__today: 54 attr["low"]=int(attrs["low"]) 55 attr["high"] = int(attrs["high"]) 56 attr['text'] =attrs['text'] 57 self["today"]=attr 58 elif WeekDay[attrs["day"]].value==self.__tomorrow: 59 attr["low"] = int(attrs["low"]) 60 attr["high"] = int(attrs["high"]) 61 attr['text'] = attrs['text'] 62 self["tomorrow"]=attr 63 64 65 66 67 def parse_weather(xml): 68 weather_parser =ParserCreate() 69 weather_handler =WeatherParserHandler() 70 weather_parser.StartElementHandler = weather_handler.start_element 71 weather_parser.EndElementHandler = weather_handler.end_element 72 weather_parser.CharacterDataHandler = weather_handler.data 73 weather_parser.Parse(xml) 74 return weather_handler 75 return json.dumps(weather_handler) 76 77 78 79 # 测试: 80 data = r'''<?xml version="1.0" encoding="UTF-8" standalone="yes" ?> 81 <rss version="2.0" xmlns:yweather="http://xml.weather.yahoo.com/ns/rss/1.0" xmlns:geo="http://www.w3.org/2003/01/geo/wgs84_pos#"> 82 <channel> 83 <title>Yahoo! Weather - Beijing, CN</title> 84 <lastBuildDate>Wed, 27 May 2015 11:00 am CST</lastBuildDate> 85 <yweather:location city="Beijing" region="" country="China"/> 86 <yweather:units temperature="C" distance="km" pressure="mb" speed="km/h"/> 87 <yweather:wind chill="28" direction="180" speed="14.48" /> 88 <yweather:atmosphere humidity="53" visibility="2.61" pressure="1006.1" rising="0" /> 89 <yweather:astronomy sunrise="4:51 am" sunset="7:32 pm"/> 90 <item> 91 <geo:lat>39.91</geo:lat> 92 <geo:long>116.39</geo:long> 93 <pubDate>Wed, 27 May 2015 11:00 am CST</pubDate> 94 <yweather:condition text="Haze" code="21" temp="28" date="Wed, 27 May 2015 11:00 am CST" /> 95 <yweather:forecast day="Wed" date="27 May 2015" low="20" high="33" text="Partly Cloudy" code="30" /> 96 <yweather:forecast day="Thu" date="28 May 2015" low="21" high="34" text="Sunny" code="32" /> 97 <yweather:forecast day="Fri" date="29 May 2015" low="18" high="25" text="AM Showers" code="39" /> 98 <yweather:forecast day="Sat" date="30 May 2015" low="18" high="32" text="Sunny" code="32" /> 99 <yweather:forecast day="Sun" date="31 May 2015" low="20" high="37" text="Sunny" code="32" /> 100 </item> 101 </channel> 102 </rss> 103 ''' 104 weather = parse_weather(data) 105 assert weather['city'] == 'Beijing', weather['city'] 106 assert weather['country'] == 'China', weather['country'] 107 assert weather['today']['text'] == 'Partly Cloudy', weather['today']['text'] 108 assert weather['today']['low'] == 20, weather['today']['low'] 109 assert weather['today']['high'] == 33, weather['today']['high'] 110 assert weather['tomorrow']['text'] == 'Sunny', weather['tomorrow']['text'] 111 assert weather['tomorrow']['low'] == 21, weather['tomorrow']['low'] 112 assert weather['tomorrow']['high'] == 34, weather['tomorrow']['high'] 113 print('Weather:', str(weather))