几经面试和简历更新,发现自己做了这么久的开发,却少于做总结,一个个项目过去了,但是知识的累积沉淀却很少……
借着这次机会,把以前的skill整理一下,浓缩的才是精华。为自己也为其他初学的朋友做个参考。
RSS(全称Really Simple Syndication) 目前广泛用于网上新闻频道,blog和wiki,主要的版本有0.91, 1.0, 2.0。
另外还有Google制定的ATOM格式,以及作为Feed集合的OPML文件。
最常见的Feed格式是Rss1.0,2.0和ATOM,解析时通过不同的命名空间来处理不同的版本,下面是解析的主要代码:
代码
/// <summary>
/// 根据xml内容解析Feed
/// </summary>
/// <param name="url">Feed源地址</param>
/// <param name="xmlContent">Feed xml 内容</param>
/// <returns>返回解析后的Feed对象实例</returns>
public static Feed AnalyseFeedContent(string url, string xmlContent)
{
Feed feed = new Feed();
feed.Url = url;
feed.ChannelInfo = new FeedChannel();
XmlDocument doc = new XmlDocument();
doc = ReadGlobals.LoadXml(doc, xmlContent, url);
// 添加常用的命名空间
XmlNamespaceManager mgr = new XmlNamespaceManager(doc.NameTable);
mgr.AddNamespace("rdf", "http://purl.org/rss/1.0/");
mgr.AddNamespace("content", "http://purl.org/rss/1.0/modules/content/");
mgr.AddNamespace("dc", "http://purl.org/dc/elements/1.1/");
XmlNode nodeRoot = doc.DocumentElement;
XmlNode nodeChannel;
XmlNodeList nodeList;
try
{
if (nodeRoot != null)
{
// enclosure地址
if (nodeRoot.Attributes["xmlns:enc"] != null)
{
mgr.AddNamespace("enc", nodeRoot.Attributes["xmlns:enc"].Value);
}
else
{
mgr.AddNamespace("enc", "http://crocodile.org/ns/rss/2.0/enclosures");
}
// trackback地址
if (nodeRoot.Attributes["xmlns:trackback"] != null)
{
mgr.AddNamespace("trackback", nodeRoot.Attributes["xmlns:trackback"].Value);
}
else
{
mgr.AddNamespace("trackback", "http://madskills.com/public/xml/rss/module/trackback/");
}
if (nodeRoot.Name.ToLower() == "opml")
{
// 是opml文件
feed.Type = FeedType.OPML;
}
if (nodeRoot.Name.ToLower() == "feed")
{
// 是atom文件
feed.Type = FeedType.ATOM_0_3;
string strAtomNameSpace = "http://www.w3.org/2005/Atom";
// 以头部的命名空间为准;
if (nodeRoot.Attributes["xmlns"] != null)
{
strAtomNameSpace = nodeRoot.Attributes["xmlns"].Value;
}
mgr.AddNamespace("atom", strAtomNameSpace);
feed.ChannelInfo = GetChannel(doc, mgr, "atom");
nodeList = doc.SelectNodes("//atom:entry", mgr);
feed.Items = GetItems(nodeList, feed.Type, mgr);
}
if (nodeRoot.Name.ToLower() == "rdf:rdf")
{
// 是rss1.0文件
feed.Type = FeedType.RSS_1_0;
feed.ChannelInfo = GetChannel(doc, mgr, "rdf");
nodeList = doc.SelectNodes("//rdf:item", mgr);
feed.Items = GetItems(nodeList, feed.Type, mgr);
}
if (nodeRoot.Name.ToLower() == "rss")
{
// 是rss2.0文件
feed.Type = FeedType.RSS_2_0;
nodeChannel = doc.SelectSingleNode("rss/channel");
feed.ChannelInfo = GetChannelForRss20(nodeChannel);
nodeList = nodeChannel.SelectNodes("item", mgr);
feed.Items = GetItems(nodeList, feed.Type, mgr);
}
}
}
catch (Exception ex)
{
Log.Write(ex);
}
return feed;
}
Feed主要分为两部分,Channel和ItemList部分,分别用如下方法解析:
FeedChannel解析
/// <summary>
/// 获取 FeedChannel 数据
/// </summary>
private static FeedChannel GetChannel(XmlDocument xdtDoc, XmlNamespaceManager mgr, string prefix)
{
FeedChannel channel = new FeedChannel();
if (xdtDoc != null)
{
string title = string.Empty, generator = string.Empty, link = string.Empty, description = string.Empty, language = string.Empty;
string managingeditor = string.Empty, webmaster = string.Empty, copyright = string.Empty, pubdate = string.Empty, lastbuilddate = string.Empty;
string strPrefix = prefix;
XmlNode xneLogo = xdtDoc.SelectSingleNode("//" + strPrefix + ":logo", mgr);
if (xneLogo != null)
{
channel.Logo.Src = xneLogo.InnerText;
}
XmlNode snTitle = xdtDoc.SelectSingleNode("//" + strPrefix + ":title", mgr);
if (snTitle != null)
{
channel.Title = snTitle.InnerText;
}
XmlNode snLink = xdtDoc.SelectSingleNode("//" + strPrefix + ":link[@rel='alternate']/@href", mgr);
if (snLink != null)
{
channel.Link = snLink.InnerText;
}
else
{
channel.Link = xdtDoc.SelectSingleNode("//" + strPrefix + ":link", mgr) == null ? string.Empty : xdtDoc.SelectSingleNode("//" + strPrefix + ":link", mgr).InnerText;
}
XmlNode snDescription = xdtDoc.SelectSingleNode("//" + strPrefix + ":tagline", mgr);
if (snDescription != null)
{
channel.Description = snDescription.InnerText;
}
XmlNode snLanguage = xdtDoc.SelectSingleNode("//" + strPrefix + ":feed/@xml:lang", mgr);
if (snLanguage != null)
{
try
{
Thread.CurrentThread.CurrentUICulture = new CultureInfo(snLanguage.InnerText);
channel.Language = CultureInfo.CreateSpecificCulture(language).LCID;
}
catch
{
channel.Language = 0;
}
}
}
return channel;
}
FeedItem解析
/// <summary>
/// 获取item列表
/// </summary>
/// <param name="xnlItems"></param>
public static List<FeedItem> GetItems(XmlNodeList xnlItems, FeedType type, XmlNamespaceManager mgr)
{
List<FeedItem> lstItems = new List<FeedItem>();
if (xnlItems != null)
{
switch (type)
{
case FeedType.RSS_1_0:
foreach (XmlNode xne in xnlItems)
{
FeedItem fim = new FeedItem();
fim.Title = xne.SelectSingleNode("rdf:title", mgr) != null ? xne.SelectSingleNode("rdf:title", mgr).InnerText : string.Empty;
fim.Link = xne.SelectSingleNode("rdf:link", mgr) != null ? xne.SelectSingleNode("rdf:link", mgr).InnerText : string.Empty;
//rim.Description = xne.SelectSingleNode("rdf:description", mgr) != null ? xne.SelectSingleNode("rdf:description", mgr).InnerText : string.Empty;
string strContent = xne.SelectSingleNode("content:encoded", mgr) != null ? xne.SelectSingleNode("content:encoded", mgr).InnerText : string.Empty;
string strDescription = xne.SelectSingleNode("rdf:description", mgr) != null ? xne.SelectSingleNode("rdf:description", mgr).InnerText : string.Empty;
fim.Description = strContent == string.Empty ? strDescription : strContent;
fim.Description = fim.Description.Replace("<![CDATA[", string.Empty).Replace("]]>", string.Empty);
string strAuthorName = xne.SelectSingleNode("//rdf:author", mgr) != null ? xne.SelectSingleNode("//rdf:author", mgr).InnerText : string.Empty;
if (strAuthorName != string.Empty)
{
strAuthorName = xne.SelectSingleNode("dc:creator", mgr) != null ? xne.SelectSingleNode("dc:creator", mgr).InnerText : string.Empty;
}
if (strAuthorName != string.Empty)
{
fim.Author = new FeedPerson();
fim.Author.Name = strAuthorName;
}
fim.PubDate = xne.SelectSingleNode("rdf:pubDate", mgr) != null ? GetDateTimeByUrl(fim.Link,xne.SelectSingleNode("rdf:pubDate", mgr).InnerText) : DateTime.MinValue;
if (fim.PubDate == DateTime.MinValue)
{
fim.PubDate = xne.SelectSingleNode("dc:date", mgr) != null ? GetDateTimeByUrl(fim.Link, xne.SelectSingleNode("dc:date", mgr).InnerText) : DateTime.MinValue;
}
XmlNodeList xnlSubjects = xne.SelectNodes("dc:subject", mgr);
if (xnlSubjects != null)
{
foreach (XmlNode xnlSubject in xnlSubjects)
{
fim.Subject += xnlSubject.InnerText + ",";
}
}
XmlNode xndEnclosure = xne.SelectSingleNode("enclosure", mgr);
if (xndEnclosure != null)
{
fim.Enclosures = new List<FeedEnclosure>();
FeedEnclosure enc = new FeedEnclosure();
enc.Type = xndEnclosure.Attributes["type"] != null ? xndEnclosure.Attributes["type"].Value : string.Empty;
string strLength = xndEnclosure.Attributes["length"] != null ? xndEnclosure.Attributes["length"].Value : string.Empty;
try
{
enc.Length = Convert.ToInt32(strLength);
}
catch { }
enc.Url = xndEnclosure.Attributes["url"] != null ? xndEnclosure.Attributes["url"].Value : string.Empty;
fim.Enclosures.Add(enc);
}
fim.Description += GetHtmlByByEnclosure(fim.Enclosures);
fim.TrackbackPing = xne.SelectSingleNode("trackback:ping", mgr) != null ? xne.SelectSingleNode("trackback:ping", mgr).InnerText : string.Empty;
lstItems.Add(fim);
}
break;
case FeedType.RSS_2_0:
foreach (XmlNode xne in xnlItems)
{
FeedItem fim = new FeedItem();
fim.Title = xne.SelectSingleNode("title", mgr) != null ? xne.SelectSingleNode("title", mgr).InnerText : string.Empty;
fim.Link = xne.SelectSingleNode("link", mgr) != null ? xne.SelectSingleNode("link", mgr).InnerText : string.Empty;
string strContent = xne.SelectSingleNode("content:encoded", mgr) != null ? xne.SelectSingleNode("content:encoded", mgr).InnerText : string.Empty;
string strDescription = xne.SelectSingleNode("description", mgr) != null ? xne.SelectSingleNode("description", mgr).InnerText : string.Empty;
fim.Description = strContent == string.Empty ? strDescription : strContent;
fim.Description = fim.Description.Replace("<![CDATA[", string.Empty).Replace("]]>", string.Empty);
string strAuthorName = xne.SelectSingleNode("author", mgr) != null ? xne.SelectSingleNode("author", mgr).InnerText : string.Empty;
if (strAuthorName != string.Empty)
{
strAuthorName = xne.SelectSingleNode("dc:creator", mgr) != null ? xne.SelectSingleNode("dc:creator", mgr).InnerText : string.Empty;
}
if (strAuthorName != string.Empty)
{
fim.Author = new FeedPerson();
fim.Author.Name = strAuthorName;
}
fim.PubDate = xne.SelectSingleNode("pubDate", mgr) != null ? GetDateTimeByUrl(fim.Link,xne.SelectSingleNode("pubDate", mgr).InnerText) : DateTime.MinValue;
if (fim.PubDate == DateTime.MinValue)
{
fim.PubDate = xne.SelectSingleNode("dc:date", mgr) != null ? GetDateTimeByUrl(fim.Link,xne.SelectSingleNode("dc:date", mgr).InnerText) : DateTime.MinValue;
}
fim.Guid = xne.SelectSingleNode("guid", mgr) != null ? xne.SelectSingleNode("guid", mgr).InnerText : string.Empty;
XmlNodeList xnlSubjects = xne.SelectNodes("dc:subject", mgr);
if (xnlSubjects != null)
{
foreach (XmlNode xnlSubject in xnlSubjects)
{
fim.Subject += xnlSubject.InnerText + ",";
}
}
XmlNodeList xnlCategorys = xne.SelectNodes("category", mgr);
if (xnlCategorys != null)
{
foreach (XmlNode xnlCategory in xnlCategorys)
{
fim.Category += xnlCategory != null ? xnlCategory.InnerText : string.Empty;
fim.Category += ",";
}
}
XmlNode xndEnclosure = xne.SelectSingleNode("enclosure", mgr);
if (xndEnclosure != null)
{
fim.Enclosures = new List<FeedEnclosure>();
FeedEnclosure enc = new FeedEnclosure();
enc.Type = xndEnclosure.Attributes["type"] != null ? xndEnclosure.Attributes["type"].Value : string.Empty;
string strLength = xndEnclosure.Attributes["length"] != null ? xndEnclosure.Attributes["length"].Value : string.Empty;
try
{
enc.Length = Convert.ToInt32(strLength);
}
catch { }
enc.Url = xndEnclosure.Attributes["url"] != null ? xndEnclosure.Attributes["url"].Value : string.Empty;
fim.Enclosures.Add(enc);
}
fim.Description += GetHtmlByByEnclosure(fim.Enclosures);
fim.TrackbackPing = xne.SelectSingleNode("trackback:ping", mgr) != null ? xne.SelectSingleNode("trackback:ping", mgr).InnerText : string.Empty;
lstItems.Add(fim);
}
break;
case FeedType.ATOM_0_3:
foreach (XmlNode xne in xnlItems)
{
FeedItem fim = new FeedItem();
fim.Title = xne.SelectSingleNode("atom:title", mgr) != null ? xne.SelectSingleNode("atom:title", mgr).InnerText : string.Empty;
fim.Link = xne.SelectSingleNode("atom:link[@rel='alternate']", mgr) != null ? xne.SelectSingleNode("atom:link[@rel='alternate']", mgr).Attributes["href"].InnerText : string.Empty;
fim.Summary = xne.SelectSingleNode("atom:summary", mgr) != null ? xne.SelectSingleNode("atom:summary", mgr).InnerText : string.Empty;
fim.Description = xne.SelectSingleNode("atom:content", mgr) != null ? xne.SelectSingleNode("atom:content", mgr).InnerText : string.Empty;
fim.Description = fim.Description.Replace("<![CDATA[", string.Empty).Replace("]]>", string.Empty);
fim.Guid = xne.SelectSingleNode("atom:id", mgr) != null ? xne.SelectSingleNode("atom:id", mgr).InnerText : string.Empty;
fim.Contributor = xne.SelectSingleNode("atom:contributor", mgr) != null ? xne.SelectSingleNode("atom:contributor", mgr).InnerText : string.Empty;
XmlNode xneAuthor = xne.SelectSingleNode("atom:author", mgr);
if (xneAuthor != null)
{
fim.Author = new FeedPerson();
fim.Author.Name = xneAuthor.SelectSingleNode("atom:name", mgr) != null ? xneAuthor.SelectSingleNode("atom:name", mgr).InnerText : string.Empty;
fim.Author.Url = xneAuthor.SelectSingleNode("atom:uri", mgr) != null ? xneAuthor.SelectSingleNode("atom:uri", mgr).InnerText : string.Empty;
fim.Author.Email = xneAuthor.SelectSingleNode("atom:email", mgr) != null ? xneAuthor.SelectSingleNode("atom:email", mgr).InnerText : string.Empty;
}
fim.UpdateDate = xne.SelectSingleNode("atom:updated", mgr) != null ? GetDateTimeByUrl(fim.Link, xne.SelectSingleNode("atom:updated", mgr).InnerText) : DateTime.MinValue;
if (fim.UpdateDate == DateTime.MinValue)
{
fim.UpdateDate = xne.SelectSingleNode("atom:modified", mgr) != null ? GetDateTimeByUrl(fim.Link, xne.SelectSingleNode("atom:modified", mgr).InnerText) : DateTime.MinValue;
}
fim.PubDate = xne.SelectSingleNode("atom:published", mgr) != null ? GetDateTimeByUrl(fim.Link, xne.SelectSingleNode("atom:published", mgr).InnerText) : DateTime.MinValue;
if (fim.PubDate == DateTime.MinValue)
{
fim.PubDate = xne.SelectSingleNode("atom:issued", mgr) != null ? GetDateTimeByUrl(fim.Link, xne.SelectSingleNode("atom:issued", mgr).InnerText) : DateTime.MinValue;
}
if (fim.PubDate == DateTime.MinValue)
{
fim.PubDate = xne.SelectSingleNode("atom:created", mgr) != null ? GetDateTimeByUrl(fim.Link, xne.SelectSingleNode("atom:created", mgr).InnerText) : DateTime.MinValue;
}
XmlNodeList xnlTags = xne.SelectNodes("dc:subject", mgr);
XmlNodeList xnlSubjects = xne.SelectNodes("dc:subject", mgr);
if (xnlSubjects != null)
{
foreach (XmlNode xnlSubject in xnlSubjects)
{
fim.Subject += xnlSubject.InnerText + ",";
}
}
XmlNodeList xnlCategorys = xne.SelectNodes("atom:category", mgr);
if (xnlCategorys != null)
{
foreach (XmlNode xnlCategory in xnlCategorys)
{
fim.Category += xnlCategory.Attributes["term"] != null ? xnlCategory.Attributes["term"].Value : string.Empty;
fim.Category += ",";
}
}
XmlNodeList xnlEnclosures = xne.SelectNodes("atom:link[@rel='enclosure']", mgr);
if (xnlEnclosures != null)
{
fim.Enclosures = new List<FeedEnclosure>();
foreach (XmlNode xndEnclosure in xnlEnclosures)
{
FeedEnclosure enc = new FeedEnclosure();
enc.Type = xndEnclosure.Attributes["type"] != null ? xndEnclosure.Attributes["type"].Value : string.Empty;
string strLength = xndEnclosure.Attributes["length"] != null ? xndEnclosure.Attributes["length"].Value : string.Empty;
try
{
enc.Length = Convert.ToInt32(strLength);
}
catch { }
enc.Url = xndEnclosure.Attributes["href"] != null ? xndEnclosure.Attributes["href"].Value : string.Empty;
enc.Title = xndEnclosure.Attributes["title"] != null ? xndEnclosure.Attributes["title"].Value : string.Empty;
fim.Enclosures.Add(enc);
}
}
fim.Description += GetHtmlByByEnclosure(fim.Enclosures);
fim.TrackbackPing = xne.SelectSingleNode("trackback:ping", mgr) != null ? xne.SelectSingleNode("trackback:ping", mgr).InnerText : string.Empty;
fim.Rights = xne.SelectSingleNode("atom:rights", mgr) != null ? xne.SelectSingleNode("atom:rights", mgr).InnerText : string.Empty;
lstItems.Add(fim);
}
break;
default:
return null;
}
}
return lstItems;
}