zoukankan      html  css  js  c++  java
  • 【原创】RSS开发心得小结

    几经面试和简历更新,发现自己做了这么久的开发,却少于做总结,一个个项目过去了,但是知识的累积沉淀却很少……

    借着这次机会,把以前的skill整理一下,浓缩的才是精华。为自己也为其他初学的朋友做个参考。

    RSS(全称Really Simple Syndication) 目前广泛用于网上新闻频道,blog和wiki,主要的版本有0.91, 1.0, 2.0。

    另外还有Google制定的ATOM格式,以及作为Feed集合的OPML文件。

    最常见的Feed格式是Rss1.0,2.0和ATOM,解析时通过不同的命名空间来处理不同的版本,下面是解析的主要代码:

    代码
    /// <summary>
    /// 根据xml内容解析Feed
    /// </summary>
    /// <param name="url">Feed源地址</param>
    /// <param name="xmlContent">Feed xml 内容</param>
    /// <returns>返回解析后的Feed对象实例</returns>
    public static Feed AnalyseFeedContent(string url, string xmlContent)
    {
    Feed feed
    = new Feed();
    feed.Url
    = url;
    feed.ChannelInfo
    = new FeedChannel();

    XmlDocument doc
    = new XmlDocument();
    doc
    = ReadGlobals.LoadXml(doc, xmlContent, url);

    // 添加常用的命名空间
    XmlNamespaceManager mgr = new XmlNamespaceManager(doc.NameTable);
    mgr.AddNamespace(
    "rdf", "http://purl.org/rss/1.0/");
    mgr.AddNamespace(
    "content", "http://purl.org/rss/1.0/modules/content/");
    mgr.AddNamespace(
    "dc", "http://purl.org/dc/elements/1.1/");
    XmlNode nodeRoot
    = doc.DocumentElement;
    XmlNode nodeChannel;
    XmlNodeList nodeList;

    try
    {
    if (nodeRoot != null)
    {
    // enclosure地址
    if (nodeRoot.Attributes["xmlns:enc"] != null)
    {
    mgr.AddNamespace(
    "enc", nodeRoot.Attributes["xmlns:enc"].Value);
    }
    else
    {
    mgr.AddNamespace(
    "enc", "http://crocodile.org/ns/rss/2.0/enclosures");
    }

    // trackback地址
    if (nodeRoot.Attributes["xmlns:trackback"] != null)
    {
    mgr.AddNamespace(
    "trackback", nodeRoot.Attributes["xmlns:trackback"].Value);
    }
    else
    {
    mgr.AddNamespace(
    "trackback", "http://madskills.com/public/xml/rss/module/trackback/");
    }

    if (nodeRoot.Name.ToLower() == "opml")
    {
    // 是opml文件
    feed.Type = FeedType.OPML;
    }

    if (nodeRoot.Name.ToLower() == "feed")
    {
    // 是atom文件
    feed.Type = FeedType.ATOM_0_3;

    string strAtomNameSpace = "http://www.w3.org/2005/Atom";
    // 以头部的命名空间为准;
    if (nodeRoot.Attributes["xmlns"] != null)
    {
    strAtomNameSpace
    = nodeRoot.Attributes["xmlns"].Value;
    }
    mgr.AddNamespace(
    "atom", strAtomNameSpace);

    feed.ChannelInfo
    = GetChannel(doc, mgr, "atom");

    nodeList
    = doc.SelectNodes("//atom:entry", mgr);
    feed.Items
    = GetItems(nodeList, feed.Type, mgr);
    }

    if (nodeRoot.Name.ToLower() == "rdf:rdf")
    {
    // 是rss1.0文件
    feed.Type = FeedType.RSS_1_0;
    feed.ChannelInfo
    = GetChannel(doc, mgr, "rdf");

    nodeList
    = doc.SelectNodes("//rdf:item", mgr);
    feed.Items
    = GetItems(nodeList, feed.Type, mgr);
    }

    if (nodeRoot.Name.ToLower() == "rss")
    {
    // 是rss2.0文件
    feed.Type = FeedType.RSS_2_0;

    nodeChannel
    = doc.SelectSingleNode("rss/channel");
    feed.ChannelInfo
    = GetChannelForRss20(nodeChannel);

    nodeList
    = nodeChannel.SelectNodes("item", mgr);
    feed.Items
    = GetItems(nodeList, feed.Type, mgr);
    }
    }
    }
    catch (Exception ex)
    {
    Log.Write(ex);
    }

    return feed;
    }

     Feed主要分为两部分,Channel和ItemList部分,分别用如下方法解析:

    FeedChannel解析
    /// <summary>
    /// 获取 FeedChannel 数据
    /// </summary>
    private static FeedChannel GetChannel(XmlDocument xdtDoc, XmlNamespaceManager mgr, string prefix)
    {
    FeedChannel channel
    = new FeedChannel();

    if (xdtDoc != null)
    {
    string title = string.Empty, generator = string.Empty, link = string.Empty, description = string.Empty, language = string.Empty;
    string managingeditor = string.Empty, webmaster = string.Empty, copyright = string.Empty, pubdate = string.Empty, lastbuilddate = string.Empty;

    string strPrefix = prefix;

    XmlNode xneLogo
    = xdtDoc.SelectSingleNode("//" + strPrefix + ":logo", mgr);
    if (xneLogo != null)
    {
    channel.Logo.Src
    = xneLogo.InnerText;
    }

    XmlNode snTitle
    = xdtDoc.SelectSingleNode("//" + strPrefix + ":title", mgr);
    if (snTitle != null)
    {
    channel.Title
    = snTitle.InnerText;
    }

    XmlNode snLink
    = xdtDoc.SelectSingleNode("//" + strPrefix + ":link[@rel='alternate']/@href", mgr);
    if (snLink != null)
    {
    channel.Link
    = snLink.InnerText;
    }
    else
    {
    channel.Link
    = xdtDoc.SelectSingleNode("//" + strPrefix + ":link", mgr) == null ? string.Empty : xdtDoc.SelectSingleNode("//" + strPrefix + ":link", mgr).InnerText;
    }


    XmlNode snDescription
    = xdtDoc.SelectSingleNode("//" + strPrefix + ":tagline", mgr);
    if (snDescription != null)
    {
    channel.Description
    = snDescription.InnerText;
    }

    XmlNode snLanguage
    = xdtDoc.SelectSingleNode("//" + strPrefix + ":feed/@xml:lang", mgr);
    if (snLanguage != null)
    {
    try
    {
    Thread.CurrentThread.CurrentUICulture
    = new CultureInfo(snLanguage.InnerText);
    channel.Language
    = CultureInfo.CreateSpecificCulture(language).LCID;
    }
    catch
    {
    channel.Language
    = 0;
    }
    }
    }
    return channel;
    }
    FeedItem解析
    /// <summary>
    /// 获取item列表
    /// </summary>
    /// <param name="xnlItems"></param>
    public static List<FeedItem> GetItems(XmlNodeList xnlItems, FeedType type, XmlNamespaceManager mgr)
    {
    List
    <FeedItem> lstItems = new List<FeedItem>();
    if (xnlItems != null)
    {
    switch (type)
    {
    case FeedType.RSS_1_0:
    foreach (XmlNode xne in xnlItems)
    {
    FeedItem fim
    = new FeedItem();
    fim.Title
    = xne.SelectSingleNode("rdf:title", mgr) != null ? xne.SelectSingleNode("rdf:title", mgr).InnerText : string.Empty;
    fim.Link
    = xne.SelectSingleNode("rdf:link", mgr) != null ? xne.SelectSingleNode("rdf:link", mgr).InnerText : string.Empty;
    //rim.Description = xne.SelectSingleNode("rdf:description", mgr) != null ? xne.SelectSingleNode("rdf:description", mgr).InnerText : string.Empty;
    string strContent = xne.SelectSingleNode("content:encoded", mgr) != null ? xne.SelectSingleNode("content:encoded", mgr).InnerText : string.Empty;
    string strDescription = xne.SelectSingleNode("rdf:description", mgr) != null ? xne.SelectSingleNode("rdf:description", mgr).InnerText : string.Empty;
    fim.Description
    = strContent == string.Empty ? strDescription : strContent;
    fim.Description
    = fim.Description.Replace("<![CDATA[", string.Empty).Replace("]]>", string.Empty);
    string strAuthorName = xne.SelectSingleNode("//rdf:author", mgr) != null ? xne.SelectSingleNode("//rdf:author", mgr).InnerText : string.Empty;
    if (strAuthorName != string.Empty)
    {
    strAuthorName
    = xne.SelectSingleNode("dc:creator", mgr) != null ? xne.SelectSingleNode("dc:creator", mgr).InnerText : string.Empty;
    }
    if (strAuthorName != string.Empty)
    {
    fim.Author
    = new FeedPerson();
    fim.Author.Name
    = strAuthorName;
    }
    fim.PubDate
    = xne.SelectSingleNode("rdf:pubDate", mgr) != null ? GetDateTimeByUrl(fim.Link,xne.SelectSingleNode("rdf:pubDate", mgr).InnerText) : DateTime.MinValue;
    if (fim.PubDate == DateTime.MinValue)
    {
    fim.PubDate
    = xne.SelectSingleNode("dc:date", mgr) != null ? GetDateTimeByUrl(fim.Link, xne.SelectSingleNode("dc:date", mgr).InnerText) : DateTime.MinValue;
    }
    XmlNodeList xnlSubjects
    = xne.SelectNodes("dc:subject", mgr);
    if (xnlSubjects != null)
    {
    foreach (XmlNode xnlSubject in xnlSubjects)
    {
    fim.Subject
    += xnlSubject.InnerText + ",";
    }
    }
    XmlNode xndEnclosure
    = xne.SelectSingleNode("enclosure", mgr);
    if (xndEnclosure != null)
    {
    fim.Enclosures
    = new List<FeedEnclosure>();
    FeedEnclosure enc
    = new FeedEnclosure();
    enc.Type
    = xndEnclosure.Attributes["type"] != null ? xndEnclosure.Attributes["type"].Value : string.Empty;
    string strLength = xndEnclosure.Attributes["length"] != null ? xndEnclosure.Attributes["length"].Value : string.Empty;
    try
    {
    enc.Length
    = Convert.ToInt32(strLength);
    }
    catch { }
    enc.Url
    = xndEnclosure.Attributes["url"] != null ? xndEnclosure.Attributes["url"].Value : string.Empty;
    fim.Enclosures.Add(enc);
    }
    fim.Description
    += GetHtmlByByEnclosure(fim.Enclosures);
    fim.TrackbackPing
    = xne.SelectSingleNode("trackback:ping", mgr) != null ? xne.SelectSingleNode("trackback:ping", mgr).InnerText : string.Empty;
    lstItems.Add(fim);
    }
    break;
    case FeedType.RSS_2_0:
    foreach (XmlNode xne in xnlItems)
    {
    FeedItem fim
    = new FeedItem();
    fim.Title
    = xne.SelectSingleNode("title", mgr) != null ? xne.SelectSingleNode("title", mgr).InnerText : string.Empty;
    fim.Link
    = xne.SelectSingleNode("link", mgr) != null ? xne.SelectSingleNode("link", mgr).InnerText : string.Empty;
    string strContent = xne.SelectSingleNode("content:encoded", mgr) != null ? xne.SelectSingleNode("content:encoded", mgr).InnerText : string.Empty;
    string strDescription = xne.SelectSingleNode("description", mgr) != null ? xne.SelectSingleNode("description", mgr).InnerText : string.Empty;
    fim.Description
    = strContent == string.Empty ? strDescription : strContent;
    fim.Description
    = fim.Description.Replace("<![CDATA[", string.Empty).Replace("]]>", string.Empty);
    string strAuthorName = xne.SelectSingleNode("author", mgr) != null ? xne.SelectSingleNode("author", mgr).InnerText : string.Empty;
    if (strAuthorName != string.Empty)
    {
    strAuthorName
    = xne.SelectSingleNode("dc:creator", mgr) != null ? xne.SelectSingleNode("dc:creator", mgr).InnerText : string.Empty;
    }
    if (strAuthorName != string.Empty)
    {
    fim.Author
    = new FeedPerson();
    fim.Author.Name
    = strAuthorName;
    }
    fim.PubDate
    = xne.SelectSingleNode("pubDate", mgr) != null ? GetDateTimeByUrl(fim.Link,xne.SelectSingleNode("pubDate", mgr).InnerText) : DateTime.MinValue;
    if (fim.PubDate == DateTime.MinValue)
    {
    fim.PubDate
    = xne.SelectSingleNode("dc:date", mgr) != null ? GetDateTimeByUrl(fim.Link,xne.SelectSingleNode("dc:date", mgr).InnerText) : DateTime.MinValue;
    }
    fim.Guid
    = xne.SelectSingleNode("guid", mgr) != null ? xne.SelectSingleNode("guid", mgr).InnerText : string.Empty;
    XmlNodeList xnlSubjects
    = xne.SelectNodes("dc:subject", mgr);
    if (xnlSubjects != null)
    {
    foreach (XmlNode xnlSubject in xnlSubjects)
    {
    fim.Subject
    += xnlSubject.InnerText + ",";
    }
    }
    XmlNodeList xnlCategorys
    = xne.SelectNodes("category", mgr);
    if (xnlCategorys != null)
    {
    foreach (XmlNode xnlCategory in xnlCategorys)
    {
    fim.Category
    += xnlCategory != null ? xnlCategory.InnerText : string.Empty;
    fim.Category
    += ",";
    }
    }
    XmlNode xndEnclosure
    = xne.SelectSingleNode("enclosure", mgr);
    if (xndEnclosure != null)
    {
    fim.Enclosures
    = new List<FeedEnclosure>();
    FeedEnclosure enc
    = new FeedEnclosure();
    enc.Type
    = xndEnclosure.Attributes["type"] != null ? xndEnclosure.Attributes["type"].Value : string.Empty;
    string strLength = xndEnclosure.Attributes["length"] != null ? xndEnclosure.Attributes["length"].Value : string.Empty;
    try
    {
    enc.Length
    = Convert.ToInt32(strLength);
    }
    catch { }
    enc.Url
    = xndEnclosure.Attributes["url"] != null ? xndEnclosure.Attributes["url"].Value : string.Empty;
    fim.Enclosures.Add(enc);
    }
    fim.Description
    += GetHtmlByByEnclosure(fim.Enclosures);
    fim.TrackbackPing
    = xne.SelectSingleNode("trackback:ping", mgr) != null ? xne.SelectSingleNode("trackback:ping", mgr).InnerText : string.Empty;
    lstItems.Add(fim);
    }
    break;
    case FeedType.ATOM_0_3:

    foreach (XmlNode xne in xnlItems)
    {
    FeedItem fim
    = new FeedItem();
    fim.Title
    = xne.SelectSingleNode("atom:title", mgr) != null ? xne.SelectSingleNode("atom:title", mgr).InnerText : string.Empty;
    fim.Link
    = xne.SelectSingleNode("atom:link[@rel='alternate']", mgr) != null ? xne.SelectSingleNode("atom:link[@rel='alternate']", mgr).Attributes["href"].InnerText : string.Empty;
    fim.Summary
    = xne.SelectSingleNode("atom:summary", mgr) != null ? xne.SelectSingleNode("atom:summary", mgr).InnerText : string.Empty;
    fim.Description
    = xne.SelectSingleNode("atom:content", mgr) != null ? xne.SelectSingleNode("atom:content", mgr).InnerText : string.Empty;
    fim.Description
    = fim.Description.Replace("<![CDATA[", string.Empty).Replace("]]>", string.Empty);

    fim.Guid
    = xne.SelectSingleNode("atom:id", mgr) != null ? xne.SelectSingleNode("atom:id", mgr).InnerText : string.Empty;
    fim.Contributor
    = xne.SelectSingleNode("atom:contributor", mgr) != null ? xne.SelectSingleNode("atom:contributor", mgr).InnerText : string.Empty;
    XmlNode xneAuthor
    = xne.SelectSingleNode("atom:author", mgr);
    if (xneAuthor != null)
    {
    fim.Author
    = new FeedPerson();
    fim.Author.Name
    = xneAuthor.SelectSingleNode("atom:name", mgr) != null ? xneAuthor.SelectSingleNode("atom:name", mgr).InnerText : string.Empty;
    fim.Author.Url
    = xneAuthor.SelectSingleNode("atom:uri", mgr) != null ? xneAuthor.SelectSingleNode("atom:uri", mgr).InnerText : string.Empty;
    fim.Author.Email
    = xneAuthor.SelectSingleNode("atom:email", mgr) != null ? xneAuthor.SelectSingleNode("atom:email", mgr).InnerText : string.Empty;
    }
    fim.UpdateDate
    = xne.SelectSingleNode("atom:updated", mgr) != null ? GetDateTimeByUrl(fim.Link, xne.SelectSingleNode("atom:updated", mgr).InnerText) : DateTime.MinValue;
    if (fim.UpdateDate == DateTime.MinValue)
    {
    fim.UpdateDate
    = xne.SelectSingleNode("atom:modified", mgr) != null ? GetDateTimeByUrl(fim.Link, xne.SelectSingleNode("atom:modified", mgr).InnerText) : DateTime.MinValue;
    }
    fim.PubDate
    = xne.SelectSingleNode("atom:published", mgr) != null ? GetDateTimeByUrl(fim.Link, xne.SelectSingleNode("atom:published", mgr).InnerText) : DateTime.MinValue;
    if (fim.PubDate == DateTime.MinValue)
    {
    fim.PubDate
    = xne.SelectSingleNode("atom:issued", mgr) != null ? GetDateTimeByUrl(fim.Link, xne.SelectSingleNode("atom:issued", mgr).InnerText) : DateTime.MinValue;
    }
    if (fim.PubDate == DateTime.MinValue)
    {
    fim.PubDate
    = xne.SelectSingleNode("atom:created", mgr) != null ? GetDateTimeByUrl(fim.Link, xne.SelectSingleNode("atom:created", mgr).InnerText) : DateTime.MinValue;
    }
    XmlNodeList xnlTags
    = xne.SelectNodes("dc:subject", mgr);
    XmlNodeList xnlSubjects
    = xne.SelectNodes("dc:subject", mgr);
    if (xnlSubjects != null)
    {
    foreach (XmlNode xnlSubject in xnlSubjects)
    {
    fim.Subject
    += xnlSubject.InnerText + ",";
    }
    }
    XmlNodeList xnlCategorys
    = xne.SelectNodes("atom:category", mgr);
    if (xnlCategorys != null)
    {
    foreach (XmlNode xnlCategory in xnlCategorys)
    {
    fim.Category
    += xnlCategory.Attributes["term"] != null ? xnlCategory.Attributes["term"].Value : string.Empty;
    fim.Category
    += ",";
    }
    }
    XmlNodeList xnlEnclosures
    = xne.SelectNodes("atom:link[@rel='enclosure']", mgr);
    if (xnlEnclosures != null)
    {
    fim.Enclosures
    = new List<FeedEnclosure>();
    foreach (XmlNode xndEnclosure in xnlEnclosures)
    {
    FeedEnclosure enc
    = new FeedEnclosure();
    enc.Type
    = xndEnclosure.Attributes["type"] != null ? xndEnclosure.Attributes["type"].Value : string.Empty;
    string strLength = xndEnclosure.Attributes["length"] != null ? xndEnclosure.Attributes["length"].Value : string.Empty;
    try
    {
    enc.Length
    = Convert.ToInt32(strLength);
    }
    catch { }
    enc.Url
    = xndEnclosure.Attributes["href"] != null ? xndEnclosure.Attributes["href"].Value : string.Empty;
    enc.Title
    = xndEnclosure.Attributes["title"] != null ? xndEnclosure.Attributes["title"].Value : string.Empty;
    fim.Enclosures.Add(enc);
    }
    }
    fim.Description
    += GetHtmlByByEnclosure(fim.Enclosures);
    fim.TrackbackPing
    = xne.SelectSingleNode("trackback:ping", mgr) != null ? xne.SelectSingleNode("trackback:ping", mgr).InnerText : string.Empty;
    fim.Rights
    = xne.SelectSingleNode("atom:rights", mgr) != null ? xne.SelectSingleNode("atom:rights", mgr).InnerText : string.Empty;
    lstItems.Add(fim);
    }
    break;
    default:
    return null;
    }
    }
    return lstItems;

    }
  • 相关阅读:
    python-django-自定义分页
    self.user = serializer_field.context['request'].user KeyError: 'request
    无法ssh远程的解决办法
    关于put和patch区别的一篇文章
    一遍关于django rest framework serializer比较详细的笔记
    vim 简单配置(根据编程语言不同,自动缩进)
    django rest framework 的api返回html
    django rest frmaework jwt认证
    django 信号
    刘江的博客
  • 原文地址:https://www.cnblogs.com/litsword/p/1851827.html
Copyright © 2011-2022 走看看