zoukankan      html  css  js  c++  java
  • 一个SAX RSS解析器的核心部分

     

    message对象,用来存储每个rss中的一条新闻

    1: package com.leipei.rss;

       2:  
       3: import java.net.MalformedURLException;
       4: import java.net.URL;
       5: import java.text.ParseException;
       6: import java.text.SimpleDateFormat;
       7: import java.util.Date;
       8:  
       9: /*
      10:  * 每条rss消息的结构体
      11:  */
      12: public class Message implements Comparable<Message> {
      13:     static SimpleDateFormat FORMATTER = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss");
      14:     /*
      15:      * 每条rss消息都至少包含tile,link,description,pubdate字段
      16:      */
      17:     private String title=null;
      18:     private URL link=null;
      19:     private String description=null;
      20:     private Date pubDate=null;
      21:     
      22:     public String getTitle() {
      23:         return title;
      24:     }
      25:  
      26:     public void setTitle(String title) {
      27:         this.title = title;
      28:     }
      29:  
      30:     // getters and setters omitted for brevity
      31:     public URL getLink() {
      32:         return link;
      33:     }
      34:  
      35:     public void setLink(String link) {
      36:         try {
      37:             this.link = new URL(link);
      38:         } catch (MalformedURLException e) {
      39:             throw new RuntimeException(e);
      40:         }
      41:     }
      42:  
      43:     public String getDescription() {
      44:         return description;
      45:     }
      46: /*
      47:  * 里面加append部分是为了解决类似网易新闻中description tag解析两次的问题
      48:  */
      49:     public void setDescription(String description) {
      50:         if( this.description==null){
      51:             this.description=description;
      52:             }else{
      53:                 this.description=this.description+description;
      54:             }
      55:     }
      56:  
      57:     public String getDate() {
      58:         return FORMATTER.format(this.pubDate);
      59:     }
      60:  
      61:     public void setDate(String date) {
      62:         try {
      63:             if (date == null || date.equals("")) {
      64:                 this.pubDate = null;
      65:             } else {
      66:                 this.pubDate = FORMATTER.parse(date);
      67:             }
      68:         } catch (ParseException e) {
      69:             e.printStackTrace();
      70:         }
      71:         System.out.println("date string is:\t" + date);
      72:     }
      73:  
      74:     @Override
      75:     public String toString() {
      76:         StringBuilder sb = new StringBuilder();
      77:         sb.append("Title: ");
      78:         sb.append(title);
      79:         sb.append('\n');
      80:         sb.append("Date: ");
      81:         sb.append(this.getDate());
      82:         sb.append('\n');
      83:         sb.append("Link: ");
      84:         sb.append(link);
      85:         sb.append('\n');
      86:         sb.append("Description: ");
      87:         sb.append(description);
      88:         return sb.toString();
      89:     }
      90:  
      91:     @Override
      92:     public int hashCode() {
      93:         final int prime = 31;
      94:         int result = 1;
      95:         result = prime * result + ((this.pubDate == null) ? 0 : this.pubDate.hashCode());
      96:         result = prime * result
      97:                 + ((description == null) ? 0 : description.hashCode());
      98:         result = prime * result + ((link == null) ? 0 : link.hashCode());
      99:         result = prime * result + ((title == null) ? 0 : title.hashCode());
     100:         return result;
     101:     }
     102:  
     103:     @Override
     104:     public boolean equals(Object obj) {
     105:         if (this == obj)
     106:             return true;
     107:         if (obj == null)
     108:             return false;
     109:         if (getClass() != obj.getClass())
     110:             return false;
     111:         Message other = (Message) obj;
     112:         if (this.pubDate == null) {
     113:             if (other.pubDate != null)
     114:                 return false;
     115:         } else if (!this.pubDate.equals(other.pubDate))
     116:             return false;
     117:         if (description == null) {
     118:             if (other.description != null)
     119:                 return false;
     120:         } else if (!description.equals(other.description))
     121:             return false;
     122:         if (link == null) {
     123:             if (other.link != null)
     124:                 return false;
     125:         } else if (!link.equals(other.link))
     126:             return false;
     127:         if (title == null) {
     128:             if (other.title != null)
     129:                 return false;
     130:         } else if (!title.equals(other.title))
     131:             return false;
     132:         return true;
     133:     }
     134:  
     135:     public int compareTo(Message another) {
     136:         if (another == null)
     137:             return 1;
     138:         return another.pubDate.compareTo(this.pubDate);
     139:     }
     140: }

    RSS解析器实现部分,采用SAX方式解析

    1: package com.leipei.rss;

       2:  
       3: import java.util.ArrayList;
       4: import java.util.List;
       5: import java.util.Stack;
       6:  
       7: import org.xml.sax.Attributes;
       8: import org.xml.sax.SAXException;
       9: import org.xml.sax.helpers.DefaultHandler;
      10:  
      11: public class RssHandler extends DefaultHandler {
      12:  
      13:     /*
      14:      * 通常的RSS源实际上是个xml文件.里面包含多个item节点,每个item节点是一条新闻.
      15:      * 每个item节点至少包含title,link,description,pubDate这四个子节点,其实很多rss源还有补充的节点
      16:      */
      17:     static final String ITEM = "item";
      18:     static final String TITLE = "title";
      19:     static final String LINK = "link";
      20:     static final String DESCRIPTION = "description";
      21:     static final String PUB_DATE = "pubDate";
      22:  
      23:     private Stack<String> currentElement = new Stack<String>();
      24:     private Message currentMessage;
      25:     private List<Message> messgelist = new ArrayList<Message>();;
      26:  
      27:     public void startElement(String uri, String localName, String qName,
      28:             Attributes attrs) throws SAXException {
      29:         currentElement.push(qName);
      30:         System.out.println("start parse '" + currentElement.peek() + "'");
      31:         if (currentElement.peek().equalsIgnoreCase(ITEM)) {
      32:             System.out.println("Create message ");
      33:             this.currentMessage = new Message();
      34:         }
      35:  
      36:     }
      37:  
      38:     public void endElement(String namespaceURI, String localName, String qName)
      39:             throws SAXException {
      40:         System.out.println("end parse element " + currentElement.peek());
      41:  
      42:         if (currentElement.peek().equalsIgnoreCase(ITEM)
      43:                 && this.currentMessage != null) {
      44:             this.messgelist.add(this.currentMessage);
      45:             this.currentMessage = null;
      46:         }
      47:         currentElement.pop();
      48:  
      49:     }
      50:  
      51:     public void characters(char[] ch, int start, int length)
      52:             throws SAXException {
      53:         String cdata = new String(ch, start, length);
      54:         String tagName = currentElement.peek();
      55:  
      56:         System.out.println("Element '" + tagName + "' contains text: " + cdata);
      57:         if (this.currentMessage != null) {
      58:             if (this.currentElement.peek().equalsIgnoreCase(TITLE)) {
      59:                 currentMessage.setTitle(cdata);
      60:             } else if (this.currentElement.peek().equalsIgnoreCase(LINK)) {
      61:                 currentMessage.setLink(cdata);
      62:             } else if (this.currentElement.peek().equalsIgnoreCase(DESCRIPTION)) {
      63:                 currentMessage.setDescription(cdata);
      64:             } else if (this.currentElement.peek().equalsIgnoreCase(PUB_DATE)) {
      65:                 System.out.println("setting pubdate:\t" + cdata);
      66:                 currentMessage.setDate(cdata);
      67:             }
      68:         } else {
      69:             System.out.println("null message");
      70:         }
      71:     }
      72:  
      73:     public List<Message> getMessaList() {
      74:         return this.messgelist;
      75:     }
      76:  
      77:     /*
      78:      * 打印解析的消息结果
      79:      */
      80:     public void printMessageList() {
      81:         if (this.messgelist != null) {
      82:             for (Message message : this.messgelist) {
      83:                 if (message != null) {
      84:                     System.out.println(message.toString());
      85:                 } else {
      86:                     System.out.println("null list");
      87:                 }
      88:             }
      89:         }
      90:     }
      91:  
      92: }
  • 相关阅读:
    如何查看linux系统是32位还是64位
    netstat 的10个基本用法
    linux入门教程(十) 文档的压缩与打包
    linux入门教程(九) 文本编辑工具vim
    linux入门教程(八) Linux磁盘管理
    linux入门教程(七) linux系统用户以及用户组管理
    CentOS5下配置JDK1.6+TOMCAT6
    【Nodejs】外研社一年级起各年级英语音频下载(缺456年级上)
    【Nodejs】外研社一年级起三年级下MP3下载爬虫1.00
    【Python】torrentParser1.03
  • 原文地址:https://www.cnblogs.com/leipei2352/p/2159042.html
Copyright © 2011-2022 走看看