zoukankan      html  css  js  c++  java
  • 用java实现新浪爬虫,代码完整剖析(仅针对当前SinaSignOn有效)

    先来看我们的web.xml文件,如下

     1 <!DOCTYPE web-app PUBLIC
     2  "-//Sun Microsystems, Inc.//DTD Web Application 2.3//EN"
     3  "http://java.sun.com/dtd/web-app_2_3.dtd" >
     4 
     5 <web-app>
     6   <display-name>MySinaSpider</display-name>
     7     <listener>
     8         <listener-class>main.java.sina.spider.StartSpiderLisenter</listener-class>
     9       </listener>
    10 </web-app>

    这样的配置当启动tomcat的时候,就会运行爬虫,然后再看我们的StartSpiderLisenter类,如下

     1 package main.java.sina.spider;
     2 
     3 import javax.servlet.ServletContextEvent;
     4 import javax.servlet.ServletContextListener;
     5 import main.java.sina.bean.info.LoginInfo;
     6 import main.java.sina.utils.Constant;
     7 
     8 public class StartSpiderLisenter implements ServletContextListener{
     9 
    10     public void contextDestroyed(ServletContextEvent arg0) {
    11         
    12     }
    13 
    14     public void contextInitialized(ServletContextEvent arg0) {
    15         Constant.personalHomePage = "http://weibo.com/zhaoyao2012/home"; //填写你自己的新浪微博个人主页
    16         LoginInfo.username = "***"; //填写你的新浪微博用户名
    18         LoginInfo.password = "***"; //填写你的新浪微博密码
    19         Constant.enableProxy = false; //是否使用代理
    20         Spider.start();
    21     }
    22 
    23 }

    很明显我们看到StartSpiderLisenter 类是继承自ServletContextListener这个接口,一定要实现它的两个方法,contextInitialized和contextDestroyed.它们分别在初始化和销毁的时候被容器调用。我们看到在contextInitialized初始化上下文的方法中调用了Spider.start()方法。那么我们来看看Spider这个类,如下:

      1 package main.java.sina.spider;
      2 
      3 import java.io.IOException;
      4 import java.util.regex.Matcher;
      5 import java.util.regex.Pattern;
      7 import org.quartz.JobBuilder;
      8 import org.quartz.JobDetail;
      9 import org.quartz.Scheduler;
     10 import org.quartz.SchedulerException;
     11 import org.quartz.SchedulerFactory;
     12 import org.quartz.SimpleScheduleBuilder;
     13 import org.quartz.SimpleTrigger;
     14 import org.quartz.TriggerBuilder;
     15 import org.quartz.impl.StdSchedulerFactory;
     17 import main.java.sina.bean.info.LoginInfo;
     18 import main.java.sina.httpclient.LoginSina;
     19 import main.java.sina.httpclient.SpiderSina;
     20 import main.java.sina.job.KeywordSearchJob;
     21 import main.java.sina.utils.Constant;
     22 import main.java.sina.utils.HttpHelper;
     23 import main.java.test.SpiderTest;
     24 
     25 public class Spider {
     26 
     27     public static void main(String[] args) {
     28 
     29         Constant.personalHomePage = "****";    
     30         LoginInfo.username = "****";
     31         LoginInfo.password = "****";
     32         Constant.enableProxy = false;
     33         Constant.hourbefore = 0;  //这个参数用于设置时差
     34         start();
     35         
     36     }
     37     public static void start() {
     38         
     39         final SchedulerFactory factory = new StdSchedulerFactory();
     40         try {
     41             Scheduler scheduler = factory.getScheduler();
     42             JobDetail jobDetail = JobBuilder.newJob(KeywordSearchJob.class)
     43                     .withIdentity("keywordSearch", "weibo").build();
     44             SimpleTrigger trigger = TriggerBuilder.newTrigger()
     45                     .withIdentity("keywordSearch", "weibo")
     46                     .withSchedule(SimpleScheduleBuilder.repeatHourlyForever())
     47                     .build();
     48             scheduler.scheduleJob(jobDetail, trigger);
     49             scheduler.start();
     50         } catch (SchedulerException e) {
     51             e.printStackTrace();
     52         }
     53     }
     54 
     55     public static SpiderSina createSpider() {
     56         LoginSina ls = new LoginSina(LoginInfo.username, LoginInfo.password);
     57         ls.dologinSina();
     58         ls.redirect();
     59         SpiderSina spider = new SpiderSina(ls);
     60 
     61         return spider;
     62     }
     63 
     64     public static void sendMidsofDays(SpiderSina spider,String keyword, String fromdate,
     65             String todate) {
     66         
     67         try {
     68             String midsString = "";
     69             for (int i = 1; i <= 50; i++) {
     70                 String htmlContent = spider
     71                         .search(keyword, i, fromdate, todate);
     72                 if (htmlContent.contains("noresult_support")) {
     73                     break;
     74                 }
     75                 System.out.println(i);
     76                 Pattern pattern = Pattern.compile("<div mid="([0-9]*)"");
     77 
     78                 String start = ""pid":"pl_weibo_direct"";
     79                 try {
     80                     htmlContent = htmlContent.substring(htmlContent
     81                             .indexOf(start));
     82                 } catch (Exception e) {
     83                     htmlContent = htmlContent.substring(1);
     84                 }
     85                 htmlContent = htmlContent.replace("\"", """);
     86                 htmlContent = htmlContent.replace("\/", "/");
     87                 Matcher matcher = pattern.matcher(htmlContent);
     88                 while (matcher.find()) {
     89                     System.out.println(matcher.group(1));
     90                     midsString += matcher.group(1) + ",";
     91                 }
     92                 if (i == 37) {
     93                     try {
     94                         Thread.sleep(1000 * 60 * 30);
     95                     } catch (InterruptedException e) {
     96                         e.printStackTrace();
     97                     }
     98                 }
     99             }
    100             System.out.println(midsString);
    101             HttpHelper.getLiveData(midsString, Constant.CommentUrl);
    102         } catch (IOException e) {
    103             e.printStackTrace();
    104         }
    105 
    106     }
    107 }

    我们在Spider.start()方法中,看到了作业KeywordSearchJob.class,那么我们来看看这个KeywordSearchJob类的实现,如下:

     1 package main.java.sina.job;
     2 
     3 import org.quartz.Job;
     4 import org.quartz.JobExecutionContext;
     5 import org.quartz.JobExecutionException;
     6 import main.java.sina.httpclient.SpiderSina;
     7 import main.java.sina.spider.Spider;
     8 import main.java.sina.utils.Constant;
     9 import main.java.sina.utils.Utils;
    10 
    11 public class KeywordSearchJob implements Job {
    12 
    13     public void execute(JobExecutionContext arg0) throws JobExecutionException {
    14 
    15         Constant.enableProxy = false; //我的爬虫中没有使用代理,故值设为false.
    16         String keyword = "%25E5%25AE%2581%25E6%25B3%25A2%25E5%25A4%25A7%25E5%25AD%25A6";//被编码后的关键字
    17         String datehour = Utils.getDateOfSpecifiedPreHour(Constant.hourbefore);//这个工具类实现了时差格式的转换
    18         SpiderSina spider = Spider.createSpider();
    19         spider.forwardToWeiboPage();
    20         Spider.sendMidsofDays(spider,keyword,datehour,datehour);
    21     }
    22 
    23 }

    接下来,我们看几个工具类的实现:首先来看下Utils.java这个类,如下:它实现了日期的格式的一些转换

      1 package main.java.sina.utils;
      2 
      3 import java.io.BufferedReader;
      4 import java.io.BufferedWriter;
      5 import java.io.File;
      6 import java.io.FileInputStream;
      7 import java.io.FileNotFoundException;
      8 import java.io.FileOutputStream;
      9 import java.io.FileWriter;
     10 import java.io.IOException;
     11 import java.io.InputStream;
     12 import java.io.InputStreamReader;
     13 import java.io.StringReader;
     14 import java.io.UnsupportedEncodingException;
     15 import java.text.ParseException;
     16 import java.text.SimpleDateFormat;
     17 import java.util.Calendar;
     18 import java.util.Date;
     19 import java.util.Properties;
     20 
     21 import org.htmlparser.Parser;
     22 import org.htmlparser.lexer.Lexer;
     23 import org.htmlparser.lexer.Page;
     24 import org.htmlparser.util.DefaultParserFeedback;
     25 //  I/O操作类
     26 public class Utils {
     27     
     28     public static Date getDateFromString(String dtext,Date fileCreateDate) {
     29         Date date=null;
     30         int y,mm,se;  
     31         Calendar c = Calendar.getInstance();  
     32         c.setTime(fileCreateDate);
     33         y = c.get(Calendar.YEAR); // 34         //d = c.get(Calendar.DAY_OF_MONTH); //
     35         mm = c.get(Calendar.MINUTE); //
     36         se = c.get(Calendar.SECOND);//
     37         if(dtext.contains("秒前")){
     38             int end=0;
     39             for(int i=0;i<dtext.length();i++){
     40                 if(dtext.charAt(i)>='0' && dtext.charAt(i)<='9'){
     41                     end++;
     42                 }else{
     43                     break;
     44                 }
     45             }
     46             dtext=dtext.substring(0,end);
     47             int second=Integer.parseInt(dtext);
     48             c.set(Calendar.SECOND, se-second);
     49             date=c.getTime();
     50         }
     51         else if(dtext.contains("分钟前")){
     52             int end=0;
     53             for(int i=0;i<dtext.length();i++){
     54                 if(dtext.charAt(i)>='0' && dtext.charAt(i)<='9'){
     55                     end++;
     56                 }else{
     57                     break;
     58                 }
     59             }
     60             dtext=dtext.substring(0,end);
     61             int minute=Integer.parseInt(dtext);
     62             c.set(Calendar.MINUTE, mm-minute);
     63             date=c.getTime();
     64         }else if(dtext.contains("今天")){
     65              dtext=dtext.replace("今天 ", "").trim();
     66              String ss[]=dtext.split(":");
     67              if(ss!=null && ss.length==2){
     68                  c.set(Calendar.HOUR_OF_DAY, Integer.parseInt(ss[0]));
     69                  c.set(Calendar.MINUTE, Integer.parseInt(ss[1]));
     70                  date=c.getTime();
     71              }
     72         }else if(dtext.contains("月")){
     73             dtext=y+"年".concat(dtext);
     74             SimpleDateFormat sf=new SimpleDateFormat("yyyy年MM月dd日 HH:mm");
     75             try {
     76                 date=sf.parse(dtext);
     77             } catch (ParseException e) {
     78                 e.printStackTrace();
     79             }
     80         }else if(dtext.contains("-")){
     81             SimpleDateFormat sf=new SimpleDateFormat("yyyy-MM-dd HH:mm");
     82             try {
     83                 date=sf.parse(dtext);
     84             } catch (ParseException e) {
     85                 e.printStackTrace();
     86             }
     87         }
     88         return date;
     89     }
     90     public static void writeFileFromStream(String filename,InputStream in){
     91         if(filename==null || filename.trim().length()==0)
     92             return;
     93         File file=new File(filename);
     94         if(!file.exists()){
     95             try {
     96                 file.createNewFile();
     97             } catch (IOException e) {
     98                 e.printStackTrace();
     99             }
    100         }
    101         FileOutputStream fou=null;
    102         try {
    103             fou = new FileOutputStream(file);
    104             byte []buffer=new byte[1024*4];
    105             int len=-1;
    106             while((len=in.read(buffer))!=-1){
    107                 fou.write(buffer,0,len);
    108             }
    109         } catch (FileNotFoundException e) {
    110             e.printStackTrace();
    111         } catch (IOException e) {
    112             e.printStackTrace();
    113         }finally{
    114             if(in!=null)
    115                 try {
    116                     in.close();
    117                 } catch (IOException e) {
    118                     e.printStackTrace();
    119                 }
    120             if(fou!=null)
    121                 try {
    122                     fou.close();
    123                 } catch (IOException e) {
    124                     e.printStackTrace();
    125                 }
    126         }
    127     }    
    128     public static void writeFileFromString(String filename,String str){
    129         if(filename==null || filename.trim().length()==0)
    130             filename="tmp.txt";
    131         File file=new File(filename);
    132         if(!file.exists()){
    133             try {
    134                 file.createNewFile();
    135             } catch (IOException e) {
    136                 e.printStackTrace();
    137             }
    138         }
    139         BufferedWriter writer=null;
    140         BufferedReader reader=null;
    141         try {
    142             writer=new BufferedWriter(new FileWriter(file));
    143             reader=new BufferedReader(new StringReader(str));
    144             String tmp=null;
    145             StringBuffer buffer=new StringBuffer();
    146             while((tmp=reader.readLine())!=null)
    147                 buffer.append(tmp+"
    ");
    148             writer.write(buffer.toString());
    149             
    150         } catch (IOException e) {
    151             e.printStackTrace();
    152         }finally{
    153             try {
    154                 reader.close();
    155                 writer.close();
    156             } catch (IOException e) {
    157                 e.printStackTrace();
    158             }
    159         }
    160         
    161     }
    162     
    163     
    164     
    165     public static String getStringFromStream(InputStream in) {
    166         BufferedReader reader=null;
    167         reader = new BufferedReader(new InputStreamReader(in));
    168         StringBuffer buffer=new StringBuffer();
    169         String str=null;
    170         try{
    171             while((str=reader.readLine())!=null){
    172                 buffer.append(str+"
    ");
    173             }    
    174             reader.close();
    175         }catch(Exception ex){
    176             ex.printStackTrace();
    177         }            
    178         try {
    179             return new String(buffer.toString().getBytes(),"utf-8");
    180         } catch (UnsupportedEncodingException e) {
    181             e.printStackTrace();
    182             return "error:"+e.getMessage();
    183         }
    184     }
    185   //得到数据库的配置信息
    186     public static Properties getDBconfig(){
    187         Properties properties=new Properties();
    188         InputStream in = null;
    189         try {
    190             in = new FileInputStream(new File("config/dbconfig.ini"));
    191             properties.load(in);
    192         } catch (FileNotFoundException e) {
    193             e.printStackTrace();
    194         } catch (IOException e) {
    195             e.printStackTrace();
    196         }finally{
    197             if(in!=null)
    198                 try {
    199                     in.close();
    200                 } catch (IOException e) {
    201                     e.printStackTrace();
    202                 }
    203         }
    204         return properties;
    205     }
    206     
    207     public static Parser createParser(String inputHTML) {
    208         Lexer mLexer = new Lexer(new Page(inputHTML));
    209         Parser parser = new Parser(mLexer, new DefaultParserFeedback(
    210                 DefaultParserFeedback.QUIET));
    211         return parser;
    212     }
    213     
    214     public static String getDateOfSpecifiedPreHour(int hourNum){
    215         SimpleDateFormat sdFormat = new SimpleDateFormat("yyyy-MM-dd-HH");
    216         Date date = new Date();
    217         System.out.println("date -" +date + " " + hourNum);
    218         Calendar calendar = Calendar.getInstance();
    219         calendar.setTime(date);
    220         calendar.add(Calendar.HOUR_OF_DAY, -1 * hourNum);
    221         System.out.println("date2 -" +sdFormat.format(calendar.getTime()));
    222         return sdFormat.format(calendar.getTime());
    223     }    
    224 }

    再来看一下ThreadPool.java这个类,如下:这是一个线程工具类,定义了线程的一些动作

     1 package main.java.sina.utils;
     2 
     3 import java.util.List;
     4 import java.util.concurrent.ExecutorService;
     5 import java.util.concurrent.Executors;
     6 
     7 /** 9  * 线程池工具类
    10  */
    11 public class ThreadPool {
    12     private ExecutorService service;
    13     private List<Thread> threadList;
    14 
    15     public ThreadPool(int limite, List<Thread> threadList) {
    16         this.service = Executors.newFixedThreadPool(limite);
    17         this.threadList = threadList;
    18     }
    19 
    20     public void execute() {
    21         if(threadList==null ||threadList.size()==0) return ;
    22         for (int index = 0; index < threadList.size(); index++) {
    23             Thread t=threadList.get(index);
    24             service.execute(t);
    25         }
    26     }
    27     public boolean isTerminated(){
    28         return service.isTerminated();
    29     }
    30     
    31     public void shutDown() {
    32         service.shutdown();
    33     }
    34 }

    然后再看一下Constant.java这个常量类,如下:常量类把系统总用到的一些常量写在这里,以后项目维护需要更改的时候,方便维护更改

    package main.java.sina.utils;
    
    /**
     * @ClassName: Constant 
     * 
     */
    public class Constant {
        public static boolean enableProxy = false;
        public static String liveCommentUrl = "http://localhost:8080/social-hub-connector/loadingLiveData";
        public static String CommentUrl = "http://localhost:8080/social-hub-connector/loadingData";
        public static String personalHomePage = "******";
        public static String weiboUsername = "*********";
        public static String weiboPassword = "*********";
        public static int hourbefore = 0;
    }

    再来看一下Base64Encoder.java类,它对一些字段进行了编码的类,如下:

     1 package main.java.sina.utils;
     2 
     3 /**
     4  *  5  */
     6 public class Base64Encoder {
     7     private static final char last2byte = (char) Integer.parseInt("00000011", 2);
     8     private static final char last4byte = (char) Integer.parseInt("00001111", 2);
     9     private static final char last6byte = (char) Integer.parseInt("00111111", 2);
    10     private static final char lead6byte = (char) Integer.parseInt("11111100", 2);
    11     private static final char lead4byte = (char) Integer.parseInt("11110000", 2);
    12     private static final char lead2byte = (char) Integer.parseInt("11000000", 2);
    13     private static final char[] encodeTable = new char[]{'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M', 'N', 'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z', 'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z', '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', '+', '/'};
    14 
    15     public Base64Encoder() {
    16     }
    17     public static  String encode(byte[] from) {
    18         StringBuffer to = new StringBuffer((int) (from.length * 1.34) + 3);
    19         int num = 0;
    20         char currentByte = 0;
    21         for (int i = 0; i < from.length; i++) {
    22             num = num % 8;
    23             while (num < 8) {
    24                 switch (num) {
    25                     case 0:
    26                         currentByte = (char) (from[i] & lead6byte);
    27                         currentByte = (char) (currentByte >>> 2);
    28                         break;
    29                     case 2:
    30                         currentByte = (char) (from[i] & last6byte);
    31                         break;
    32                     case 4:
    33                         currentByte = (char) (from[i] & last4byte);
    34                         currentByte = (char) (currentByte << 2);
    35                         if ((i + 1) < from.length) {
    36                             currentByte |= (from[i + 1] & lead2byte) >>> 6;
    37                         }
    38                         break;
    39                     case 6:
    40                         currentByte = (char) (from[i] & last2byte);
    41                         currentByte = (char) (currentByte << 4);
    42                         if ((i + 1) < from.length) {
    43                             currentByte |= (from[i + 1] & lead4byte) >>> 4;
    44                         }
    45                         break;
    46                 }
    47                 to.append(encodeTable[currentByte]);
    48                 num += 6;
    49             }
    50         }
    51         if (to.length() % 4 != 0) {
    52             for (int i = 4 - to.length() % 4; i > 0; i--) {
    53                 to.append("=");
    54             }
    55         }
    56         return to.toString();
    57     }
    58 }

    这个类中,针对新浪的一些特殊的加密规则,写的方法,这个在拼接最终的URl的时候回用到,如根据servertime+nonce两个参数来生成一串字符串加密规则:

     1 package main.java.sina.utils;
     2 import java.io.File;
     3 import java.io.FileReader;
     4 
     5 import javax.script.Invocable;
     6 import javax.script.ScriptEngine;
     7 import javax.script.ScriptEngineManager;
     8 
     9 /**
    10  * 12  */
    13 public class EncodeSuAndSp {
    14     static ScriptEngineManager mgr = new ScriptEngineManager();  
    15     static ScriptEngine engine = mgr.getEngineByExtension("js");
    16     static Invocable inv = (Invocable) engine;   
    17       
    18     public static String getEncryptedP(String password,String servertime,String nonce){
    19         String value1="";
    20         try { 
    21             engine.eval(new FileReader(new File("js/encrypt.js")));
    22             value1 = String.valueOf(inv.invokeFunction("hex_sha1",password));
    23             value1 = String.valueOf(inv.invokeFunction("hex_sha1",value1));
    24             value1 = String.valueOf(inv.invokeFunction("hex_sha1",value1+servertime+nonce));
    25         } catch (Exception e) {
    26             e.printStackTrace();
    27         }
    28         return value1;
    29     }
    30     
    31     
    32     public static String getEncodedUsername(String username){
    33         String value1="";
    34         try {
    35             engine.eval(new FileReader(new File("js/encrypt.js")));
    36             value1 = String.valueOf(inv.invokeFunction("encode",username));
    37             System.out.println(value1);
    38         } catch (Exception e) {
    39             e.printStackTrace();
    40         }
    41         return value1;
    42     }
    43 }
    package main.java.sina.utils;
    import java.io.UnsupportedEncodingException;
    import java.net.URLDecoder;
    import java.net.URLEncoder;
    public class EncodeUtils {
    
        public static final String encodeURL(String str,String enc) {
            try {
                return URLEncoder.encode(str, enc);
            } catch (UnsupportedEncodingException e) {
                throw new RuntimeException(e);
            }
        }
        public static final String decodeURL(String str,String enc) {
            try {
                return URLDecoder.decode(str, enc);
            } catch (UnsupportedEncodingException e) {
                throw new RuntimeException(e);
            }
        }
        
        public static String unicdoeToGB2312(String str) {
            String res = null;
            if(str==null ){
                return "";
            }
            StringBuffer sb = new StringBuffer();
            try {
                while (str.length() > 0) {
                    if (str.startsWith("\u")) {
                        int x = 0;
                        try{
                            x = Integer.parseInt(str.substring(2, 6), 16);
                        }catch(Exception ex){
                            x=  0;
                        }
                        sb.append((char) x);
                        str = str.substring(6);
                    } else {
                        sb.append(str.charAt(0));
                        str = str.substring(1);
                    }
                }
                res = sb.toString();
            } catch (Exception e) {
                e.printStackTrace(System.err);
            }
            res=res.replaceAll("\\r", "")
                .replaceAll("\\n", "")
                .replaceAll("\\t", "")
                .replaceAll("&nbsp;", "")
                .replaceAll("&gt", "")
                .replaceAll("\[", """)
                .replaceAll("\]", """);
            return res;
        }
        
        public static String unicodeTogb2312(String str) {
            String res = null;
            StringBuffer sb = new StringBuffer();
            try {
                while (str.length() > 0) {
                    if (str.startsWith("\u")) {
                        int x = Integer.parseInt(str.substring(2, 6), 16);
                        sb.append((char) x);
                        str = str.substring(6);
                    } else {
                        sb.append(str.charAt(0));
                        str = str.substring(1);
                    }
                }
                res = sb.toString();
            } catch (Exception e) {
                e.printStackTrace(System.err);
            }
            res=res.replaceAll("\\r", "")
                    .replaceAll("\\t", "")
                    .replaceAll("&nbsp;", "")
                    .replaceAll("&gt", "")
                   .replaceAll("\\n", "");
            return res;
        }
    }

    这个类很关键HttpUtils.java类,这个方法中重写了doPost()和doGet()方法.如下:

    package main.java.sina.utils;
    
    import java.io.ByteArrayInputStream;
    import java.io.File;
    import java.io.FileInputStream;
    import java.io.IOException;
    import java.io.InputStream;
    import java.util.ArrayList;
    import java.util.List;
    import java.util.Map;
    import java.util.Random;
    import java.util.Set;
    import org.apache.http.Header;
    import org.apache.http.HttpEntity;
    import org.apache.http.HttpHost;
    import org.apache.http.HttpResponse;
    import org.apache.http.HttpVersion;
    import org.apache.http.NameValuePair;
    import org.apache.http.client.ClientProtocolException;
    import org.apache.http.client.HttpClient;
    import org.apache.http.client.entity.UrlEncodedFormEntity;
    import org.apache.http.client.methods.HttpGet;
    import org.apache.http.client.methods.HttpPost;
    import org.apache.http.client.methods.HttpUriRequest;
    import org.apache.http.conn.params.ConnRoutePNames;
    import org.apache.http.conn.params.ConnRouteParams;
    import org.apache.http.cookie.Cookie;
    import org.apache.http.entity.InputStreamEntity;
    import org.apache.http.impl.client.DefaultHttpClient;
    import org.apache.http.impl.conn.tsccm.ThreadSafeClientConnManager;
    import org.apache.http.impl.cookie.BasicClientCookie;
    import org.apache.http.message.BasicNameValuePair;
    import org.apache.http.params.BasicHttpParams;
    import org.apache.http.params.CoreProtocolPNames;
    import org.apache.http.params.HttpParams;
    import org.apache.http.params.HttpProtocolParams;
    import org.apache.http.protocol.BasicHttpContext;
    import org.apache.http.protocol.ExecutionContext;
    import org.apache.http.protocol.HTTP;
    import org.apache.http.protocol.HttpContext;
    
    /**
     * http操作相关的类
     */
    public class HttpUtils {
        /*
         * params :
         * url:  地址
         * headers请求头部信息
         * return : httpresponse响应
         */
        public static HttpResponse doGet(String url,Map<String,String> headers){
            HttpClient client=createHttpClient();
            HttpGet getMethod=new HttpGet(url);
            HttpResponse response=null;
            
            HttpContext httpContext = new BasicHttpContext();
            try {
                if(headers!=null && headers.keySet().size()>0){
                    for(String key:headers.keySet()){
                        getMethod.addHeader(key, headers.get(key));
                    }
                }    
                response=client.execute(getMethod);
                HttpUriRequest realRequest  = (HttpUriRequest)httpContext.getAttribute(ExecutionContext.HTTP_REQUEST);
                System.out.println(realRequest.getURI());
            } catch (ClientProtocolException e) {
                e.printStackTrace();
            } catch (IOException e) {
                String msg=e.getMessage();
                if(msg.contains("Truncated chunk")){
                    System.out.println(e.getMessage() +" 数据获取不完整,需要重新获取。");
                }else{
                    System.out.println(e.getMessage() +" 连接被拒绝,需要降低爬取频率。");
                }
            } catch(Exception e){
            }
            System.out.println(response);
            return response;        
        }
        
        /*
         * params :
         * url:  地址
         * headers:请求头部信息
         * params:post的请求数据
         * return : httpresponse响应
         */
        
        public static HttpResponse doPost(String url,Map<String,String> headers,Map<String,String> params){
            HttpClient client=createHttpClient();
            HttpPost postMethod=new HttpPost(url);
            HttpResponse response=null;
            try {
                if(headers!=null && headers.keySet().size()>0){
                    for(String key:headers.keySet()){
                        postMethod.addHeader(key, headers.get(key));
                    }
                }    
                List<NameValuePair> p=null;
                if(params!=null && params.keySet().size()>0){
                    p=new ArrayList<NameValuePair>();
                    for(String key:params.keySet()){
                        p.add(new BasicNameValuePair(key,params.get(key)));
                    }
                }
                if(p!=null)
                    postMethod.setEntity(new UrlEncodedFormEntity(p,HTTP.UTF_8));
                response=client.execute(postMethod);
            } catch (ClientProtocolException e) {
                e.printStackTrace();
            } catch (IOException e) {
                e.printStackTrace();
            } 
            return response;            
        }
        
        //上传一个文件
        public static HttpResponse doPost(String url,Map<String,String> headers,String fileName){
            HttpClient client=createHttpClient();
            HttpPost postMethod=new HttpPost(url);
            String boundary = "";
            HttpResponse response=null;
            try {
                if(headers!=null && headers.keySet().size()>0){
                    for(String key:headers.keySet()){
                        postMethod.addHeader(key, headers.get(key));
                        if(key.equals("Content-Type")){
                            String tmp=headers.get(key);
                            boundary=tmp.substring(tmp.indexOf("=")+1);
                        }
                    }
                }    
                File file=new File(fileName);
                InputStream in=new FileInputStream(file);
                
                StringBuffer buffer=new StringBuffer();
                buffer.append(boundary).append("
    ")
                      .append("Content-Disposition: form-data; name="pic1"; filename=""+file.getName()).append(""
    ")
                      .append("Content-Type: image/pjpeg").append("
    ")
                      .append("
    ");
                
                System.out.println(buffer.toString());
                
                String tmpstr=Utils.getStringFromStream(in);
                tmpstr=Base64Encoder.encode(tmpstr.getBytes());
                buffer.append(tmpstr).append("
    ");
                buffer.append(boundary+"--").append("
    ");
                
                System.out.println(buffer.toString());
                
                in=new ByteArrayInputStream(buffer.toString().getBytes());
                
                InputStreamEntity ise=new InputStreamEntity(in,buffer.toString().getBytes().length);  
                
                postMethod.setEntity(ise);  
                
                response=client.execute(postMethod);
            } catch (ClientProtocolException e) {
                e.printStackTrace();
            } catch (IOException e) {
                e.printStackTrace();
            } 
            return response;            
        }
        /*
         * params :
         * httpresponse
         * return : 响应的头部信息
         */
        
        public static List<Header> getReponseHeaders(HttpResponse response){
            List<Header> headers=null;
            Header[] hds=response.getAllHeaders();
            if(hds!=null && hds.length>0){
                headers=new ArrayList<Header>();
                for(int i=0;i<hds.length;i++){
                    headers.add(hds[i]);
                }
            }        
            return headers;
        }
        
        /*
          * params :
          * headers:头部信息 
          * request:请求
         */
        public static void setHeaders(Map<String,String> headers,HttpUriRequest request){
            if(headers!=null && headers.keySet().size()>0){
                for(String key:headers.keySet()){
                    request.addHeader(key, headers.get(key));            }
            }
        }
        
        /*
         * params :
         * httpresponse
         * return : 响应的cookies值
         */
        
        public static List<Cookie> getResponseCookies(HttpResponse response){
            List<Cookie> cookies=null;
            Header[] hds=response.getAllHeaders();
            if(hds!=null && hds.length>0){
                for(int i=0;i<hds.length;i++){
                    if(hds[i].getName().equalsIgnoreCase("Set-Cookie")){
                        if(cookies==null){
                            cookies=new ArrayList<Cookie>();
                        }                     
                        String cookiestring[]=hds[i].getValue().split(";");
                        String ss[]=cookiestring[0].split("=",2);
                        String cookiename=ss[0];
                        String cookievalue=ss[1];
                        Cookie cookie=new BasicClientCookie(cookiename,cookievalue);
                        cookies.add(cookie);
                    }
                }
            }        
            return cookies;
        }
        /*
         * params :
         * cookies数组
         * return : cookies数组组成的字符串
         */
        public static String setCookie2String(List<Cookie> cookies){
            StringBuilder builder=null; 
            if(cookies!=null && cookies.size()>0){
                builder=new StringBuilder();
                for(int j=0;j<cookies.size();j++){
                    Cookie c=cookies.get(j);
                    builder.append(c.getName()+"="+c.getValue());
                    if(j!=cookies.size()-1)
                        builder.append("; ");
                 }
                return builder.toString();
            }        
            return null;
        }
        
        /*
         * 从响应中得到输入流
         */
        public static InputStream getInputStreamFromResponse(HttpResponse response){
            if(response==null){
                return null;
            }
            HttpEntity entity=response.getEntity();
            InputStream in=null;
            try {
                in = entity.getContent();
            } catch (IllegalStateException e) {
                e.printStackTrace();
            } catch (IOException e) {
                e.printStackTrace();
            }
            return  in;
        }
        
        /*
         * 从响应中得到字符串
         */
        public static String getStringFromResponse(HttpResponse response){
            if(response==null){
                return null;
            }
            InputStream in=getInputStreamFromResponse(response);
            String responseText="";
            if(in!=null){
                responseText=Utils.getStringFromStream(in);
            }
            return responseText;
        }
        
        /**
         * 创建支持多线程并发连接的HTTPCLIENT
         */
        private final static HttpClient createHttpClient() {
             String proxyHost = "web-proxy-sha.chn.hp.com";
             int proxyPort = 8080;
             HttpHost proxy = new HttpHost(proxyHost,proxyPort);
            HttpParams params = new BasicHttpParams();
            if(Constant.enableProxy){
                params.setParameter(ConnRouteParams.DEFAULT_PROXY, proxy);
            }
            HttpProtocolParams.setVersion(params, HttpVersion.HTTP_1_1);
            HttpProtocolParams.setContentCharset(params, "UTF-8");
            
            ThreadSafeClientConnManager clientmanager = new ThreadSafeClientConnManager();
            clientmanager.setMaxTotal(20);
            HttpClient client = new DefaultHttpClient(clientmanager, params);
            
            //定义了环形重定向,定向到相同的路径是否被允许.
            client.getParams().setParameter("http.protocol.allow-circular-redirects", true); 
            
            //定义了重定向的最大数量
            client.getParams().setParameter("http.protocol.max-redirects", 50);
            
            //定义了重定向是否应该自动处理
            client.getParams().setParameter("http.protocol.handle-redirects", false);
            return client;
        }
        
        /**
         *加入代理的功能
         * @return HttpClient 对象
         */
        public static HttpClient getDefaultHttpClientByProxy() {
            HttpClient httpclient =createHttpClient();
            String filePath = "proxy.properties";
            HttpHost proxy = null;
            Map<String, String> map = ReadIni.getDbini(filePath);
            if (map.size() == 0) {
                throw new RuntimeException("无可用代理");
            } else {
                Set<String> set = map.keySet();
                String[] array = (String[]) set.toArray(new String[set.size()]);
                Random r = new Random();
                int rnum = r.nextInt(array.length);
                String ip = array[rnum];
                String port = map.get(ip);
                proxy = new HttpHost(ip, Integer.parseInt(port));
            }
            httpclient.getParams().setParameter(ConnRoutePNames.DEFAULT_PROXY,proxy);
            httpclient.getParams().setParameter(CoreProtocolPNames.PROTOCOL_VERSION, HttpVersion.HTTP_1_1);
            return httpclient;
        }
    }

    接下来卡一个HttpHelper的辅助类,如下:

    /**
     * 
     */
    package main.java.sina.utils;
    
    import java.io.IOException;
    import org.apache.commons.httpclient.HttpClient;
    import org.apache.commons.httpclient.HttpException;
    import org.apache.commons.httpclient.methods.PostMethod;
    
    /**
     * @ClassName: HttpHelper
     * 
     */
    public class HttpHelper {
        public static String getLiveData(String requestData,String url)
                throws HttpException, IOException {
            PostMethod postMethod = new PostMethod(url);
            postMethod.setParameter("mids", requestData);
            HttpClient httpClient = new HttpClient();
            int statusCode = httpClient.executeMethod(postMethod);
            String response = postMethod.getResponseBodyAsString();
            postMethod.releaseConnection();
            System.out.println(response);
            return response;
        }
        
        public static String getHobbyData(String userid, String hobbys)
                throws HttpException, IOException {
            PostMethod postMethod = new PostMethod("http://c0048925.itcs.hp.com:8080/connector/loadingHobby");
            postMethod.setParameter("userid", userid);
            postMethod.setParameter("hobbys", hobbys);
            HttpClient httpClient = new HttpClient();
            int statusCode = httpClient.executeMethod(postMethod);
            String response = postMethod.getResponseBodyAsString();
            postMethod.releaseConnection();
            System.out.println(response);
            return response;
        }
    
    }

    ReadIni.java类,在读文本文件中使用,如下:

    package main.java.sina.utils;
     
    import java.io.BufferedReader;
    import java.io.FileInputStream;
    import java.io.FileNotFoundException;
    import java.io.InputStreamReader;
    import java.util.HashMap;
    import java.util.Map;
    
    public class ReadIni {
    
        public static Map<String, String> getDbini(String file) {
            Map<String, String> map = new HashMap<String, String>();
            InputStreamReader isr = null;
            try{
                isr = new InputStreamReader(new FileInputStream(file));
            } catch (FileNotFoundException e1) {
                e1.printStackTrace();
            }
            BufferedReader br = new BufferedReader(isr);
            String s = null;
            try {
                s = br.readLine();
                while (s != null) {
                    if (s.trim().length() > 0) {
                        String[] s1 = getIni(s);
                        map.put(s1[0], s1[1]);
                        s = br.readLine();
                    }
                }
                br.close();
                isr.close();
            } catch (Exception e) {
                e.printStackTrace();
            }
            return map;
        }
    
        public static String[] getIni(String str) {
            String[] temp = str.split("=");
            return temp;
        }
    
    }

    然后,我们跳转到登录sina,来看一下loginSina这个类的实现:

    package main.java.sina.httpclient;
    
    import java.io.IOException;
    import java.io.InputStream;
    import java.io.UnsupportedEncodingException;
    import java.math.BigInteger;
    import java.security.InvalidKeyException;
    import java.security.KeyFactory;
    import java.security.NoSuchAlgorithmException;
    import java.security.interfaces.RSAPublicKey;
    import java.security.spec.InvalidKeySpecException;
    import java.security.spec.RSAPublicKeySpec;
    import java.util.ArrayList;
    import java.util.HashMap;
    import java.util.List;
    import java.util.Map;
    import java.util.Scanner;
    
    import javax.crypto.BadPaddingException;
    import javax.crypto.Cipher;
    import javax.crypto.IllegalBlockSizeException;
    import javax.crypto.NoSuchPaddingException;
    
    import org.apache.commons.codec.binary.Hex;
    import org.apache.commons.httpclient.params.HttpParams;
    import org.apache.http.HttpResponse;
    import org.apache.http.client.HttpClient;
    import org.apache.http.client.methods.HttpGet;
    import org.apache.http.cookie.Cookie;
    import org.springframework.core.io.ClassPathResource;
    
    import main.java.sina.json.msg.PreLoginResponseMessage;
    import main.java.sina.utils.Base64Encoder;
    import main.java.sina.utils.EncodeUtils;
    import main.java.sina.utils.HttpUtils;
    import main.java.sina.utils.JsonUtils;
    import main.java.sina.utils.Utils;
    
    public class LoginSina {
        private String username;
        private String password;
        private String rsakv;
        private String pubkey;
        
        //servertime和nonce都是在登录时需要使用的,用于post信息的加密
        private String servertime;//服务器的时间
        private String nonce;//一次性字符串
        private String userid;//用户微博ID
        private String pcid;//若需要输入验证码时用到
        private String userdomainname;//用于域名
        private String door;//验证码
        
        private Map<String,String> headers=null;
        
        private List<Cookie> cookies=null;
        
        
        public LoginSina(String username,String password){
            this.username=username;
            this.password=password;
            init();
        }
        
        public Map<String,String> getHeaders(){
            Map<String,String> hds=null;
            if(headers!=null && headers.keySet().size()>0){
                hds=new HashMap<String,String>();
                for(String key:headers.keySet()){
                    hds.put(key,headers.get(key));
                }
            }
            return hds;
        }
        
        public List<Cookie> getCookies(){
            List<Cookie> cc=null;
            if(cookies!=null && cookies.size()>0){
                cc=new ArrayList<Cookie>();
                for(int i=0;i<cookies.size();i++){
                    cc.add(cookies.get(i));
                }
            }
            return cc;
        }
        //登录微博
        public String dologinSina(){
            System.out.println("---do login, please hold on...---");
            String url="http://login.sina.com.cn/sso/login.php?client=ssologin.js(v1.4.5)";//v1.3.17
            Map<String,String> headers=new HashMap<String,String>();
            Map<String,String> params=new HashMap<String,String>();
            
            /*HTTP协议中的headers:http://www.cnblogs.com/yuzhongwusan/archive/2011/10/20/2218954.html
             * */
            headers.put("Accept", "text/html, application/xhtml+xml, */*");
            headers.put("Referer", "http://login.sina.com.cn/member/my.php?entry=sso");
            headers.put("Accept-Language", "zh-cn");
            headers.put("User-Agent", "Mozilla/5.0 (compatible; MSIE 9.0; Windows NT 6.1; Trident/5.0; BOIE9;ZHCN");
            headers.put("Host", "login.sina.com.cn");
            headers.put("Connection", "Keep-Alive");
            headers.put("Content-Type", "application/x-www-form-urlencoded");
            headers.put("Cache-Control", "no-cache");
            params.put("encoding", "UTF-8");
            params.put("entry", "weibo");
            params.put("from", "");
            params.put("prelt", "112");
            params.put("gateway", "1");
            params.put("nonce", nonce);
            params.put("pwencode", "rsa2");//wsse
            params.put("returntype", "META");
            params.put("pagerefer", "");
            params.put("savestate", "7");    
            params.put("servertime", servertime);
            params.put("rsakv", rsakv);
            params.put("service", "miniblog");
            params.put("sp", getEncryptedP());
            params.put("ssosimplelogin", "1");
            params.put("su", getEncodedU());
            params.put("url", "http://weibo.com/ajaxlogin.php?framelogin=1&callback=parent.sinaSSOController.feedBackUrlCallBack");
            params.put("useticket", "1");
            params.put("vsnf", "1");
            HttpResponse response=HttpUtils.doPost(url, headers, params);
            this.cookies=HttpUtils.getResponseCookies(response);
            this.headers=headers;
            String responseText=HttpUtils.getStringFromResponse(response);
            try {
                responseText=new String(responseText.getBytes(),"GBK");
                if(!responseText.contains("retcode=0")){
                    downloadCheckImage();
                    this.nonce=getnonce();
                    Scanner s=new Scanner(System.in);
                    if(responseText.contains("retcode=4049"))
                        System.out.println("请输入验证码:");
                    else if(responseText.contains("retcode=2070")){
                        System.out.println("验证码不正确,请再次输入验证码:");
                    }
                    this.door=s.next();
                    dologinSina();
                }
            } catch (UnsupportedEncodingException e) {
                e.printStackTrace();
            }
            System.out.println("Congratulations, you have login success!");
            return responseText;
        }
        //登录后重定向
        public String redirect(){
            String cookieValue=HttpUtils.setCookie2String(this.cookies);
            this.headers.clear();
            this.headers.put("Accept", "image/gif, image/jpeg, image/pjpeg, image/pjpeg, application/x-shockwave-flash, application/vnd.ms-excel, application/vnd.ms-powerpoint, application/msword, */*");
            this.headers.put("Accept-Language", "zh-cn");
            this.headers.put("Connection", "Keep-Alive");
            this.headers.put("Host", "sina.com.cn");
            this.headers.put("Referer", "http://login.sina.com.cn/sso/login.php?client=ssologin.js(v1.4.15)");
            this.headers.put("User", "Mozilla/4.0 (compatible; MSIE 8.0; Windows NT 5.1; Trident/4.0; QQDownload 691)");
            this.headers.put("Cookie", cookieValue);
            String ssosavestate=""; //SSO即Sina Sign-on,
            String ticket = "";
            for(Cookie c:this.cookies){
                if(c.getName().equals("ALF")){
                    ssosavestate=c.getValue();
                }else if(c.getName().equals("tgc")){
                    ticket=c.getValue();
                }
            }
            String url="http://weibo.com/ajaxlogin.php?" +
                    "framelogin=1&callback=parent.sinaSSOController.feedBackUrlCallBack&" +
                    "sudaref=weibo.com";
            HttpResponse response=HttpUtils.doGet(url, this.headers);
            response=HttpUtils.doGet(url, this.headers);    
            String responseText=HttpUtils.getStringFromResponse(response);
            return responseText;
        }
        //生成一次性的字符串 6位 用于加密
        private String getnonce() {
            String x = "ABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789";
            String str = "";
            for (int i = 0; i < 6; i++) {
                str += x.charAt((int)Math.ceil(Math.random() * 1000000) % x.length());
            }
            return str;
        }
        //初始化:得到服务区的时间servertime和一次性字符串nonce
        private void init(){
            String url=compositeUrl();
            Map<String,String> headers=new HashMap<String,String>();
            headers.put("Accept", "*/*");
            headers.put("Referer", "http://weibo.com/");
            headers.put("Accept-Language", "zh-cn");
            headers.put("User-Agent", "Mozilla/4.0 (compatible; MSIE 8.0; Windows NT 5.1; Trident/4.0; QQDownload 691)");
            headers.put("Host", "login.sina.com.cn");
            headers.put("Connection", "Keep-Alive");        
            HttpResponse response=HttpUtils.doGet(url, headers);
            String responseText=HttpUtils.getStringFromResponse(response);
            int begin=responseText.indexOf("{");
            int end=responseText.lastIndexOf("}");
            responseText=responseText.substring(begin,end+1);
            PreLoginResponseMessage plrmsg =JsonUtils.jsontoPreLoginResponseMessage(responseText);
            this.nonce=plrmsg.getNonce();
            this.servertime=plrmsg.getServertime()+"";
            this.pubkey=plrmsg.getPubkey();
            this.rsakv=plrmsg.getRsakv();
            this.pcid=plrmsg.getPcid();
        }
        //下载验证码
        private void downloadCheckImage() {
            if(pcid==null) return;
            this.headers.remove("Content-Type");
            try {
                if(this.cookies != null){
                    this.cookies.clear();
                }
                
            } catch (Exception e) {
                e.printStackTrace();
            }
            String cookieValue=HttpUtils.setCookie2String(this.cookies);
            this.headers.put("Cookie", cookieValue);
            String url="http://login.sina.com.cn/cgi/pin.php?r="+(long)(Math.random()*100000000)+"&s=0&p="+this.pcid;
            HttpResponse response=HttpUtils.doGet(url, headers);
            InputStream in=HttpUtils.getInputStreamFromResponse(response);
            try {
                //System.out.println(new ClassPathResource("checkImage.jpeg").getFile().getPath());
                Utils.writeFileFromStream(new ClassPathResource("checkImage.jpeg").getFile().getPath(), in);
            } catch (IOException e) {
                e.printStackTrace();
            }
        }
        //组合预登陆时的URL
        private String compositeUrl(){
            StringBuilder builder=new StringBuilder();
            builder.append("http://login.sina.com.cn/sso/prelogin.php?")
               .append("entry=weibo&callback=sinaSSOController.preloginCallBack&")
               .append("su="+getEncodedU())
               .append("&rsakt=mod&checkpin=1&client=ssologin.js(v1.4.5)&_="+System.currentTimeMillis());
            return builder.toString();
        }
        //对用户名进行编码
        private String getEncodedU() {
            if(username!=null && username.length()>0){
                return Base64Encoder.encode(EncodeUtils.encodeURL(username,"utf-8").getBytes());
            }
            return "";
        }
        //对密码进行编码
        private String getEncryptedP(){
    //        return EncodeSuAndSp.getEncryptedP(password, servertime, nonce);
            String data=servertime+"	"+nonce+"
    "+password;
            String spT=rsaCrypt(pubkey, "10001", data);
            return spT;
        }
        
        public static String rsaCrypt(String pubkey, String exponentHex, String pwd,String servertime,String nonce) {
              String data=servertime+"	"+nonce+"
    "+pwd;
              return rsaCrypt(pubkey,exponentHex,data);
        }
    
        public static String rsaCrypt(String pubkey, String exponentHex, String messageg) {
                KeyFactory factory=null;
                try {
                    factory = KeyFactory.getInstance("RSA");
                } catch (NoSuchAlgorithmException e1) {
                    return "";
                }
                BigInteger publicExponent = new BigInteger(pubkey, 16); /* public exponent */
                BigInteger modulus = new BigInteger(exponentHex, 16); /* modulus */
                RSAPublicKeySpec spec = new RSAPublicKeySpec(publicExponent, modulus);
                RSAPublicKey pub=null;
                try {
                    pub = (RSAPublicKey) factory.generatePublic(spec);
                } catch (InvalidKeySpecException e1) {
                    return "";
                }
                Cipher enc=null;
                byte[] encryptedContentKey =null;
                try {
                    enc = Cipher.getInstance("RSA");
                    enc.init(Cipher.ENCRYPT_MODE, pub);
                    encryptedContentKey = enc.doFinal(messageg.getBytes());
                } catch (NoSuchAlgorithmException e1) {
                    System.out.println(e1.getMessage());
                    return "";
                } catch (NoSuchPaddingException e1) {
                    System.out.println(e1.getMessage());
                    return "";
                } catch (InvalidKeyException e1) {
                    System.out.println(e1.getMessage());
                    return "";
                } catch (IllegalBlockSizeException e1) {
                    System.out.println(e1.getMessage());
                    return "";
                } catch (BadPaddingException e1) {
                    System.out.println(e1.getMessage());
                    return "";
                } 
                return new String(Hex.encodeHex(encryptedContentKey));
        }
        public void setUserid(String userid) {
            this.userid = userid;
        }
    
        public String getUserid() {
            return userid;
        }
    
        public void setUserdomainname(String userdomainname) {
            this.userdomainname = userdomainname;
        }
    
        public String getUserdomainname() {
            return userdomainname;
        }
    
    }

    Spider.sina类如下:

      1 package main.java.sina.httpclient;
      2 import java.util.HashMap;
      3 import java.util.List;
      4 import java.util.Map;
      5 
      6 import org.apache.http.HttpResponse;
      7 import org.apache.http.cookie.Cookie;
      8 
      9 import main.java.sina.utils.Constant;
     10 import main.java.sina.utils.EncodeUtils;
     11 import main.java.sina.utils.HttpUtils;
     12 import main.java.sina.utils.Utils;
     13 
     14 public class SpiderSina {
     15     private LoginSina ls;
     16     private Map<String,String> headers;
     17     private final int  ADDFOLLOWING =1;
     18     private final int  CANCELFOLLOWING =2;
     19     public SpiderSina(LoginSina ls){
     20         this.ls=ls;
     21         this.headers=new HashMap<String,String>();
     22         headers.put("Accept", "text/html, application/xhtml+xml, */*");
     23         headers.put("Accept-Language", "zh-cn");
     24         headers.put("User-Agent", "Mozilla/5.0 (compatible; MSIE 9.0; Windows NT 6.1; Trident/5.0; BOIE9;ZHCN");
     25         headers.put("Connection", "Keep-Alive");
     26         headers.put("Cache-Control", "no-cache");
     27         String cookieValue=HttpUtils.setCookie2String(ls.getCookies());
     28         headers.put("Cookie", cookieValue);
     29     }
     36     public String getGroupCategory(){
     37         String url="http://q.weibo.com/";
     38         this.headers.put("Host", "q.weibo.com");
     39         HttpResponse response=HttpUtils.doGet(url, headers);
     40         String responseText=HttpUtils.getStringFromResponse(response);
     41         responseText=EncodeUtils.unicdoeToGB2312(responseText);
     42         return responseText;
     43     }
     44     public String search(String keyword, int pageNo){
     47         String url="http://s.weibo.com/weibo/%25E5%25AE%2581%25E6%25B3%25A2%25E5%25A4%25A7%25E5%25AD%25A6&page="+pageNo;
     48         String cookieValue = "SINAGLOBAL=8556698272004.724.1417744632425; un=shy_annan@126.com; myuid=5439352084; wvr=6; un=sm2014121904@126.com; _s_tentry=developer.51cto.com; SWB=usrmdinst_14; SUS=SID-5438576807-1419173757-GZ-lrze7-d8e1e3f082b428c12412c8ba30f0a6de; SUE=es%3D4cdfdd5d5f0f75141c092b32f89525a2%26ev%3Dv1%26es2%3D469e50c869315e57efeec3012c3bb6a8%26rs0%3DoWdG36CQ33LUEtKTvGn907Zy1mwFETvSVJsxeHEiaMPcKDB7pFxg596a2pLhFLJfQmswf4AvXYAkzTfemrYgWrz%252BQPustEA2wLNYufYpAZqFsGWanhTBq6elzB2yoZp41xcpy1WwXn1CuvzIzzEYpuILjHahkmJDQDQy6KaxlbA%253D%26rv%3D0; SUP=cv%3D1%26bt%3D1419173757%26et%3D1419260157%26d%3Dc909%26i%3Da6de%26us%3D1%26vf%3D0%26vt%3D0%26ac%3D27%26st%3D0%26uid%3D5438576807%26name%3Dsm2014121904%2540126.com%26nick%3DSocialMedia%25E5%259B%259B%25E5%25A8%2583%26fmp%3D%26lcp%3D; SUB=_2A255kq8tDeTxGeNK6FoU9yjEyzuIHXVa6DVlrDV8PUNbvtBeLW3TkW-bMoi0G_bBfpbS3TMqcXg6zDWFLA..; SUBP=0033WrSXqPxfM725Ws9jqgMF55529P9D9WhGThsH46uNrx1VY0ApV0SR5JpX5KMt; ALF=1450709756; SSOLoginState=1419173757; WBStore=bc5ad8450c3f8a48|undefined; Apache=1027467835228.8901.1419173761694; ULV=1419173761704:6:6:1:1027467835228.8901.1419173761694:1418797827169; UOR=www.ilehao.com,widget.weibo.com,login.sina.com.cn; ULOGIN_IMG=14192385783486";
     49         headers.put("Accept", "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8");
     50         //headers.put("Accept-Encoding", "gzip, deflate, sdch");
     51         headers.put("Accept-Language", "zh-CN");
     52         headers.put("User-Agent", "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/39.0.2171.71 Safari/537.36");
     53         headers.put("Connection", "Keep-Alive");
     54         headers.put("Cache-Control", "max-age=0");
     55         headers.put("Referer", "http://login.sina.com.cn/sso/login.php?url=http%3A%2F%2Fs.weibo.com%2Fweibo%2F%2525E6%252583%2525A0%2525E6%252599%2525AE%26page%3D2&_rand=1419173756.6387&gateway=1&service=weibo&entry=miniblog&useticket=1&returntype=META");
     56         headers.put("Cookie", cookieValue);
     57         this.headers.put("Host", "s.weibo.com");
     58         HttpResponse response=HttpUtils.doGet(url, headers);
     59         String responseText=HttpUtils.getStringFromResponse(response);
     60         responseText=EncodeUtils.unicdoeToGB2312(responseText);
     61         
     62         
     63         return responseText;
     64     }
     65     
     66     public String searchCommentsByUid(String uid){
     67         
     68         String url="http://www.weibo.com/u/"+uid;
     69         String cookieValue = "SINAGLOBAL=8556698272004.724.1417744632425; myuid=2035860051; wvr=6; YF-Ugrow-G0=ad06784f6deda07eea88e095402e4243; SSOLoginState=1423150079; YF-V5-G0=32eb5467e9bfc8b60c2d771056535ac5; _s_tentry=www.weibo.com; Apache=6264929557219.147.1423150103832; ULV=1423150103842:18:2:2:6264929557219.147.1423150103832:1422769721265; ULOGIN_IMG=1423233797946; YF-Page-G0=82cdcdfb16327a659fbb60cc9368fb19; SUS=SID-2035860051-1423286223-GZ-jdkh4-c8ea11de0a42151313986e52f9aa6017; SUE=es%3D8701ff5aca59244ff1ff263cf985bee6%26ev%3Dv1%26es2%3D7995c9eb7455697c09fac4f7486e14eb%26rs0%3DTyXXIRjcEw%252BeS5PaVSM%252FhQjc2JGhKBOe3uFTgShiIUAbPFI2eKtrgxM2wIi9A1xndiTFFM72zY%252FDKYFXONrgkao5cRo%252FHkydV%252FnaQjNmXoeESu5gi6Iq0aX883NhGR0utBVNZb5XaIG3X6HMMfBJC%252B7pnVHogEo8eD6cx8nzN5c%253D%26rv%3D0; SUP=cv%3D1%26bt%3D1423286223%26et%3D1423372623%26d%3Dc909%26i%3D6017%26us%3D1%26vf%3D0%26vt%3D0%26ac%3D0%26st%3D0%26uid%3D2035860051%26name%3Dshy_annan%2540126.com%26nick%3D%25E7%2594%25A8%25E6%2588%25B72035860051%26fmp%3D%26lcp%3D2013-08-18%252021%253A48%253A10; SUB=_2A2550e-fDeTxGeRO6FcZ9i7Mzj2IHXVap0ZXrDV8PUNbvtBuLWnTkW-gBGVORTA7J_lSZzAqzW6E50JjBQ..; SUBP=0033WrSXqPxfM725Ws9jqgMF55529P9D9Wh7oKNCGYcNnhlC6eqqQbbl5JpX5KMt; SUHB=0M20OGRPiOKzyc; ALF=1454822222; UOR=www.ilehao.com,widget.weibo.com,login.sina.com.cn";
     70         headers.put("Accept", "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8");
     71         headers.put("Accept-Language", "zh-CN");
     72         headers.put("User-Agent", "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/39.0.2171.71 Safari/537.36");
     73         headers.put("Connection", "Keep-Alive");
     74         headers.put("Cache-Control", "max-age=0");
     75         headers.put("Cookie", cookieValue);
     76         this.headers.put("Host", "www.weibo.com");
     77         HttpResponse response=HttpUtils.doGet(url, headers);
     78         String responseText=HttpUtils.getStringFromResponse(response);
     79         responseText=EncodeUtils.unicdoeToGB2312(responseText);
     82         return responseText;
     83     }    
     85 //爬虫根据关键字,查询时间断,和查询页数  来得到htmlContent
     86 public String search(String keyword, int pageNo, String fromdate,String todate){
     87     StringBuffer stringBuffer = new StringBuffer(200);   
     93     stringBuffer.append("http://s.weibo.com/weibo/"+ keyword +"&page=");
     94     stringBuffer.append(pageNo);
     95     stringBuffer.append("&typeall=1&suball=1&timescope=custom:");
     96     stringBuffer.append(fromdate);
     97     stringBuffer.append(":");
     98     stringBuffer.append(todate);
     99     stringBuffer.append("&Refer=g");
    104     String url = stringBuffer.toString();
    105     String cookieValue = headers.get("Cookie");
    106     headers.put("Accept", "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8");
    107     //headers.put("Accept-Encoding", "gzip, deflate, sdch");
    108     headers.put("Accept-Language", "zh-CN");
    109     headers.put("User-Agent", "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/39.0.2171.71 Safari/537.36");
    110     headers.put("Connection", "Keep-Alive");
    111     headers.put("Cache-Control", "max-age=0");
    112     headers.put("Referer", "http://s.weibo.com/weibo/%25E5%25AE%2581%25E6%25B3%25A2%25E5%25A4%25A7%25E5%25AD%25A6&typeall=1&suball=1&timescope=custom:"+fromdate+":"+todate+"&Refer=g");
    113     headers.put("Cookie", cookieValue);
    114     this.headers.put("Host", "s.weibo.com");
    115     HttpResponse response=HttpUtils.doGet(url, headers);
    116     String responseText=HttpUtils.getStringFromResponse(response);
    117     responseText=EncodeUtils.unicdoeToGB2312(responseText);
    118     
    119     System.out.println("************htmlContent start***********");
    120     System.out.println(responseText);
    121     System.out.println("************htmlContent end***********");
    125     return responseText;   
    127 } 
    129 public void forwardToWeiboPage(){
    130     String url = Constant.personalHomePage;
    131     headers.put("Accept", "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8");
    133     headers.put("Accept-Language", "zh-CN");
    134     headers.put("User-Agent", "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/39.0.2171.71 Safari/537.36");
    135     headers.put("Connection", "Keep-Alive");
    137     this.headers.put("Host", "s.weibo.com");
    138     HttpResponse response=HttpUtils.doGet(url, headers);
    139     String responseText=HttpUtils.getStringFromResponse(response);
    140     responseText=EncodeUtils.unicdoeToGB2312(responseText);
    141     List<Cookie> cookies = HttpUtils.getResponseCookies(response);
    142     String cookie = HttpUtils.setCookie2String(cookies);
    144     headers.put("Cookie", cookie);   
    146 }
    150     public String getGroupCategory(int id){
    151         String url="http://q.weibo.com/class/category/?id="+id;
    152         this.headers.put("Host", "q.weibo.com");
    154         HttpResponse response=HttpUtils.doGet(url, headers);
    155         String responseText=HttpUtils.getStringFromResponse(response);
    156         responseText=EncodeUtils.unicdoeToGB2312(responseText);
    157         return responseText;
    158     }
    169     //得到微群管理员ID信息,其实用户成员的第一页 HTML页面
    170     public String getGroupAdministrator(String groupid) {
    171         String url="http://q.weibo.com/"+groupid+"/members/all";
    172         this.headers.remove("Referer");
    173         this.headers.put("Host", "q.weibo.com");
    174         this.headers.remove("Content-Type");
    175         this.headers.remove("x-requested-with");
    176         HttpResponse response=HttpUtils.doGet(url, headers);
    177         String responseText=HttpUtils.getStringFromResponse(response);
    178         return responseText;
    179     }
    180     //根据微群号和页号得到群成员ID信息 -----JSON格式数据
    181     public String getGroupMembers(String groupid,int pagenumber){
    182         this.headers.put("Referer", "http://q.weibo.com/"+groupid+"/members/all");
    183         this.headers.put("Host", "q.weibo.com");
    184         this.headers.put("Content-Type", "application/x-www-form-urlencoded");
    185         this.headers.put("x-requested-with", "XMLHttpRequest"); 
    187         Map<String,String> params=new HashMap<String,String>();
    188         params.put("_t", "0");
    189         params.put("page", pagenumber+"");
    190         params.put("gid", groupid);
    191         params.put("query","");
    192         params.put("tab", "all");
    193         params.put("vip", "1");
    194         String url="http://q.weibo.com/ajax/members/page";
    195         HttpResponse response=HttpUtils.doPost(url, headers, params);
    196         return HttpUtils.getStringFromResponse(response);
    197     }
    198     /*
    199      *  得到微群中微博信息 经过多次尝试成功
    200      *  每次获得50个微博记录,page是页号, count值50 可以在1-75之间,但是,每次开始的时候还是从50的倍数开始的
    201      */
    202     public String getGroupTopic(int page,int count,String gid){
    203         this.headers.put("Referer", "http://q.weibo.com/"+gid);
    204         this.headers.put("Host", "q.weibo.com");
    205         this.headers.put("Content-Type", "application/x-www-form-urlencoded");
    206         this.headers.put("x-requested-with", "XMLHttpRequest");
    207         Integer pre_page=1;
    208         if(page==1){
    209             pre_page=2;
    210         }else{
    211             pre_page=page-1;
    212         }
    213         Map<String,String> params=new HashMap<String,String>();
    214         params.put("_k", System.currentTimeMillis()+"");
    215         params.put("_t", "0");
    216         params.put("count", count+"");
    217         //params.put("end_id", end_id);
    218         params.put("gid", gid);
    219         params.put("is_search","");
    220         params.put("key_word", "");
    221         params.put("me", "0");
    222         params.put("mids", "");
    223         params.put("new", "0");
    224         params.put("page", page+"");
    225         params.put("pagebar", "0");  
    226         params.put("pre_page", pre_page+"");
    227         params.put("since_id", "0");
    228         params.put("uid", "0");
    229         
    230         String url="http://q.weibo.com/ajax/mblog/groupfeed";
    231         HttpResponse response=HttpUtils.doPost(url, headers, params);
    232         return HttpUtils.getStringFromResponse(response);
    233     }
    234     /*
    235      *  得到微群中微博信息数目
    236      *  这个信息中其实还包含了微群的所有的基本信息~~~~~~~~~~****** json格式的数据信息
    237      */
    238     public String getGroupMessageNumber(String gid){
    239         this.headers.put("Referer", "http://q.weibo.com/"+gid);
    240         this.headers.put("Host", "q.weibo.com");
    241         this.headers.put("Content-Type", "application/x-www-form-urlencoded");
    242         this.headers.put("x-requested-with", "XMLHttpRequest");
    243         String url="http://q.weibo.com/ajax/rightnav/groupprofile?gid="+gid+"&_t=0&__rnd="+System.currentTimeMillis();
    244         HttpResponse response=HttpUtils.doGet(url, headers);
    245         return HttpUtils.getStringFromResponse(response);
    246     }
    247     //得到微群的主页信息  HTML页码   主要是为了得到第一条微博记录的MID值
    248     public String getgroupMainPage(String groupid) {
    249         String url="http://q.weibo.com/"+groupid+"?topnav=1";
    250         this.headers.remove("Referer");
    251         this.headers.put("Host", "q.weibo.com");
    252         this.headers.remove("Content-Type");
    253         this.headers.remove("x-requested-with");
    254         
    255         HttpResponse response=HttpUtils.doGet(url, headers);
    256         String responseText=HttpUtils.getStringFromResponse(response);
    257         return responseText;
    258     }
    259     /*
    260      * 根据分类得到微群信息
    261      * categroyID :分类ID号
    262      * pagenumber:页号
    263      * sort:分类方式 1 按成员人数 2按 微群博数 3按创建时间分类
    264      * count:每页的记录数目
    265      */
    266     public String getGroupByCategroy(int categroyID,int pagenumber,int sort,int count){
    267         this.headers.put("Referer", "http://q.weibo.com/class/category/?id="+categroyID);
    268         this.headers.put("Host", "q.weibo.com");
    269         this.headers.put("Content-Type", "application/x-www-form-urlencoded");
    270         this.headers.put("x-requested-with", "XMLHttpRequest");
    271         Map<String,String> params=new HashMap<String,String>();
    272         params.put("_t", "0");
    273         params.put("page", pagenumber+"");
    274         params.put("id", categroyID+"");
    275         params.put("sort",sort+"");
    276         params.put("count", count+"");
    277         
    278         String url="http://q.weibo.com/ajax/class/category";
    279         HttpResponse response=HttpUtils.doPost(url, headers,params);
    280         String responseText=HttpUtils.getStringFromResponse(response);
    281         responseText=EncodeUtils.unicdoeToGB2312(responseText);
    282         return responseText;
    283     }
    284     //得到表情列表信息
    285     public String getFaceList(){
    286         String url="http://weibo.com/aj/mblog/face?type=face&_t=0&__rnd="+System.currentTimeMillis();
    287         this.headers.put("Referer", "http://weibo.com/");
    288         this.headers.put("Host", "weibo.com");
    289         this.headers.put("Content-Type", "application/x-www-form-urlencoded");
    290         this.headers.put("x-requested-with", "XMLHttpRequest");
    291         
    292         HttpResponse response=HttpUtils.doGet(url, headers);
    293         String responseText=HttpUtils.getStringFromResponse(response);
    294         System.out.println(responseText);
    295         Utils.writeFileFromString("tmpFile/faceList.txt", responseText);
    296         return responseText;
    297     }
    307     //用户基本信息          主要是将要解析用户主页下方经过编码后的内容
    308     public String getMemberInfo(String memberID){
    309         String url="http://weibo.com/"+memberID+"/info";
    310         this.headers.put("Host", "weibo.com");
    311         this.headers.put("Referer", "http://weibo.com/u/"+memberID);
    312         HttpResponse response=HttpUtils.doGet(url, headers);
    313         String responseText=HttpUtils.getStringFromResponse(response);
    314         return responseText;
    315     }
    316     //用户粉丝用户信息    html页面,每次20个
    317     public String getMemberFans(String memberID,int page){
    318         String url="http://weibo.com/"+memberID+"/fans?&uid=1689219395&tag=&page="+page;
    319         this.headers.put("Host", "weibo.com");
    320         this.headers.put("Referer", "http://weibo.com/"+memberID+"/fans");
    321         HttpResponse response=HttpUtils.doGet(url, headers);
    322         String responseText=HttpUtils.getStringFromResponse(response);
    323         return responseText;
    324     }
    325     //用户关注的用户信息     html页面
    326     public String getMemberFollowing(String memberID,int page){
    327         String url="http://weibo.com/"+memberID+"/follow?page="+page;
    328         this.headers.put("Host", "weibo.com");
    329         this.headers.put("Referer", "http://weibo.com/"+memberID+"/follow");
    330         HttpResponse response=HttpUtils.doGet(url, headers);
    331         String responseText=HttpUtils.getStringFromResponse(response);
    332         return responseText;
    333     }
    334     
    335     /*
    336      *  @params 
    337      *   memberID:是用户ID
    338      *   max_id:每次AJAX获得数据时上面一次的最后一个ID值
    339      *   end_id:用户最新的一条微博的ID值
    340      *   k:一个随机数
    341      *   page:页号
    342      *   pre_page:前一页
    343      *   count:每次返回的数值  当max_id为null是 count=50 否则为15
    344      *      pagebar:ajax时,第一次为0,第二次为1
    345      *   注意:
    346      *   1  用此请求,每次获得的数据格式都一样,用同样的解析方法来进行解析。
    347      *   2 每次一页可以获得总共45条记录,需要三次请求。每次请求可获得15条记录。
    348      *   3 max_id可以不用到,直接等于 end_id就可以了.
    349      *   4 第一次请求时可以将end_id设置为NUll,即为第一次时翻页时的请求后边的滚动时必须有end_id参数,end_id为第一页的第一条ID即可。
    350      */
    351     //获得用户发布的微博信息   json格式的数据    
    352     public String getMemberReleaseTopic(String memberID,String end_id,Integer page,Integer pagebar){
    353         String url="";
    354         Integer pre_page=1;
    355         Integer count=0;
    356         String k=System.currentTimeMillis()+""+(int)(Math.random()*100000)%100;
    357         if(end_id==null){
    358             count=50;
    359             if(page==1){
    360                 pre_page=2;
    361             }else{
    362                 pre_page=page-1;
    363             }
    364             url="http://weibo.com/aj/mblog/mbloglist?" +
    365             "page="+page+"&count="+count+"&pre_page="+pre_page+"&" +
    366             "_k="+ k+"&uid="+memberID+
    367             "&_t=0&__rnd="+System.currentTimeMillis();
    368         }else{
    369             count=15;
    370             pre_page=page;
    371             url="http://weibo.com/aj/mblog/mbloglist?" +
    372             "page="+page+"&count="+count+"&max_id="+end_id+"&" +
    373             "pre_page="+pre_page+"&end_id="+end_id+"&" +
    374             "pagebar="+pagebar+"&_k="+k+"&" +
    375             "uid="+memberID+"&_t=0&__rnd="+System.currentTimeMillis();
    376         }
    377         String cookieValue = "SINAGLOBAL=8556698272004.724.1417744632425; un=sm2014121903@126.com; myuid=5439352084; YF-Ugrow-G0=4703aa1c27ac0c4bab8fc0fc5968141e; SSOLoginState=1421374583; wvr=6; YF-V5-G0=8c4aa275e8793f05bfb8641c780e617b; _s_tentry=login.sina.com.cn; Apache=2461283528245.9854.1421374588453; ULV=1421374588550:13:5:3:2461283528245.9854.1421374588453:1421210767499; UOR=www.ilehao.com,widget.weibo.com,login.sina.com.cn; SUS=SID-2035860051-1421462085-GZ-7jcgb-1539d643bae5195fb7f792b2ae77befb; SUE=es%3Df15e11ed09b6a0108a28adfa58609b78%26ev%3Dv1%26es2%3Da0f706efac5c89495062648a4de3e337%26rs0%3DZBxlOUv0mhmxyHfOVmZ3tH7tNvAp08BjPeLUJPdu9WzG38Dsm40px%252Bd9w21ycDpZQwBK3q0prFfNs%252F8ZuZSasa1eps%252FOGNxJ3CIHN8JN%252Fik6gVpIPgVeeRdalNWTIbth6hLa34uOp%252BXii%252Bxeib%252BvINsr%252FdOvQx6kjp6fsC44QXc%253D%26rv%3D0; SUP=cv%3D1%26bt%3D1421462085%26et%3D1421548485%26d%3Dc909%26i%3Dbefb%26us%3D1%26vf%3D0%26vt%3D0%26ac%3D2%26st%3D0%26uid%3D2035860051%26name%3Dshy_annan%2540126.com%26nick%3D%25E7%2594%25A8%25E6%2588%25B72035860051%26fmp%3D%26lcp%3D2013-08-18%252021%253A48%253A10; SUB=_2A255vboVDeTxGeRO6FcZ9i7Mzj2IHXVazdpdrDV8PUNbvtBuLVj-kW91jmbQSGo7Rn30RVvGP5KOgBgNgQ..; SUBP=0033WrSXqPxfM725Ws9jqgMF55529P9D9Wh7oKNCGYcNnhlC6eqqQbbl5JpX5KMt; ALF=1452998078; ULOGIN_IMG=14214638933178; YF-Page-G0=0acee381afd48776ab7a56bd67c2e7ac";
    378         headers.put("Cookie", cookieValue);
    379         this.headers.put("Referer", "http://weibo.com/u/"+memberID);
    380         this.headers.put("Host", "www.weibo.com");
    381         this.headers.put("Content-Type", "application/x-www-form-urlencoded");
    382         this.headers.put("x-requested-with", "XMLHttpRequest");
    383         url = "http://weibo.com/u/"+memberID;
    384         HttpResponse response=HttpUtils.doGet(url, headers);
    385         if(response==null){
    386             return "";
    387         }
    388         return HttpUtils.getStringFromResponse(response);
    389     }
    390     /*
    391      * ~~~~~~~~~~~~~~~~~~~~~获取用户的一些信息~~~end~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
    392      */
    393     
    394     
    395     //**********************************************************************************
    396 
    397     /*
    398      *  名人堂与达人信息
    399      */
    400     public String getVerified(String url){
    401         this.headers.put("Host", "verified.weibo.com");
    402         this.headers.put("Referer", "http://plaza.weibo.com/?topnav=1&wvr=4");
    403         HttpResponse response=HttpUtils.doGet(url, headers);
    404         String responseText=HttpUtils.getStringFromResponse(response);
    405         return responseText;
    406     }
    407 
    408     public String getVerifiedMember(String path,Integer g_index){
    409         String url="http://verified.weibo.com/aj/getgrouplist?g_index="+g_index+
    410         "&path="+path+"&_t=0&__rnd="+System.currentTimeMillis();
    411         this.headers.put("Host", "verified.weibo.com");
    412         this.headers.put("Referer", path);
    413         this.headers.put("Content-Type", "application/x-www-form-urlencoded");
    414         this.headers.put("x-requested-with", "XMLHttpRequest");
    415         HttpResponse response=HttpUtils.doGet(url, headers);
    416         String responseText=HttpUtils.getStringFromResponse(response);
    417     
    418         return responseText;
    419     }
    420     
    421     public String setArea(Integer provinceID){
    422         this.headers.put("Referer", "http://club.weibo.com/list");
    423         this.headers.put("Host", "club.weibo.com");
    424         this.headers.put("Content-Type", "application/x-www-form-urlencoded");
    425         this.headers.put("x-requested-with", "XMLHttpRequest");
    426 
    427         Map<String,String> params=new HashMap<String,String>();
    428         
    429         params.put("_t", "0");
    430         params.put("city", "1000");
    431         params.put("prov", provinceID+"");
    432         
    433         String url="http://club.weibo.com/ajax_setArea.php";
    434         HttpResponse response=HttpUtils.doPost(url, headers, params);
    435         
    436         List<Cookie> cks=HttpUtils.getResponseCookies(response);
    437         List<Cookie> cookies=ls.getCookies();
    438         cookies.addAll(cks);
    439         String cookieValue=HttpUtils.setCookie2String(cookies);
    440         this.headers.put("Cookie", cookieValue);
    441         
    442         return HttpUtils.getStringFromResponse(response);
    443     }
    444     
    445     public String getDaRen(Integer page){
    446         String op="ltime"; 
    447         String url="http://club.weibo.com/list?sex=3&op="+op+"&page="+page+"&";
    448         Integer pre_page=(page<=1? 2:page-1);
    449         this.headers.put("Host", "club.weibo.com");
    450         this.headers.put("Referer", "http://club.weibo.com/list?sex=3&op=ltime&page="+pre_page+"&");
    451         this.headers.remove("Content-Type");
    452         this.headers.remove("x-requested-with");
    453         
    454         HttpResponse response=HttpUtils.doGet(url, headers);
    455         if(response!= null){
    456             return HttpUtils.getStringFromResponse(response);
    457         }
    458         return "";
    459         
    460     }
    470     //发布一条文字微博
    471     public String releaseTopic(String content){
    472         this.headers.put("Referer", "http://weibo.com/");
    473         this.headers.put("Host", "weibo.com");
    474         this.headers.put("Content-Type", "application/x-www-form-urlencoded");
    475         this.headers.put("x-requested-with", "XMLHttpRequest");
    476         Map<String,String> params=new HashMap<String,String>();
    477         params.put("_t", "0");
    478         params.put("location", "home");
    479         params.put("module", "stissue");
    480         params.put("pic_id", "");
    481         params.put("text", content);
    482         String url="http://weibo.com/aj/mblog/add?__rnd="+System.currentTimeMillis();
    483         HttpResponse response=HttpUtils.doPost(url, headers, params);
    484         return HttpUtils.getStringFromResponse(response);
    485     }519     //得到自己关注的成员
    520     public String getSelfFollowIngs(){
    521         return "";
    522     }
    523     //得到自己的粉丝
    524     public String getSelfFollowers(){
    525         return "";
    526     }
    527     //得到自己加入的微群
    528     public String getSelfJoinedGroups(){
    529         return "";
    530     }
    531     //得到自己的标签
    532     public String getSelfTags(){
    533         return "";
    534     }
    535     //得到自己发布的微博
    536     public String getSelfReleaseTopics(){
    537         return "";
    538     }
    539     //得到自己主页的微博
    540     public String getSelfPageTopics(){
    541         return "";
    542     }
    543     //关注一个人
    544     public String addFollowing(String memberid){
    545         return addorcancleFollowing(memberid,this.ADDFOLLOWING);
    546     }
    547     //取消关注一个人
    548     public String cancelFollowing(String memberid){
    549         return addorcancleFollowing(memberid,this.CANCELFOLLOWING);
    550     }
    551     private String addorcancleFollowing(String memberid,int option){
    552         String url="";
    553         switch(option){
    554             case ADDFOLLOWING:
    555                 url="http://weibo.com/aj/f/followed?__rnd="+System.currentTimeMillis();
    556                 break;
    557             case CANCELFOLLOWING:
    558                 url="http://weibo.com/aj/f/unfollow?__rnd="+System.currentTimeMillis();
    559                 break;
    560         }
    561         
    562         Map<String,String> params=new HashMap<String,String>();
    563 
    564         this.headers.put("Referer", "http://weibo.com/");
    565         this.headers.put("Host", "weibo.com");
    566         this.headers.put("Content-Type", "application/x-www-form-urlencoded");
    567         this.headers.put("Referer", "http://weibo.com/");
    568         this.headers.put("x-requested-with", "XMLHttpRequest");
    569         
    570         params.put("_t", "0");
    571         params.put("f", "1");
    572         params.put("location", "profile");
    573         params.put("refer_flag", "");
    574         params.put("refer_sort", "profile");
    575         params.put("uid", memberid);
    576         
    577         HttpResponse response=HttpUtils.doPost(url, headers, params);
    578         return HttpUtils.getStringFromResponse(response);
    579     }
    584     /**
    585      * 得到的标签信息  调用一次10个 
    586      * @return
    587      */
    588     public String getTags(){
    589         String url="http://account.weibo.com/set/aj/tagsuggest?__rnd="+System.currentTimeMillis();    
    590         this.headers.put("Referer", "http://account.weibo.com/set/tag#");
    591         this.headers.put("Host", "account.weibo.com");
    592         HttpResponse response=HttpUtils.doGet(url, headers);
    593         return HttpUtils.getStringFromResponse(response);
    594     }
    595     
    596     /**
    597      * 得到微博热词信息
    598      * @param k :热词的门类
    599      */
    600     public String getHotWords(String k){
    601         String url="http://data.weibo.com/top/keyword?k="+k;    
    602         try{
    603             Integer.parseInt(k);
    604         }catch(Exception ex){
    605             url="http://data.weibo.com/top/keyword?t="+k;    
    606         }
    607         this.headers.put("Referer", "http://data.weibo.com/top/keyword");
    608         this.headers.put("Host", "data.weibo.com");
    609         HttpResponse response=HttpUtils.doGet(url, headers);
    610         return HttpUtils.getStringFromResponse(response);
    611     }
    612     
    613     /**
    614      * 得到微博热帖子
    615      * @param cat  表示热帖门类
    616      * @param page 表示页号
    617      */
    618     public String getHotWeibo(String cat,int page){
    619         String url="http://data.weibo.com/hot/ajax/catfeed?page="+page+"&cat="+cat+"&_t=0&__rnd="+System.currentTimeMillis();    
    620         this.headers.put("Referer", "http://data.weibo.com/hot/minibloghot");
    621         this.headers.put("Host", "data.weibo.com");
    622         HttpResponse response=HttpUtils.doGet(url, headers);
    623         return HttpUtils.getStringFromResponse(response);
    624     }
    625     
    626     /**
    627      * 按照分类获取 微博吧名字  第一步
    628      */
    629     public String getWeiBar(String ctgid,int p){
    630         String sort="post";
    631         String url="http://weiba.weibo.com/aj_f/CategoryList?sort="+sort+"&p="+p+"&ctgid="+ctgid+"&_t=0&__rnd="+System.currentTimeMillis();    
    632         this.headers.put("Referer", "http://weiba.weibo.com/ct/"+ctgid);
    633         this.headers.put("Host", "weiba.weibo.com");
    634         this.headers.put("Accept", "*/*");
    635         this.headers.put("Content-Type", "application/x-www-form-urlencoded");
    636         this.headers.put("X-Requested-With", "XMLHttpRequest");
    637         HttpResponse response=HttpUtils.doGet(url, headers);
    638         return HttpUtils.getStringFromResponse(response);
    639     }
    640     /**
    641      * 根据微博吧 名称 ,得到该吧内的所有帖子标题 第二步
    642      */
    643     public String getWeiBarByWeibarName(String bid,int p){
    644         String url="http://weiba.weibo.com/aj_t/postlist?bid="+bid+"&p="+p+"&_t=all&__rnd="+System.currentTimeMillis();    
    645         this.headers.put("Referer", "http://weiba.weibo.com/");
    646         this.headers.put("Host", "weiba.weibo.com");
    647         this.headers.put("Accept", "*/*");
    648         this.headers.put("Content-Type", "application/x-www-form-urlencoded");
    649         this.headers.put("X-Requested-With", "XMLHttpRequest");
    650         HttpResponse response=HttpUtils.doGet(url, headers);
    651         return HttpUtils.getStringFromResponse(response);
    652     }
    653     
    654     /**
    655      * 新浪微公益名单
    656      * type ="donate"
    657      * type="discuss"
    658      */
    659     public String getWeiGongYiMember(int page,int projectID,String type){
    660         String url="http://gongyi.weibo.com/aj_personal_helpdata?page="+page+"&type="+type+"&project_id="+projectID+"&_t=0&__rnd="+System.currentTimeMillis();    
    661         this.headers.put("Referer", "http://gongyi.weibo.com/"+projectID);
    662         this.headers.put("Host", "gongyi.weibo.com");
    663         this.headers.put("Accept", "*/*");
    664         this.headers.put("Content-Type", "application/x-www-form-urlencoded");
    665         this.headers.put("X-Requested-With", "XMLHttpRequest");
    666         HttpResponse response=HttpUtils.doGet(url, headers);
    667         return HttpUtils.getStringFromResponse(response);
    668     }
    669 }
  • 相关阅读:
    查看网站上保存的密码
    前端图片预览
    Amaze UI的一点总结
    简单实现图片验证码
    获取网页数据的例子
    谈谈网页中的ajax
    网页小技巧-360doc个人图书馆复制文字
    Js中的4个事件
    网页页面蒙版实现
    Spring+SprinMVC配置学习总结
  • 原文地址:https://www.cnblogs.com/RunForLove/p/4511920.html
Copyright © 2011-2022 走看看