zoukankan      html  css  js  c++  java
  • crawler_基础之_httpclient 访问网络资源

    先粘贴一个 简单版的,后期再修改

    pom文件

      <dependency>
          <groupId>org.apache.httpcomponents</groupId>
          <artifactId>httpasyncclient</artifactId>
          <version>4.0-alpha3</version>
          <scope>compile</scope>
        </dependency>
     1 package com.cph.utils;
     2 
     3 import java.io.IOException;
     4 
     5 import org.apache.http.HttpEntity;
     6 import org.apache.http.HttpResponse;
     7 import org.apache.http.HttpStatus;
     8 import org.apache.http.client.ClientProtocolException;
     9 import org.apache.http.client.methods.HttpGet;
    10 import org.apache.http.client.methods.HttpUriRequest;
    11 import org.apache.http.impl.client.DefaultHttpClient;
    12 import org.apache.http.util.EntityUtils;
    13 
    14 /**
    15  * httpclient 帮助类<br>
    16  * 4.1.2测试版
    17  * 
    18  * @author cphmvp
    19  * 
    20  */
    21 public class HttpClientUtil {
    22     private static String encoding = "UTF-8";
    23 
    24     /**
    25      * get方式下载
    26      * 
    27      * @param url
    28      */
    29     public static String downloadGet(String url) {
    30         String htmls = null;
    31         DefaultHttpClient client = new DefaultHttpClient();
    32         // 新建get请求
    33         HttpUriRequest request = new HttpGet(url);
    34         // 封装请求头
    35         pageRequest(request);
    36         // 声明响应
    37         HttpResponse response = null;
    38         // 响应实体
    39         HttpEntity entity = null;
    40         try {
    41             response = client.execute(request);
    42             System.out.println("响应码: "
    43                     + response.getStatusLine().getStatusCode());
    44             if (HttpStatus.SC_OK == response.getStatusLine().getStatusCode()) {
    45                 entity = response.getEntity();
    46                 byte[] content = EntityUtils.toByteArray(entity);
    47                 htmls = new String(content, encoding);
    48             }
    49         } catch (ClientProtocolException e) {
    50             e.printStackTrace();
    51         } catch (IOException e) {
    52             e.printStackTrace();
    53         } finally {
    54             // 关闭客户端
    55             client.getConnectionManager().shutdown();
    56         }
    57         return htmls;
    58     }
    59 
    60     /**
    61      * 封装请求头
    62      * 
    63      * @param request
    64      */
    65     private static void pageRequest(HttpUriRequest request) {
    66         // 设置浏览器版本
    67         request.setHeader(
    68                 "User-Agent",
    69                 "Mozilla/4.0 (compatible; MSIE 8.0; Windows NT 6.1; Trident/4.0; MyIE 2.0 Beta 2; SLCC2; .NET CLR 2.0.50727; .NET CLR 3.5.30729; .NET CLR 3.0.30729; Media Center PC 6.0; .NET4.0C; .NET4.0E; InfoPath.3)");
    70         // 设置cookie refer等
    71         request.setHeader(
    72                 "Cookie",
    73                 "RK=hki7lw6qHP; wbilang_821910184=zh_CN; wbilang_10000=zh_CN; dm_login_weixin_rem=; dm_login_weixin_scan=; wb_regf=%3B0%3B%3Bwww.baidu.com%3B0; mb_reg_from=8; ts_last=t.qq.com/; ts_refer=search.t.qq.com/index.php; ts_uid=7492426386; wbilang_384871492=zh_CN; ts_last=1.t.qq.com/wolongxian; ts_refer=www.baidu.com/s; ts_uid=7492426386; pgv_pvid=1942759996; pgv_info=ssid=s5111200112; o_cookie=384871492; ptui_loginuin=821910184; ptisp=cnc; ptcz=9c03596fa66d550bcd5c8cd812f16ad5d6c2074604285851a218c478774eb6bb; luin=o0821910184; lskey=00010000b43bed256a14b910da63ac03a1c1a042994fea6a8a7078dcb2ea566d5dc09188883ddddd1f7feadb; pt2gguin=o0821910184; uin=o0821910184; skey=@xObtCqUUW; p_uin=o0821910184; p_skey=swqZymgXczQrTdTin9Qe44jMT5cTNoTeSzaXrxDjs3k_; pt4_token=OlMTg1UJSdPz-VzgfdEgFQ__; p_luin=o0821910184; p_lskey=000400001663db9b9783c84586b6d929044d17e291916d1cfcfb93c0f520f05e8c85adc89dffc94e52b1325e");
    74     }
    75 
    76     public static void main(String[] args) {
    77         String url = "http://www.baidu.com/";
    78         System.out.println(downloadGet(url));
    79     }
    80 }
  • 相关阅读:
    计算fibonacci数(多种方法)
    数组求和(两种方法)
    C语言二级指针(指向指针的指针)
    唯品会海量实时OLAP分析技术升级之路
    hive 调优(一)coding调优
    supsplk 服务器被植入木马 挖矿 cpu使用 700%
    OPTS参数设置
    Yarn 内存分配管理机制及相关参数配置
    hive on tez 任务失败
    hive 调优(三)tez优化
  • 原文地址:https://www.cnblogs.com/cphmvp/p/3495461.html
Copyright © 2011-2022 走看看