zoukankan      html  css  js  c++  java
  • HttpClient4.5简单使用

    转自:https://www.cnblogs.com/codingexperience/p/5319850.html

    一、HttpClient简介

        HttpClient是一个客户端的HTTP通信实现库,它不是一个浏览器。关于HTTP协议,可以搜索相关的资料。它设计的目的是发送与接收HTTP报文。它不会执行嵌入在页面中JavaScript代码,所以当需要抓取通过AJAX技术获取实际内容的页面时需要使用WebClient等其他开源库。HttpClient最新版已经到第5版,但已经稳定的应该是4.5.2版本,官方网址:http://hc.apache.org/

    二、HttpClient简单使用

        HttpClient的主要用途是接收HTTP响应的内容,下面介绍HttpClient的简单使用,抓取博客园的首页。至于HttpClient4.5的常用API可以参考这篇文章:http://liangbizhi.github.io/httpclient-4-3-x-chapter-1/

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    22
    23
    24
    25
    26
    27
    28
    29
    30
    31
    32
    33
    34
    35
    36
    37
    38
    39
    40
    41
    42
    43
    44
    45
    46
    47
    48
    49
    50
    51
    52
    53
    54
    55
    56
    57
    package com.httpclient.demo;
     
    import java.io.IOException;
    import java.nio.charset.Charset;
    import java.util.regex.Matcher;
    import java.util.regex.Pattern;
     
    import org.apache.http.HttpEntity;
    import org.apache.http.HttpStatus;
    import org.apache.http.client.ClientProtocolException;
    import org.apache.http.client.methods.CloseableHttpResponse;
    import org.apache.http.client.methods.HttpGet;
    import org.apache.http.entity.ContentType;
    import org.apache.http.impl.client.CloseableHttpClient;
    import org.apache.http.impl.client.HttpClients;
    import org.apache.http.util.EntityUtils;
     
    public class SimpleHttpClient {
      // 使用HttpClient获取博客园首页
      public static void main(String[] args) throws ClientProtocolException, IOException {
        String targetUrl = "http://www.cnblogs.com/";
         
        // 1.建立HttpClient对象
        CloseableHttpClient client = HttpClients.createDefault();
     
        // 2.建立Get请求
        HttpGet get = new HttpGet(targetUrl);
     
        // 3.发送Get请求
        CloseableHttpResponse res = client.execute(get);
     
        // 4.处理请求结果
        if (res.getStatusLine().getStatusCode() == HttpStatus.SC_OK) {
          HttpEntity entity = res.getEntity();
          ContentType contentType = ContentType.getOrDefault(entity);
          Charset charset = contentType.getCharset();
          String mimeType = contentType.getMimeType();
          // 获取字节数组
          byte[] content = EntityUtils.toByteArray(entity);
          if (charset == null) {
            // 默认编码转成字符串
            String temp = new String(content);
            String regEx = "(?=<meta).*?(?<=charset=[\'|\"]?)([[a-z]|[A-Z]|[0-9]|-]*)";
            Pattern p = Pattern.compile(regEx, Pattern.CASE_INSENSITIVE);
            Matcher m = p.matcher(temp);
            if (m.find() && m.groupCount() == 1) {
              charset = Charset.forName(m.group(1));
            } else {
              charset = Charset.forName("ISO-8859-1");
            }
          }
          System.out.println(new String(content, charset));
        }
     
      }
     
    }

      

    三、HttpClient模拟登陆

         HTTP协议本来是无状态的,但为了保持会话的状态,使用Cookie保存Session信息,当向服务器发送请求时会附加一些会话信息,从而能区分不同会话的状态。用户登陆过程,其实简单而言,就是首先验证用户名与密码,然后服务器生成会话信息保存到本地,最后用户凭借会话信息能够访问类似用户信息等需登陆的网页。

        HttpClient4.5通过CookieStore保存用户的会话信息,还提供HttpClientContext保存用户连接的信息。下面是一个使用HttpClient模拟知乎登陆的简单案例。

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    22
    23
    24
    25
    26
    27
    28
    29
    30
    31
    32
    33
    34
    35
    36
    37
    38
    39
    40
    41
    42
    43
    44
    45
    46
    47
    48
    49
    50
    51
    52
    53
    54
    55
    56
    57
    58
    59
    60
    61
    62
    63
    64
    65
    66
    67
    68
    69
    70
    71
    72
    73
    74
    75
    76
    77
    78
    79
    80
    81
    82
    83
    84
    85
    86
    87
    88
    89
    90
    91
    92
    93
    94
    95
    96
    97
    98
    99
    100
    package com.httpclient.demo;
     
    import java.io.IOException;
    import java.util.LinkedList;
    import java.util.List;
     
    import org.apache.http.Consts;
    import org.apache.http.NameValuePair;
    import org.apache.http.client.CookieStore;
    import org.apache.http.client.config.CookieSpecs;
    import org.apache.http.client.config.RequestConfig;
    import org.apache.http.client.entity.UrlEncodedFormEntity;
    import org.apache.http.client.methods.CloseableHttpResponse;
    import org.apache.http.client.methods.HttpGet;
    import org.apache.http.client.methods.HttpPost;
    import org.apache.http.client.protocol.HttpClientContext;
    import org.apache.http.cookie.Cookie;
    import org.apache.http.impl.client.BasicCookieStore;
    import org.apache.http.impl.client.CloseableHttpClient;
    import org.apache.http.impl.client.HttpClients;
    import org.apache.http.message.BasicNameValuePair;
    import org.apache.http.util.EntityUtils;
     
    /**
     * 模拟登陆知乎
     */
    public class ZhiHuTest {
     
      public static void main(String[] args) throws java.text.ParseException {
        String name = "username";
        String password = "password"
         
        // 全局请求设置
        RequestConfig globalConfig = RequestConfig.custom().setCookieSpec(CookieSpecs.STANDARD).build();
        // 创建cookie store的本地实例
        CookieStore cookieStore = new BasicCookieStore();
        // 创建HttpClient上下文
        HttpClientContext context = HttpClientContext.create();
        context.setCookieStore(cookieStore);
     
        // 创建一个HttpClient
        CloseableHttpClient httpClient = HttpClients.custom().setDefaultRequestConfig(globalConfig)
            .setDefaultCookieStore(cookieStore).build();
     
        CloseableHttpResponse res = null;
     
        // 创建本地的HTTP内容
        try {
          try {
            // 创建一个get请求用来获取必要的Cookie,如_xsrf信息
            HttpGet get = new HttpGet("http://www.zhihu.com/");
     
            res = httpClient.execute(get, context);
            // 获取常用Cookie,包括_xsrf信息
            System.out.println("访问知乎首页后的获取的常规Cookie:===============");
            for (Cookie c : cookieStore.getCookies()) {
              System.out.println(c.getName() + ": " + c.getValue());
            }
            res.close();
     
            // 构造post数据
            List<NameValuePair> valuePairs = new LinkedList<NameValuePair>();
            valuePairs.add(new BasicNameValuePair("email", name));
            valuePairs.add(new BasicNameValuePair("password", password));
            valuePairs.add(new BasicNameValuePair("remember_me", "true"));
            UrlEncodedFormEntity entity = new UrlEncodedFormEntity(valuePairs, Consts.UTF_8);
            entity.setContentType("application/x-www-form-urlencoded");
     
            // 创建一个post请求
            HttpPost post = new HttpPost("https://www.zhihu.com/login/email");
            // 注入post数据
            post.setEntity(entity);
            res = httpClient.execute(post, context);
     
            // 打印响应信息,查看是否登陆是否成功
            System.out.println("打印响应信息===========");
            HttpClientUtils.printResponse(res);
            res.close();
     
            System.out.println("登陆成功后,新的Cookie:===============");
            for (Cookie c : context.getCookieStore().getCookies()) {
              System.out.println(c.getName() + ": " + c.getValue());
            }
     
            // 构造一个新的get请求,用来测试登录是否成功
            HttpGet newGet = new HttpGet("http://www.zhihu.com/question/following");
            res = httpClient.execute(newGet, context);
            String content = EntityUtils.toString(res.getEntity());
            System.out.println("登陆成功后访问的页面===============");
            System.out.println(content);
            res.close();
     
          } finally {
            httpClient.close();
          }
        } catch (IOException e) {
          e.printStackTrace();
        }
      }
    }
  • 相关阅读:
    第二十九课 循环链表的实现
    第二十八课 再论智能指针(下)
    第二十七课 再论智能指针(上)
    第二十六课 典型问题分析(Bugfix)
    普通new和placement new的重载
    leetcode 581. Shortest Unsorted Continuous Subarray
    leetcode 605. Can Place Flowers
    leetcode 219. Contains Duplicate II
    leetcode 283. Move Zeroes
    leetcode 217. Contains Duplicate
  • 原文地址:https://www.cnblogs.com/sharpest/p/7832135.html
Copyright © 2011-2022 走看看