zoukankan      html  css  js  c++  java
  • Java爬虫之 HttpClient 的使用

    Get

    • 不带参数的Get请求
      // 创建httpClient对象,模拟客户端
      CloseableHttpClient httpClient = HttpClients.createDefault();
      // 创建httpGet对象,设置地址
      HttpGet httpGet = new HttpGet("http://www.baidu.com");
      CloseableHttpResponse response = null;
      try {
          // 使用httpClient发起请求,获取响应
          response = httpClient.execute(httpGet);
      
          // 解析
          if (response.getStatusLine().getStatusCode() == 200) {
              String content = EntityUtils.toString(response.getEntity(), "utf8");
              System.out.println(content.length());
          }
      } catch (IOException e) {
          e.printStackTrace();
      } finally {
          try {
              response.close();
          } catch (IOException e) {
              e.printStackTrace();
          }
          try {
              httpClient.close();
          } catch (IOException e) {
              e.printStackTrace();
          }
      }
      
    • 带参数的Get请求
      // 创建httpClient对象,模拟客户端
      CloseableHttpClient httpClient = HttpClients.createDefault();
      
      // 创建URIBuilder
      URIBuilder uriBuilder = new URIBuilder("http://www.baidu.com");
      // 设置参数
      uriBuilder.setParameter("keys", "Java");
      
      // 创建httpGet对象,设置地址
      HttpGet httpGet = new HttpGet(uriBuilder.build());
      

    Post

    • 不带参数的Post请求
      只需把 HttpGet 改为 HttpPost 即可
      // 创建httpClient对象,模拟客户端
      CloseableHttpClient httpClient = HttpClients.createDefault();
      // 创建httpPost对象,设置地址
      HttpPost httpPost = new HttpPost("http://www.baidu.com");
      
    • 带参数的Post请求
      // 创建httpClient对象,模拟客户端
      CloseableHttpClient httpClient = HttpClients.createDefault();
      // 创建httpGet对象,设置地址
      HttpPost httpPost = new HttpPost("http://www.baidu.com");
      // 声明List集合,封装表单中的请求参数
      List<NameValuePair> params = new ArrayList<NameValuePair>();
      // 地址
      params.add(new BasicNameValuePair("keys", "Java"));
      // 创建表单的Entity对象
      UrlEncodedFormEntity formEntity = new UrlEncodedFormEntity(params, "utf8");
      // 设置表单的Entity对象到post中
      httpPost.setEntity(formEntity);
      

    连接池

    • 避免多次创建 HttpClient
      // 从连接池获取HttpClient对象
      CloseableHttpClient httpClient = HttpClients.custom().setConnectionManager(cm).build();
      HttpGet httpGet = new HttpGet("http://www.baidu.com");
      CloseableHttpResponse response = null;
      try {
          response = httpClient.execute(httpGet);
          if (response.getStatusLine().getStatusCode() == 200) {
              String content = EntityUtils.toString(response.getEntity(), "utf8");
              System.out.println(content.length());
          }
      } catch (IOException e) {
      
      } finally {
          if (response != null) {
              try {
                  response.close();
              } catch (IOException e) {
                  e.printStackTrace();
              }
          }
          // 不可关闭httpClient,由连接池统一管理
      }
      

    请求参数

    • 对爬取设置参数
      // 创建httpClient对象,模拟客户端
      CloseableHttpClient httpClient = HttpClients.createDefault();
      // 创建httpGet对象,设置地址
      HttpGet httpGet = new HttpGet("http://www.baidu.com");
      
      // 配置请求信息
      RequestConfig config = RequestConfig.custom()
              // 创建连接的最长时间
              .setConnectTimeout(1000)
              // 获取连接的最长时间
              .setConnectionRequestTimeout(500)
              // 数据传输的最长时间
              .setSocketTimeout(10 * 1000).build();
      
      // 给请求设置请求信息
      httpGet.setConfig(config);
      

    没有修不好的电脑
  • 相关阅读:
    作业三
    源代码版本管理与项目管理软件的认识与github的注册
    每周更新的学习进度表
    电脑四则运算出题
    软件工程问题
    自我介绍
    2016.2.14-2016.2.21 中大信(北京)工程造价咨询有限公司实习有感
    《软件工程》课程总结
    结对编程项目---四则运算
    作业三:代码规范、代码复审、PSP
  • 原文地址:https://www.cnblogs.com/duniqb/p/12702479.html
Copyright © 2011-2022 走看看