zoukankan      html  css  js  c++  java
  • Java爬虫之 HttpClient 的使用

    Get

    • 不带参数的Get请求
      // 创建httpClient对象,模拟客户端
      CloseableHttpClient httpClient = HttpClients.createDefault();
      // 创建httpGet对象,设置地址
      HttpGet httpGet = new HttpGet("http://www.baidu.com");
      CloseableHttpResponse response = null;
      try {
          // 使用httpClient发起请求,获取响应
          response = httpClient.execute(httpGet);
      
          // 解析
          if (response.getStatusLine().getStatusCode() == 200) {
              String content = EntityUtils.toString(response.getEntity(), "utf8");
              System.out.println(content.length());
          }
      } catch (IOException e) {
          e.printStackTrace();
      } finally {
          try {
              response.close();
          } catch (IOException e) {
              e.printStackTrace();
          }
          try {
              httpClient.close();
          } catch (IOException e) {
              e.printStackTrace();
          }
      }
      
    • 带参数的Get请求
      // 创建httpClient对象,模拟客户端
      CloseableHttpClient httpClient = HttpClients.createDefault();
      
      // 创建URIBuilder
      URIBuilder uriBuilder = new URIBuilder("http://www.baidu.com");
      // 设置参数
      uriBuilder.setParameter("keys", "Java");
      
      // 创建httpGet对象,设置地址
      HttpGet httpGet = new HttpGet(uriBuilder.build());
      

    Post

    • 不带参数的Post请求
      只需把 HttpGet 改为 HttpPost 即可
      // 创建httpClient对象,模拟客户端
      CloseableHttpClient httpClient = HttpClients.createDefault();
      // 创建httpPost对象,设置地址
      HttpPost httpPost = new HttpPost("http://www.baidu.com");
      
    • 带参数的Post请求
      // 创建httpClient对象,模拟客户端
      CloseableHttpClient httpClient = HttpClients.createDefault();
      // 创建httpGet对象,设置地址
      HttpPost httpPost = new HttpPost("http://www.baidu.com");
      // 声明List集合,封装表单中的请求参数
      List<NameValuePair> params = new ArrayList<NameValuePair>();
      // 地址
      params.add(new BasicNameValuePair("keys", "Java"));
      // 创建表单的Entity对象
      UrlEncodedFormEntity formEntity = new UrlEncodedFormEntity(params, "utf8");
      // 设置表单的Entity对象到post中
      httpPost.setEntity(formEntity);
      

    连接池

    • 避免多次创建 HttpClient
      // 从连接池获取HttpClient对象
      CloseableHttpClient httpClient = HttpClients.custom().setConnectionManager(cm).build();
      HttpGet httpGet = new HttpGet("http://www.baidu.com");
      CloseableHttpResponse response = null;
      try {
          response = httpClient.execute(httpGet);
          if (response.getStatusLine().getStatusCode() == 200) {
              String content = EntityUtils.toString(response.getEntity(), "utf8");
              System.out.println(content.length());
          }
      } catch (IOException e) {
      
      } finally {
          if (response != null) {
              try {
                  response.close();
              } catch (IOException e) {
                  e.printStackTrace();
              }
          }
          // 不可关闭httpClient,由连接池统一管理
      }
      

    请求参数

    • 对爬取设置参数
      // 创建httpClient对象,模拟客户端
      CloseableHttpClient httpClient = HttpClients.createDefault();
      // 创建httpGet对象,设置地址
      HttpGet httpGet = new HttpGet("http://www.baidu.com");
      
      // 配置请求信息
      RequestConfig config = RequestConfig.custom()
              // 创建连接的最长时间
              .setConnectTimeout(1000)
              // 获取连接的最长时间
              .setConnectionRequestTimeout(500)
              // 数据传输的最长时间
              .setSocketTimeout(10 * 1000).build();
      
      // 给请求设置请求信息
      httpGet.setConfig(config);
      

    没有修不好的电脑
  • 相关阅读:
    动态SQL的注意
    关于数据库抛出异常:Incorrect string value: 'xE1x...' for column '字段名' at row 1 问题的解决方法
    让.bashrc文件在终端自动生效
    期中考试题
    RAP、Mock.js、Vue.js、Webpack
    全局变量变为局部变量 & MVC思想
    用 JS + LeanCloud 给网页添加数据库(留言功能)
    闭包的使用
    从发请求到AJAX到同源政策
    从实现HTML页面局部刷新到JSONP
  • 原文地址:https://www.cnblogs.com/duniqb/p/12702479.html
Copyright © 2011-2022 走看看