zoukankan      html  css  js  c++  java
  • Java 爬取京东商品评论信息

    public static void downJDProductComment() throws IOException {
            String url = "https://club.jd.com/comment/productPageComments.action?productId=100011199522&score=0&sortType=5&page=3&pageSize=10";
            CloseableHttpClient httpclient = HttpClients.createDefault();
            HttpGet httpGet = new HttpGet(url);
    
            httpGet.setHeader("user-agent", "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/70.0.3538.102 Safari/537.36");
            CloseableHttpResponse response = httpclient.execute(httpGet);
            // 获取响应状态码
            int statusCode = response.getStatusLine().getStatusCode();
            try {
                HttpEntity entity = response.getEntity();
                // 如果状态响应码为200,则获取html实体内容或者json文件
                if (statusCode == 200) {
                    String html = EntityUtils.toString(entity, Consts.UTF_8);
                    JSONObject jsonObject = new JSONObject(html);
                    String comments = jsonObject.getString("comments");
                    JSONArray jsonArray = new JSONArray(comments);
                    for (int i = 0; i < jsonArray.length(); i++) {
                        System.out.println(i + jsonArray.getJSONObject(i).getString("content"));
                    }
                    // 消耗掉实体
                    EntityUtils.consume(response.getEntity());
                } else {
                    // 消耗掉实体
                    EntityUtils.consume(response.getEntity());
                }
            } catch (JSONException e) {
                e.printStackTrace();
            } finally {
                response.close();
            }
        }

     

  • 相关阅读:
    mybatis批量更新策略
    tk.mybatis扩展通用接口
    IDEA入门——jdbc连接和工具类的使用
    tensorflow——3
    再战tensorflow
    tensorflow初学
    Anaconda和TensorFlow安装遇到的坑记录
    《企业应用架构模式》——阅读笔记3
    机器学习十讲——第十讲
    机器学习十讲——第九讲
  • 原文地址:https://www.cnblogs.com/sakura--/p/13446638.html
Copyright © 2011-2022 走看看