zoukankan      html  css  js  c++  java
  • Java 爬取京东商品评论信息

    public static void downJDProductComment() throws IOException {
            String url = "https://club.jd.com/comment/productPageComments.action?productId=100011199522&score=0&sortType=5&page=3&pageSize=10";
            CloseableHttpClient httpclient = HttpClients.createDefault();
            HttpGet httpGet = new HttpGet(url);
    
            httpGet.setHeader("user-agent", "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/70.0.3538.102 Safari/537.36");
            CloseableHttpResponse response = httpclient.execute(httpGet);
            // 获取响应状态码
            int statusCode = response.getStatusLine().getStatusCode();
            try {
                HttpEntity entity = response.getEntity();
                // 如果状态响应码为200,则获取html实体内容或者json文件
                if (statusCode == 200) {
                    String html = EntityUtils.toString(entity, Consts.UTF_8);
                    JSONObject jsonObject = new JSONObject(html);
                    String comments = jsonObject.getString("comments");
                    JSONArray jsonArray = new JSONArray(comments);
                    for (int i = 0; i < jsonArray.length(); i++) {
                        System.out.println(i + jsonArray.getJSONObject(i).getString("content"));
                    }
                    // 消耗掉实体
                    EntityUtils.consume(response.getEntity());
                } else {
                    // 消耗掉实体
                    EntityUtils.consume(response.getEntity());
                }
            } catch (JSONException e) {
                e.printStackTrace();
            } finally {
                response.close();
            }
        }

     

  • 相关阅读:
    Codeforces 1093D(染色+组合数学)
    Codeforces 1093C (思维+贪心)
    Codeforces 1082D (贪心)
    Codeforces 433A (背包)
    BZOJ 3262(Treap+树状数组)
    BZOJ 1588 (treap)
    Codeforces 1061C (DP+滚动数组)
    Codeforces 1080C 题解(思维+二维前缀和)
    周记 2015.07.12
    周记 2015.07.04
  • 原文地址:https://www.cnblogs.com/sakura--/p/13446638.html
Copyright © 2011-2022 走看看