zoukankan      html  css  js  c++  java
  • java爬取当前疫情数据项目总结

    代码

    import java.io.BufferedReader;
    import java.io.IOException;
    import java.io.InputStream;
    import java.io.InputStreamReader;
    import java.net.HttpURLConnection;
    import java.net.MalformedURLException;
    import java.net.URL;
    import java.sql.Connection;
    import java.sql.PreparedStatement;
    import java.sql.ResultSet;
    import java.sql.SQLException;
    import java.sql.Statement;
    import java.text.SimpleDateFormat;
    import java.util.Date;
    import java.util.HashMap;
    import java.util.Map;
    import java.util.regex.Matcher;
    import java.util.regex.Pattern;
    
    import javax.net.ssl.HttpsURLConnection;
    
    import com.alibaba.fastjson.JSONArray;
    
    import net.sf.json.JSON;
    import net.sf.json.JSONObject;
    
    public class ggg {
        public static void main(String[] args) throws IOException, SQLException {
            getAreaStat();
        }
    
    
        private static String httpRequset(String requesturl) throws IOException {
            StringBuffer buffer = null;
            BufferedReader bufferedReader = null;
            InputStreamReader inputStreamReader = null;
            InputStream inputStream = null;
            HttpsURLConnection httpsURLConnection = null;
            try {
                URL url = new URL(requesturl);
                httpsURLConnection = (HttpsURLConnection) url.openConnection();
                httpsURLConnection.setDoInput(true);
                httpsURLConnection.setRequestMethod("GET");
                inputStream = httpsURLConnection.getInputStream();
                inputStreamReader = new InputStreamReader(inputStream, "utf-8");
                bufferedReader = new BufferedReader(inputStreamReader);
                buffer = new StringBuffer();
                String str = null;
                while ((str = bufferedReader.readLine()) != null) {
                    buffer.append(str);
                }
            } catch (MalformedURLException e) {
                // TODO Auto-generated catch block
                e.printStackTrace();
            }
    
            return buffer.toString();
        }
    
    
        public static String getAreaStat() throws SQLException {
            String url = "https://ncov.dxy.cn/ncovh5/view/pneumonia";
            String htmlResult = "";
            try {
                htmlResult = httpRequset(url);
            } catch (IOException e) {
                // TODO Auto-generated catch block
                e.printStackTrace();
            }
        
    
            
            String reg = "window.getAreaStat = (.*?)\}(?=catch)";
            Pattern totalPattern = Pattern.compile(reg);
            Matcher totalMatcher = totalPattern.matcher(htmlResult);
            System.out.println(htmlResult);
            String result = "";
            if (totalMatcher.find()) {
                result = totalMatcher.group(1);
                System.out.println(result);
        
                JSONArray array = JSONArray.parseArray(result);
                
                
                Connection conn=DBUtil.DBUtil.getConnection();
                String sql=null;
                PreparedStatement pst=null;
                sql = "insert into sheng values(?,?,?,?) ";  
                
                 Date dNow = new Date( );
                  SimpleDateFormat ft = new SimpleDateFormat ("yyyy年MM月dd日");
                  String time=ft.format(dNow);
                
    
                    for (int i = 0; i <= 30; i++) {
    
                        com.alibaba.fastjson.JSONObject jsonObject =array.getJSONObject(i);
                        String provinceName = jsonObject.getString("provinceName");
                        
                            String current = jsonObject.getString("currentConfirmedCount");
                            String confirmed = jsonObject.getString("confirmedCount");
                            String cured = jsonObject.getString("curedCount");
                            String dead = jsonObject.getString("deadCount");
                            String suspect = jsonObject.getString("suspectedCount");
                            System.out.println(provinceName);
                            
                             pst = conn.prepareStatement(sql,Statement.RETURN_GENERATED_KEYS);  
                            
                                
                            
                                pst.setString(1, provinceName);  
                                pst.setString(2,confirmed ); 
                                pst.setString(3, cured); 
                                pst.setString(4, time);
                                pst.executeUpdate();
                            
                            
                            JSONArray array2 = jsonObject.getJSONArray("cities");
                            for (int j = 0; j < array2.size(); j++) {
                                com.alibaba.fastjson.JSONObject jsonObject2 =array2.getJSONObject(j);
                                String cityname = jsonObject2.getString("cityName");
                                String current2 = jsonObject2.getString("currentConfirmedCount");
                                String confirmed2 = jsonObject2.getString("confirmedCount");
                                String cured2 = jsonObject2.getString("curedCount");
                                String dead2 = jsonObject2.getString("deadCount");
                                String suspect2 = jsonObject2.getString("suspectedCount");
                                
                            }
                    }
            }
            return result;
        }
    }

    爬取的数据是丁香医生的,由于数据是JSONArray形式,所以直接用正则表达式提取来转换成JSONArray,然后拿来用就行了。

    日志:

    日期 编号 类型 引入阶段 排除阶段 修复时间 修复缺陷
     3.10    编码 编码  3.10  JSONArray无法使用
    描述:JSONArray是用不了,没有把jsoup-1.7.2.jar和fastjson-1.2.66.jar导入构建路径
    日期 编号 类型 引入阶段 排除阶段 修复时间 修复缺陷
     3.10   编码   编码  3.10 找不到数据 
    描述:正则表达式使用不熟练,"()"内为正则表达式截取内容

  • 相关阅读:
    C语言01
    C++面试总结更新
    Python网络爬虫与信息提取02
    Self-Driving Car 01
    Python网络爬虫与信息提取01
    Python-03
    Shell
    Python-05
    Python-04
    Python-02
  • 原文地址:https://www.cnblogs.com/liuleliu/p/12498951.html
Copyright © 2011-2022 走看看