代码
import java.io.BufferedReader; import java.io.IOException; import java.io.InputStream; import java.io.InputStreamReader; import java.net.HttpURLConnection; import java.net.MalformedURLException; import java.net.URL; import java.sql.Connection; import java.sql.PreparedStatement; import java.sql.ResultSet; import java.sql.SQLException; import java.sql.Statement; import java.text.SimpleDateFormat; import java.util.Date; import java.util.HashMap; import java.util.Map; import java.util.regex.Matcher; import java.util.regex.Pattern; import javax.net.ssl.HttpsURLConnection; import com.alibaba.fastjson.JSONArray; import net.sf.json.JSON; import net.sf.json.JSONObject; public class ggg { public static void main(String[] args) throws IOException, SQLException { getAreaStat(); } private static String httpRequset(String requesturl) throws IOException { StringBuffer buffer = null; BufferedReader bufferedReader = null; InputStreamReader inputStreamReader = null; InputStream inputStream = null; HttpsURLConnection httpsURLConnection = null; try { URL url = new URL(requesturl); httpsURLConnection = (HttpsURLConnection) url.openConnection(); httpsURLConnection.setDoInput(true); httpsURLConnection.setRequestMethod("GET"); inputStream = httpsURLConnection.getInputStream(); inputStreamReader = new InputStreamReader(inputStream, "utf-8"); bufferedReader = new BufferedReader(inputStreamReader); buffer = new StringBuffer(); String str = null; while ((str = bufferedReader.readLine()) != null) { buffer.append(str); } } catch (MalformedURLException e) { // TODO Auto-generated catch block e.printStackTrace(); } return buffer.toString(); } public static String getAreaStat() throws SQLException { String url = "https://ncov.dxy.cn/ncovh5/view/pneumonia"; String htmlResult = ""; try { htmlResult = httpRequset(url); } catch (IOException e) { // TODO Auto-generated catch block e.printStackTrace(); } String reg = "window.getAreaStat = (.*?)\}(?=catch)"; Pattern totalPattern = Pattern.compile(reg); Matcher totalMatcher = totalPattern.matcher(htmlResult); System.out.println(htmlResult); String result = ""; if (totalMatcher.find()) { result = totalMatcher.group(1); System.out.println(result); JSONArray array = JSONArray.parseArray(result); Connection conn=DBUtil.DBUtil.getConnection(); String sql=null; PreparedStatement pst=null; sql = "insert into sheng values(?,?,?,?) "; Date dNow = new Date( ); SimpleDateFormat ft = new SimpleDateFormat ("yyyy年MM月dd日"); String time=ft.format(dNow); for (int i = 0; i <= 30; i++) { com.alibaba.fastjson.JSONObject jsonObject =array.getJSONObject(i); String provinceName = jsonObject.getString("provinceName"); String current = jsonObject.getString("currentConfirmedCount"); String confirmed = jsonObject.getString("confirmedCount"); String cured = jsonObject.getString("curedCount"); String dead = jsonObject.getString("deadCount"); String suspect = jsonObject.getString("suspectedCount"); System.out.println(provinceName); pst = conn.prepareStatement(sql,Statement.RETURN_GENERATED_KEYS); pst.setString(1, provinceName); pst.setString(2,confirmed ); pst.setString(3, cured); pst.setString(4, time); pst.executeUpdate(); JSONArray array2 = jsonObject.getJSONArray("cities"); for (int j = 0; j < array2.size(); j++) { com.alibaba.fastjson.JSONObject jsonObject2 =array2.getJSONObject(j); String cityname = jsonObject2.getString("cityName"); String current2 = jsonObject2.getString("currentConfirmedCount"); String confirmed2 = jsonObject2.getString("confirmedCount"); String cured2 = jsonObject2.getString("curedCount"); String dead2 = jsonObject2.getString("deadCount"); String suspect2 = jsonObject2.getString("suspectedCount"); } } } return result; } }
爬取的数据是丁香医生的,由于数据是JSONArray形式,所以直接用正则表达式提取来转换成JSONArray,然后拿来用就行了。
日志:
日期 | 编号 | 类型 | 引入阶段 | 排除阶段 | 修复时间 | 修复缺陷 |
3.10 | 1 | 编码 | 编码 | 3.10 | JSONArray无法使用 | |
描述:JSONArray是用不了,没有把jsoup-1.7.2.jar和fastjson-1.2.66.jar导入构建路径 |
日期 | 编号 | 类型 | 引入阶段 | 排除阶段 | 修复时间 | 修复缺陷 |
3.10 | 2 | 编码 | 编码 | 3.10 | 找不到数据 | |
描述:正则表达式使用不熟练,"()"内为正则表达式截取内容 |