DAO.java
package dao;
import java.sql.ResultSet;
import java.sql.SQLException;
import java.util.ArrayList;
import java.util.List;
import entity.Cvf;
import utils.DBUtil;
public class dao {
//添加数据入库
public boolean add(Cvf cvf) {
String sql="insert into cvpr(cname,chref,cabstract,ckeyword) values (?,?,?,?)";
Object obj[]= {cvf.getCname(),cvf.getChref(),cvf.getCabstract(),cvf.getCkeyword()};
return DBUtil.executeUpdate(sql, obj);
}
//查询数据
public List<Cvf> Query() {
List<Cvf> cvfs=new ArrayList();
Cvf cvf= null;
ResultSet rs = null;
try {
String sql="select * from cvpr " ;
Object [] params= {};
rs=DBUtil.executeQuery(sql, params);
while(rs.next()) {
int Id=rs.getInt("id");
String cname=rs.getString("cname");
String chref=rs.getString("chref");
String cabstract=rs.getString("cabstract");
String ckeyword=rs.getString("ckeyword");
cvf=new Cvf(Id,cname,chref,cabstract,ckeyword);
cvfs.add(cvf);
}
}catch(SQLException e) {
e.printStackTrace();
}catch(Exception e) {
e.printStackTrace();
}finally {
try {
//先开的后关,后开的先关
if(rs!=null)rs.close();
if(DBUtil.pstmt!=null)DBUtil.pstmt.close();
if(DBUtil.connection !=null)DBUtil.connection.close();
}catch(SQLException e) {
e.printStackTrace();
}finally {
}
}
return cvfs;
}
//查询数据
public List<Cvf> Query(String key) {
List<Cvf> cvfs=new ArrayList();
Cvf cvf= null;
ResultSet rs = null;
try {
String sql="select * from cvpr where ckeyword=? " ;
Object [] params= {key};
rs=DBUtil.executeQuery(sql, params);
while(rs.next()) {
int Id=rs.getInt("id");
String cname=rs.getString("cname");
String chref=rs.getString("chref");
String cabstract=rs.getString("cabstract");
cvf=new Cvf(Id,cname,chref,cabstract,key);
cvfs.add(cvf);
}
}catch(SQLException e) {
e.printStackTrace();
}catch(Exception e) {
e.printStackTrace();
}finally {
try {
//先开的后关,后开的先关
if(rs!=null)rs.close();
if(DBUtil.pstmt!=null)DBUtil.pstmt.close();
if(DBUtil.connection !=null)DBUtil.connection.close();
}catch(SQLException e) {
e.printStackTrace();
}finally {
}
}
return cvfs;
}
}
Cvf.java
package dao;
import java.sql.ResultSet;
import java.sql.SQLException;
import java.util.ArrayList;
import java.util.List;
import entity.Cvf;
import utils.DBUtil;
public class dao {
//添加数据入库
public boolean add(Cvf cvf) {
String sql="insert into cvpr(cname,chref,cabstract,ckeyword) values (?,?,?,?)";
Object obj[]= {cvf.getCname(),cvf.getChref(),cvf.getCabstract(),cvf.getCkeyword()};
return DBUtil.executeUpdate(sql, obj);
}
//查询数据
public List<Cvf> Query() {
List<Cvf> cvfs=new ArrayList();
Cvf cvf= null;
ResultSet rs = null;
try {
String sql="select * from cvpr " ;
Object [] params= {};
rs=DBUtil.executeQuery(sql, params);
while(rs.next()) {
int Id=rs.getInt("id");
String cname=rs.getString("cname");
String chref=rs.getString("chref");
String cabstract=rs.getString("cabstract");
String ckeyword=rs.getString("ckeyword");
cvf=new Cvf(Id,cname,chref,cabstract,ckeyword);
cvfs.add(cvf);
}
}catch(SQLException e) {
e.printStackTrace();
}catch(Exception e) {
e.printStackTrace();
}finally {
try {
//先开的后关,后开的先关
if(rs!=null)rs.close();
if(DBUtil.pstmt!=null)DBUtil.pstmt.close();
if(DBUtil.connection !=null)DBUtil.connection.close();
}catch(SQLException e) {
e.printStackTrace();
}finally {
}
}
return cvfs;
}
//查询数据
public List<Cvf> Query(String key) {
List<Cvf> cvfs=new ArrayList();
Cvf cvf= null;
ResultSet rs = null;
try {
String sql="select * from cvpr where ckeyword=? " ;
Object [] params= {key};
rs=DBUtil.executeQuery(sql, params);
while(rs.next()) {
int Id=rs.getInt("id");
String cname=rs.getString("cname");
String chref=rs.getString("chref");
String cabstract=rs.getString("cabstract");
cvf=new Cvf(Id,cname,chref,cabstract,key);
cvfs.add(cvf);
}
}catch(SQLException e) {
e.printStackTrace();
}catch(Exception e) {
e.printStackTrace();
}finally {
try {
//先开的后关,后开的先关
if(rs!=null)rs.close();
if(DBUtil.pstmt!=null)DBUtil.pstmt.close();
if(DBUtil.connection !=null)DBUtil.connection.close();
}catch(SQLException e) {
e.printStackTrace();
}finally {
}
}
return cvfs;
}
}
ListServlel.java
package servlet;
import java.io.IOException;
import java.util.List;
import javax.servlet.ServletException;
import javax.servlet.http.HttpServlet;
import javax.servlet.http.HttpServletRequest;
import javax.servlet.http.HttpServletResponse;
import dao.dao;
import entity.Cvf;
public class ListServlet extends HttpServlet {
protected void doGet(HttpServletRequest request, HttpServletResponse response) throws ServletException, IOException {
/**
* 这里是设置编码集,以避免出现乱码问题
*/
request.setCharacterEncoding("utf-8");
String key= request.getParameter("keyword");
response.setCharacterEncoding("utf-8");
response.setContentType("text/html;charset=utf-8");
dao dao=new dao();
List<Cvf> cvfs=dao.Query(key);
System.out.println(cvfs);
request.setAttribute("cvfs",cvfs);
request.getRequestDispatcher("list.jsp").forward(request, response);
}
protected void doPost(HttpServletRequest request, HttpServletResponse response) throws ServletException, IOException {
// TODO Auto-generated method stub
doGet(request, response);
}
}
QueryServlet.java
package servlet;
import java.io.IOException;
import java.util.List;
import javax.servlet.ServletException;
import javax.servlet.http.HttpServlet;
import javax.servlet.http.HttpServletRequest;
import javax.servlet.http.HttpServletResponse;
import dao.dao;
import entity.Cvf;
import utils.Jsouputil;
public class QueryServlet extends HttpServlet {
protected void doGet(HttpServletRequest request, HttpServletResponse response) throws ServletException, IOException {
/**
* 这里是设置编码集,以避免出现乱码问题
*/
request.setCharacterEncoding("utf-8");
response.setCharacterEncoding("utf-8");
response.setContentType("text/html;charset=utf-8");
/**
* 这是爬取数据
*/
// Jsouputil jsouptil=new Jsouputil();
// try {
// Jsouputil.testSelector();
// } catch (Exception e) {
// // TODO Auto-generated catch block
// e.printStackTrace();
// }
dao dao=new dao();
List<Cvf> cvfs=dao.Query();
System.out.println(cvfs);
request.setAttribute("cvfs",cvfs);
request.getRequestDispatcher("show.jsp").forward(request, response);
}
protected void doPost(HttpServletRequest request, HttpServletResponse response) throws ServletException, IOException {
// TODO Auto-generated method stub
doGet(request, response);
}
}
DBUtil.java
package utils;
import java.sql.Connection;
import java.sql.DriverManager;
import java.sql.PreparedStatement;
import java.sql.ResultSet;
import java.sql.SQLException;
import java.util.ArrayList;
import java.util.List;
public class DBUtil {
//数据库URL和账号密码
public static String URL="jdbc:mysql://localhost:3306/test?useUnicode=true&characterEncoding=GB18030&useSSL=false&serverTimezone=GMT&allowPublicKeyRetrieval=true";//数据源 !!!!注意若出现加载或者连接数据库失败一般是这里出现问题
private static final String UNAME="root";
private static final String UPWD="1234";
public static PreparedStatement pstmt=null;
public static ResultSet rs = null;
public static Connection connection=null;
//增删改
public static boolean executeUpdate(String sql,Object [] params) {
boolean flag = false;
try {
//a.导入驱动,加载具体的驱动类
Class.forName("com.mysql.cj.jdbc.Driver");
//b.与数据库建立连接
connection = DriverManager.getConnection(URL,UNAME,UPWD);
pstmt = connection.prepareStatement(sql);
for(int i=0;i<params.length;i++) {
pstmt.setObject(i+1, params[i]);
}
int count=pstmt.executeUpdate();//返回值表示,增删改几条数据
//处理结果
if(count>0)
{
System.out.println("操作成功!!!");
flag=true;
}
}catch(ClassNotFoundException e) {
e.printStackTrace();
}catch(SQLException e) {
e.printStackTrace();
}catch(Exception e){
e.printStackTrace();
}finally {
try {
//先开的后关,后开的先关
if(pstmt!=null)pstmt.close();
if(connection !=null)connection.close();
}catch(SQLException e) {
e.printStackTrace();
}finally {
}
}
return flag;
}
//查
public static ResultSet executeQuery(String sql,Object [] params) {
try {
//a.导入驱动,加载具体的驱动类
Class.forName("com.mysql.cj.jdbc.Driver");
//b.与数据库建立连接
connection = DriverManager.getConnection(URL,UNAME,UPWD);
pstmt = connection.prepareStatement(sql);
if(params!=null) {
for(int i=0;i<params.length;i++) {
pstmt.setObject(i+1, params[i]);
}
}
rs = pstmt.executeQuery();
return rs;
}catch(ClassNotFoundException e) {
e.printStackTrace();
return null;
}catch(SQLException e) {
e.printStackTrace();
return null;
}catch(Exception e){
e.printStackTrace();
return null;
}
}
}
HttpclientPool.java
package utils;
import java.io.IOException;
import java.io.UnsupportedEncodingException;
import java.net.URISyntaxException;
import java.util.ArrayList;
import java.util.List;
import org.apache.http.HttpEntity;
import org.apache.http.NameValuePair;
import org.apache.http.client.config.RequestConfig;
import org.apache.http.client.entity.UrlEncodedFormEntity;
import org.apache.http.client.methods.CloseableHttpResponse;
import org.apache.http.client.methods.HttpGet;
import org.apache.http.client.methods.HttpPost;
import org.apache.http.client.utils.URIBuilder;
import org.apache.http.impl.client.CloseableHttpClient;
import org.apache.http.impl.client.HttpClients;
import org.apache.http.impl.conn.PoolingHttpClientConnectionManager;
import org.apache.http.message.BasicNameValuePair;
import org.apache.http.util.EntityUtils;
import com.alibaba.fastjson.JSONObject;
public class HttpClientPool {
/**
* 这是httpClient连接池
* @throws Exception
*/
public static void HttpClientPool() {
//创建连接池管理器
PoolingHttpClientConnectionManager cm =new PoolingHttpClientConnectionManager();
//设置最大连接数
cm.setMaxTotal(100);
//设置每个主机的最大连接数
cm.setDefaultMaxPerRoute(10);
//使用连接池管理器发起请求
// doGet(cm);
// doPost(cm);
}
public static String doPost(PoolingHttpClientConnectionManager cm) throws Exception {
//从连接池中获取httpClient对象
CloseableHttpClient httpClient = HttpClients.custom().setConnectionManager(cm).build();
//2、输入网址,发起请求,创建httpPost对象
HttpPost httpPost= new HttpPost("http://openaccess.thecvf.com/CVPR2019.py#");
System.out.println("发起请求的信息:"+httpPost);
//Post使用,声明List集合,封装表单中的参数
List<NameValuePair> params= new ArrayList<NameValuePair>();
params.add(new BasicNameValuePair("",""));
//创建表单的Entity对象,第一个参数是封装好的参数,第二个是编码
UrlEncodedFormEntity formEntity= new UrlEncodedFormEntity(params,"utf8");
//设置表单的Entity对象到Post请求中
httpPost.setEntity(formEntity);
//配置请求信息
RequestConfig config = RequestConfig.custom().setConnectTimeout(10000)//设置创建连接的最长时间,单位为毫秒
.setConnectionRequestTimeout(50000)//设置获取连接的最长时间,单位为毫秒
.setSocketTimeout(1000*1000)//设置传输数据的最长时间,单位为毫秒
.build();
//给请求设置请求信息
httpPost.setConfig(config);
CloseableHttpResponse response=null;
String content=null;
try {
//3、按回车,发起请求,返回响应,使用httpClient对象发起请求
response = httpClient.execute(httpPost);
//解析响应,获取数据
//判断状态码是否为两百
if(response.getStatusLine().getStatusCode()==200) {
HttpEntity httpEntity = response.getEntity();
if(httpEntity!=null) {
content = EntityUtils.toString(httpEntity, "utf8");
System.out.println(content.length());
// System.out.println(content);
}
}else {
System.out.println("请求失败"+response);
}
}catch(Exception e) {
e.printStackTrace();
}finally {
try {
//关闭response
if(response!=null) {
//关闭response
response.close();
}
//不关闭httpClient
//httpClient.close();
} catch (IOException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
}
return content;
}
public static String doGet(PoolingHttpClientConnectionManager cm) throws Exception {
//从连接池中获取httpClient对象
CloseableHttpClient httpClient = HttpClients.custom().setConnectionManager(cm).build();
//创建URIBuilder
URIBuilder uribuilder= new URIBuilder("http://openaccess.thecvf.com/CVPR2019.py#");
//设置参数:参数名+参数值,可设置多个
//2、输入网址,发起请求,创建httpGet对象
HttpGet httpGet= new HttpGet(uribuilder.build());
System.out.println("发起请求的信息:"+httpGet);
//配置请求信息
RequestConfig config = RequestConfig.custom().setConnectTimeout(10000*10000)//设置创建连接的最长时间,单位为毫秒
.setConnectionRequestTimeout(10000*10000)//设置获取连接的最长时间,单位为毫秒
.setSocketTimeout(100000*1000000)//设置传输数据的最长时间,单位为毫秒
.build();
//给请求设置请求信息
httpGet.setConfig(config);
CloseableHttpResponse response=null;
String content=null;
try {
//3、按回车,发起请求,返回响应,使用httpClient对象发起请求
response = httpClient.execute(httpGet);
//解析响应,获取数据
//判断状态码是否为两百
if(response.getStatusLine().getStatusCode()==200) {
HttpEntity httpEntity = response.getEntity();
if(httpEntity!=null) {
content = EntityUtils.toString(httpEntity, "utf8");
// System.out.println(content.length());
// System.out.println(content);
}
}
}catch(Exception e) {
e.printStackTrace();
}finally {
try {
if(response!=null) {
//关闭response
response.close();
}
//不能关闭httpClient
//httpClient.close();
} catch (IOException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
}
return content;
}
}
Jsouputil.java
package utils;
import java.io.IOException;
import java.net.MalformedURLException;
import java.net.URL;
import java.text.SimpleDateFormat;
import java.util.Date;
import java.util.Set;
import java.util.HashMap;
import java.util.Iterator;
import java.util.Map;
import org.apache.http.client.config.RequestConfig;
import org.apache.http.client.methods.CloseableHttpResponse;
import org.apache.http.client.methods.HttpGet;
import org.apache.http.client.utils.URIBuilder;
import org.apache.http.impl.client.CloseableHttpClient;
import org.apache.http.impl.client.HttpClients;
import org.apache.http.impl.conn.PoolingHttpClientConnectionManager;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Attributes;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;
import dao.dao;
import entity.Cvf;
/**
* 这是使用Jsoup解析
*/
public class Jsouputil {
/**
* 使用Selector选择器获取元素
*/
public static void testSelector()throws Exception{
//获取Document对象
HttpClientPool httpClientPool =new HttpClientPool();
//创建连接池管理器
PoolingHttpClientConnectionManager cm =new PoolingHttpClientConnectionManager();
//获取网页HTML字符串
String content=httpClientPool.doGet(cm);
//解析字符串
Document doc = Jsoup.parse(content);
// System.out.println(doc.toString());
//[attr=value],利用属性获取
Elements elements = doc.select("div[id=content]").select("dl").select("dt[class=ptitle]");
System.out.println(elements.toString());
Cvf cvf=new Cvf();
dao dao=new dao();
if(elements!=null)
{
for(Element ele:elements)
{
String href="http://openaccess.thecvf.com/";
String cname=ele.select("a").text();
System.out.println(cname);
String href2=ele.select("a").attr("href");
String chref=href.concat(href2);
System.out.println(chref);
String cabstract =null;
String ckeyword =null;
//获取title的内容
CloseableHttpClient httpClient = HttpClients.custom().setConnectionManager(cm).build();
//创建URIBuilder
URIBuilder uribuilder= new URIBuilder(chref);
HttpGet httpGet= new HttpGet(uribuilder.build());
RequestConfig config = RequestConfig.custom().setConnectTimeout(10000*10000)//设置创建连接的最长时间,单位为毫秒
.setConnectionRequestTimeout(10000*10000)//设置获取连接的最长时间,单位为毫秒
.setSocketTimeout(100000*1000000)//设置传输数据的最长时间,单位为毫秒
.build();
//给请求设置请求信息
httpGet.setConfig(config);
CloseableHttpResponse response=null;
response = httpClient.execute(httpGet);
//解析响应,获取数据
//判断状态码是否为两百
if(response.getStatusLine().getStatusCode()==200||response.getStatusLine().getStatusCode()==302) {
Document document = Jsoup.parse(new URL(chref), 100000);
cabstract = document.select("div[id=abstract]").text();
System.out.println("已获取摘要");
String[] strs = strTostrArray(cname+cabstract);
ckeyword=keyword(strs);
}
else {
System.out.println(response.getStatusLine().getStatusCode());
cabstract =null;
ckeyword=null;
}
if(response!=null) {
//关闭response
response.close();
}
cvf=new Cvf(cname,chref,cabstract,ckeyword);
dao.add(cvf);
}
}
}
public static String[] strTostrArray(String str) {
/*
* 将非字母字符全部替换为空格字符" " 得到一个全小写的纯字母字符串包含有空格字符
*/
str = str.toLowerCase();// 将字符串中的英文部分的字符全部变为小写
String regex = "[\W]+";// 非字母的正则表达式 --W:表示任意一个非单词字符
str = str.replaceAll(regex, " ");
String[] strs = str.split(" "); // 以空格作为分隔符获得字符串数组
return strs;
}
public static String keyword(String[] strs) {
/*
* 建立字符串(String)出现次数(Integer)的映射
*/
HashMap<String, Integer> strhash = new HashMap<String, Integer>();
Integer in = null;// 用于存放put操作的返回值
for (String s : strs) {// 遍历数组 strs
in = strhash.put(s, 1);
if (in != null) {// 判断如果返回的不是null,则+1再放进去就是出现的次数
strhash.put(s, in + 1);
}
}
Set<java.util.Map.Entry<String, Integer>> entrySet = strhash.entrySet();
String maxStr = null;// 用于存放出现最多的单词
int maxValue = 0;// 用于存放出现最多的次数
for (java.util.Map.Entry<String, Integer> e : entrySet) {
String key = e.getKey();
Integer value = e.getValue();
if(key.equals("a")||key.equals("the")||key.equals("to")||key.equals("and")||key.equals("in")||key.equals("of")||key.equals("our")||key.equals("your")||key.equals("we")||key.equals("is")||key.equals("on")||key.equals("for")||key.equals("that")||key.equals("an")||key.equals("are")) {
value=0;
}
if (value > maxValue) {
maxValue = value;// 这里有自动拆装箱
maxStr = key;
}
}
System.out.println("出现最多的单词是:" + maxStr + "出现了" + maxValue + "次");
return maxStr;
}
}
