zoukankan      html  css  js  c++  java
  • 爬虫效果--详细

    项目简介:
    此项目为了方便同学们进行爬虫编写。涉及到的知识点有Html,Javascript、JQuery
    Echarts,Servlet 3.0,Jsoup,FastJson、IO流
    项目概要设计:
    1、包结构的设计 必须满足MVC开发模式---解耦

     1 package com.hyxy.food.entity;
     2 public class Food {
     3     private int id;//区别不同的商店
     4     private String name;//名称
     5     private int num;//评论条数
     6     public int getId() {
     7         return id;
     8     }
     9     public void setId(int id) {
    10         this.id = id;
    11     }
    12     public String getName() {
    13         return name;
    14     }
    15     public void setName(String name) {
    16         this.name = name;
    17     }
    18     public int getNum() {
    19         return num;
    20     }
    21     public void setNum(int num) {
    22         this.num = num;
    23     }
    24     public Food(int id, String name, int num) {
    25         super();
    26         this.id = id;
    27         this.name = name;
    28         this.num = num;
    29     }
    30     public Food() {
    31         super();
    32     }    
    33 }
     1 package com.hyxy.food.util;
     2 
     3 import java.io.IOException;
     4 import java.io.InputStream;
     5 import java.sql.Connection;
     6 import java.sql.DriverManager;
     7 import java.util.Properties;
     8 public class ConnectMysqlDB {
     9     public static Connection getConnection() {
    10         Connection conn=null;
    11         Properties p=new Properties();
    12         InputStream in=ConnectMysqlDB.class.getClassLoader().getResourceAsStream("jdbc.properties");
    13         try {
    14         p.load(in);
    15             String url=p.getProperty("url");
    16             String user=p.getProperty("user");
    17             String password=p.getProperty("pwd");
    18             String driver=p.getProperty("driver");
    19             Class.forName(driver);
    20             conn=DriverManager.getConnection(url, user, password);
    21         } catch (Exception e) {
    22             // TODO Auto-generated catch block
    23             e.printStackTrace();
    24         }
    25 
    26         return conn;
    27     }
    28     public static void main(String[] args) {
    29         getConnection();
    30     }
    31 }
    1 jdbc.properties  --->src根目录下
    2 driver=com.mysql.jdbc.Driver
    3 url=jdbc:mysql://192.168.0.199:3306/spider?useUnicode=true&characterEncoding=utf-8
    4 user=1805
    5 pwd=123456
     1 package com.hyxy.food.dao;
     2 import java.sql.Connection;
     3 import com.hyxy.food.entity.Food;
     4 import com.hyxy.food.util.ConnectMysqlDB;
     5 import java.sql.PreparedStatement;
     6 import java.sql.ResultSet;
     7 import java.sql.SQLException;
     8 import java.util.ArrayList;
     9 import java.util.List;
    10 
    11 public class FoodDao {
    12     private  Connection conn;
    13     public FoodDao() {
    14        if(conn==null) {
    15         conn=ConnectMysqlDB.getConnection();
    16     }
    17      }
    18     public boolean addFood(Food f) {
    19         String sql="insert into food(name,num) value(?,?)";
    20         PreparedStatement ps;
    21         boolean flag=false;
    22         try {
    23             ps = conn.prepareStatement(sql);
    24             ps.setString(1, f.getName());
    25             ps.setInt(2, f.getNum());
    26             ps.executeUpdate();
    27             flag=true;
    28         } catch (SQLException e) {
    29             // TODO Auto-generated catch block
    30             e.printStackTrace();
    31         }
    32         return flag;
    33     }
    34     public List<Food> list(){
    35         String sql="select name,num from food";
    36         List<Food> list=new ArrayList<Food>();
    37         try {
    38             PreparedStatement ps=conn.prepareStatement(sql);
    39             ResultSet rs= ps.executeQuery();
    40             while(rs.next()) {
    41                 Food f=new Food();
    42                 f.setName(rs.getString("name"));
    43                 f.setNum(rs.getInt("num"));
    44                 list.add(f);
    45             }
    46         } catch (SQLException e) {
    47             // TODO Auto-generated catch block
    48             e.printStackTrace();
    49         }
    50         return list;
    51     }
    52 }
     1 package com.hyxy.food.controller;
     2 
     3 import java.io.IOException;
     4 import javax.servlet.ServletException;
     5 import javax.servlet.annotation.WebServlet;
     6 import javax.servlet.http.HttpServlet;
     7 import javax.servlet.http.HttpServletRequest;
     8 import javax.servlet.http.HttpServletResponse;
     9 
    10 import org.jsoup.Jsoup;
    11 import org.jsoup.nodes.Document;
    12 import org.jsoup.nodes.Element;
    13 import org.jsoup.select.Elements;
    14 
    15 import com.hyxy.food.dao.FoodDao;
    16 import com.hyxy.food.entity.Food;
    17 @WebServlet("/food")
    18 public class FoodServlet extends HttpServlet {
    19     private static final long serialVersionUID = 1L;
    20     public FoodServlet() {
    21         super();
    22         // TODO Auto-generated constructor stub
    23     }
    24     protected void doGet(HttpServletRequest request, HttpServletResponse response) throws ServletException, IOException {
    25         doPost(request, response);
    26     }
    27     protected void doPost(HttpServletRequest request, HttpServletResponse response) throws ServletException, IOException {
    28         String url=request.getParameter("url");//http://www.mafengwo.cn/cy/10035/0-0-0-0-0-
    29         int sum=Integer.parseInt(request.getParameter("sum"));
    30         //爬虫开始
    31         for (int i = 1; i <= sum; i++) {
    32             System.out.println("爬取第"+i+"页数据");
    33             //爬虫第一步,获取一个爬虫的Document对象
    34             Document d=Jsoup.connect(url+i+".html").get();
    35             //输出Document对象
    36     //        System.out.println(d.html());
    37             //第二步观察网页具体信息,爬取想要的信息
    38             Elements es= d.select("li[class=item clearfix]");
    39             boolean flag=false;
    40             for (Element element : es) {
    41                 String title=element.select("div[class=title]").select("h3").select("a").first().text();
    42                 int num=Integer.parseInt(element.select("div[class=grade]").select("p[class=rev-num]").select("em").text());
    43                 Food food=new Food();
    44                 food.setName(title);
    45                 food.setNum(num);
    46                 FoodDao dao=new FoodDao();
    47                 flag = dao.addFood(food);
    48             }
    49             if(flag) {
    50                 System.out.println("success!!!");
    51             }else {
    52                 System.out.println("false!!!");
    53             }
    54 //            request.getRequestDispatcher("list").forward(request, response);
    55         }
    56     }
    57 
    58 }
     1 package com.hyxy.food.test;
     2 
     3 import java.io.IOException;
     4 
     5 import org.jsoup.Jsoup;
     6 import org.jsoup.nodes.Document;
     7 import org.jsoup.nodes.Element;
     8 import org.jsoup.select.Elements;
     9 
    10 public class FoodTest {
    11     public static void main(String[] args) throws IOException {
    12         int sum=20;
    13         for (int i = 1; i <= sum; i++) {
    14             System.out.println("爬取第"+i+"页数据");
    15             String url="http://www.mafengwo.cn/cy/10035/0-0-0-0-0-"+i+".html";
    16             //爬虫第一步,获取一个爬虫的Document对象
    17             Document d=Jsoup.connect(url).get();
    18             //输出Document对象
    19     //        System.out.println(d.html());
    20             //第二步观察网页具体信息,爬取想要的信息
    21             Elements es= d.select("li[class=item clearfix]");
    22             for (Element element : es) {
    23                 String title=element.select("div[class=title]").select("h3").select("a").first().text();
    24                 int num=Integer.parseInt(element.select("div[class=grade]").select("p[class=rev-num]").select("em").text());
    25                 System.out.println(title+":"+num);
    26             }
    27         }            
    28     }
    29 }
  • 相关阅读:
    优秀数
    加法检测器
    数字转换
    选课
    二叉苹果树
    分离与合体
    括号配对
    凸多边形的划分
    能量项链
    石子合并
  • 原文地址:https://www.cnblogs.com/yangfanfan/p/11342017.html
Copyright © 2011-2022 走看看