zoukankan      html  css  js  c++  java
  • java爬虫中jsoup的使用

                                               java爬虫中jsoup的使用

    jsoup可以用来解析HTML的内容,其功能非常强大,它可以向javascript那样直接从网页中提取有用的信息

    例如1:

     从html字符串中解析数据

    //直接从字符串中获取

        public static void getParByString()

        {

            String html = "<html><head><title> 这里是字符串内容</title></head"+ ">"+"<body><p class='p1'> 这里是 jsoup 作用的相关演示</p></body></html>";

           Document doc = Jsoup.parse(html);

           Elements links = doc.select("p[class]");

           for(Element link:links){

            String linkclass = link.className();

                String linkText = link.text();

                System.out.println(linkText);

                System.out.println(linkclass);

            }

        }

       从本地文件中解析数据

    //从本地文件中获取

        public static void getHrefByLocal()

        {

            File input = new File("C:\Users\Idea\Desktop\html\Home.html");

            Document doc = null;

            try {

                doc = Jsoup.parse(input,"UTF-8","http://www.oschina.net/");     //这里后面加了网址是为了解决后面绝对路径和相对路径的问题

                  }

            catch (IOExceptione) {

                // TODO Auto-generated catch block            

             e.printStackTrace();

                  }

            Elements links = doc.select("a[href]");

            for(Element link:links){

                String linkHref = link.attr("href");

                String linkText = link.text();

                System.out.println(linkText+":"+linkHref);

                   }

            

        }

    直接从网络上解析数据

    public static HashMap getHrefByNet(String url)

        {    

          HashMap hm = new HashMap();

          String href = null;

             try {

                //这是get方式得到的

                Document doc = Jsoup.connect(url).get();

                String title = doc.title();

                Elements links = doc.select("a[href]");        

                for(Element link:links){              

                    String linkHref = link.attr("abs:href");

                    String linkText = link.text();

                    //System.out.println(linkText+":"+linkHref);                

                    hm.put(linkText, linkHref);

                    href=linkText;

                }

                //System.out.println("***************");

                //另外一种是post方式

                /*@SuppressWarnings("unused")

                Document doc_Post = Jsoup.connect(url)

                        .data("query","Java")

                        .userAgent("I am jsoup")

                        .cookie("auth","token")

                        .timeout(10000)

                        .post();

                Elements links_Post = doc.select("a[href]");

                 for(Element link:links_Post){

                        String linkHref = link.attr("abs:href");

                        String linkText = link.text();

                        //System.out.println(linkText+":"+linkHref);

                        

                        //map.put(linkText, linkHref);

                    }*/          

            } catch (IOException e) {

                // TODO Auto-generated catch block            

                e.printStackTrace();

                hm.put("加载失败", "error");

            }         

            return hm ;

        }

       

    注意:需要引用的jar为以下:

    import org.jsoup.*;
    import org.jsoup.nodes.*;
    import org.jsoup.select.Elements;

    最后附上jar包下载地址:

    http://jsoup.org/packages/jsoup-1.8.1.jar
     具体
    实际项目请看java爬虫实战项目

     循环遍历Hashtable中的键和值

    /*创建一个测试的键值对*/

    Hashtable h = new Hashtable();/*往键值对中添加数据*/

    h.put(key, value);/*然后依次循环取出hashtable中的键和值*/

    Iterator it = h.entrySet().iterator();

            while(it.hasNext())

            {

                Map.Entry m = (Map.Entry)it.next();

                System.out.println(m.getValue());

                System.out.println(m.getKey());

            }

     java文件夹的创建(先判断是否存在,如果不存在就创建 

    //创建文件夹(如果不存在就创建,存在就不变)

         public void makedir(){

             //定义文件夹路径

             String filePath = "D://home//Lucy";

             File file = new File(filePath);

             if(!file.exists()&&!file.isDirectory())

             {

                 System.out.println("不存在");

                 file.mkdirs();  //创建文件夹,注意mkdirs()mkdir()的区别

                 //判断是否创建成功

                 if(file.exists()&&file.isDirectory())   //文件夹存在并且是文件夹             {

                     System.out.println("文件夹创建成功!");

                 }

                 else{

                     System.out.println("文件创建不成功!");

                 }

             }

             else{

                 System.out.println("文件已经存在!");

             }

             

         }

     java文件的创建(先判断是否存在,如果不存在就创建)

    //创建文件,如果不存在就创建文件

         public void makeFile()

         {   

             String fileName = "D://file2.txt";

             File file = new File(fileName);

             if(!file.exists()&&!file.isFile())

             {

                try {

                    if(file.createNewFile())  //创建文件,返回布尔值,如果成功为true,否则为false               

                       {

                        System.out.println("文件创建成功!");

                    }

                    }

                   catch (IOException e) {

                    // TODO Auto-generated catch block                

                   e.printStackTrace();

                }

             }

             else{

              System.out.println("文件已经存在!");

              }

         }

    在文件中写入内容

     //往文件中写入文本

         public void writeText(String s)

         {

             String fileName = "D://file2.txt";

            File file = new File(fileName);

            if(file.exists()&&file.isFile()) //如果文件存在,可以写入内容        

                         {

                FileOutputStream fos = null;

                try {

                    fos = new FileOutputStream(fileName);

                }

                    catch (FileNotFoundException e2) {

                    // TODO Auto-generated catch block                

                    e2.printStackTrace();

                }

                try {

                    fos.write(s.getBytes());

                }

                    catch (IOException e1) {

                    // TODO Auto-generated catch block               

                    e1.printStackTrace();

                }

                try {

                    fos.close();

                }

                     catch (IOException e) {

                    // TODO Auto-generated catch block                

                    e.printStackTrace();

                }

            }

            else{

                System.out.println("文件不存在,不能写入内容");

            }

         }

     

    java获取系统时间:

    public static void getTime()

        {

            SimpleDateFormat f = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss");  

            Date date = new Date();

            System.out.println(f.format(date));

            System.out.println(new SimpleDateFormat("yyyyMMdd日   HHmmss").format(date));

            System.out.println(date);

        }

    java连接mysql数据库

       首先添加jar包:下载jar包 

    public class connectDoctorMySql {    

        /*

            public static final String url = "jdbc:mysql://192.168.0.16/hive";  

            public static final String name = "com.mysql.jdbc.Driver";  

            public static final String user = "hive";  

            public static final String password = "hive";  

            public Connection conn = null;  

            public PreparedStatement pst = null;

            public Statement stmt = null;

            ResultSet rs = null;*/

            public static final String url = "jdbc:mysql://127.0.0.1/orcl?useUnicode=true&characterEncoding=utf-8&useSSL=false";  

            public static final String name = "com.mysql.jdbc.Driver";

            public static final String user = "root";  

            public static final String password = "China123";  

            public Connection conn = null;  

            public PreparedStatement pst = null;

            public Statement stmt = null;

            ResultSet rs = null;

       //初始化数据库

         public void init(){

                     try {  

                            Class.forName(name); //指定连接类型  

                             conn = DriverManager.getConnection(url, user, password); //获取连接  

                             stmt = conn.createStatement();

                         }

                            catch (Exception e) {  

                            System.out.println("数据库连接失败. . .");

                            e.printStackTrace();  

                        }  

              }

            

       //执行sql语句

        public void excute(String sql){

                init();

                try {

                    int result =stmt.executeUpdate(sql);

                }

                    catch (SQLException e) {

                    System.out.println("数据执行失败:"+sql);   //打印sql语句                

                        e.printStackTrace();

                    }

                     finally{

                         try {

                             if (rs!=null){

                                rs.close();

                              }

                             if(pst!=null){

                               pst.close();

                                }

                             if(conn!=null) {

                              conn.close();

                              }

                      }

                       catch (SQLException e) {

                       e.printStackTrace();

                       }  

              }

            }

    //查询语句

        public ArrayList select(String sql,int x,int y){

                init();

                 ArrayList result= new ArrayList();

                try {

                    ResultSet rs = stmt.executeQuery(sql);

                    while(rs.next())

                    {   String[] str = new String[2];

                        str[0]=rs.getString(x);

                        str[1]=rs.getString(y);

                        result.add(str);    

                    }

                }

                    catch (SQLException e) {

                    e.printStackTrace();

                    }

                       finally{

                           try {

                             if (rs!=null){

                                rs.close();

                              }

                             if(pst!=null){

                               pst.close();

                                }

                             if(conn!=null) {

                              conn.close();

                              }

                      }

                       catch (SQLException e) {

                       e.printStackTrace();

                       }  

              }

                    return result;

            }

    java连接oracle数据库

    public class connectDoctor {

          //连接oracl数据库

            public static final String url = "jdbc:oracle:thin:@127.0.0.1:1521:orcl";

            //@127.0.0.1

            public static final String name = "oracle.jdbc.driver.OracleDriver";  

            public static final String user = "c238891";  

            public static final String password = "Rapid111";  

            public Connection conn = null;  

            public PreparedStatement pst = null;

            public Statement stmt = null;

            ResultSet rs = null;  

            //初始化数据库

            public void init(){

                     try {  

                            Class.forName(name); //指定连接类型  

                             conn = DriverManager.getConnection(url, user, password); //获取连接  

                             stmt = conn.createStatement();

                        }

                            catch (Exception e) {  

                            System.out.println("插入数据失败:");

                            e.printStackTrace();  

                        }  

              }

            

            //测试连接数据库

            public void start()

            {  

                init();

                String sql = "select * from emp";

                try {

                    pst = conn.prepareStatement(sql);

                     rs = pst.executeQuery();  

                                     while (rs.next()) {  

                                        System.out.println("编号:" + rs.getString("empno")   

                                                        + ";姓名:" + rs.getString("ename")  

                                                        + "; 工作:" + rs.getString("job")  

                                                        + "; 领导:" + rs.getString("mgr")  

                                                        + "; 雇佣日期:" + rs.getString("hiredate")  

                                                        + "; 工资:" + rs.getString("sal")  

                                                         + "; 奖金:" + rs.getString("comm")  

                                                         + "; 部门:" + rs.getString("deptno"));  

                                     }  

                }

                    catch (SQLException e) {

                    e.printStackTrace();

                }

                     finally{

                     try {

                         if (rs!=null){

                         rs.close();

                         if(pst!=null)

                         {

                             pst.close();

                         }

                         if(conn!=null)

                         {

                             conn.close();

                         }

                         }

                    }

                        catch (SQLException e) {

                        e.printStackTrace();

                    }  

                 

                }

            }

      //执行sql语句

            public void excute(String sql){

                init();

                try {

                    int result =stmt.executeUpdate(sql);

                }

                    catch (SQLException e) {

                    System.out.println(sql);

                    //System.out.println("错误");                

                         e.printStackTrace();

                    }

                     finally{

                         try {

                             if (rs!=null){

                                rs.close();

                              }

                             if(pst!=null){

                               pst.close();

                              }

                             if(conn!=null) {

                              conn.close();

                              }

                      }

                       catch (SQLException e) {

                       e.printStackTrace();

                       }  

              }

            }

       

      //查询语句

            public ArrayList select(String sql,int x,int y){

                init();

                 ArrayList result= new ArrayList();

                try {

                    ResultSet rs = stmt.executeQuery(sql);

                    while(rs.next())

                    {   

                       String[] str = new String[2];

                        str[0]=rs.getString(x);

                        str[1]=rs.getString(y);

                        result.add(str);    

                    }

                }

                    catch (SQLException e) {

                    e.printStackTrace();

                    }

                           finally{

                           try {

                             if (rs!=null){

                                rs.close();

                              }

                             if(pst!=null){

                               pst.close();

                                }

                             if(conn!=null) {

                              conn.close();

                              }

                      }

                       catch (SQLException e) {

                       e.printStackTrace();

                       }  

              }

                    return result;

            }

    好文要顶 关注我 收藏该文  

  • 相关阅读:
    STL: merge
    STL: rotate
    javascript的prototype继承问题
    日期正则表达式
    有关linq的一系列学习的文章,值得收藏
    EF读取关联数据
    jQuery UI中的日期选择插件Datepicker
    LINQ的基本语法中八个关键字用法说明
    Shell变量内容的删除、替代与替换
    Shell命令别名与历史命令
  • 原文地址:https://www.cnblogs.com/qingbai/p/11958814.html
Copyright © 2011-2022 走看看