zoukankan      html  css  js  c++  java
  • 寒假学习进度-8(热词爬取)

    package hotword;
    
    import java.io.IOException;
    
    import org.jsoup.Connection;
    import org.jsoup.Jsoup;
    import org.jsoup.nodes.Document;
    import org.jsoup.nodes.Element;
    import org.jsoup.select.Elements;
    
    import dao.Dao;
    
    public class Baidu1 {
        public static void main(String[] args)
        {
            lianjie("https://baike.baidu.com/item/%E6%89%8B%E6%9C%BA/6342");
            
        }
        public static void lianjie(String address)
        {
            String explanation="";
            String title="";
            String url="";
            try {
                Document document=Jsoup.connect(address).get();    
                Elements links=document.getElementsByClass("para");
                Elements reault=links.select("a[href]");
                for(Element link:reault)
                {
                    String linkHref=link.attr("href");
                    //System.out.println(linkHref);
                    String title1=link.text();
                    //System.out.println(title1);
                    title=timu("https://baike.baidu.com"+linkHref);
                    explanation=ex("https://baike.baidu.com"+linkHref);
                    url="https://baike.baidu.com"+linkHref;
                    System.out.println(title);
                    System.out.println(explanation);
                    Dao dao=new Dao();
                    try {
                        //dao.Hotword(title, explanation, url);
                    } catch (Exception e) {
                        // TODO Auto-generated catch block
                        e.printStackTrace();
                    }
                }
    
            } catch (IOException e) {
                // TODO Auto-generated catch block
                e.printStackTrace();
            }
            
        }
        public static String timu(String address)
        {
            String name="";
            try {
                Document document=Jsoup.connect(address).get();
                Elements ProjectName=document.getElementsByClass("lemmaWgt-lemmaTitle-title");
                Elements h1=ProjectName.select("h1");
                Elements h2=ProjectName.select("h2");
                name=h1.text()+h2.text();
            } catch (IOException e) {
                // TODO Auto-generated catch block
                e.printStackTrace();
            }
            return name;
        }
        public static String ex(String address)
        {
            String name="";
            try {
                Document document=Jsoup.connect(address).get();
                Elements ProjectName=document.getElementsByClass("lemma-summary");
                name=ProjectName.text();
            } catch (IOException e) {
                // TODO Auto-generated catch block
                e.printStackTrace();
            }
            return name;
        }
    
    }

    截图:

  • 相关阅读:
    Java-数据结构与算法-选择排序与冒泡排序
    Java-马士兵设计模式学习笔记-迭代器模式-模仿Collectin ArrayList LinckedList
    Java-马士兵设计模式学习笔记-装饰者模式
    1072 Gas Station (30)(30 分)
    1034 Head of a Gang (30)(30 分)
    poj 3723 Conscription
    qduoj 218 签到题
    1045 Favorite Color Stripe (30)(30 分)
    1068 Find More Coins (30)(30 分)
    1057 Stack (30)(30 分)
  • 原文地址:https://www.cnblogs.com/liujinxin123/p/12263238.html
Copyright © 2011-2022 走看看