zoukankan      html  css  js  c++  java
  • 假期十

    热词爬取

    package word;

    import java.io.IOException;

    import org.jsoup.Connection;
    import org.jsoup.Jsoup;
    import org.jsoup.nodes.Document;
    import org.jsoup.nodes.Element;
    import org.jsoup.select.Elements;

    import dao.Dao;

    public class Baidu1 {
    public static void main(String[] args)
    {
    lianjie("https://baike.baidu.com/item/%E6%89%8B%E6%9C%BA/6342");

    }
    public static void lianjie(String address)
    {
    String explanation="";
    String title="";
    String url="";
    try {
    Document document=Jsoup.connect(address).get();
    Elements links=document.getElementsByClass("para");
    Elements reault=links.select("a[href]");
    for(Element link:reault)
    {
    String linkHref=link.attr("href");
    //System.out.println(linkHref);
    String title1=link.text();
    //System.out.println(title1);
    title=timu("https://baike.baidu.com"+linkHref);
    explanation=ex("https://baike.baidu.com"+linkHref);
    url="https://baike.baidu.com"+linkHref;
    System.out.println(title);
    System.out.println(explanation);
    Dao dao=new Dao();
    try {
    //dao.Hotword(title, explanation, url);
    } catch (Exception e) {
    // TODO Auto-generated catch block
    e.printStackTrace();
    }
    }

    } catch (IOException e) {
    // TODO Auto-generated catch block
    e.printStackTrace();
    }

    }
    public static String timu(String address)
    {
    String name="";
    try {
    Document document=Jsoup.connect(address).get();
    Elements ProjectName=document.getElementsByClass("lemmaWgt-lemmaTitle-title");
    Elements h1=ProjectName.select("h1");
    Elements h2=ProjectName.select("h2");
    name=h1.text()+h2.text();
    } catch (IOException e) {
    // TODO Auto-generated catch block
    e.printStackTrace();
    }
    return name;
    }
    public static String ex(String address)
    {
    String name="";
    try {
    Document document=Jsoup.connect(address).get();
    Elements ProjectName=document.getElementsByClass("lemma-summary");
    name=ProjectName.text();
    } catch (IOException e) {
    // TODO Auto-generated catch block
    e.printStackTrace();
    }
    return name;
    }

    }

  • 相关阅读:
    Brettle.Web.NeatUpload.dll支持的大文件上传
    WebStorm:令人眼前一亮的一款前端开发IDE
    jQuery UI vs EasyUI
    jquery validate.js表单验证的基本用法入门
    JQuery扩展插件Validate—6radio、checkbox、select的验证位置显示
    ExtJs与jQuery的比较
    WebStorm 5.0 注册码
    常用PHP运行环境一键安装包
    Sublime Text 2.0.1 简体中文版来了 技术帝的最爱
    JS匿名函数
  • 原文地址:https://www.cnblogs.com/jbwen/p/12293366.html
Copyright © 2011-2022 走看看