zoukankan      html  css  js  c++  java
  • JAVA 爬虫Gecco

    主要代码:

      1 Gecco(matchUrl="https://github.com/{user}/{project}", pipelines="consolePipeline")
      2 public class MyGithub implements HtmlBean {
      3  
      4     private static final long serialVersionUID = -7127412585200687225L;
      5      
      6     @Request
      7     private HttpRequest request;
      8      
      9     @RequestParameter("user")
     10     private String user;
     11      
     12     @RequestParameter("project")
     13     private String project;
     14      
     15     @Text
     16     @HtmlField(cssPath=".repository-meta-content")
     17     private String title;
     18      
     19     @Text
     20     @HtmlField(cssPath=".pagehead-actions li:nth-child(2) .social-count")
     21     private int star;
     22      
     23     @Text
     24     @HtmlField(cssPath=".pagehead-actions li:nth-child(3) .social-count")
     25     private int fork;
     26  
     27     @Href(click=false)
     28     @HtmlField(cssPath="ul.numbers-summary > li:nth-child(4) > a")
     29     private String contributors;
     30      
     31     @HtmlField(cssPath=".entry-content")
     32     private String readme;
     33  
     34     public HttpRequest getRequest() {
     35         return request;
     36     }
     37  
     38     public void setRequest(HttpRequest request) {
     39         this.request = request;
     40     }
     41  
     42     public String getReadme() {
     43         return readme;
     44     }
     45  
     46     public void setReadme(String readme) {
     47         this.readme = readme;
     48     }
     49  
     50     public String getUser() {
     51         return user;
     52     }
     53  
     54     public void setUser(String user) {
     55         this.user = user;
     56     }
     57  
     58     public String getProject() {
     59         return project;
     60     }
     61  
     62     public void setProject(String project) {
     63         this.project = project;
     64     }
     65  
     66     public String getTitle() {
     67         return title;
     68     }
     69  
     70     public void setTitle(String title) {
     71         this.title = title;
     72     }
     73  
     74     public int getStar() {
     75         return star;
     76     }
     77  
     78     public void setStar(int star) {
     79         this.star = star;
     80     }
     81  
     82     public int getFork() {
     83         return fork;
     84     }
     85  
     86     public void setFork(int fork) {
     87         this.fork = fork;
     88     }
     89      
     90     public String getContributors() {
     91         return contributors;
     92     }
     93  
     94     public void setContributors(String contributors) {
     95         this.contributors = contributors;
     96     }
     97  
     98     public static void main(String[] args) {
     99         GeccoEngine.create()
    100         .classpath("com.geccocrawler.gecco.demo")
    101         //开始抓取的页面地址
    102         .start("https://github.com/xtuhcy/gecco")
    103         //开启几个爬虫线程,线程数量最好不要大于start request数量
    104         .thread(2)
    105         //单个爬虫每次抓取完一个请求后的间隔时间
    106         .interval(2000)
    107         .run();
    108     }
    109  
    110 }
  • 相关阅读:
    asp.net mvc 两级分类联动方法示例
    动手实践虚拟网络
    KVM 网络虚拟化基础
    LVM 类型的 Storage Pool
    KVM 存储虚拟化
    CPU 和内存虚拟化原理
    远程管理 KVM 虚机
    启动第一个 KVM 虚机
    准备 KVM 实验环境
    虚拟化
  • 原文地址:https://www.cnblogs.com/lr393993507/p/5629380.html
Copyright © 2011-2022 走看看