zoukankan      html  css  js  c++  java
  • phpspider案例


    phpspider案例

    
    <?php
    
    require './autoload.php';
    use phpspidercorephpspider;
    /* Do NOT delete this comment */
    /* 不要删除这段注释 */
    
    $configs = array(
    	'name' => '简书',
    	'log_show' =>false,
    	'tasknum' => 1,
    	//数据库配置
    	'db_config' => array(
    	'host'  => '127.0.0.1',
    	'port'  => 3306,
    	'user'  => 'root',
    	'pass'  => 'root',
    	'name'  => 'demo',
    ),
    'export' => array(
    'type' => 'db',
    'table' => 'jianshu',  // 如果数据表没有数据新增请检查表结构和字段名是否匹配
    ),
    //爬取的域名列表  
    'domains' => array(
        'jianshu',
        'www.jianshu.com'
    ), 
    //抓取的起点
    'scan_urls' => array(
        'https://www.jianshu.com/c/V2CqjW?utm_medium=index-collections&utm_source=desktop'
    ),
    //列表页实例
    'list_url_regexes' => array(
        "https://www.jianshu.com/c/d+"
    ),
    //内容页实例
    //  d+  指的是变量
    'content_url_regexes' => array(
        "https://www.jianshu.com/p/d+",
    ),
    'max_try' => 5,
    
    'fields' => array(
        array(
            'name' => "title",
            'selector' => "//h1[@class='title']",
            'required' => true,
        ),
        array(
            'name' => "content",
            'selector' => "//div[@class='show-content-free']",
            'required' => true,
        ),
    ),
    );
    
    $spider = new phpspider($configs);
    $spider->start();
    
    ?>
    
    
  • 相关阅读:
    07 JavaWeb
    06 XML编程(CRUD)
    05 XML
    04 DOM一窥
    03 Javascript初识
    02 CSS/javaScript
    01 HTML基础
    09_IO流
    08_集合概述
    07_基本数据类型和包装类
  • 原文地址:https://www.cnblogs.com/lalalagq/p/10206050.html
Copyright © 2011-2022 走看看