手册:https://doc.phpspider.org/configs-members.html
参考:https://www.jianshu.com/p/01052508ea7c
不多说,代码贴上:
<?php require './vendor/autoload.php'; use phpspidercorephpspider; /* Do NOT delete this comment */ /* 不要删除这段注释 */ $configs = array( 'name' => '简书', 'log_show' =>false, 'tasknum' => 1, 'log_file' => 'data/qiushibaike.log', 'log_type' => 'error,debug,warn', //数据库配置 'db_config' => array( 'host' => '127.0.0.1', 'port' => 3306, 'user' => 'pai', 'pass' => 'pai', 'name' => 'pai', ), //导出数据到Mysql 'export' => array( 'type' => 'db', 'table' => 'pai_content', // 如果数据表没有数据新增请检查表结构和字段名是否匹配 ), //爬取的域名列表 'domains' => array( 'jianshu', 'www.jianshu.com' ), //定义爬虫的入口链接, 爬虫从这些链接开始爬取,同时这些链接也是监控爬虫所要监控的链接(入口) 'scan_urls' => array( 'https://www.jianshu.com' ), //定义列表页url的规则(列表) 'list_url_regexes' => array( "https://www.jianshu.com" ), //定义内容页url的规则content_url_regexes 'content_url_regexes' => array( "https://www.jianshu.com/p/w+", ), 'max_try' => 1, 'fields' => array( array( 'name' => "title", 'selector' => "//h1[@class='_1RuRku']", 'required' => true, ), array( 'name' => "content", 'selector' => "//article[@class='_2rhmJa']", 'required' => true, ), ), ); $spider = new phpspider($configs); $spider->start();