今天没事,就分享一个采集新浪新闻PHP插件接口,可用于火车头采集,比较简单,大家可以研究!
新浪新闻实时动态列表为:https://news.sina.com.cn/roll/?qq-pf-to=pcqq.group#pageid=153&lid=2968&k=&num=50&page=1
<?php function curl_sina($url){ $header = array ( 0 => 'Accept: */*', 1 => 'Accept-Encoding: gzip, deflate, br', 2 => 'Accept-Language: zh-CN,zh;q=0.8,en-US;q=0.5,en;q=0.3', 3 => 'Connection: keep-alive', 4 => 'Host: feed.mix.sina.com.cn', 5 => 'Referer: https://news.sina.com.cn/roll/?qq-pf-to=pcqq.group', 6 => 'User-Agent: Mozilla/5.0 (Windows NT 6.1; WOW64; rv:55.0) Gecko/20100101 Firefox/55.0 FirePHP/0.7.4', 7 => 'x-insight: activate', ); $postData = ''; // $cookie = 'Cookie: UOR=,news.sina.com.cn,; ULV=1545891095102:2:2:2:10.71.2.95_1545891091.345391:1545891089621; SINAGLOBAL=10.71.2.95_1545891091.345389; Apache=10.71.2.95_1545891091.345391; NEWSCENTER=78565b88b160488188f3c4bb7622647b; lxlrttp=1545098194'; //需要cookie的话去掉这行的注释 $timeout = 10; $ch = curl_init($url); curl_setopt($ch, CURLOPT_RETURNTRANSFER, 1); //返回数据不直接输出 curl_setopt($ch, CURLOPT_ENCODING, "gzip"); //指定gzip压缩 curl_setopt($ch, CURLOPT_FOLLOWLOCATION, true); //302/301 //SSL if(substr($url, 0, 8) === 'https://') { curl_setopt($ch, CURLOPT_SSL_VERIFYPEER, false); curl_setopt($ch, CURLOPT_SSL_VERIFYHOST, false); //error:14077458:SSL routines:SSL23_GET_SERVER_HELLO:reason(1112)解决 //值有0-6,请参考手册,值1不行试试其他值 //curl_setopt($ch, CURLOPT_SSLVERSION, 1); } //post数据 if(!empty($postData)) { curl_setopt($ch, CURLOPT_POST, 1); //发送POST类型数据 curl_setopt($ch, CURLOPT_POSTFIELDS, $postData); //POST数据,$post可以是数组(multipart/form-data),也可以是拼接参数串(application/x-www-form-urlencoded) } if(!empty($cookie)) { $header[] = $cookie; } if(!empty($header)) { curl_setopt($ch, CURLOPT_HTTPHEADER, $header); //使用header头信息 } //超时时间 curl_setopt($ch, CURLOPT_TIMEOUT, (int)$timeout); //执行 $content = curl_exec($ch); if($error = curl_error($ch)) { //log error error_log($error); } curl_close($ch); // $content 是请求结果 return $content; } //$u='https://feed.mix.sina.com.cn/api/roll/get?pageid=153&lid=2510&k=&num=50&page=1'; if($LabelArray['PageType']=='List'){ $content=curl_sina($LabelUrl); //获取到的当前列表源码内容 $resarr = json_decode($content, true); $resdata = $resarr['result']['data']; $us=''; foreach($resdata as $k => $v){ $us.='<b>'.$v['url'].'</b>'; $LabelArray['Html']='str'.$us.'go'; } } //$LabelArray['Html']=curl_sina($LabelUrl); //获取到的当前列表源码内容 echo serialize($LabelArray); ?>
接口主要获取列表有限制!这个插件就是获取列表地址的,另外分享我新上线的测试的技术专题文摘:http://zhimo.yuanzhumuban.cc/blog/