zoukankan      html  css  js  c++  java
  • php curl 抓取

     1 <?php
     2 
     3  set_time_limit(0);
     4 function curl_multi($urls) {  
     5     if (!is_array($urls) or count($urls) == 0) {  
     6         return false;  
     7     }   
     8     $num=count($urls);  
     9     $curl = $curl2 = $text = array();  
    10     $handle = curl_multi_init();  
    11     function createCh($url) {  
    12         $ch = curl_init();  
    13         curl_setopt ($ch, CURLOPT_URL, $url);  
    14         curl_setopt ($ch, CURLOPT_USERAGENT, 'Mozilla/5.0 (Windows NT 6.1; Trident/7.0; rv:11.0) like Gecko');//设置头部  
    15         curl_setopt ($ch, CURLOPT_REFERER, $url); //设置来源  
    16         curl_setopt ($ch, CURLOPT_ENCODING, "gzip"); // 编码压缩  
    17         curl_setopt ($ch, CURLOPT_RETURNTRANSFER, 1);  
    18         curl_setopt ($ch, CURLOPT_FOLLOWLOCATION, 1);//是否采集301、302之后的页面  
    19         curl_setopt ($ch, CURLOPT_MAXREDIRS, 5);//查找次数,防止查找太深  
    20         curl_setopt($ch, CURLOPT_SSL_VERIFYPEER, FALSE); // 对认证证书来源的检查  
    21         curl_setopt($ch, CURLOPT_SSL_VERIFYHOST, FALSE); // 从证书中检查SSL加密算法是否存在         
    22         curl_setopt ($ch, CURLOPT_TIMEOUT, 20);  
    23         curl_setopt ($ch, CURLOPT_HEADER, 0);//输出头部  
    24         return $ch;  
    25     }  
    26     foreach($urls as $k=>$v){  
    27         $url=$urls[$k];  
    28         $curl[$k] = createCh($url);  
    29         curl_multi_add_handle ($handle,$curl[$k]);  
    30     }  
    31     $active = null;  
    32     do {  
    33         $mrc = curl_multi_exec($handle, $active);  
    34     } while ($mrc == CURLM_CALL_MULTI_PERFORM);  
    35   
    36     while ($active && $mrc == CURLM_OK) {  
    37         if (curl_multi_select($handle) != -1) {  
    38             usleep(100);  
    39         }  
    40         do {  
    41             $mrc = curl_multi_exec($handle, $active);  
    42         } while ($mrc == CURLM_CALL_MULTI_PERFORM);  
    43     }   
    44   
    45     foreach ($curl as $k => $v) {  
    46         if (curl_error($curl[$k]) == "") {  
    47             $text[$k] = (string) curl_multi_getcontent($curl[$k]);   
    48         }  
    49         curl_multi_remove_handle($handle, $curl[$k]);  
    50         curl_close($curl[$k]);  
    51     }   
    52     curl_multi_close($handle);  
    53     return $text;  
    54 }  
    55 $urls=array('http://www.baidu.com',  
    56             'http://www.baidu.com',  
    57             'http://www.baidu.com',  
    58             'http://www.baidu.com',  
    59             'http://www.baidu.com',  
    60             'http://www.baidu.com',  
    61             'http://www.baidu.com',  
    62             'http://www.baidu.com'  
    63             );  
    64 $res=curl_multi($urls);  
    65 print_r($res);
  • 相关阅读:
    Uncaught (in promise) DOMException: Failed to execute 'postMessage' on 'Window': An object could not be cloned.
    iframe的坑
    echarts展示
    常量
    变量赋值
    变量声明
    变量初始化
    windows下nvm的安装及使用
    sessionStorage 使用方法
    jquery+ajax获取本地json对应数据
  • 原文地址:https://www.cnblogs.com/wujunbin/p/7566865.html
Copyright © 2011-2022 走看看