zoukankan      html  css  js  c++  java
  • curl学习(实例不断总结)

    1.先来一个简单的案例,请求http协议的网站
    
    // 初始化一个 cURL 对象
    $curl = curl_init();
    
    // 设置你需要抓取的URL
    curl_setopt($curl, CURLOPT_URL, 'http://www.hao123.com');
    
    // 设置header
    //是否把被访问服务器的头信息显示出来, 0不显示,非0显示
    curl_setopt($curl, CURLOPT_HEADER, 0);
    
    // 设置cURL 参数,要求结果保存到字符串中还是输出到屏幕上, 0为直接输出屏幕,非0则不输出
    curl_setopt($curl, CURLOPT_RETURNTRANSFER, 1);
    
    // 运行cURL,请求网页
    $data = curl_exec($curl);
    
    // 关闭URL请求
    curl_close($curl);
    
    // 显示获得的数据
    var_dump($data);
    2.请求https协议网站,并发送数据(get)
    
    $url = 'https://api.weixin.qq.com/cgi-bin/token?grant_type=client_credential&appid=wxfefd7eaa357a57cf&secret=e061b4df1183fb203e2dc38d35b6a633';
    //$url = 'http://localhost/wx/xx.php';
    $curl = curl_init($url);
    
    // 对认证证书来源的检查,0表示阻止对证书的合法性的检查。
    curl_setopt ( $curl, CURLOPT_SSL_VERIFYPEER, 0 );
    
    // 从证书中检查SSL加密算法是否存在
    curl_setopt ( $curl, CURLOPT_SSL_VERIFYHOST, 2 );
    
    //如果访问的url有发送跳转请求,将继续获取跳转后网址的内容
    curl_setopt ( $curl, CURLOPT_FOLLOWLOCATION, 1 );
    
    // 设置超时限制防止死循环
    curl_setopt ($curl, CURLOPT_TIMEOUT, 30 );            
    
    //不取得返回头信息 
    curl_setopt ($curl, CURLOPT_HEADER, 0 );
    /*
    CURLOPT_RETURNTRANSFER 
    设置为1 如果成功只将结果返回,不自动输出任何内容,如果失败 返回false
    设置为0或不使用这个选项 ,如果成功返回true,自动输出返回内容,如果失败返回false
    */
    curl_setopt($curl, CURLOPT_RETURNTRANSFER,1);
    
    $result = curl_exec ($curl);
    
    //关闭
    curl_close ( $curl );
    
    $res = json_decode($result,true);
    print_r($res);
     1 //3.模拟登录lamp兄弟连
     2 $url = 'http://bbs.lampbrother.net/login.php';
     3 
     4 $arr = array(
     5     'step'=>2,
     6     'lgt'=>2,
     7     'pwuser'=>'你的邮箱',
     8     'pwpwd'=>'你的密码',
     9     'question'=>0,
    10     'hideid'=>0
    11 );
    12 
    13 /*****方法一*****/
    14 /*
    15 // 把COOKIE保存至cookie.txt
    16 curl_setopt($ch, CURLOPT_COOKIEFILE, 'cookie.txt');
    17 curl_setopt($ch, CURLOPT_COOKIEJAR, 'cookie.txt');
    18 先把COOKIE保存文件,调用的时候还得读取文件,这样意味着两次的IO操作,效率低
    19 */
    20 
    21 /*
    22 $cookie_file = tempnam('./temp','cookie');
    23 //先获取cookie保存文件
    24 $ch = curl_init();
    25 curl_setopt($ch, CURLOPT_URL, $url);
    26 curl_setopt($ch, CURLOPT_HEADER, 0);
    27 curl_setopt($ch, CURLOPT_RETURNTRANSFER, 1);
    28 curl_setopt($ch, CURLOPT_POST, 1);
    29 curl_setopt($ch, CURLOPT_POSTFIELDS, http_build_query($arr));
    30 curl_setopt($ch, CURLOPT_COOKIEJAR, $cookie_file);
    31 curl_setopt($ch, CURLOPT_FOLLOWLOCATION, true);
    32 $data = curl_exec($ch);
    33 curl_close($ch);
    34 //echo $data;
    35 
    36 //通过保存文件的cookie请求首页
    37 $ch = curl_init();
    38 curl_setopt($ch, CURLOPT_URL, 'http://bbs.lampbrother.net/');
    39 curl_setopt($ch, CURLOPT_HEADER, 0);
    40 curl_setopt($ch, CURLOPT_RETURNTRANSFER, 0);
    41 curl_setopt($ch, CURLOPT_COOKIEFILE, $cookie_file);
    42 curl_exec($ch);
    43 curl_close($ch);
    44 */
    45 
    46 
    47 /*****方法二*****/
    48 $ch = curl_init();
    49 curl_setopt($ch, CURLOPT_URL, $url);
    50 //这里返回头信息方便获取
    51 curl_setopt($ch, CURLOPT_HEADER, 1);
    52 curl_setopt($ch, CURLOPT_RETURNTRANSFER, 1);
    53 curl_setopt($ch, CURLOPT_POST, 1);
    54 curl_setopt($ch, CURLOPT_POSTFIELDS, http_build_query($arr));
    55 $content = curl_exec($ch);
    56 curl_close($ch);
    57 //解析http数据流
    58 list($header, $body) = explode("
    
    ",$content);
    59 print_r($header);
    60 //解析cookie
    61 preg_match_all("/set-cookie:([^
    ]*)/i",$header,$matches);
    62 //print_r($matches);
    63 $cookies = implode(';', $matches[1]);
    64 print_r($cookies);
    65 
    66 
    67 //后面用curl请求时可以直接使用
    68 // curl_setopt($ch, CURLOPT_COOKIE, $cookie);
    69 $ch = curl_init();
    70 curl_setopt($ch, CURLOPT_URL, 'http://bbs.lampbrother.net/');
    71 curl_setopt($ch, CURLOPT_HEADER, 0);
    72 curl_setopt($ch, CURLOPT_RETURNTRANSFER, 0);
    73 curl_setopt($ch, CURLOPT_COOKIE, $cookies);
    74 curl_exec($ch);
    75 curl_close($ch);
    <?php
    //4.开源中国信息抓取实例
    
    header('Content-type:text/html;charset=utf-8');
    $url = 'https://www.oschina.net/action/user/hash_login';
    
    $data = array(
        'email'=>'你的邮箱',
        'pwd'=>sha1('你的密码'),
        'save_login'=>1,
    );
    
    $headers = array(
        'User-Agent:Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/32.0.1700.107 Safari/537.36',
        'Referer:https://www.oschina.net/home/login?goto_page=http%3A%2F%2Fwww.oschina.net%2Fcode%2Fsnippet_47318_27221',
    );
    
    //获取cookie
    $curl = curl_init($url);
    curl_setopt ( $curl, CURLOPT_SSL_VERIFYPEER, 0 );
    curl_setopt ( $curl, CURLOPT_SSL_VERIFYHOST, 2 );
    curl_setopt ( $curl, CURLOPT_FOLLOWLOCATION, 1 );
    curl_setopt ($curl, CURLOPT_TIMEOUT, 30 );
    curl_setopt ($curl, CURLOPT_HEADER, 1);
    curl_setopt($curl, CURLOPT_HTTPHEADER,$headers);
    curl_setopt($curl, CURLOPT_RETURNTRANSFER,1);
    curl_setopt($curl, CURLOPT_POST, 1);
    curl_setopt($curl, CURLOPT_POSTFIELDS, http_build_query($data));
    $result = curl_exec ($curl);
    curl_close ($curl);
    //print_r($result);
    preg_match_all("/set-cookie:([^
    ]*)/i",$result,$matches);
    //print_r($matches);
    $cookies = implode(';', $matches[1]);
    
    //抓取信息
    $url = 'http://my.oschina.net/xxxxx/admin/inbox';
    $ch = curl_init();
    curl_setopt($ch, CURLOPT_URL, $url);
    curl_setopt($ch, CURLOPT_HEADER, 0);
    curl_setopt($ch, CURLOPT_RETURNTRANSFER, 1);
    curl_setopt($ch, CURLOPT_COOKIE, $cookies);
    $res = curl_exec($ch);
    curl_close($ch);
    
    require './simple_html_dom.php';
    
    //simple_html_dom解释包使用实例
    
    $html1 = new simple_html_dom();
    $html1->load($res);
    $r = $html1->find('ul.Msgs li[id]');
    
    $html2 = new simple_html_dom();
    foreach($r as $k=>$v){
        $html2->load($v);
        $t = $html2->find('.msg');
        foreach($t as $key=>$value){
            echo $value.'<hr/>';
        }
    }
    
    $html2->clear();
    ?>
  • 相关阅读:
    Guava学习笔记(4):Ordering犀利的比较器
    Guava学习笔记(3):复写的Object常用方法
    Guava学习笔记(1):Optional优雅的使用null
    [BZOJ1076][SCOI2008]奖励关
    [BZOJ1821][JSOI2010]部落划分
    [BZOJ1041]圆上的整点
    [Luogu2324]八数码难题
    [BZOJ1085][SCOI2005]骑士精神
    [BZOJ3109] [cqoi2013]新数独
    [LnOI2019]长脖子鹿省选模拟赛 东京夏日相会
  • 原文地址:https://www.cnblogs.com/loveyouyou616/p/3542414.html
Copyright © 2011-2022 走看看