zoukankan      html  css  js  c++  java
  • PHP抓取网页内容的几种方法

    方法1: 用file_get_contents 以get方式获取内容 
    <?php 
    $url='http://www.domain.com/?para=123'; 
    $html = file_get_contents($url); 
    echo $html; 
    ?> 
    
    方法2:用file_get_contents函数,以post方式获取url 
    <?php 
    $url = 'http://www.domain.com/test.php?id=123'; 
    $data = array ('foo' => 'bar'); 
    $data = http_build_query($data); 
    
    $opts = array ( 
    'http' => array ( 
       'method' => 'POST', 
       'header'=> "Content-type: application/x-www-form-urlencoded
    " . 
                         "Content-Length: " . strlen($data) . "
    ", 
       'content' => $data 
    ) 
    ); 
    $ctx = stream_context_create($opts); 
    $html = @file_get_contents($url,'',$ctx); 
    
    如果需要再传递cookie数据,则把 
    'header'=> "Content-type: application/x-www-form-urlencoded
    " . 
                      "Content-Length: " . strlen($data) . "
    ", 
    修改为 
    'header'=> "Content-type: application/x-www-form-urlencoded
    " . 
                     "Content-Length: " . strlen($data) . "
    ". 
                     "cookie:cookie1=c1;cookie2=c2
    " ; 
    即可 
    
    方法3: 用fopen打开url, 以get方式获取内容 
    <?php 
    $fp = fopen($url, 'r'); 
    $header = stream_get_meta_data($fp);//获取报头信息 
    while(!feof($fp)) { 
    $result .= fgets($fp, 1024); 
    } 
    echo "url header: {$header} <br>": 
    echo "url body: $result"; 
    fclose($fp); 
    ?> 
    
    方法4: 用fopen打开url, 以post方式获取内容 
    <?php 
    $data = array ('foo2' => 'bar2','foo3'=>'bar3'); 
    $data = http_build_query($data); 
    
    $opts = array ( 
    'http' => array ( 
    'method' => 'POST', 
    'header'=> "Content-type: application/x-www-form-urlencoded
    Cookie:cook1=c3;cook2=c4
    " . 
    "Content-Length: " . strlen($data) . "
    ", 
    'content' => $data 
    ) 
    ); 
    
    $context = stream_context_create($opts); 
    $html = fopen('http://www.test.com/zzzz.php?id=i3&id2=i4','rb' ,false, $context); 
    $w=fread($html,1024); 
    echo $w; 
    ?> 
    
    方法5:用fsockopen函数打开url,以get方式获取完整的数据,包括header和body 
    <?php 
    function get_url ($url,$cookie=false) 
    { 
    $url = parse_url($url); 
    $query = $url[path]."?".$url[query]; 
    echo "Query:".$query; 
    $fp = fsockopen( $url[host], $url[port]?$url[port]:80 , $errno, $errstr, 30); 
    if (!$fp) { 
    return false; 
    } else { 
    $request = "GET $query HTTP/1.1
    "; 
    $request .= "Host: $url[host]
    "; 
    $request .= "Connection: Close
    "; 
    if($cookie) $request.="Cookie:   $cookie
    "; 
    $request.="
    "; 
    fwrite($fp,$request); 
    while(!@feof($fp)) { 
    $result .= @fgets($fp, 1024); 
    } 
    fclose($fp); 
    return $result; 
    } 
    } 
    //获取url的html部分,去掉header 
    function GetUrlHTML($url,$cookie=false) 
    { 
    $rowdata = get_url($url,$cookie); 
    if($rowdata) 
    { 
    $body= stristr($rowdata,"
    
    "); 
    $body=substr($body,4,strlen($body)); 
    return $body; 
    } 
    
       return false; 
    } 
    ?> 
    
    方法6:用fsockopen函数打开url,以POST方式获取完整的数据,包括header和body 
    <?php 
    function HTTP_Post($URL,$data,$cookie, $referrer="") 
    { 
    
       // parsing the given URL 
    $URL_Info=parse_url($URL); 
    
       // Building referrer 
    if($referrer=="") // if not given use this script as referrer 
    $referrer="111"; 
    
       // making string from $data 
    foreach($data as $key=>$value) 
    $values[]="$key=".urlencode($value); 
    $data_string=implode("&",$values); 
    
       // Find out which port is needed - if not given use standard (=80) 
    if(!isset($URL_Info["port"])) 
    $URL_Info["port"]=80; 
    
       // building POST-request: 
    $request.="POST ".$URL_Info["path"]." HTTP/1.1
    "; 
    $request.="Host: ".$URL_Info["host"]."
    "; 
    $request.="Referer: $referer
    "; 
    $request.="Content-type: application/x-www-form-urlencoded
    "; 
    $request.="Content-length: ".strlen($data_string)."
    "; 
    $request.="Connection: close
    "; 
    
       $request.="Cookie:   $cookie
    "; 
    
       $request.="
    "; 
    $request.=$data_string."
    "; 
    
       $fp = fsockopen($URL_Info["host"],$URL_Info["port"]); 
    fputs($fp, $request); 
    while(!feof($fp)) { 
    $result .= fgets($fp, 1024); 
    } 
    fclose($fp); 
    
       return $result; 
    } 
    
    ?> 
    
    方法7:使用curl库,使用curl库之前,可能需要查看一下php.ini是否已经打开了curl扩展 
    <?php 
    $ch = curl_init(); 
    $timeout = 5; 
    curl_setopt ($ch, CURLOPT_URL, 'http://www.domain.com/'); 
    curl_setopt ($ch, CURLOPT_RETURNTRANSFER, 1); 
    curl_setopt ($ch, CURLOPT_CONNECTTIMEOUT, $timeout); 
    $file_contents = curl_exec($ch); 
    curl_close($ch); 
    
    echo $file_contents; 
    ?>
  • 相关阅读:
    MQTT TLS 加密传输
    python多进程并发redis
    各种消息队列的特点
    mqtt异步publish方法
    Numpy API Analysis
    Karma install steps for unit test of Angular JS app
    reinstall bower command
    Simulate getter in JavaScript by valueOf and toString method
    How to: Raise and Consume Events
    获取对象的类型信息 (JavaScript)
  • 原文地址:https://www.cnblogs.com/myphper/p/3305500.html
Copyright © 2011-2022 走看看