zoukankan      html  css  js  c++  java
  • curl模拟ip和来源进行网站采集的实现方法

    对于限制了ip和来源的网站,使用正常的采集方式是不行的。这里说我的一种方法吧,使用php的curl类实现模拟ip和来源,可以实现采集限制ip和来源的网站。

    1.设置页面限制ip和来源访问
    比如服务端的server.php

    <?php
     
    $client_ip = getip();
    $referer = getreferer();
     
    $allow_ip = '192.168.0.100';
    $allow_referer = 'http://www.xxx.cn';
     
    if($client_ip==$allow_ip && strpos($referer, $allow_referer)===0){
      echo 'allow access';
    }else{
      echo 'deny access';
    } 
     
    // 获取访问者ip
    function getip(){
      if(!empty($_SERVER['HTTP_CLIENT_IP'])){
        $cip = $_SERVER['HTTP_CLIENT_IP'];
      }elseif(!empty($_SERVER['HTTP_X_FORWARDED_FOR'])){
        $cip = $_SERVER['HTTP_X_FORWARDED_FOR'];
      }elseif(!empty($_SERVER['REMOTE_ADDR'])){
        $cip = $_SERVER['REMOTE_ADDR'];
      }else{
        $cip = '';
      }
      return $cip;
    }
     
    // 获取访问者来源
    function getreferer(){
      if(isset($_SERVER['HTTP_REFERER'])){
        return $_SERVER['HTTP_REFERER'];
      }
      return '';
    }
     
    ?>

    使用curl正常访问

    <?php
    function doCurl($url, $data=array(), $header=array(), $timeout=30){
     
      $ch = curl_init();
      curl_setopt($ch, CURLOPT_URL, $url);
      curl_setopt($ch, CURLOPT_HTTPHEADER, $header);
      curl_setopt($ch, CURLOPT_POST, true);
      curl_setopt($ch, CURLOPT_POSTFIELDS, http_build_query($data));
      curl_setopt($ch, CURLOPT_RETURNTRANSFER, true);
      curl_setopt($ch, CURLOPT_TIMEOUT, $timeout);
     
      $response = curl_exec($ch);
     
      if($error=curl_error($ch)){
        die($error);
      }
     
      curl_close($ch);
     
      return $response;
     
    }
     
    // 调用
    $url = 'http://www.xxx.cn/server.php';
    $response = doCurl($url);
     
    echo $response;
    ?>

    使用curl模拟ip和来源进行访问

    模拟来源

    curl_setopt($ch, CURLOPT_REFERER, '来源');

    模拟ip

    curl_setopt($ch, CURLOPT_HTTPHEADER, array('CLIENT-IP: 模拟ip','X-FORWARDED-FOR: 模拟ip'));

    完整代码如下:

    <?php
    function doCurl($url, $data=array(), $header=array(), $referer='', $timeout=30){
     
      $ch = curl_init();
      curl_setopt($ch, CURLOPT_URL, $url);
      curl_setopt($ch, CURLOPT_HTTPHEADER, $header);
      curl_setopt($ch, CURLOPT_POST, true);
      curl_setopt($ch, CURLOPT_POSTFIELDS, http_build_query($data));
      curl_setopt($ch, CURLOPT_RETURNTRANSFER, true);
      curl_setopt($ch, CURLOPT_TIMEOUT, $timeout);
     
      // 模拟来源
      curl_setopt($ch, CURLOPT_REFERER, $referer);
     
      $response = curl_exec($ch);
     
      if($error=curl_error($ch)){
        die($error);
      }
     
      curl_close($ch);
     
      return $response;
     
    }
     
    // 调用
    $url = 'http://www.example.cn/server.php';//外部采集网站
    $data = array();
     
    // 设置IP
    $header = array(
      'CLIENT-IP: 192.168.0.100',
      'X-FORWARDED-FOR: 192.168.0.100'
    );
     
    // 设置来源
    $referer = 'http://www.xxx.cn/';
     
    $response = doCurl($url, $data, $header, $referer, 5);
     
    echo $response;
    ?>

     

  • 相关阅读:
    2016-8-29
    2016-8-25
    2016-8-24
    2016-8-23
    2016-8-22
    2016-8-16
    2016-8-15
    深圳_多测师面试 __腾讯云/_高级讲师肖sir
    深圳_多测师面试 _新字节跳动(2020年10月23日)_高级讲师肖sir
    多测师讲解自动化 _RF_(202)高级讲师肖sir
  • 原文地址:https://www.cnblogs.com/phpper/p/7169032.html
Copyright © 2011-2022 走看看