zoukankan      html  css  js  c++  java
  • php使用服务器进行远程抓取百度网页内容

       php使用服务器进行远程抓取百度网页内容

    <?php
    error_reporting(E_ALL^E_NOTICE^E_WARNING);
    $useragent= $_SERVER['HTTP_USER_AGENT'];
    //获取客户端ip
    function getip() { 
        $unknown = 'unknown'; 
        if (isset($_SERVER['HTTP_X_FORWARDED_FOR']) && $_SERVER['HTTP_X_FORWARDED_FOR'] && strcasecmp($_SERVER['HTTP_X_FORWARDED_FOR'], $unknown)) { 
            $ip = $_SERVER['HTTP_X_FORWARDED_FOR']; 
        } 
        elseif(isset($_SERVER['REMOTE_ADDR']) && $_SERVER['REMOTE_ADDR'] && strcasecmp($_SERVER['REMOTE_ADDR'], $unknown)) { 
            $ip = $_SERVER['REMOTE_ADDR']; 
        } 
        if (false !== strpos($ip, ',')) $ip = reset(explode(',', $ip)); 
        return $ip; 
    } 
    
    function get_client_ip(){
        $cip = "unknown";
        if($_SERVER['REMOTE_ADDR']){
            $cip = $_SERVER['REMOTE_ADDR'];
        }else if(getenv("REMOTE_ADDR")){
            $cip = getenv("REMOTE_ADDR");
        }
        return $cip;
    }
    
    //添加关键词
    $word=[
     '医院',
    ];
    $arrword=$word[mt_rand(0,count($word)-1)];
    $keyword= urlencode($arrword);
    $url = "http://m.baidu.com/s?word=".$keyword;
    //$url = "http://www.sdfymj.com/ua.php";
    // 构造包头,模拟浏览器请求
    $header = array (
    		"Host:www.baidu.com",
    		"Content-Type:application/x-www-form-urlencoded",//post请求
    		"Connection: keep-alive",
    		'Referer:http://m.baidu.com/'
    		
    );
    $ch = curl_init ();
    curl_setopt ( $ch, CURLOPT_URL, $url );
    curl_setopt ( $ch, CURLOPT_HTTPHEADER, $header );
    curl_setopt($ch, CURLOPT_USERAGENT, $useragent);
    curl_setopt($ch, CURLOPT_HTTPHEADER, array('X-FORWARDED-FOR:'.getip(), 'CLIENT-IP:'.get_client_ip()));
    curl_setopt ( $ch, CURLOPT_RETURNTRANSFER, 1 );
    // 执行
    $content = curl_exec ( $ch );
    if ($content == FALSE) {
    	echo "error:" . curl_error ( $ch );
    }
    // 关闭
    curl_close ( $ch );
     
    //输出结果
    echo $content;
    ?>
    

      

  • 相关阅读:
    机器人的运动范围
    矩阵中的路径
    MySql数据库表的基本连接
    Java虚拟机垃圾收集算法
    spring_boot 配置
    js 表格合并单元格
    js数组操作
    前端定位Position属性四个值
    form表单在发送到服务器时候编码方式
    多文件上传CommonsMultipartResolver
  • 原文地址:https://www.cnblogs.com/68xi/p/13784816.html
Copyright © 2011-2022 走看看