字符串与爬虫相关
1,解析url
/** * 模拟访问,可get可post * @param $curl * @param $postInfo post数组/字符串,不填默认没有post * @param $cookie cookie模拟 * @param $referer 来路模拟 * @param $userAgent 页数 * @param $userAgent 环境模拟 * @return $content 抓取的内容 */ function _grab($curl,$postInfo='',$cookie='',$referer='',$userAgent=''){ $ch = curl_init(); curl_setopt($ch, CURLOPT_URL, $curl); //不输出头 curl_setopt($ch, CURLOPT_HEADER, 0); //以字符串返回获取的信息,不直接输出 curl_setopt($ch, CURLOPT_RETURNTRANSFER, 1); //如果是https链接,不验证证书 if(preg_match('/https/i', $curl)){ curl_setopt($ch, CURLOPT_SSL_VERIFYPEER, false); } //POST if($postInfo){ curl_setopt($ch,CURLOPT_POST,1); curl_setopt($ch,CURLOPT_POSTFIELDS,$postInfo); } //加入cookie if($cookie){ curl_setopt($ch,CURLOPT_COOKIE,$cookie); } //模拟来路 if($referer){ curl_setopt($ch, CURLOPT_REFERER, $referer); } //模拟环境 if($userAgent){ curl_setopt($ch, CURLOPT_USERAGENT, $userAgent); } //执行 $content = curl_exec($ch); //错误处理 if ($content === false) { return "网络请求出错: " . curl_error($ch); exit(); } return $content; }