zoukankan      html  css  js  c++  java
  • 使用CURL抓取淘宝页面

    /**
         * 根据地址抓取淘宝页面html代码
         * @param type $url 地址
         * @return boolean
         */
        public function getTaoBaoHtml($url) {
            if (empty($url)) {
                return false;
            }
            $ch = curl_init();
            // 设置 url
            curl_setopt($ch, CURLOPT_URL, $url);
            // 设置浏览器的特定header
            curl_setopt($ch, CURLOPT_HTTPHEADER, array(
                "User-Agent: {Mozilla/5.0 (Windows NT 6.1; WOW64; rv:26.0) Gecko/20100101 Firefox/26.0}",
                "Accept: {text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8}",
                "Accept-Language: {zh-cn,zh;q=0.8,en-us;q=0.5,en;q=0.3}",
                "Cookie:{cq=ccp%3D1; cna=a7suCzOmSTECAXgg9iCf4AtX; t=671b2069c7e8ac444da66d664a397a5f; tracknick=%5Cu4F0D%5Cu6653%5Cu8F8901; _tb_token_=nDiU1vCuzFd0; cookie2=c54709ffbe04a5ccb80283c34d6b00fa; pnm_cku822=128WsMPac%2FFS4KgNn%2BYfhzduo4U2NC0zh9cAS4%3D%7CWUCLjKhqr873bOIFQcMecSw%3D%7CWMEKRlV%2B3D9a6XWaidNWNQOSWXwaXugvQHzhxALh%7CX0YLbX78NUR2b2DHoxnIqZENQqR35TBZbfQ5vooI0b6GHZA3U1kr%7CXkdILogCr878ZK9I%2B%2FE3QjAD3lFJJaAZRA%3D%3D%7CXUeMwMR2s%2BTUQk8IPP5TNgWfUjQwonccMCxihTa0fRYgtjgfa4j6%7CXMYK7F8liOvH3hMUpzXkiaU%2FJw%3D%3D}",
            ));
            // 页面内容我们并不需要
            curl_setopt($ch, CURLOPT_NOBODY, 0);
            // 只需返回HTTP header
            curl_setopt($ch, CURLOPT_HEADER, 0);
            // 返回结果,而不是输出它
            //curl_setopt($ch, CURLOPT_RETURNTRANSFER, 1);
            curl_setopt($ch, CURLOPT_FOLLOWLOCATION, 1);
            ob_start();
            curl_exec($ch);
            $html = ob_get_contents();
            ob_end_clean();
            curl_close($ch);
            return $html;
        }
    

      

  • 相关阅读:
    【转】对象持久化与数据序列化的联系?
    【转】Linux安装方法一(U盘引导)
    bash中的"-n"、"-z" 以及“[]” 、“[[]]”判断
    mysql获取行号
    IP白名单
    复合赋值位运算符“&=、| =”
    Java匿名内部类访问外部
    mysql的orde by 按照指定状态顺序排序
    Spring声明式事务
    定时任务总结
  • 原文地址:https://www.cnblogs.com/geniusxjq/p/4881554.html
Copyright © 2011-2022 走看看