zoukankan      html  css  js  c++  java
  • php curl 正则获取网页标题

    <?php
    /****/
    //Gary xu
    //1122557724@qq.com
    /****/
    namespace Xuyaoxiang;
    
    	class Snoopy {
    	
    	public $pattern_array=array(
    	'title'=>'/<title>(s*.*)</title>/i',
    	'description'=>'/<meta +name="[d|D]escription" +content="(.*)" +/>/',
    	'charset'=>'/charset="?([w-]+)"?/i',
    	);
    	
    	public $user_agent='Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/49.0.2623.110 Safari/537.36'; //模拟浏览器头部数据
    	
    	public $target_code="utf-8"; //目标编码
    	
    	public $url;
    	
    	public $data;
    	
    	public $pattern_key;
    	
    	function __construct($url)
    	{
    			$this->url=$url;
    	}
    	
    	public function set_pattern($key,$val)
    	{
    		$this->pattern_array[$key]=$val;
    	} 
    	
    	
    	
    	function get_content($pattern_key)
    	{
    		$this->pattern_key=$pattern_key;
    		
    		if($this->pattern_key==''){return false;}
    		
    		$this->curl_get_data();
    		
    		if($this->data==false){return false;} 
    		
    		$charset=$this->get_charset();
    		
    		$this->check_charset($charset);
    		
    		$content=$this->get_key_content();
    		
    		return  trim($content[1]);
    	}
    	
    	
    			function curl_get_data()
    		{
    				$curl=curl_init();
    				// 设置你需要抓取的URL
    			
    				curl_setopt($curl, CURLOPT_URL, $this->url);
    		
    				// 设置header
    				curl_setopt($curl, CURLOPT_HEADER, 0);
    				
    				// 设置cURL 参数,要求结果保存到字符串中还是输出到屏幕上。
    				curl_setopt($curl, CURLOPT_RETURNTRANSFER, 1);
    				
    				curl_setopt($curl, CURLOPT_USERAGENT, $this->user_agent);
    			
    				// 运行cURL,请求网页
    				
    				$this->data = curl_exec($curl);	
    				
    				curl_close($curl);
    		}
    		
    		function check_charset($page_charset)
    		{
    			 if($page_charset!=$this->target_code)
    			 {
    				$this->data=mb_convert_encoding($this->data,$this->target_code,$page_charset);
    			 }
    		}
    		
    		function get_key_content()
    		{
    			preg_match($this->pattern_array[$this->pattern_key],$this->data,$content);
    		    return $content;	
    		}
    		
    		function get_charset()
    		{
    			preg_match($this->pattern_array['charset'],$this->data,$reg_charset); 
    			return $page_charset=strtolower($reg_charset[1]);	
    		}
    }
    
    
    header("Content-type:text/html;charset=utf-8");
    	 $snoopy=new snoopy("http://www.qq.com");
    	
    	 $title=$snoopy->get_content('title');
    		
    	 print_r($title);
    ?>
    

      

  • 相关阅读:
    页面的三大家族
    封装函数
    图片自动播放的案例
    动画封装
    长图滚动案例+点名册案例
    时钟案例
    伪随机数,随机数种子seed
    numpy.tolist( )函数
    countif( ) 函数判断当前单元格的身份证号码是否重复出现
    Excel技巧
  • 原文地址:https://www.cnblogs.com/xuyaoxiang/p/5485373.html
Copyright © 2011-2022 走看看