zoukankan      html  css  js  c++  java
  • 下载远程图片到本地

      1 <?php
      2 
      3     /**
      4      * @desc CURL下载处理
      5      * @param unknown $url
      6      * @return string
      7      */
      8     public function downloadImage($url){
      9         global $_GLOBAL;
     10         //error_reporting(E_ALL);
     11         set_time_limit(160);
     12         
     13         //url filter
     14         $url = isset($url) ? trim(urldecode($url)) : '';
     15         //$url = filter_input(INPUT_GET, 'URL', FILTER_SANITIZE_URL);
     16         if (substr($url, 0, 7) != 'http://') {
     17             return array('res' => 0, 'msg' => '采集图片仅支持http协议');
     18         }
     19         //url validate
     20         $url = filter_var($url, FILTER_VALIDATE_URL);
     21         if (!$url) {
     22             return array('res' => 0, 'msg' => 'url地址无效');
     23         }
     24         //host filter
     25         $host = strtolower(parse_url($url, PHP_URL_HOST));
     26         $hostarr = json_decode($this->hostfilter($host));
     27         if ($hostarr['res'] == '0') { return array('res' => 0, 'msg' => $hostarr['msg']); }
     28         
     29         //url filter        
     30         list($fileid, $hash, $thumb) = $this->urlfilter($url);
     31         if ($fileid < 0) {
     32             return array('res' => 0, 'msg' => '图片不符合要求');
     33         } else if ($fileid) {
     34             return array('res' => 1, 'msg' => $thumb);
     35         }        
     36         //curl start
     37         $ch = $this->getcurl($url);
     38         $tmpfile = 'temp/'. $hash;
     39         $fp = fopen($tmpfile, 'wb');
     40         flock($fp, LOCK_EX);
     41         curL_setopt($ch, CURLOPT_FILE, $fp);
     42         curl_exec($ch);
     43         if (curl_errno($ch)) {  //error check
     44             //echo curl_error($ch);
     45             curl_close($ch);
     46             fclose($fp);
     47             unlink($tmpfile);
     48             return array('res' => 0, 'msg' => '采集超时,请重试...');
     49         }
     50         $http_code = curl_getinfo($ch, CURLINFO_HTTP_CODE);
     51         $content_type = curl_getinfo($ch, CURLINFO_CONTENT_TYPE);
     52         $size = curl_getinfo($ch, CURLINFO_SIZE_DOWNLOAD) || curl_getinfo($ch, CURLINFO_CONTENT_LENGTH_DOWNLOAD);
     53         curl_close($ch);
     54         fclose($fp);
     55         
     56         $_GLOBAL['mime'] = array('image/jpeg' => 'jpg', 'image/jpg' => 'jpg', 'image/png' => 'png', 'image/gif' => 'gif', 'image/bmp' => 'bmp');        
     57         if (isset($_GLOBAL['mime'][$content_type])) {  //image
     58             if (filesize($tmpfile) != $size) {  //size check
     59                 unlink($tmpfile);
     60                 return array('res' => 0, 'msg' => '采集超时,文件不完整,请重试...');
     61             }        
     62             //image filter
     63             list($status, $message) = $this->imgfilter($tmpfile);
     64             if ($status <= 0) {
     65                 @unlink($tmpfile);
     66                 $tmp1 = array('res' => 0, 'msg' => $message);
     67             } else {
     68                 $this->updateurl($url, $hash, $status);
     69                 $tmp1 = array('res' => 1, 'msg' => $message);
     70             }
     71             return $tmp1;
     72                 
     73         } else {
     74             unlink($tmpfile);
     75             return array('res' => 0, 'msg' => '链接地址失效或者不存在');
     76         }        
     77     }
     78 
     79     
     80     /**
     81      * @desc 防止本地采集
     82      * @param unknown $host
     83      * @return string
     84      */
     85     public function hostfilter($host) {
     86         if (strpos($host, $_SERVER['SERVER_NAME']) !== false) { //self check
     87             return json_encode(array('res' => 0, 'msg' => '不支持站内采集'));
     88             //exit;
     89         } else if ($host == '127.0.0.1' || $host == 'localhost') {  //localhost check
     90             return json_encode(array('res' => 0, 'msg' => '不支持本地网络采集'));
     91             //exit;
     92         } else if (preg_match('/^d{1,3}.d{1,3}.d{1,3}.d{1,3}$/', $host)) {   //inner network check
     93             $iparr = explode('.', $host);
     94             if($iparr['0'] == 10 || $iparr['0'] == 127 || ($iparr['0'] == 192 && $iparr['1'] == 168) || ($iparr['0'] == 172 && ($iparr['1'] >= 16 && $iparr['1'] <= 31))) {
     95                 return json_encode(array('res' => 0, 'msg' => '不支持内部网络采集'));
     96                 // exit;
     97             } else if ($iparr['0'] > 255 || $iparr['1'] > 255 || $iparr['2'] > 255 || $iparr['3'] > 255) {
     98                 return json_encode(array('res' => 0, 'msg' => '无效的ip'));
     99                 //exit;
    100             }
    101             unset($iparr);
    102         }
    103     }
    104     
    105     /**
    106      * @desc URL记录,防重复下载
    107      * @param unknown $url
    108      * @param string $type
    109      * @return multitype:string |multitype:string unknown
    110      */
    111     public function urlfilter($url,$type=false) {
    112         global $db;
    113         $url = filter_var($url, FILTER_VALIDATE_URL);
    114         if (empty($url)) return array('-1', '', '');
    115         $hash = sha1($url);
    116         $sql = "SELECT `fileid` FROM `". TNAME. "url` WHERE `hash`='$hash' LIMIT 1";
    117         $db->query($sql);
    118         $fileid = $db->fetchResult();
    119         if ($fileid === false) {
    120             return array('0', $hash, '');
    121         } else if ($fileid == 0) {
    122             return array('-1', $hash, '');
    123         } else {
    124             $db->query("SELECT `thumbdata` FROM `". TNAME. "file` WHERE `fileid`=$fileid LIMIT 1");
    125             $thumb = $db->fetchResult();
    126             if (empty($thumb)) {
    127                 return array('0', $hash, '');
    128             } else {
    129                 return array($fileid, $hash, $thumb);
    130             }
    131         }
    132     }
    133     
    134     /**
    135      * @desc CURL模拟
    136      * @return Ambigous <string>
    137      */
    138     public function setuseragent() {
    139         $uarr = array(
    140                 'Mozilla/4.0 (compatible; MSIE 8.0; Windows NT 5.1; Trident/4.0)',
    141                 'Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 5.1)',
    142                 'Mozilla/5.0 (Windows NT 6.1) AppleWebKit/534.28 (KHTML, like Gecko) Chrome/12.0.725.0 Safari/534.28',
    143                 'Mozilla/5.0 (Windows NT 5.1; rv:2.0) Gecko/20100101 Firefox/4.0',
    144                 'Mozilla/5.0 (Macintosh; U; Intel Mac OS X 10.6; zh-CN; rv:1.9.2.16) Gecko/20110319 Firefox/3.6.16',
    145         );
    146         $rnd = array_rand($uarr, 1);
    147         return $uarr[$rnd];
    148     }
    149     
    150     /**
    151      * @desc CURL
    152      * @param unknown $url
    153      * @param number $timout
    154      * @return resource
    155      */
    156     public function getcurl($url, $timout = 120) {
    157         $ch = curl_init();
    158         curl_setopt($ch, CURLOPT_USERAGENT, $this->setuseragent());
    159         curl_setopt($ch, CURLOPT_MAXREDIRS, 5);
    160         curl_setopt($ch, CURLOPT_FOLLOWLOCATION, 1);
    161         curl_setopt($ch, CURLOPT_RETURNTRANSFER, 1);
    162         curl_setopt($ch, CURLOPT_NOBODY, 0);
    163         curl_setopt($ch, CURLOPT_HEADER, 0);
    164         curl_setopt($ch, CURLOPT_URL, $url);
    165         curl_setopt($ch, CURLOPT_TIMEOUT, $timout);
    166         return $ch;
    167     }
    168     
    169     /**
    170      * @desc curl url记录
    171      * @param unknown $url
    172      * @param unknown $urlhash
    173      * @param unknown $fileid
    174      */
    175     public function updateurl($url, $urlhash, $fileid) {
    176         global $db;
    177         $urlsql = "INSERT INTO `". TNAME. "url` (`url`,`hash`,`fileid`) VALUES('$url','$urlhash','$fileid') ON DUPLICATE KEY UPDATE `fileid`='$fileid'";
    178         $db->query($urlsql);
    179         $db->disconnect();
    180     }
    181     
    182     /**
    183      * @desc 下载文件
    184      * @param unknown $file
    185      * @param number $hashcheck
    186      * @return multitype:number string |multitype:Ambigous <unknown, string> Ambigous <unknown, number>
    187      */
    188     public function imgfilter($file, $hashcheck = 1) {
    189         global $db, $_GLOBAL;
    190         //mime check
    191         $mimearr = array('image/jpeg' => 'jpg', 'image/jpg' => 'jpg', 'image/png' => 'png', 'image/gif' => 'gif', 'image/bmp' => 'bmp');
    192         $imginfo = getimagesize($file);
    193         if (empty($imginfo) || !isset($mimearr[$imginfo['mime']])) {
    194             unlink($file);
    195             return array(0, '图片格式无效');
    196         } else if ($imginfo['0'] < IMAGE_MIN_WIDTH && $imginfo['1'] < IMAGE_MIN_HEIGHT) {
    197             unlink($file);
    198             return array(0, '宽度不小于'. IMAGE_MIN_WIDTH. ', 高度不小于'. IMAGE_MIN_HEIGHT);
    199         }    
    200         if (!$hashcheck) return array(1, 'mime check passed');
    201     
    202         //hash check
    203         $ext = $mimearr[$imginfo['mime']];
    204         $Hash = $this->getHashFile($file);
    205         $hashdir = 'files/'.$this->getHashDir($Hash);
    206         $srcfile = $hashdir. $Hash. '.'.$ext;
    207         if (!is_dir($hashdir)) {
    208             mkdir($hashdir, 0755, true);
    209         }
    210         $sql = "SELECT `fileid`,`thumbdata` FROM `".TNAME."file` WHERE `hash`='$Hash'
    
    ";//上传图片记录表
    211         $db->query($sql);
    212         $filearr = $db->fetchRow();
    213         $db->clear();
    214         if($filearr) {  //hit
    215             extract($filearr);
    216             if (!is_file($srcfile)) {  //recover
    217                 rename($file, $srcfile);
    218             } else {
    219                 unlink($file);
    220             }
    221             $HashID = $filearr['fileid'];
    222             $ThumbData = $filearr['thumbdata'];
    223         } else {    //miss
    224             rename($file, $srcfile);
    225             $Size = filesize($srcfile);
    226             $Geometry = "{$imginfo['0']}x{$imginfo['1']}";
    227             $mode = ($ext == 'jpg') ? ($imginfo['channels'] == 3 ? 'RGB' : 'CMYK') : '';
    228             $filesql = "INSERT IGNORE INTO `".TNAME."file` SET uid='{$_GLOBAL['user']['uid']}', username='{$_GLOBAL['user']['username']}',`hash`='{$Hash}',`type`='{$ext}', `size`='{$Size}', `format`='{$ext}', `mode`='{$mode}', `geometry`='{$Geometry}', `created`='".time()."'";
    229             
    230             $db->beginTransaction();
    231             $db->query($filesql);
    232             $HashID = $db->fetchLastInsertId();
    233             $db->commitTransaction();
    234             if (empty($HashID)) exit('db failed');  //guard
    235     
    236             $ThumbData = $HashID. '?'. $Hash. '?'. $Size;
    237             $ThumbHash = sha1(sha1($Hash). $Size);
    238             
    239             //gif workaround
    240             //$ext = ($imginfo['2'] == 1)? ".gif?{$imginfo['0']}?{$imginfo['1']}" : '.jpg';
    241                         
    242             if ($imginfo['2'] == 1) {   //gif workaround
    243                 unlink('temp/'. md5($srcfile). '.gif');
    244             }
    245             unset($Thumb);    
    246             //post fix
    247             $ThumbData .= '?'.$ext;                
    248             $this->getThumbImg($srcfile, $ThumbData, 'article');
    249                         
    250             $db->query("UPDATE `".TNAME."file` SET `file`='$srcfile',`thumbdata`='$ThumbData' WHERE `fileid`=$HashID;");
    251             $db->disconnect();
    252         }
    253         return array($HashID, $ThumbData);
    254     }
    255  
    256 ?>
  • 相关阅读:
    Java Web学习总结(16)——JSP的九个内置对象
    Java Web学习总结(15)——JSP指令
    【我的物联网成长记11】8招带你玩转规则引擎
    云图说|高效管理华为云SAP的“秘密武器”
    Python 中更优雅的日志记录方案
    有了它,Python编码再也不为字符集问题而发愁了!
    【鲲鹏来了】手把手教你创造一个属于自己的鲲鹏开发者环境
    解密昇腾AI处理器--DaVinci架构(计算单元)
    使用Keil5构建GD32450i-EVAL工程
    云图说|SAP技术画册“一点通”
  • 原文地址:https://www.cnblogs.com/zhaoyuqi/p/3467796.html
Copyright © 2011-2022 走看看