zoukankan      html  css  js  c++  java
  • PHP采集程序中常用的函数

     

    //获得当前的脚本网址
    function get_php_url(){
            if(!empty($_SERVER["REQUEST_URI"])){
                    $scriptName = $_SERVER["REQUEST_URI"];
                    $nowurl = $scriptName;
            }else{
                    $scriptName = $_SERVER["PHP_SELF"];
                    if(empty($_SERVER["QUERY_STRING"])) $nowurl = $scriptName;
                    else $nowurl = $scriptName."?".$_SERVER["QUERY_STRING"];
            }
            return $nowurl;
    }
    //把全角数字转为半角数字
    function GetAlabNum($fnum){
            $nums = array("0","1","2","3","4","5","6","7","8","9");
            $fnums = "0123456789";
            for($i=0;$i<=9;$i++) $fnum = str_replace($nums[$i],$fnums[$i],$fnum);
            $fnum = ereg_replace("[^0-9\.]|^0{1,}","",$fnum);
            if($fnum=="") $fnum=0;
            return $fnum;
    }
    //去除HTML标记
    function Text2Html($txt){
            $txt = str_replace("  "," ",$txt);
            $txt = str_replace("<","&lt;",$txt);
            $txt = str_replace(">","&gt;",$txt);
            $txt = preg_replace("/[\r\n]{1,}/isU","<br/>\r\n",$txt);
            return $txt;
    }

    //清除HTML标记
    function ClearHtml($str){
            $str = str_replace('<','&lt;',$str);
            $str = str_replace('>','&gt;',$str);
            return $str;
    }
    //相对路径转化成绝对路径
    function relative_to_absolute($content, $feed_url) {
        preg_match('/(http|https|ftp):\/\//', $feed_url, $protocol);
        $server_url = preg_replace("/(http|https|ftp|news):\/\//", "", $feed_url);
        $server_url = preg_replace("/\/.*/", "", $server_url);

        if ($server_url == '') {
            return $content;
        }

        if (isset($protocol[0])) {
            $new_content = preg_replace('/href="\//', 'href="'.$protocol[0].$server_url.'/', $content);
            $new_content = preg_replace('/src="\//', 'src="'.$protocol[0].$server_url.'/', $new_content);
        } else {
            $new_content = $content;
        }
        return $new_content;
    }
    //取得所有链接
    function get_all_url($code){
            preg_match_all('/<a\s+href=["|\']?([^>"\' ]+)["|\']?\s*[^>]*>([^>]+)<\/a>/i',$code,$arr);
            return array('name'=>$arr[2],'url'=>$arr[1]);
    }

    //获取指定标记中的内容
    function get_tag_data($str, $start, $end){
            if ( $start == '' || $end == '' ){
                   return;
            }
            $str = explode($start, $str);
            $str = explode($end, $str[1]);
            return $str[0];
    }
    //HTML表格的每行转为CSV格式数组
    function get_tr_array($table) {
            $table = preg_replace("'<td[^>]*?>'si",'"',$table);
            $table = str_replace("</td>",'",',$table);
            $table = str_replace("</tr>","{tr}",$table);
            //去掉 HTML 标记
            $table = preg_replace("'<[\/\!]*?[^<>]*?>'si","",$table);
            //去掉空白字符
            $table = preg_replace("'([\r\n])[\s]+'","",$table);
            $table = str_replace(" ","",$table);
            $table = str_replace(" ","",$table);

            $table = explode(",{tr}",$table);
            array_pop($table);
            return $table;
    }

    //将HTML表格的每行每列转为数组,采集表格数据
    function get_td_array($table) {
            $table = preg_replace("'<table[^>]*?>'si","",$table);
            $table = preg_replace("'<tr[^>]*?>'si","",$table);
            $table = preg_replace("'<td[^>]*?>'si","",$table);
            $table = str_replace("</tr>","{tr}",$table);
            $table = str_replace("</td>","{td}",$table);
            //去掉 HTML 标记
            $table = preg_replace("'<[\/\!]*?[^<>]*?>'si","",$table);
            //去掉空白字符
            $table = preg_replace("'([\r\n])[\s]+'","",$table);
            $table = str_replace(" ","",$table);
            $table = str_replace(" ","",$table);
          
            $table = explode('{tr}', $table);
            array_pop($table);
            foreach ($table as $key=>$tr) {
                    $td = explode('{td}', $tr);
                    array_pop($td);
                $td_array[] = $td;
            }
            return $td_array;
    }

    //返回字符串中的所有单词 $distinct=true 去除重复
    function split_en_str($str,$distinct=true) {
            preg_match_all('/([a-zA-Z]+)/',$str,$match);
            if ($distinct == true) {
                    $match[1] = array_unique($match[1]);
            }
            sort($match[1]);
            return $match[1];
    }

  • 相关阅读:
    LeetCode 1110. Delete Nodes And Return Forest
    LeetCode 473. Matchsticks to Square
    LeetCode 886. Possible Bipartition
    LeetCode 737. Sentence Similarity II
    LeetCode 734. Sentence Similarity
    LeetCode 491. Increasing Subsequences
    LeetCode 1020. Number of Enclaves
    LeetCode 531. Lonely Pixel I
    LeetCode 1091. Shortest Path in Binary Matrix
    LeetCode 590. N-ary Tree Postorder Traversal
  • 原文地址:https://www.cnblogs.com/tdalcn/p/2279539.html
Copyright © 2011-2022 走看看