zoukankan      html  css  js  c++  java
  • 获取网页中的部分内容,新闻标题和内容

    新闻信息采集

    <?php
    function canshujiequ($yuanma,$canshustr,$mubiao){
                if($yuanma=='')return array();
                
                if(strpos($canshustr,'[参数]')==false||strpos($mubiao,'[参数1]')==false)
                {
                    echo '参数或组合字符串格式不对';
                    return array();
                }
                $chaxunwz=0;
                $canshuarr=array();
                $canshuarr=explode('[参数]',$canshustr);
                $len1=count($canshuarr);
                $pipeiarr=array();
                $tpfarr=array();
                $qianks=0;
                $qianjs=0;
                $nowks=0;
                $nowjs=0;
                $end=0;
                $num=0;
                while(($end==0)&&($chaxunwz<strlen($yuanma))){
                        $mubiaofuben=$mubiao;
                        $feikong=0;
                        for($i=0;($end==0)&&($i<$len1);$i++){
                                if($canshuarr[$i]=='')continue;
                                $feikong++;
                                $tpfarr=explode('(*)',$canshuarr[$i]);
                                $len2=count($tpfarr);
                                $feikongnum=0;
                                for($j=0;($j<$len2)&&($end==0);$j++){
                                        if($tpfarr[$j]=='')continue;
                                        $feikongnum++;
                                        if($chaxunwz>=strlen($yuanma)){$end=1;break;}
                                        if(($pipeiwz=strpos($yuanma,$tpfarr[$j],$chaxunwz))!==false){
                                        $chaxunwz=$pipeiwz+strlen($tpfarr[$j]);
                                        if($feikongnum==1)$nowks=$pipeiwz;
                                        $nowjs=$chaxunwz;
    
    
                                        }
                                        else{$end=1;break;}
                                }
                                if($end==0){
                                        if($feikong>1){
                                            $str=substr($yuanma,$qianjs,$nowks-$qianjs);
                                            $mubiaofuben=str_replace('[参数'.($feikong-1).']',$str,$mubiaofuben);
                                        }
                                        $qianks=$nowks;
                                        $qianjs=$nowjs;
                                }else{
                                    break;
                                }
                        }
                        if($end==0){
                            $pipeiarr[]=$mubiaofuben;
                            $num++;
                        }
                }
                return $pipeiarr;
    }
    $source=file_get_contents("http://news.ef360.com/lady/");
    //获取数据源(【url】)
    $a='<ul class="ul_text_1 f14 arr1" style="padding:15px 0;">[参数]</ul>';
    $b="&nbsp;[参数1]&nbsp";
    $jieguo1=canshujiequ($source,$a,$b); 
    
    ?>
  • 相关阅读:
    poj 1010
    poj 1060
    poj 1001
    POJ 2769
    POJ 2559
    poj 2403
    POJ 1088
    设置全屏与退出全屏
    iframe 父子页面方法调用
    Web 前端面试小知识
  • 原文地址:https://www.cnblogs.com/bafeiyu/p/2951196.html
Copyright © 2011-2022 走看看