zoukankan      html  css  js  c++  java
  • php 爬虫爱奇艺 视频、内容

    <?php
    function getdata($i, $url) {

      $data = array();
    // 把整个文件读入到字符串中
      $str = file_get_contents($url);
      $str = substr($str, strpos($str, 'album-head-info clearfix'));
    // print_r($str);
      $str = substr($str,0,strpos($str, 'class="album-auto"'));


      $preg='/<a .*?id="j-album-title".*?>(.*?)</a>/is'
      preg_match_all($preg,$str,$match);
      //echo $str;exit;
      $title = $match[1][0];
      $data['title'] = $title;

      $preg='/<span .*?class="info-intro-title-s".*?>(.*?)</span>/is'
      preg_match_all($preg,$str,$match);
      $other_title = $match[1][0];
      $data['other_title'] = $other_title;

      //地区
      $preg='/<p .*?class="episodeIntro-area".*?>.*?<em>(.*?)</em>.*?<a.*?>(.*?)</a>.*?</p>/is'
      preg_match_all($preg,$str,$match);
      $data['area'] = trim($match[2][0]);


      //语言
      $preg='/<p .*?class="episodeIntro-lang".*?>.*?<em>(.*?)</em>.*?<span.*?>(.*?)</span>.*?</p>/is'
      preg_match_all($preg,$str,$match);
      $data['lang'] = trim($match[2][0]);


      //类型-悬疑/历史/剧情
      $preg='/<a .*?qwys_leixing.*?>(.*?)</a>/is'
      preg_match_all($preg,$str,$match);

      $data['type'] = implode('/', $match[1]);
      
    // 时间
      $preg='/<p .*?class="episodeIntro-time".*?>.*?<em>(.*?)</em>.*?<span.*?>(.*?)</span>.*?</p>/is'
      preg_match_all($preg,$str,$match);
      $data['time'] = $match[2][0];
    // 导演
      $preg='/<p .*?class="episodeIntro-director".*?>.*?<em>(.*?)</em>.*?<a.*?>(.*?)</a>.*?</p>/is'
      preg_match_all($preg,$str,$match);
      $data['daoyan'] = $match[2][0];
    // 简介
      $preg='/<span .*?class="briefIntroTxt".*?>(.*?)</span>/is';
      preg_match_all($preg,$str,$match);
      //$data['summary'] = $match[1][0];
      
      if(!empty( $match[1][1])) {
        $data['summary_all'] = $match[1][1];
      }elseif (!empty( $match[1][0])) {
        $data['summary_all'] = $match[1][0];
      }

      $preg='/<img .*?src="(.*?)".*?id="j-album-img".*?>/is'
      preg_match_all($preg,$str,$match);
      $img = $match[1][0];

      $file ='/data/' . $i. '.jpg';
      if(!file_exists($file)) {
        $f = file_get_contents($img);
        if($f) {
          file_put_contents($file, $f);
        }
      }
      
      return $data;
    }
    // explode 将字符串打散
    $data = file('dianshiju02.txt');

    $ret = array();

    $i = 5000;
    foreach($data as $v) {

      $i++;
      // if(strpos($v, 'mp4') !== false) {
      //  continue;
      // }
      
      $tmp = explode(" ", $v);

      // print_r($tmp);
      // exit;
      $num = (int) $tmp[0];

      $_names = explode("/", $tmp[1]);
      $_names = explode("-", $_names[0]);

      $mp4 = $i . "/01.mp4";

      $infos = array();
      if(!empty($tmp[2])) {
        print_r($i);
        print_r($tmp[2]);
        // exit;
        $infos = getdata($i, $tmp[2]);
      } else {
        continue;
      }

      $ret[$i] = array(
        'title' => $infos['title'],
        'num' => $num,
        'img' => '//static0.qianqian.com/movies/' . $i . '.jpg',
        'mp4' => 'http://qukufile2.qianqian.com/data2/film_tv/tv/' . $mp4,
        'id' => $i,
        'infos' => $infos
    );
    }
    // echo count($ret);
    echo var_export($ret, true);

    40 雪山飞狐-01.mpg http://www.iqiyi.com/lib/m_204754714.html?src=search 33 大捕房-01.mp4 http://www.iqiyi.com/lib/m_202787314.html?src=search 40 嫁入豪门=01.mp4 http://www.iqiyi.com/lib/m_200881014.html?src=search 32 劝和小姐-01.mp4 http://www.iqiyi.com/lib/m_200840914.html?src=search 30 血色恋情-01.mp4 http://www.iqiyi.com/lib/m_202498214.html?src=search 32 锁定美军特使-01.mp4 http://www.iqiyi.com/lib/m_218730014.html?src=search 32 红狐-01.mp4 http://www.iqiyi.com/lib/m_202587214.html?src=search 34 大浴堂-01.mp4 http://www.iqiyi.com/lib/m_200880514.html?src=search 30 女婿难当-01.mp4 http://www.iqiyi.com/lib/m_206378814.html?src=search 27 风云1911-01.mp4 http://www.iqiyi.com/lib/m_202904114.html?src=search 28 醉红尘-01.mp4 http://www.iqiyi.com/lib/m_202964014.html?src=search 23 栗裕大将-01.mp4 http://www.iqiyi.com/lib/m_215180614.html?src=search 24 将军日记-01.mp4 http://www.iqiyi.com/lib/m_202547514.html?src=search 

  • 相关阅读:
    MyBatis+Oracle+Sequence
    原来这就是JVM垃圾
    JVM内存布局
    CacheAsidePattern结论
    The LMAX Architecture
    网络编程
    随机存取文件流
    数据流
    打印流
    标准输入流、标准输出流
  • 原文地址:https://www.cnblogs.com/yayaxuping/p/11200146.html
Copyright © 2011-2022 走看看