zoukankan      html  css  js  c++  java
  • go爬虫

    package main
    
    import (
    	"fmt"
    	"github.com/antchfx/htmlquery"
    	"io/ioutil"
    	"net/http"
    	"os"
    	"regexp"
    	"strings"
    	"sync"
    	"time"
    )
    var wg sync.WaitGroup
    func main() {
    
    	var url string = "https://haomooc.com/xiaoxue-read-2991.html"
    
    	resp, _ := http.Get(url)
    
    	defer resp.Body.Close()
    
    	doc, _ := htmlquery.Parse(resp.Body)
    	list := htmlquery.Find(doc, "//div[@class='dxs-l-b']//a")
    	for _, li := range list {
    		href := htmlquery.SelectAttr(li, "href")
    		strings.Replace(href, " ", "", -1)
    		title := htmlquery.SelectAttr(li,"title")
    		strings.Replace(title, " ", "", -1)
    		fmt.Printf("%s
    ", title)
    		fmt.Printf("%s
    ", href)
    		video := getVideo(href)
    		wg.Add(1)
    		//saveVideo(title,video)
    		fmt.Printf("%s
    ", video)
    	}
    
    	wg.Wait()
    }
    
    func getVideo(url string) string  {
    	time.Sleep(time.Second*1)
    	resp, _ := http.Get(url)
    
    	bytesContent, _ := ioutil.ReadAll(resp.Body)
    
    	var reEmail = `(https://video.haomooc.com/.*.mp4)`
    
    	re := regexp.MustCompile(reEmail)
    	list := re.FindAllStringSubmatch(string(bytesContent), -1)
    
    	var result string
    
    	for _,v := range list {
    		if v != nil{
    			result = v[1]
    		}
    	}
    	strings.Replace(result, " ", "", -1)
    	return result
    }
    func PathExists(path string) (bool, error) {
    
    	_, err := os.Stat(path)
    	if err == nil {
    		return true, nil
    	}
    	if os.IsNotExist(err) {
    		return false, nil
    	}
    	return false, err
    }
    
    func saveVideo(title string ,url string)  {
    
    	fmt.Printf(title,url)
    	path := "/www/shell/video/"+title+".mp4"
    	b, err := PathExists(path)
    	if err != nil {
    		fmt.Printf("PathExists(%s),err(%v)
    ", path, err)
    	}
    	if b {
    		fmt.Printf("path %s 存在
    ", path)
    	} else{
    		fmt.Println("save video "+title )
    		fmt.Printf("%s",url)
    		// Get the data
    		resp, err := http.Get(url)
    		if err != nil {
    			panic(err)
    		}
    		defer resp.Body.Close()
    
    		data, err := ioutil.ReadAll(resp.Body)
    		if err != nil {
    			panic(err)
    		}
    		ioutil.WriteFile(path, data, 0644)
    		defer wg.Done()
    	}
    
    
    }
    
  • 相关阅读:
    linux开关机命令
    实现vmare虚拟机系统随主机开机自动启动
    linux常用命令
    centos7.3上用源代码安装zabbix3.2.7
    centos7用yum搭建LAMP环境
    centos7永久更改主机名
    Python基础学习-列表的常用方法
    中兴交换机配置命令
    dml并行
    11.2.0.4 aix下运行第二个节点root.sh报错处理
  • 原文地址:https://www.cnblogs.com/brady-wang/p/14054939.html
Copyright © 2011-2022 走看看