zoukankan      html  css  js  c++  java
  • go语言爬取豆瓣的电影名称,导演和评分

    package main
    
    import (
        "fmt"
        "io"
        "net/http"
        "os"
        "regexp"
        "strconv"
    )
    
    func main() {
        var start, end int
        fmt.Println("请输入要爬取的开始页面:")
        fmt.Scan(&start)
        fmt.Println("请输入要爬取的结束的界面:")
        fmt.Scan(&end)
        ToWork(start, end)
    }
    
    //爬取的程序
    func ToWork(start, end int) {
        fmt.Printf("要爬取的是从第%d到%d页", start, end)
        page := make(chan int)
        for i := start; i <= end; i++ {
            go DoMain(i, page)
        }
        for i := start; i <= end; i++ {
            <-page
        }
    }
    func DoMain(index int, page chan int) {
        //获取url
        url := "https://movie.douban.com/top250?start=" + strconv.Itoa((index-1)*25) + "&filter="
        result, err := HttpGet1(url)
        if err != nil {
            fmt.Printf("url加载错误%s", err)
            return
        }
        namereg := regexp.MustCompile(`<img width="100" alt="(?s:(.*?))"`)
        editorreg := regexp.MustCompile(`导演: (?s:(.*?))&nbsp;&nbsp;`)
        scorereg := regexp.MustCompile(` <span class="rating_num" property="v:average">(?s:(.*?))</span>`)
        name := namereg.FindAllStringSubmatch(result, -1)
        editor := editorreg.FindAllStringSubmatch(result, -1)
        score := scorereg.FindAllStringSubmatch(result, -1)
        SaveFile(index, name, editor, score)
        //写入chan防止程序提前结束
        page <- index
    }
    func HttpGet1(url string) (result string, err error) {
        resp, err1 := http.Get(url)
        if err1 != nil {
            err = err1 //峰会钻杆数内部传递给调用者
        }
        defer resp.Body.Close()
        //循环读取网页数据1
        buf := make([]byte, 4096)
        for {
            n, err2 := resp.Body.Read(buf)
            if n == 0 {
                fmt.Println("读取网页完成")
                break
            }
            if err2 != nil && err2 != io.EOF {
                err = err2
                return
            }
            result += string(buf[:n])
        }
        return
    }
    func SaveFile(index int, name, editor, score [][]string) {
        f, err := os.Create("" + strconv.Itoa(index) + "页.txt")
        if err != nil {
            fmt.Println("文件打开错误", err)
            return
        }
        defer f.Close()
        n := len(editor)
        _, _ = f.WriteString("电影名称" + "					" + "导演" + "						" + "电影评分" + "							" + "
    ")
        for i := 0; i < n; i++ {
            _, _ = f.WriteString(name[i][1] + "			" + editor[i][1] + "			" + score[i][1] /*+ "			" + year[i][1]*/ + "
    ")
        }
    }
    https://necydcy.me/
  • 相关阅读:
    静态全局变量
    java处理相对路劲
    Java 获取字符串中第N次出现的字符位置
    java 更改list 某一元素?
    for循环,如何结束多层for循环
    SQL Server Management Studio的对象资源管理器的使用
    线程
    存储过程,稍微复杂
    触发器--里面涉及触发器调存储过程
    Bootstrap:弹出框和提示框效果以及代码展示
  • 原文地址:https://www.cnblogs.com/miria-486/p/10071318.html
Copyright © 2011-2022 走看看