package main import ( "fmt" "github.com/antchfx/htmlquery" "golang.org/x/net/html" "io/ioutil" "net/http" "strconv" "strings" "sync" "time" ) var ( url = "https://www.woyaogexing.com/shouji/" referUrl = "https://www.woyaogexing.com/shouji/" referImg = "img2.woyaogexing.com" ) func downloadUrl(url string, refer string) []byte { client := &http.Client{} req, e := http.NewRequest("GET", url, nil) handError(e) req.Header.Add("User-Agent", "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/80.0.3987.149 Safari/537.36") req.Header.Add("Referer", refer) response, err := client.Do(req) handError(err) defer response.Body.Close() byteContent, e := ioutil.ReadAll(response.Body) handError(e) return byteContent } func parseContent(content []byte) []string { reader := strings.NewReader(string(content)) html_node, i := html.Parse(reader) handError(i) nodes, e := htmlquery.QueryAll(html_node, "//img/@src") handError(e) var urls []string for _, n := range nodes { src := htmlquery.SelectAttr(n, "src") urls = append(urls, src) } return urls } func downloadImgs(url string, refer string,wg *sync.WaitGroup) { prefix := strings.HasPrefix(url, "//img2") if prefix != true { return } defer wg.Done() url = url[2:] url = "http://"+url fmt.Println("下载图片", url) content := downloadUrl(url, referUrl) str1 := strings.Split(url, "/") file_name := str1[len(str1)-1] file := ioutil.WriteFile("./imgs/"+file_name, content, 0777) if file != nil { fmt.Printf("下载图片%s 成功", file_name) } } func handError(err error) { if err != nil { fmt.Println(err) } } func main() { var wg sync.WaitGroup var totalPage = 10 for j:=0;j<=totalPage;j++{ wg.Add(1) pageUrl := url+"index_"+strconv.Itoa(j) +".html" go crawl(pageUrl) wg.Done() } wg.Wait() time.Sleep(time.Second * 100) } func crawl(url string ) { var wg sync.WaitGroup byteContent := downloadUrl(url,referUrl) urls := parseContent(byteContent) fmt.Println(urls) if len(urls) > 0 { wg.Add(len(urls)) for _, v := range urls { go downloadImgs(v, referImg,&wg) } wg.Wait() } }