时隔多年 感慨良多 废话不多说 上代码
package main import ( "crypto/md5" "encoding/hex" "encoding/json" "io/ioutil" "log" "net/http" "os" "runtime" "strings" "github.com/PuerkitoBio/goquery" "github.com/axgle/mahonia" ) // 字符转换 func ConvertToString(src string, srcCode string, tagCode string) string { srcCoder := mahonia.NewDecoder(srcCode) srcResult := srcCoder.ConvertString(src) tagCoder := mahonia.NewDecoder(tagCode) _, cdata, _ := tagCoder.Translate([]byte(srcResult), true) result := string(cdata) return result } func GbkToUtf8(src string) string { return ConvertToString(src, "gbk", "utf-8") } //下载图片 func downloadImg(img_url string, filename string, Referer string) { req, _ := http.NewRequest("GET", img_url, nil) req.Header.Add("User-Agent", "Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/68.0.3440.75 Safari/537.36") req.Header.Add("Referer", Referer) client := &http.Client{} response, err := client.Do(req) if err != nil { log.Println("get img_url failed:", err) return } defer response.Body.Close() data, err := ioutil.ReadAll(response.Body) if err != nil { log.Println("read data failed:", img_url, err) return } image, err := os.Create(filename) if err != nil { log.Println("create file failed:", filename, err) return } defer image.Close() image.Write(data) } //抓取图片 id目录名 func GetJokes(url string, id string) map[string]string { // page one baseUrl := url client := &http.Client{} req, err := http.NewRequest("GET", baseUrl, nil) req.Header.Add("User-Agent", "Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/77.0.3865.90 Safari/537.36") req.Header.Add("Referer", "https://www.soyoung.com/") req.Header.Add("Cookie", "__order_time__=undefined; msg_time=undefined; back_order_time=undefined; complain_time=undefined; __usersign__=1570614910876417305; _ga=GA1.2.2061581476.1570614904; _gid=GA1.2.1666843180.1570614904; PHPSESSID=5001a7796cc83a8255b33284a3a30dd7; cityId=1; Hm_lvt_b366fbb5465f5a86e1cc2871552e1fdb=1570614904,1570693381; __p_t__=15706935958294; __postion__=a%3A4%3A%7Bs%3A6%3A%22cityId%22%3Bs%3A3%3A%22207%22%3Bs%3A8%3A%22cityName%22%3Bs%3A9%3A%22%E6%B3%89%E5%B7%9E%E5%B8%82%22%3Bs%3A8%3A%22cityCode%22%3Bs%3A3%3A%22134%22%3Bs%3A3%3A%22jwd%22%3Bi%3A0%3B%7D; _gat=1; Hm_lpvt_b366fbb5465f5a86e1cc2871552e1fdb=1570694344") res, err := client.Do(req) if err != nil { log.Fatal(err) } defer res.Body.Close() doc, err := goquery.NewDocumentFromResponse(res) if err != nil { log.Fatal(err) } //获取术前图片链接 var beforImgs []string doc.Find(".big-photo").Each(func(i int, s *goquery.Selection) { imgUrl, _ := s.Attr("href") beforImgs = append(beforImgs, imgUrl) }) err = os.MkdirAll("./"+id+"/before", os.ModePerm) if err != nil { log.Println(err) } else { for _, v := range beforImgs { //下载图片 h := md5.New() h.Write([]byte(v)) filename := hex.EncodeToString(h.Sum(nil)) + ".jpg" downloadImg(v, "./"+id+"/before/"+filename, baseUrl) } } //获取术后文字和图片链接 page one doc.Find(".diary-item").Each(func(i int, s *goquery.Selection) { title := s.Find(".day").Text() os.Mkdir("./"+id+"/"+title, os.ModePerm) s.Find(".photo-list li img").Each(func(k int, s2 *goquery.Selection) { imgUrl, _ := s2.Attr("data-img") imgUrl = strings.Replace(imgUrl, "_301_301", "", -1) imgUrl = strings.Replace(imgUrl, "face/", "", -1) h := md5.New() h.Write([]byte(imgUrl)) filename := hex.EncodeToString(h.Sum(nil)) + ".jpg" downloadImg(imgUrl, "./"+id+"/"+title+"/"+filename, baseUrl) }) }) // page two baseUrl2 := url + "/p2/" req2, err := http.NewRequest("GET", baseUrl2, nil) req2.Header.Add("User-Agent", "Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/77.0.3865.90 Safari/537.36") req2.Header.Add("Referer", baseUrl) req2.Header.Add("Cookie", "__order_time__=undefined; msg_time=undefined; back_order_time=undefined; complain_time=undefined; __usersign__=1570614910876417305; _ga=GA1.2.2061581476.1570614904; _gid=GA1.2.1666843180.1570614904; PHPSESSID=5001a7796cc83a8255b33284a3a30dd7; cityId=1; Hm_lvt_b366fbb5465f5a86e1cc2871552e1fdb=1570614904,1570693381; __p_t__=15706935958294; __postion__=a%3A4%3A%7Bs%3A6%3A%22cityId%22%3Bs%3A3%3A%22207%22%3Bs%3A8%3A%22cityName%22%3Bs%3A9%3A%22%E6%B3%89%E5%B7%9E%E5%B8%82%22%3Bs%3A8%3A%22cityCode%22%3Bs%3A3%3A%22134%22%3Bs%3A3%3A%22jwd%22%3Bi%3A0%3B%7D; _gat=1; Hm_lpvt_b366fbb5465f5a86e1cc2871552e1fdb=1570694344") res2, err := client.Do(req2) if err != nil { log.Fatal(err) } defer res2.Body.Close() doc2, err := goquery.NewDocumentFromResponse(res2) if err != nil { log.Fatal(err) } //获取术后文字和图片链接 page two doc2.Find(".diary-item").Each(func(i int, s *goquery.Selection) { num := s.Length() if num > 0 { title := s.Find(".day").Text() os.Mkdir("./"+id+"/"+title, os.ModePerm) s.Find(".photo-list li img").Each(func(k int, s2 *goquery.Selection) { imgUrl, _ := s2.Attr("data-img") imgUrl = strings.Replace(imgUrl, "_301_301", "", -1) imgUrl = strings.Replace(imgUrl, "face/", "", -1) h := md5.New() h.Write([]byte(imgUrl)) filename := hex.EncodeToString(h.Sum(nil)) + ".jpg" downloadImg(imgUrl, "./"+id+"/"+title+"/"+filename, baseUrl) }) } }) //对应ID info := make(map[string]string) info["ID"] = GbkToUtf8(id) info["picUrl"] = "success" return info } //http://127.0.0.1:1024/?id=dpg8426968 func main() { runtime.GOMAXPROCS(runtime.NumCPU()) http.HandleFunc("/", indexHandler) http.ListenAndServe(":1024", nil) } func indexHandler(w http.ResponseWriter, r *http.Request) { defer r.Body.Close() r.ParseForm() id := r.FormValue("id") var url string url = "https://www.soyoung.com/" + id info := GetJokes(url, id) s, _ := json.Marshal(info) w.Write(s) }