时隔多年 感慨良多 废话不多说 上代码
package main
import (
"crypto/md5"
"encoding/hex"
"encoding/json"
"io/ioutil"
"log"
"net/http"
"os"
"runtime"
"strings"
"github.com/PuerkitoBio/goquery"
"github.com/axgle/mahonia"
)
// 字符转换
func ConvertToString(src string, srcCode string, tagCode string) string {
srcCoder := mahonia.NewDecoder(srcCode)
srcResult := srcCoder.ConvertString(src)
tagCoder := mahonia.NewDecoder(tagCode)
_, cdata, _ := tagCoder.Translate([]byte(srcResult), true)
result := string(cdata)
return result
}
func GbkToUtf8(src string) string {
return ConvertToString(src, "gbk", "utf-8")
}
//下载图片
func downloadImg(img_url string, filename string, Referer string) {
req, _ := http.NewRequest("GET", img_url, nil)
req.Header.Add("User-Agent", "Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/68.0.3440.75 Safari/537.36")
req.Header.Add("Referer", Referer)
client := &http.Client{}
response, err := client.Do(req)
if err != nil {
log.Println("get img_url failed:", err)
return
}
defer response.Body.Close()
data, err := ioutil.ReadAll(response.Body)
if err != nil {
log.Println("read data failed:", img_url, err)
return
}
image, err := os.Create(filename)
if err != nil {
log.Println("create file failed:", filename, err)
return
}
defer image.Close()
image.Write(data)
}
//抓取图片 id目录名
func GetJokes(url string, id string) map[string]string {
// page one
baseUrl := url
client := &http.Client{}
req, err := http.NewRequest("GET", baseUrl, nil)
req.Header.Add("User-Agent", "Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/77.0.3865.90 Safari/537.36")
req.Header.Add("Referer", "https://www.soyoung.com/")
req.Header.Add("Cookie", "__order_time__=undefined; msg_time=undefined; back_order_time=undefined; complain_time=undefined; __usersign__=1570614910876417305; _ga=GA1.2.2061581476.1570614904; _gid=GA1.2.1666843180.1570614904; PHPSESSID=5001a7796cc83a8255b33284a3a30dd7; cityId=1; Hm_lvt_b366fbb5465f5a86e1cc2871552e1fdb=1570614904,1570693381; __p_t__=15706935958294; __postion__=a%3A4%3A%7Bs%3A6%3A%22cityId%22%3Bs%3A3%3A%22207%22%3Bs%3A8%3A%22cityName%22%3Bs%3A9%3A%22%E6%B3%89%E5%B7%9E%E5%B8%82%22%3Bs%3A8%3A%22cityCode%22%3Bs%3A3%3A%22134%22%3Bs%3A3%3A%22jwd%22%3Bi%3A0%3B%7D; _gat=1; Hm_lpvt_b366fbb5465f5a86e1cc2871552e1fdb=1570694344")
res, err := client.Do(req)
if err != nil {
log.Fatal(err)
}
defer res.Body.Close()
doc, err := goquery.NewDocumentFromResponse(res)
if err != nil {
log.Fatal(err)
}
//获取术前图片链接
var beforImgs []string
doc.Find(".big-photo").Each(func(i int, s *goquery.Selection) {
imgUrl, _ := s.Attr("href")
beforImgs = append(beforImgs, imgUrl)
})
err = os.MkdirAll("./"+id+"/before", os.ModePerm)
if err != nil {
log.Println(err)
} else {
for _, v := range beforImgs {
//下载图片
h := md5.New()
h.Write([]byte(v))
filename := hex.EncodeToString(h.Sum(nil)) + ".jpg"
downloadImg(v, "./"+id+"/before/"+filename, baseUrl)
}
}
//获取术后文字和图片链接 page one
doc.Find(".diary-item").Each(func(i int, s *goquery.Selection) {
title := s.Find(".day").Text()
os.Mkdir("./"+id+"/"+title, os.ModePerm)
s.Find(".photo-list li img").Each(func(k int, s2 *goquery.Selection) {
imgUrl, _ := s2.Attr("data-img")
imgUrl = strings.Replace(imgUrl, "_301_301", "", -1)
imgUrl = strings.Replace(imgUrl, "face/", "", -1)
h := md5.New()
h.Write([]byte(imgUrl))
filename := hex.EncodeToString(h.Sum(nil)) + ".jpg"
downloadImg(imgUrl, "./"+id+"/"+title+"/"+filename, baseUrl)
})
})
// page two
baseUrl2 := url + "/p2/"
req2, err := http.NewRequest("GET", baseUrl2, nil)
req2.Header.Add("User-Agent", "Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/77.0.3865.90 Safari/537.36")
req2.Header.Add("Referer", baseUrl)
req2.Header.Add("Cookie", "__order_time__=undefined; msg_time=undefined; back_order_time=undefined; complain_time=undefined; __usersign__=1570614910876417305; _ga=GA1.2.2061581476.1570614904; _gid=GA1.2.1666843180.1570614904; PHPSESSID=5001a7796cc83a8255b33284a3a30dd7; cityId=1; Hm_lvt_b366fbb5465f5a86e1cc2871552e1fdb=1570614904,1570693381; __p_t__=15706935958294; __postion__=a%3A4%3A%7Bs%3A6%3A%22cityId%22%3Bs%3A3%3A%22207%22%3Bs%3A8%3A%22cityName%22%3Bs%3A9%3A%22%E6%B3%89%E5%B7%9E%E5%B8%82%22%3Bs%3A8%3A%22cityCode%22%3Bs%3A3%3A%22134%22%3Bs%3A3%3A%22jwd%22%3Bi%3A0%3B%7D; _gat=1; Hm_lpvt_b366fbb5465f5a86e1cc2871552e1fdb=1570694344")
res2, err := client.Do(req2)
if err != nil {
log.Fatal(err)
}
defer res2.Body.Close()
doc2, err := goquery.NewDocumentFromResponse(res2)
if err != nil {
log.Fatal(err)
}
//获取术后文字和图片链接 page two
doc2.Find(".diary-item").Each(func(i int, s *goquery.Selection) {
num := s.Length()
if num > 0 {
title := s.Find(".day").Text()
os.Mkdir("./"+id+"/"+title, os.ModePerm)
s.Find(".photo-list li img").Each(func(k int, s2 *goquery.Selection) {
imgUrl, _ := s2.Attr("data-img")
imgUrl = strings.Replace(imgUrl, "_301_301", "", -1)
imgUrl = strings.Replace(imgUrl, "face/", "", -1)
h := md5.New()
h.Write([]byte(imgUrl))
filename := hex.EncodeToString(h.Sum(nil)) + ".jpg"
downloadImg(imgUrl, "./"+id+"/"+title+"/"+filename, baseUrl)
})
}
})
//对应ID
info := make(map[string]string)
info["ID"] = GbkToUtf8(id)
info["picUrl"] = "success"
return info
}
//http://127.0.0.1:1024/?id=dpg8426968
func main() {
runtime.GOMAXPROCS(runtime.NumCPU())
http.HandleFunc("/", indexHandler)
http.ListenAndServe(":1024", nil)
}
func indexHandler(w http.ResponseWriter, r *http.Request) {
defer r.Body.Close()
r.ParseForm()
id := r.FormValue("id")
var url string
url = "https://www.soyoung.com/" + id
info := GetJokes(url, id)
s, _ := json.Marshal(info)
w.Write(s)
}