zoukankan      html  css  js  c++  java
  • 比较爬虫用的语言Python与Go

    Python是我比较喜欢的语言,莫名的喜欢,对Python的学习可能起初是敲错了网址开始的,哈哈哈~

    工作的任务从一个网站后台做登录、爬取数据,写入服务器Redis中,同事认为我会用PHP来写,哼!让你猜到那该多没意思,于是乎有了如下Python的代码,你看50多行搞定了。

     1 #!/usr/bin/python3
     2 import requests
     3 import re
     4 import redis
     5 from pyquery import PyQuery as pq
     6 
     7 loginUrl = 'https://manage.xxx.com.cn/home/login'
     8 userName = 'xxx'
     9 passWord = 'xxx'
    10 
    11 redisServer = '192.168.0.2'
    12 redisPort = 6379
    13 redisPass = ''
    14 
    15 productList = {'椰油':'CL_Spot','咖啡':'COFFEE','工业铜':'COPPER'}
    16 volumeList = {'CL_Spot':[0, 0], 'COFFEE':[0, 0], 'COPPER':[0, 0]}
    17 
    18 def main():
    19     jsessionid = getCookie()
    20     doLogin(jsessionid)
    21     dataUrl = 'https://manage.xxx.cn/?pageNo=1&pageSize=100'
    22     cookies = {'JSESSIONID': jsessionid}
    23     r = requests.get(dataUrl, cookies = cookies)
    24     dom = pq(r.text)
    25     lines = dom('table').eq(1).find('tr').items()
    26     for line in lines:
    27         line = re.sub(r'<!--.*-->', '', str(line))
    28         pattern = re.compile(r'<td>(.*?)</td>')
    29         group = pattern.findall(line)
    30         if not group:
    31             continue
    32         productCode = productList[group[3]]
    33         if group[6] == '':
    34             volumeList[productCode][0]+= int(group[7]) * int(group[8])
    35         if group[6] == '':
    36             volumeList[productCode][1]+= int(group[7]) * int(group[8])
    37 
    38     redisClient = redis.Redis(host=redisServer, port=redisPort, password=redisPass)
    39     for x in volumeList:
    40         keyUp = 'redis_order_count_u_%s' % x
    41         keyDown = 'redis_order_count_d_%s' % x
    42         redisClient.set(keyUp, int(volumeList[x][0]))
    43         redisClient.set(keyDown, int(volumeList[x][1]))
    44 
    45 def getCookie():
    46     ua = {'user-agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_14_1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/70.0.3538.110 Safari/537.36'}
    47     r = requests.get(loginUrl, headers = ua)
    48     return r.cookies['JSESSIONID']
    49 
    50 def doLogin(jsessionid):
    51     param = {'userName': userName, 'password': passWord}
    52     cookies = {'JSESSIONID': jsessionid}
    53     requests.post(loginUrl, data = param, cookies = cookies)
    54     
    55 
    56 if __name__ == '__main__':
    57     main()

    另一个服务也需要这个需求,用了最近看的Golang来实现一次,瞧写了100多行

      1 package main
      2 
      3 import (
      4     "fmt"
      5     "net/http"
      6     "net/url"
      7     "os"
      8     "strings"
      9     "strconv"
     10     "gopkg.in/redis.v4"
     11     "github.com/PuerkitoBio/goquery"
     12 )
     13 
     14 var loginUrl string = "https://manage.xxx.com.cn/home/login"
     15 var dataUrl string = "https://manage.xxx.com.cn/?pageNo=1&pageSize=100"
     16 var userName string = "xxx"
     17 var passWord string = "xxx"
     18 var redisServer string = "192.168.1.2"
     19 var redisPort string = "6379"
     20 var redisPass string = ""
     21 var redisDB   int = 0
     22 
     23 func main() {
     24     productList := make(map[string] string)
     25     productList["椰油"] = "CL_Spot"
     26     productList["咖啡"] = "COFFEE"
     27     productList["工业铜"] = "COPPER"
     28     volumeList := make(map[string] int)
     29     volumeList["u_CL_Spot"] = 0
     30     volumeList["d_CL_Spot"] = 0
     31     volumeList["u_COFFEE"] = 0
     32     volumeList["d_COFFEE"] = 0
     33     volumeList["u_COPPER"] = 0
     34     volumeList["d_COPPER"] = 0
     35     jsessionid := getCookie()
     36     doLogin(jsessionid)
     37 
     38     request, err := http.NewRequest("GET", dataUrl, nil)
     39     request.AddCookie(&http.Cookie{Name: "JSESSIONID", Value: jsessionid})
     40     client := &http.Client{}
     41     response, err := client.Do(request)
     42     if err != nil {
     43         fmt.Println(err.Error())
     44         os.Exit(0)
     45     }
     46     defer response.Body.Close()
     47     doc, err := goquery.NewDocumentFromReader(response.Body)
     48     doc.Find("table").Eq(1).Find("tr").Each(func(i int, tr *goquery.Selection) {
     49         td := tr.Find("td")
     50         name := td.Eq(3).Text()
     51         dir := td.Eq(6).Text()
     52         if val, ok := productList[name]; ok {
     53             buyNum, _ := strconv.Atoi(td.Eq(7).Text())
     54             buyUnit, _ := strconv.Atoi(td.Eq(8).Text())
     55             num :=  buyNum * buyUnit
     56             cacheKey := ""
     57             if dir == "" {
     58                 cacheKey = fmt.Sprintf("u_%s", val)
     59             } else if dir == "" {
     60                 cacheKey = fmt.Sprintf("d_%s", val)
     61             }
     62             volumeList[cacheKey] += num
     63         }
     64     })
     65     redisClient := redis.NewClient(&redis.Options{
     66         Addr:     fmt.Sprintf("%s:%s", redisServer, redisPort),
     67         Password: redisPass,
     68         DB:       redisDB,
     69     })
     70     for k, v := range volumeList {
     71         strKey := fmt.Sprintf("redis_order_count_%s", k)
     72         redisClient.Set(strKey, int(v), 0)
     73     }
     74     fmt.Println("puti volume get success")
     75 }
     76 
     77 func getCookie() string {
     78     jsessionid := ""
     79     response, err := http.Get(loginUrl)
     80     if err != nil {
     81         fmt.Println(err.Error())
     82         os.Exit(0)
     83     }
     84     defer response.Body.Close()
     85     for _, val := range response.Cookies() {
     86         if val.Name == "JSESSIONID" {
     87             jsessionid = val.Value
     88         }
     89     }
     90     return jsessionid
     91 }
     92 
     93 func doLogin(jsessionid string) bool {
     94     data := url.Values{}
     95     data.Set("userName", userName)
     96     data.Add("password", passWord)
     97     request, _ := http.NewRequest("POST", loginUrl, strings.NewReader(data.Encode()))
     98     request.Header.Add("Content-Type", "application/x-www-form-urlencoded")
     99     request.Header.Add("Content-Length", strconv.Itoa(len(data.Encode())))
    100     request.AddCookie(&http.Cookie{Name: "JSESSIONID", Value: jsessionid})
    101     client := &http.Client{}
    102     response, err := client.Do(request)
    103     if err != nil {
    104         fmt.Println(err.Error())
    105         os.Exit(0)
    106     }
    107     defer response.Body.Close()
    108     return true
    109 }

    Python的实现到上线半天的功夫搞定了,Go足足搞了1整天,蹩脚的语法与不熟悉的语法让我学习了很多知识点,最后Mac编译到Linux上执行也给我上了一课。

    觉得入门学习这两门语言挺好,一个是脚本语言另一个是编译语言,用处都很广泛。轩轩你准备好了吗?

  • 相关阅读:
    java的eclipse集成开发环境中引入java web项目
    Uncaught SyntaxError: Unexpected identifier错误的解决方法
    layui框架和iframe总结 layui框架最简单的iframe版使用
    js不完全总结,除内置对象,DOM,BOM
    css简单总结
    机器学习之ID3决策树python算法实现
    python邮件发送正文,和图片,文件附件
    python邮件发送基础知识
    python实现带附件的邮件发送基于smtp协议
    Go语言之sync包 WaitGroup的使用
  • 原文地址:https://www.cnblogs.com/aboys/p/10025409.html
Copyright © 2011-2022 走看看