zoukankan      html  css  js  c++  java
  • 爬虫系列之淘宝商品爬取

     1 import re
     2 import requests
     3 
     4 def getHTMLText(url):
     5     try:
     6         r = requests.get(url, timeout = 30)
     7         r.raise_for_status()
     8         r.encoding = r.apparent_encoding
     9         return r.text
    10     except:
    11         return ""
    12 
    13 
    14 def parsePage(ilt, html):
    15     try:
    16         plt = re.findall(r'"view_price":"[d.]*"', html)
    17         tlt = re.findall(r'"raw_title":".*?"', html)
    18         for i in range(len(plt)):
    19             price = eval(plt[i].split(":")[1])  #eval就是将字符串string对象转化为有效的表达式参与求值运算返回计算结果
    20             title = eval(tlt[i].split(":")[1])
    21             ilt.append([price, title])
    22     except:
    23         print("")       
    24 
    25 def printGoodsList(ilt):
    26     tplt = "{:4}	{:8}	{:16}"  #规定输出格式
    27     print(tplt.format("序号", "价格", "商品名称"))
    28     count = 0
    29     for g in ilt:
    30         count = count + 1
    31         print(tplt.format(count, g[0], g[1]))
    32     print("")
    33 
    34 def main():
    35     goods = '书包'
    36     depth = 2
    37     start_url = 'https://s.taobao.com/search?q=' + goods
    38     infoList = []
    39     for i in range(depth):
    40         try:
    41             url = start_url + '&s=' + str(44*i)
    42             html = getHTMLText(url)
    43             parsePage(infoList,html)
    44         except:
    45             continue
    46     printGoodsList(infoList)
    47 
    48 
    49 main()

  • 相关阅读:
    图像处理-06-图像的反色处理
    Egg.js框架
    Node基础
    Node介绍与安装
    线性表结构-数组(散列表与可变长度数组)
    复杂度分析和大O表示法
    Java框架之Struts2(六)
    Java框架之Struts2(五)
    Java框架之Struts2(四)
    Java框架之Struts2(三)
  • 原文地址:https://www.cnblogs.com/zyb993963526/p/9090107.html
Copyright © 2011-2022 走看看