zoukankan      html  css  js  c++  java
  • 爬虫:淘宝价格

     1 import requests
     2 import re
     3 
     4 def getHTMLText(url):
     5     try:
     6         r = requests.get(url, timeout = 30)
     7         r.raise_for_status()
     8         r.encoding = r.apparent_encoding
     9         return r.text
    10     except:
    11         return "error_1"
    12 
    13 def parsePage(ilt, html):
    14     try:
    15         plt = re.findall(r'"view_price":"[d.]*"', html)
    16         tlt = re.findall(r'"raw_title":".*?"', html)
    17         for i in range(len(plt)):
    18             price = eval(plt[i].split(':')[1])
    19             title = eval(tlt[i].split(':')[1])
    20             ilt.append([price, title])
    21     except:
    22         print("error_2")
    23 
    24 def printGoodsList(ilt):
    25     tplt = "{:4}	{:8}	{:16}"
    26     print(tplt.format("序号", "价格", "商品名称"))
    27     count = 0
    28     for g in ilt:
    29         count = count + 1
    30         print(tplt.format(count, g[0], g[1]))
    31 
    32 def main():
    33     goods = '书包'
    34     depth = 2
    35     start_url = 'https://s.taobao.com/search?q=' + goods
    36     infoList = []
    37     for i in range(depth):
    38         try:
    39             url = start_url + '&s=' + str(44*i)
    40             html = getHTMLText(url)
    41             parsePage(infoList, html)
    42         except:
    43             continue
    44     printGoodsList(infoList)
    45 
    46 main()
  • 相关阅读:
    ORACLE表空间管理维护
    oracle表分区详解
    Jquery
    B
    A
    E
    字符串排成字典序,字符串数组
    命令,快捷键,配置
    第一个java程序
    A
  • 原文地址:https://www.cnblogs.com/dalanjing/p/7063051.html
Copyright © 2011-2022 走看看