zoukankan      html  css  js  c++  java
  • 爬虫:淘宝价格

     1 import requests
     2 import re
     3 
     4 def getHTMLText(url):
     5     try:
     6         r = requests.get(url, timeout = 30)
     7         r.raise_for_status()
     8         r.encoding = r.apparent_encoding
     9         return r.text
    10     except:
    11         return "error_1"
    12 
    13 def parsePage(ilt, html):
    14     try:
    15         plt = re.findall(r'"view_price":"[d.]*"', html)
    16         tlt = re.findall(r'"raw_title":".*?"', html)
    17         for i in range(len(plt)):
    18             price = eval(plt[i].split(':')[1])
    19             title = eval(tlt[i].split(':')[1])
    20             ilt.append([price, title])
    21     except:
    22         print("error_2")
    23 
    24 def printGoodsList(ilt):
    25     tplt = "{:4}	{:8}	{:16}"
    26     print(tplt.format("序号", "价格", "商品名称"))
    27     count = 0
    28     for g in ilt:
    29         count = count + 1
    30         print(tplt.format(count, g[0], g[1]))
    31 
    32 def main():
    33     goods = '书包'
    34     depth = 2
    35     start_url = 'https://s.taobao.com/search?q=' + goods
    36     infoList = []
    37     for i in range(depth):
    38         try:
    39             url = start_url + '&s=' + str(44*i)
    40             html = getHTMLText(url)
    41             parsePage(infoList, html)
    42         except:
    43             continue
    44     printGoodsList(infoList)
    45 
    46 main()
  • 相关阅读:
    测试心得
    mysql学习整理
    测试思考
    Win7下安装Mysql方法
    xcall.sh
    hive 的几款可视化连接工具
    mongo 使用 mongoexport 按照条件导出 csv 文件
    WebFlux springboot 2.0
    Calendar java  日历(常用小结)
    java 自定义注解(3)
  • 原文地址:https://www.cnblogs.com/dalanjing/p/7063051.html
Copyright © 2011-2022 走看看