zoukankan      html  css  js  c++  java
  • 获取淘宝特定商品信息

    获取淘宝特定商品信息

    import re
    import requests
    from bs4 import BeautifulSoup
    import numpy as np
    import  bs4
    def getHTMLText(url):
        headers = {
            'User-Agent': 'Chorme'}
        try:  # 请求爬虫框架
            coo = "这里输入你们自己的cookie"
            cookies = {}
            for line in coo.split(';'):  # 浏览器伪装
                name, value = line.strip().split('=', 1)
                cookies[name] = value
            r = requests.get(url, cookies=cookies, headers=headers, timeout=30)
            r.raise_for_status()
            r.encoding = r.apparent_encoding
    
    
            return r.text
        except:
            return ""
    
    
    def parasePage(ilt,html):
        try:
    
            plt=re.findall(r'"view_price":"[d|.]*"',html)
            tlt=re.findall(r'"raw_title":".*?"',html)
    
    
            soup=BeautifulSoup(html,'html.parser')
    
            #for link in soup.find_all('a'):
             #   xx=link.get('href')
    
            for i in range(len(plt)):
                price=eval(plt[i].split(':')[1])#把得到的数最外层的双引号单引号去掉
                title=eval(tlt[i].split(':')[1])
                ilt.append([price,title])
        except:
            print("2")
    
    def printGoodList(ilt):
        try:
            tplt = "{:4}	{:8}	{:16}"
            print(tplt.format("序号", "价格", "商品名称"))
            count = 0
            a=[]
    
            for g in ilt:
                count = count + 1
    
    
                print(tplt.format(count, g[0], g[1]))
    
        except:
            print('3')
    
    def main():
        goods =input('请输入你想要查询的商品:
    ');
    
        depth = 2
        #start_url = "https://s.taobao.com/search?q=%E4%B9%A6%E5%8C%85&imgfile=&commend=all&ssid=s5-e&search_type=item&sourceId=tb.index&spm=a21bo.2017.201856-taobao-item.1&ie=utf8&initiative_id=tbindexz_20170306&bcoffset=3&ntoffset=3&p4ppushleft=1%2C48&s=0" + goods  # start_url通过将淘宝搜索页面的代码与变量goods的整合实现对商品的检索
        start_url="https://s.taobao.com/search?q="+goods
        infoList = []
        for i in range(depth):  # 单独对每一个url链接进行单独处理
            try:
                url = start_url + "&s=" + str(44 * i)  # 44是淘宝每个页面呈现的宝贝数量
                html = getHTMLText(url)  # 获得输入的url的网页
                parasePage(infoList, html)
            except:
                continue
        printGoodList(infoList)
        input()
    
    main()
    
    
    
    
    

    使用方式:

    1. 运行代码
    2. 输入想要查询的商品信息
    3. 显示出所有相关商品信息
  • 相关阅读:
    java_db2错误码对应值
    oracle_用户与概要文件
    quartz配置时间
    bzoj2395: [Balkan 2011]Timeismoney
    bzoj2725: [Violet 6]故乡的梦
    bzoj4400: tjoi2012 桥
    双连通分量模板
    bzoj3047: Freda的传呼机 && 2125: 最短路
    bzoj3541: Spoj59 Bytelandian Information Agency
    bzoj1023: [SHOI2008]cactus仙人掌图
  • 原文地址:https://www.cnblogs.com/Xiong-Jun/p/13515049.html
Copyright © 2011-2022 走看看