zoukankan      html  css  js  c++  java
  • 合并之前

    main

    from tkinter import *
    import time
    import tkinter as tk
    import PythonTaoBao_TaobaoTest as TB
    from tkinter import messagebox
    import pandas as pd
    from tkinter import scrolledtext
    from tkinter import PhotoImage
    from PIL import Image, ImageTk
    import os
    import csv
    
    root = Tk()
    root.geometry("600x700+550+100")
    root.title("爬虫小助手")
    #------------------ROOT----------------------------------------------
    UpperBlock = Frame(root,width = 1600,height=50,relief=SUNKEN)
    UpperBlock.pack(side=TOP)
    
    TAB = Frame(root ,width = 400,height=700,relief=SUNKEN)
    TAB.pack(side=TOP)
    
    #------------------TIME信息--------------
    localtime=time.asctime(time.localtime(time.time()))
    
    #-------------------顶部信息--------------------------------------
    lblinfo = Label(UpperBlock, font=( 'Times New Roman' ,30, 'bold' ),text="爬虫小助手",fg="blue",bd=10,anchor='w')
    lblinfo.grid(row=0,column=0)
    lblinfo = Label(UpperBlock, font=( 'aria' ,20, ),text=localtime,fg="blue",anchor=W)
    lblinfo.grid(row=1,column=0)
    
    #--------------函数调用功能------------------------------
    
    
    def buttonclick(numbers):
        if numbers==1 :
            messagebox.showinfo("事件1","现在执行的是事件1")
        elif numbers==2:
            messagebox.showinfo("事件2", "现在执行的是事件2")
        else :
            rt = tk.Toplevel()
            rt.title("测试")
            rt.geometry('300x300')
    
            l1 = Label(rt, text="输入爬取的关键字:")
            l1.pack()
            xls_text = StringVar()
            xls = Entry(rt, textvariable=xls_text)
            xls_text.set(" ")
            xls.pack()
    
            l2 = Label(rt, text="输入爬取的页数:")
            l2.pack()
            xls2_text = StringVar()
            xls2 = Entry(rt, textvariable=xls2_text)
            xls2_text.set(" ")
            xls2.pack()
    
            def on_click():
                x = str(xls_text.get())
                print(x)
                page= int(xls2_text.get())+1
                data=TB.getData(TB.TaobaoUrl,x,page)
                TB.SaveData(x, data)
                TB.Hottopic(x)
                TB.PriceFigure(x)
    
                def on_click1():
                    s = x + '.csv'
                    s2= x + '.txt'
                    rt3 = Toplevel()
                    rt3.title("展示界面")
                    rt3.geometry("400x300+550+100")
                    data = pd.read_csv(s, encoding='utf-8')
    
                    with open(s2, 'a+', encoding='utf-8') as f:
                        for line in data.values:
                            f.write((str(line[0]) + '	' + str(line[1]) + '	' + str(line[2]) + '	' + str(line[3]) + '
    '))
                    f = open(s2, 'r', encoding='utf-8')
                    sda = scrolledtext.ScrolledText(rt3, width=50, height=10, font=("宋体", 10))
                    sda.place(x=30, y=30)
                    s = f.read()
                    sda.insert(END, s)
                def on_click2():
                    rt4 = Toplevel()
                    rt4.title("查询关键字界面")
                    rt4.geometry("600x800+550+100")
                    ph = x + '.png'
                    img_open = Image.open(ph)
                    global img_png
                    img_png = ImageTk.PhotoImage(img_open)
                    label_img = Label(rt4, image=img_png)
                    label_img.place(x=0,y=0)
                def on_click3():
                    rt5 = Toplevel()
                    rt5.title("价格分布展示界面")
                    rt5.geometry("600x800+550+100")
                    ph = x + '价格分布.png'
                    img_open = Image.open(ph)
                    global img_png1
                    img_png1 = ImageTk.PhotoImage(img_open)
                    label_img = Label(rt5, image=img_png1)
                    label_img.place(x=0,y=0)
    
                rt2 = tk.Toplevel()
                rt2.title("调用成功")
                rt2.geometry('300x300')
                Button(rt2, text="查询内容", command=on_click1).pack()
                Button(rt2, text="查询关键字", command=on_click2).pack()
                Button(rt2, text="查询价格分布", command=on_click3).pack()
    #            messagebox.showinfo(title='调用成功', message=x+' 调用成功')
    
            Button(rt, text="press", command=on_click).pack()
    
    #-----------一些测试信息------------------
    button1=Button(TAB,padx=16,pady=16,bd=7, fg="black", font=('Times New Roman', 20 ,'bold'),text="爬取淘宝",bg="orange", command=lambda: buttonclick(1) )
    button1.grid(row=2,column=0)
    
    button2=Button(TAB,padx=16,pady=16,bd=7, fg="black", font=('Times New Roman', 20 ,'bold'),text="爬取豆瓣",bg="orange", command=lambda: buttonclick(2) )
    button2.grid(row=2,column=1)
    
    button2=Button(TAB,padx=16,pady=16,bd=7, fg="black", font=('Times New Roman', 20 ,'bold'),text="爬取测试",bg="orange", command=lambda: buttonclick(3) )
    button2.grid(row=2,column=3)
    #----------关于EXIT的功能--------------------
    
    def qexit():
        root.destroy()
    
    
    buttonexit=Button(TAB,padx=16,pady=8, bd=10 ,fg="black",font=('Times New Roman' ,16,'bold'),width=10, text="EXIT", bg="orange",command=qexit)
    buttonexit.grid(row=8, column=3)
    
    #-----------Main Function------------------
    root.mainloop()
    View Code

    function_Zhu Jiu

    from bs4 import BeautifulSoup
    import re
    import urllib.request,urllib.error
    from urllib.parse import urlencode
    import xlwt
    import sqlite3
    import jieba
    import operator
    import pandas
    from wordcloud import WordCloud
    from matplotlib import pyplot as plt
    
    FileName="TaoBao.txt"
    TaobaoUrl='https://s.taobao.com/search?'
    FindTitle = re.compile(r'"raw_title":"(.*?)"')
    FindPrice = re.compile(r'"view_price":"(.*?)"')
    FindComment = re.compile(r'"comment_count":"(.*?)"')
    FindSale = re.compile(r'"view_sales":"(.*?)"')
    FindNid = re.compile(r'"nid":"(.*?)"')
    
    def askURL(url):
        head = {
            "accept":"text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.9",
            "cookie":"t=32c7a85cd15aa831b8ec0dc54623814b; cna=DG+ZFyYN83MCAW8Ci6iqyWAe; xlly_s=1; miid=2028550422116889343; lLtC1_=1; _m_h5_tk=c829cf68e876d8c46a690fa1bd32dceb_1603189177067; _m_h5_tk_enc=786725f712559720af4e547a1185869a; sgcookie=E100v3lUlY2Mn9qHcIOEWSWHoxmJEQFA84URgVCAvFhUhCztc60kkVLRNTHY0kh5Q1Gi6ra%2BVmZfTginF7icJwPfhQ%3D%3D; uc3=lg2=UIHiLt3xD8xYTw%3D%3D&id2=UUBYgrfF9R5FYg%3D%3D&nk2=F5QbQQuNek%2FZK1s%3D&vt3=F8dCufHGEaKtervENj4%3D; lgc=tbn60165318; uc4=nk4=0%40FY5Qy64y3SsmdbiPdQMfcOrjzIuaMg%3D%3D&id4=0%40U2LK%2FwS6ehmB45%2B%2FKSl4I7%2FD3ptE; tracknick=tbn60165318; _cc_=U%2BGCWk%2F7og%3D%3D; enc=vdrXdFKY2xozxiBQPzFgqdULeLbSJ%2FZphd1xAWDpxUmZxatQDDmH%2B34zrqcdQ4rwpZrtlQRAQqyFPuQS%2FDL%2FVg%3D%3D; mt=ci=0_1; thw=cn; hng=CN%7Czh-CN%7CCNY%7C156; cookie2=7769de3eba14a2d9ac704fb765673036; _tb_token_=39e8e34355a48; JSESSIONID=546A2DF4811D091AEB4BF5C0C57FF8DE; tfstk=cTJlBRjtQQ55nU5D1Y6WXdfZuOcAZCEPP1C23KRqeRichtJViAV4_bZ8PZohmW1..; l=eBgyZQlROoK9Q3ooBOfwourza77OSIRAguPzaNbMiOCPOt1p5amAWZ5U_QY9C3GVh6qBR3l63A9LBeYBqCvan5U62j-la_kmn; isg=BAkJZSbcsjYTsE7Cj68GIl6uGDVjVv2IE5HXp6t-hfAv8ikE86YNWPcoNFbEqpXA",
            "user-agent":"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/86.0.4240.75 Safari/537.36"
        }
        request = urllib.request.Request(url,headers=head)
        html = ""
        try:
            response = urllib.request.urlopen(request)
            html=response.read().decode('utf-8')
        except urllib.error.URLError as e:
            if hasattr(e,"code"):
                print(e.code)
            if hasattr(e,"reason"):
                print(e.reason)
        return html
    
    def getData(baseurl,keyword,deep):
        #print(keyword)
        #File=open(FileName,'w',encoding='utf-8')
        data=[]
        for page in range(1,deep):
            keys={'q':keyword,'s':page}
            pageurl=baseurl+urlencode(keys)
            print(pageurl)
            html=askURL(pageurl)
            #print(pageurl)
            title=re.findall(FindTitle,html)
            price=re.findall(FindPrice,html)
            sale=re.findall(FindSale,html)
            Comment=re.findall(FindComment,html)
            ID=re.findall(FindNid,html)
            #print(len(title),len(price))
            Lenth=min(len(title),len(price),len(sale),len(Comment),len(ID))
            for item in range(Lenth):
                data.append([title[item],price[item],sale[item],Comment[item],ID[item]])
            
        return data    
    
    def SaveData(keyword,data):
        Save = pandas.DataFrame(data,columns=['title','price','sale','comment','id'])
        Save.to_csv('%s.csv'%keyword,encoding='utf_8_sig',index=False)
    
    def ReadData(key):
        data = pandas.read_csv('%s.csv'%key,encoding='utf_8_sig',engine='python')
        return data
    
    def PriceFigure(key):
        data=ReadData(key)
        plt.rcParams['font.family']=['sans-serif']
        plt.rcParams['font.sans-serif']=['SimHei']
        plt.figure(figsize=(5,5))
        plt.hist(data['price'],bins=20,alpha=0.6)
        plt.xlabel('价格')
        plt.ylabel('频数')
        plt.savefig('%s价格分布.png'%key)
    
    def Hottopic(key):
        data=ReadData(key)
        content=''
        for i in range(len(data)):
            content+=data['title'][i]
        word=" ".join(jieba.cut(content,cut_all=True))
        Topic=WordCloud('simhei.ttf',background_color="black",width=1000,height=600).generate(word)
        Topic.to_file('%s.png'%key)
    View Code
  • 相关阅读:
    Z-stack之OSAL初始化流程
    回调函数
    VCC,VDD,VEE,VSS,VPP 表示的意义
    ARM内核全解析,从ARM7,ARM9到Cortex-A7,A8,A9,A12,A15到Cortex-A53,A57
    SQL Server 2005 版本的操作系统兼容性详细列表
    Windows Xp Home Edition 安装IIS组件
    MySql 5.7中添加用户,新建数据库,用户授权,删除用户,修改密码
    AtCoder Beginner Contest 077(ABC)
    Codeforces Round #441 (Div. 2, by Moscow Team Olympiad)(A B C D)
    B. Which floor?
  • 原文地址:https://www.cnblogs.com/Anonytt/p/14035893.html
Copyright © 2011-2022 走看看