zoukankan      html  css  js  c++  java
  • 第二次作业

    作业1

    1.天气实验代码

    #!/usr/bin/env python
    # _*_ coding:utf-8 _*_
    from bs4 import BeautifulSoup
    from bs4 import UnicodeDammit
    import urllib.request
    import sqlite3
    
    
    class weatherDB:
        def openDB(self):
            self.con = sqlite3.connect("weather.db")
            self.cursor = self.con.cursor()
            try:
                self.cursor.execute(
                    "create table weathers (wcity varchar(16),wdate varchar(16),wweather varchar(64),wtemp varchar(32),constraint pk_weather primary key(wcity,wdate))")
            except:
                self.cursor.execute("delete from weathers")
    
        def closeDB(self):
            self.con.commit()
            self.con.close()
    
        def insert(self, city, date, weather, temp):
            try:
                self.cursor.execute("insert into weathers (wcity,wdate,wweather,wtemp) values(?,?,?,?)",
                                    (city, date, weather, temp))
            except:
                print("err")
    
        def show(self):
            self.cursor.execute("select * from weathers")
            rows = self.cursor.fetchall()
            print("%-16s%-16s%-32s%-16s" % ("city", "date", "weather", "temp"))
            for row in rows:
                print("%-16s%-16s%-32s%-16s" % (row[0], row[1], row[2], row[3]))
    
    
    class weatherforecast():
        def __init__(self):
            self.headers = {                                       #伪装成主机,提前记录下来四个城市的代码编号
                "User-Agent": "Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) "
                              "Chrome/70.0.3538.25 Safari/537.36 Core/1.70.3775.400 QQBrowser/10.6.4209.400"}
            self.citycode = {"北京": "101010100", "上海": "101020100", "广州": "101280101", "深圳": "101280601"}
    
        def forecastcity(self, city):
            if city not in self.citycode.keys():                   #if语句用来看看这个城市在不在这个网页上面,有没有可能是自己在输入的时候有错误
                print(city + "code not found")
                return
            url = "http://www.weather.com.cn/weather/" + self.citycode[city] + ".shtml"
            try:                                                   #进入不同城市的天气信息页面,开始行动
                req = urllib.request.Request(url, headers=self.headers)
                data = urllib.request.urlopen(req)
                data = data.read()
                dammit = UnicodeDammit(data, ["utf-8", "gbk"])
                data = dammit.unicode_markup
                soup = BeautifulSoup(data, 'html.parser')
                lis = soup.select("ul[class='t clearfix'] li")     #查看页面源码后发现,信息储存在ul里面
                for li in lis:
                    try:
                        date_ = li.select('h1')[0].text            #用select逐层挑选,筛选出自己需要的城市,日期,天气和温度信息
                        weather_ = li.select('p[class="wea"]')[0].text
                        temp_ = li.select('p[class="tem"] span')[0].text + '℃/' + li.select("p[class='tem'] i")[0].text
                        print(city, date_, weather_, temp_)
                        self.db.insert(city, date_, weather_, temp_)
                    except:
                        print('err1')
            except:
                print('err2')
    
        def precess(self, cities):
            self.db = weatherDB()                                   
            self.db.openDB()
            for city in cities:
                self.forecastcity(city)
            self.db.show()
            self.db.closeDB()
    
    
    ws = weatherforecast()
    ws.precess(["北京", '上海', '广州', '深圳'])
    print('completed')
    

    结果:

    2.心得体会

    在天气预报的代码方面,有点难以理解的是两个大类的定义和函数的处理,各种参数的作用在自己琢磨之后也就出来了。

    作业2

    1.股票爬取

    import requests
    from bs4 import BeautifulSoup
    import re
    
    
    
    def getHtmlText(url):
        head = {'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:70.0) Gecko/20100101 Firefox/70.0',
                'Cookie': 'qgqp_b_id=54fe349b4e3056799d45a271cb903df3; st_si=24637404931419; st_pvi=32580036674154; st_sp=2019-11-12%2016%3A29%3A38; st_inirUrl=; st_sn=1; st_psi=2019111216485270-113200301321-3411409195; st_asi=delete'
                }
        try:
            r = requests.get(url, timeout=30, headers=head)
            r.raise_for_status()
            r.encoding = 'utf-8'
            return r.text
        except:
            return ""
    
    
    recordfile = 'Data.txt'
    url = 'http://51.push2.eastmoney.com/api/qt/clist/get?cb=jQuery112408349318807687469_1574045112932&pn=1&pz=20&po=1&np=2&ut=bd1d9ddb04089700cf9c27f6f7426281&fltt=2&invt=2&fid=f3&fs=m:0+t:6,m:0+t:13,m:0+t:80,m:1+t:2,m:1+t:23&fields=f1,f2,f3,f4,f5,f6,f7,f8,f9,f10,f12,f13,f14,f15,f16,f17,f18,f20,f21,f23,f24,f25,f22,f11,f62,f128,f136,f115,f152&_=1574045112933'
    head = {
            "User-Agent": "Mozilla/5.0 (Windows; U; Windows NT 6.0 x64; en-US; rv:1.9pre) Gecko/2008072421 Minefield/3.0.2pre"
        }
    Codelist=[]
    DealData=[['股票代码', '今开', '最高', '最低', '昨收', '成交量', '成交额', '总市值', '流通市值', '振幅', '换手率', '市净率', '市盈率', ]]
    r = requests.get(url, timeout=30, headers=head)
    r.raise_for_status()
    r.encoding = 'utf-8'
    html=r.text
    # print(html)
    soup = str(BeautifulSoup(html,"html.parser"))
    # print(html)
    regex=re.compile(r'.f12...d{6}.')
    listpatterns=regex.findall(soup)
    for listpattern in listpatterns:
        numpattern=re.compile(r'd{6}')
        Codelist.append(numpattern.findall(listpattern)[0])
    # print(Codelist)
    total = len(Codelist)
    CodeList = Codelist[:50]
    finished = int(0)
    for code in CodeList:
        finished = finished + 1
        finishedco = (finished / total) * 100
        print("total : {0}   finished : {1}    completion : {2}%".format(total, finished, finishedco))
        dealDataList = []
        dataUrl = 'http://info.stcn.com/dc/stock/index.jsp?stockcode=' + code
        dataHtml = getHtmlText(dataUrl)
        soup = BeautifulSoup(dataHtml, "html.parser")
        dealDataList.append(code)
        for i in range(1, 4):
            classStr = 'sj_r_' + str(i)
            divdata = soup.find_all('div', {'class': classStr})
            if len(divdata) == 0:
                dealDataList.append('该股票暂时没有交易数据!')
                break
            dealData = str(divdata[0])
            dealPattern = re.compile(r'd+.d+[u4e00-u9fa5]|d+.+.%|d+.d+')
            listdeal = dealPattern.findall(dealData)
            for j in range(0, 4):
                dealDataList.append(listdeal[j])
        DealData.append(dealDataList)
    file = open(recordfile, 'a+')
    for i in range(len(DealData)):
        if i == 0:
            s = str(DealData[i]).replace('[', '').replace(']', '')
            s = s.replace("'", '').replace(',', ' 	') + '
    '
        else:
            s = str(DealData[i]).replace('[', '').replace(']', '')
            s = s.replace("'", '').replace(',', '	') + '
    '
        file.write(s)
    file.close()
    print(len(DealData))
    

    结果:

    2.心得体会

    因为是第一次涉及到爬取实时获取的数据,用了很多的时间,用的时间也是最多的。这次的作业,大部分的代码也是借鉴了CSDN上面的相关代码,再加上自己的修改最后才得出来的结果。

    作业3

    1.自选股票代码

    import requests
    Codelist=[]
    List = ["股票代码号","股票名称","今日最高","今日最低","今日开"]
    url = 'http://46.push2his.eastmoney.com/api/qt/stock/kline/get?cb=jQuery112406437068490950477_1602146854442&secid=1.600115&ut=fa5fd1943c7b386f172d6893dbfba10b&fields1=f1%2Cf2%2Cf3%2Cf4%2Cf5%2Cf6&fields2=f51%2Cf52%2Cf53%2Cf54%2Cf55%2Cf56%2Cf57%2Cf58%2Cf59%2Cf60%2Cf61&klt=101&fqt=0&end=20500101&lmt=120&_=1602146854482'
    head = {"User-Agent": "Mozilla/5.0 (Windows; U; Windows NT 6.0 x64; en-US; rv:1.9pre) Gecko/2008072421 Minefield/3.0.2pre"}
    r = requests.get(url, timeout=30, headers=head)
    r.raise_for_status()
    r.encoding = 'utf-8'
    html=r.text
    msg=html[html.rindex('"',0,-10):]
    result=msg.split(",")
    print("股票代码号  股票名称  今日开  今日最高  今日最低")
    print("600115    "+"东方航空  "+result[2]+"   "+result[3]+"    "+result[4])
    

    结果:

    2.心得体会

    这次是请教霖哥才写出来的结果。用了rindex直接把相关的数据从后面爬出来后,再换成数组就能直接输出了。

  • 相关阅读:
    网络运维架构
    Q in Q
    光纤/光模块的多模和单模
    Cisco VTP
    Cisco NTP配置
    惠普/aruba交换机
    【转】交换机背板带宽
    接入交换机下所有服务器不定时丢包
    windows 2012安装不了KB2919355
    Cisco交换机密码策略设置
  • 原文地址:https://www.cnblogs.com/Fzu-hwl/p/13776072.html
Copyright © 2011-2022 走看看