zoukankan      html  css  js  c++  java
  • 第二次作业

    作业1

    1.天气实验代码

    #!/usr/bin/env python
    # _*_ coding:utf-8 _*_
    from bs4 import BeautifulSoup
    from bs4 import UnicodeDammit
    import urllib.request
    import sqlite3
    
    
    class weatherDB:
        def openDB(self):
            self.con = sqlite3.connect("weather.db")
            self.cursor = self.con.cursor()
            try:
                self.cursor.execute(
                    "create table weathers (wcity varchar(16),wdate varchar(16),wweather varchar(64),wtemp varchar(32),constraint pk_weather primary key(wcity,wdate))")
            except:
                self.cursor.execute("delete from weathers")
    
        def closeDB(self):
            self.con.commit()
            self.con.close()
    
        def insert(self, city, date, weather, temp):
            try:
                self.cursor.execute("insert into weathers (wcity,wdate,wweather,wtemp) values(?,?,?,?)",
                                    (city, date, weather, temp))
            except:
                print("err")
    
        def show(self):
            self.cursor.execute("select * from weathers")
            rows = self.cursor.fetchall()
            print("%-16s%-16s%-32s%-16s" % ("city", "date", "weather", "temp"))
            for row in rows:
                print("%-16s%-16s%-32s%-16s" % (row[0], row[1], row[2], row[3]))
    
    
    class weatherforecast():
        def __init__(self):
            self.headers = {                                       #伪装成主机,提前记录下来四个城市的代码编号
                "User-Agent": "Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) "
                              "Chrome/70.0.3538.25 Safari/537.36 Core/1.70.3775.400 QQBrowser/10.6.4209.400"}
            self.citycode = {"北京": "101010100", "上海": "101020100", "广州": "101280101", "深圳": "101280601"}
    
        def forecastcity(self, city):
            if city not in self.citycode.keys():                   #if语句用来看看这个城市在不在这个网页上面,有没有可能是自己在输入的时候有错误
                print(city + "code not found")
                return
            url = "http://www.weather.com.cn/weather/" + self.citycode[city] + ".shtml"
            try:                                                   #进入不同城市的天气信息页面,开始行动
                req = urllib.request.Request(url, headers=self.headers)
                data = urllib.request.urlopen(req)
                data = data.read()
                dammit = UnicodeDammit(data, ["utf-8", "gbk"])
                data = dammit.unicode_markup
                soup = BeautifulSoup(data, 'html.parser')
                lis = soup.select("ul[class='t clearfix'] li")     #查看页面源码后发现,信息储存在ul里面
                for li in lis:
                    try:
                        date_ = li.select('h1')[0].text            #用select逐层挑选,筛选出自己需要的城市,日期,天气和温度信息
                        weather_ = li.select('p[class="wea"]')[0].text
                        temp_ = li.select('p[class="tem"] span')[0].text + '℃/' + li.select("p[class='tem'] i")[0].text
                        print(city, date_, weather_, temp_)
                        self.db.insert(city, date_, weather_, temp_)
                    except:
                        print('err1')
            except:
                print('err2')
    
        def precess(self, cities):
            self.db = weatherDB()                                   
            self.db.openDB()
            for city in cities:
                self.forecastcity(city)
            self.db.show()
            self.db.closeDB()
    
    
    ws = weatherforecast()
    ws.precess(["北京", '上海', '广州', '深圳'])
    print('completed')
    

    结果:

    2.心得体会

    在天气预报的代码方面,有点难以理解的是两个大类的定义和函数的处理,各种参数的作用在自己琢磨之后也就出来了。

    作业2

    1.股票爬取

    import requests
    from bs4 import BeautifulSoup
    import re
    
    
    
    def getHtmlText(url):
        head = {'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:70.0) Gecko/20100101 Firefox/70.0',
                'Cookie': 'qgqp_b_id=54fe349b4e3056799d45a271cb903df3; st_si=24637404931419; st_pvi=32580036674154; st_sp=2019-11-12%2016%3A29%3A38; st_inirUrl=; st_sn=1; st_psi=2019111216485270-113200301321-3411409195; st_asi=delete'
                }
        try:
            r = requests.get(url, timeout=30, headers=head)
            r.raise_for_status()
            r.encoding = 'utf-8'
            return r.text
        except:
            return ""
    
    
    recordfile = 'Data.txt'
    url = 'http://51.push2.eastmoney.com/api/qt/clist/get?cb=jQuery112408349318807687469_1574045112932&pn=1&pz=20&po=1&np=2&ut=bd1d9ddb04089700cf9c27f6f7426281&fltt=2&invt=2&fid=f3&fs=m:0+t:6,m:0+t:13,m:0+t:80,m:1+t:2,m:1+t:23&fields=f1,f2,f3,f4,f5,f6,f7,f8,f9,f10,f12,f13,f14,f15,f16,f17,f18,f20,f21,f23,f24,f25,f22,f11,f62,f128,f136,f115,f152&_=1574045112933'
    head = {
            "User-Agent": "Mozilla/5.0 (Windows; U; Windows NT 6.0 x64; en-US; rv:1.9pre) Gecko/2008072421 Minefield/3.0.2pre"
        }
    Codelist=[]
    DealData=[['股票代码', '今开', '最高', '最低', '昨收', '成交量', '成交额', '总市值', '流通市值', '振幅', '换手率', '市净率', '市盈率', ]]
    r = requests.get(url, timeout=30, headers=head)
    r.raise_for_status()
    r.encoding = 'utf-8'
    html=r.text
    # print(html)
    soup = str(BeautifulSoup(html,"html.parser"))
    # print(html)
    regex=re.compile(r'.f12...d{6}.')
    listpatterns=regex.findall(soup)
    for listpattern in listpatterns:
        numpattern=re.compile(r'd{6}')
        Codelist.append(numpattern.findall(listpattern)[0])
    # print(Codelist)
    total = len(Codelist)
    CodeList = Codelist[:50]
    finished = int(0)
    for code in CodeList:
        finished = finished + 1
        finishedco = (finished / total) * 100
        print("total : {0}   finished : {1}    completion : {2}%".format(total, finished, finishedco))
        dealDataList = []
        dataUrl = 'http://info.stcn.com/dc/stock/index.jsp?stockcode=' + code
        dataHtml = getHtmlText(dataUrl)
        soup = BeautifulSoup(dataHtml, "html.parser")
        dealDataList.append(code)
        for i in range(1, 4):
            classStr = 'sj_r_' + str(i)
            divdata = soup.find_all('div', {'class': classStr})
            if len(divdata) == 0:
                dealDataList.append('该股票暂时没有交易数据!')
                break
            dealData = str(divdata[0])
            dealPattern = re.compile(r'd+.d+[u4e00-u9fa5]|d+.+.%|d+.d+')
            listdeal = dealPattern.findall(dealData)
            for j in range(0, 4):
                dealDataList.append(listdeal[j])
        DealData.append(dealDataList)
    file = open(recordfile, 'a+')
    for i in range(len(DealData)):
        if i == 0:
            s = str(DealData[i]).replace('[', '').replace(']', '')
            s = s.replace("'", '').replace(',', ' 	') + '
    '
        else:
            s = str(DealData[i]).replace('[', '').replace(']', '')
            s = s.replace("'", '').replace(',', '	') + '
    '
        file.write(s)
    file.close()
    print(len(DealData))
    

    结果:

    2.心得体会

    因为是第一次涉及到爬取实时获取的数据,用了很多的时间,用的时间也是最多的。这次的作业,大部分的代码也是借鉴了CSDN上面的相关代码,再加上自己的修改最后才得出来的结果。

    作业3

    1.自选股票代码

    import requests
    Codelist=[]
    List = ["股票代码号","股票名称","今日最高","今日最低","今日开"]
    url = 'http://46.push2his.eastmoney.com/api/qt/stock/kline/get?cb=jQuery112406437068490950477_1602146854442&secid=1.600115&ut=fa5fd1943c7b386f172d6893dbfba10b&fields1=f1%2Cf2%2Cf3%2Cf4%2Cf5%2Cf6&fields2=f51%2Cf52%2Cf53%2Cf54%2Cf55%2Cf56%2Cf57%2Cf58%2Cf59%2Cf60%2Cf61&klt=101&fqt=0&end=20500101&lmt=120&_=1602146854482'
    head = {"User-Agent": "Mozilla/5.0 (Windows; U; Windows NT 6.0 x64; en-US; rv:1.9pre) Gecko/2008072421 Minefield/3.0.2pre"}
    r = requests.get(url, timeout=30, headers=head)
    r.raise_for_status()
    r.encoding = 'utf-8'
    html=r.text
    msg=html[html.rindex('"',0,-10):]
    result=msg.split(",")
    print("股票代码号  股票名称  今日开  今日最高  今日最低")
    print("600115    "+"东方航空  "+result[2]+"   "+result[3]+"    "+result[4])
    

    结果:

    2.心得体会

    这次是请教霖哥才写出来的结果。用了rindex直接把相关的数据从后面爬出来后,再换成数组就能直接输出了。

  • 相关阅读:
    LeetCode 230. 二叉搜索树中第K小的元素(Kth Smallest Element in a BST)
    LeetCode 216. 组合总和 III(Combination Sum III)
    LeetCode 179. 最大数(Largest Number)
    LeetCode 199. 二叉树的右视图(Binary Tree Right Side View)
    LeetCode 114. 二叉树展开为链表(Flatten Binary Tree to Linked List)
    LeetCode 106. 从中序与后序遍历序列构造二叉树(Construct Binary Tree from Inorder and Postorder Traversal)
    指针变量、普通变量、内存和地址的全面对比
    MiZ702学习笔记8——让MiZ702变身PC的方法
    你可能不知道的,定义,声明,初始化
    原创zynq文章整理(MiZ702教程+例程)
  • 原文地址:https://www.cnblogs.com/Fzu-hwl/p/13776072.html
Copyright © 2011-2022 走看看