zoukankan      html  css  js  c++  java
  • 大学排名爬取 + 绘制树状图 + 绘制圆饼图

    Python爬虫与数据图表的实现

    1. 参考教材实例20,编写Python爬虫程序,获取江西省所有高校的大学排名数据记录,并打印输出。

    2. 使用numpy和matplotlib等库分析数据,并绘制南昌大学、华东交通大学、江西理工大学三个高校的总分排名、生源质量(新生高考成绩得分)、培养结果(毕业生就业率)、顶尖成果(高被引论文·篇)等四个指标构成的多指标柱形图。

    3. 对江西各高校的顶尖成果(高被引论文数量)进行分析,使用matplotlib绘制各高校顶尖成果数构成的饼状图,并突出江西理工大学所在的饼状块。

    毕竟不要验证登录,所以挺好写的.

    一个好玩的爬虫:

      1 # Created by carryon on 18-12-24.
      2 import requests
      3 from bs4 import BeautifulSoup
      4 from prettytable import PrettyTable
      5 from test4.Wtsql import Wtsql
      6 import numpy as np
      7 import matplotlib.mlab as mlab
      8 import matplotlib.pyplot as plt
      9 import matplotlib
     10 
     11 
     12 def get():
     13     res = requests.get(
     14         url='http://zuihaodaxue.cn/zuihaodaxuepaiming2018.html',
     15         headers={
     16             'User-Agent': 'XXX'#自己填自己的
     17         }
     18     )
     19 
     20     res.encoding = res.apparent_encoding
     21 
     22     return res.text
     23 
     24 
     25 def beautiful(text, cur, db, ls):
     26     soup = BeautifulSoup(text, 'lxml')
     27     title = soup.find(name='thead').find(name='tr').find_all(name='th')
     28 
     29     lists = []
     30     for i in range(len(title)):
     31         if i <= 3:
     32             lists.append(title[i].text)
     33         else:
     34             select = title[i].find_all(name='option')
     35             for it in select:
     36                 lists.append(it.text)
     37 
     38     # table = PrettyTable(lists)
     39     rou = soup.find(name='tbody', attrs={'class': 'hidden_zhpm', 'style': 'text-align: center;'}).find_all('tr')
     40     paints = []
     41     draw_1 = []
     42     draw_2 = []
     43     for it in rou:
     44         pan = []
     45         ans = []
     46         tds = it.find_all('td')
     47         for i in range(len(tds)):
     48             # if tds[i].text
     49             pan.append(tds[i].text)
     50         if pan[1] == "江西理工大学" or pan[1] == "南昌大学" or pan[1] == "华东交通大学":
     51             ans.append(pan[1])
     52             ans.append(float(pan[3]))
     53             ans.append(float(pan[4]))
     54             # print(pan[5])
     55             pan[5] = str(pan[5]).strip('%')
     56             ans.append(float(pan[5]))
     57             ans.append(float(pan[9]))
     58             paints.append(ans)
     59             # print(ans)
     60         if pan[2] == "江西":
     61             draw_1.append(pan[1])
     62             draw_2.append(float(pan[9]))
     63         #     ls.insert(pan, cur, db)
     64             # table.add_row(pan)
     65             # print(table)
     66     # print(paints)
     67     # paint(paints)
     68     drawbing(draw_1, draw_2)
     69 def paint(line):
     70     matplotlib.rcParams['font.sans-serif'] = ['SimHei']
     71     matplotlib.rcParams['font.family'] = 'sans-serif'
     72     # 解决负号'-'显示为方块的问题
     73     matplotlib.rcParams['axes.unicode_minus'] = False
     74     # data to plot
     75     # line = [["江西理工大学",24.4,33.8,94.97,11],["南昌大学", 33.5, 52.4, 86.50, 108],["华东交通大学", 26.2, 42.8, 86.50,11]]
     76     n_groups = 4
     77     means_frank = (line[0][1], line[0][2], line[0][3], line[0][4])
     78     means_guido = (line[1][1], line[1][2], line[1][3], line[1][4])
     79     means_frank1 = (line[2][1], line[2][2], line[2][3], line[2][4])
     80 
     81     # create plot
     82     fig, ax = plt.subplots()
     83     index = np.arange(n_groups)
     84     bar_width = 0.15
     85     opacity = 0.99
     86 
     87     rects1 = plt.bar(index, means_frank, bar_width,
     88                      alpha=opacity,
     89                      color='b',
     90                      label=line[0][0])
     91 
     92     rects2 = plt.bar(index + bar_width, means_guido, bar_width,
     93                      alpha=opacity,
     94                      color='g',
     95                      label=line[1][0])
     96     rects3 = plt.bar(index + 2*bar_width, means_frank1, bar_width,
     97                      alpha=opacity,
     98                      color='r',
     99                      label=line[2][0])
    100 
    101 
    102     plt.xlabel('江西省部分大学')
    103     plt.ylabel('总评比')
    104     plt.title('江西省部分大学总评比')
    105     plt.xticks(index + bar_width, ("总分排名", "生源质量", "就业率", "顶尖成果"))
    106     plt.legend()
    107 
    108     plt.tight_layout()
    109     plt.show()
    110 
    111 
    112 def drawbing(line, ans):
    113 
    114     matplotlib.rcParams['font.sans-serif'] = ['SimHei']
    115     matplotlib.rcParams['font.family'] = 'sans-serif'
    116     # 解决负号'-'显示为方块的问题
    117     matplotlib.rcParams['axes.unicode_minus'] = False
    118 
    119     lable = line
    120     fras = ans
    121     explode = [0.2,0.2,0.2,0.2,0.2,0.5,0.2,0.2,0.2,0.2,0.2,0.2, 0.2,0.2,0.2, 0.2, 0.2]  # 设置离员中心的位置 此处是为了突出显示
    122     plt.axes(aspect=1)  # 此处设置的目的 是为了让饼状图画出来是圆形
    123     plt.pie(x=fras, labels=lable, autopct='%.2f%%', explode=explode,
    124             shadow=False)  # 传入数据及参数,占比保留两位小数 explode突出显示 shadow 阴影
    125     plt.show()
    126 
    127 
    128 if __name__ == '__main__':
    129     ls = Wtsql()
    130     cur, db = ls.login()
    131     text = get()
    132     beautiful(text, cur, db, ls)
    133     # paint()

    还有连接数据库:

     1 # Created by carryon on 18-12-24.
     2 import pymysql
     3 
     4 
     5 class Wtsql:
     6     def login(self):
     7         db = pymysql.Connect(host="localhost", user="root", password="", db="")
     8         cur = db.cursor()
     9         return cur, db
    10 
    11     def insert(self, lists, cur, db):
    12         sql = "insert into jiangxi(`pm`,`xxmc`, `ss`, `zf`, `syzl`, `pyjg`, `shsy`, `kygm`, `kyzl`, `djcg`, `djrc`, `kjfw`, `cgzh`, `xsgjh`)values" 
    13               "('{}','{}','{}','{}','{}','{}','{}','{}','{}','{}','{}','{}','{}','{}')".format(
    14             lists[0], lists[1], lists[2], lists[3], lists[4], lists[5], lists[6], lists[7], lists[8], lists[9],
    15             lists[10], lists[11], lists[12], lists[13])
    16         # print(sql)
    17         try:
    18             # 执行sql语句
    19             cur.execute(sql)
    20             # 提交到数据库执行
    21             db.commit()
    22         except Exception as e:
    23             # 如果发生错误则回滚
    24             print(e)
    25             db.rollback()
  • 相关阅读:
    基本数据类型与其包装类型
    String与常量池(JDK1.8)
    数据库知识归纳(面试向)
    HashMap、ConcurrentHashMap以及HashTable(面试向)
    Java多线程基础(面试向)
    Java的类加载
    Sql语句的一些事(二)
    RUBY惯用方法(转)
    Ruby中区分运行来源的方法(转)
    ruby安装devkit
  • 原文地址:https://www.cnblogs.com/zllwxm123/p/10179976.html
Copyright © 2011-2022 走看看