zoukankan      html  css  js  c++  java
  • 寒假自学(十一)

    希望所有温柔又可爱的人最后都能幸福❤

    今日总结:

    代码量 400行
    博客量 一篇
    所学时间 6小时左右
    了解到的知识点 python爬取疫情信息、Acwing每日一题

    明日计划:

    早上 python数据分析入门
    下午 python数据分析入门
    晚上 Acwing每日一题

    具体内容:
    采集最近一日的疫情数据

    import requests
    from bs4 import BeautifulSoup
    import re
    import json
    
    response = requests.get("http://ncov.dxy.cn/ncovh5/view/pneumonia")
    home_page = response.content.decode()
    
    soup = BeautifulSoup(home_page, 'lxml')
    script = soup.find(id="getListByCountryTypeService2true")
    text = script.string
    
    json_str = re.findall(r'[.+]', text)[0]
    # print(json_str)
    
    last_day_corona_virus = json.loads(json_str)
    print(last_day_corona_virus)
    with open('last_day_corona_virus.json','w',encoding='utf-8') as fp:
        json.dump(last_day_corona_virus,fp,ensure_ascii=False)
    

    改造:

    import requests
    from bs4 import BeautifulSoup
    import re
    import json
    
    
    class CoronaVirusSpider(object):
        def __init__(self):
            self.home_url = "http://ncov.dxy.cn/ncovh5/view/pneumonia"
    
        def get_content_from_url(self, url):
            """
            根据url,获取响应内容的字符串数据
            :param url:请求的url
            """
            response = requests.get(url)
            return response.content.decode()
    
        def parse_home_page(self, home_page):
            """
            解析首页内容,获取解析后的python数据
            :param home_page:首页内容
            :return:解析后的python数据
            """
            soup = BeautifulSoup(home_page, 'lxml')
            script = soup.find(id="getListByCountryTypeService2true")
            text = script.string
    
            json_str = re.findall(r'[.+]', text)[0]
            # print(json_str)
    
            data = json.loads(json_str)
    
            return data
    
        def save(self, data, path):
            with open(path, 'w', encoding='utf-8') as fp:
                json.dump(data, fp, ensure_ascii=False)
    
        def crawl_last_day_corona_virus(self):
            """
            采集最近一天的各国疫情数据
            :return:
            """
            home_page = self.get_content_from_url(self.home_url)
            last_day_corona_virus = self.parse_home_page(home_page)
            self.save(last_day_corona_virus, 'last_day_corona_virus.json')
    
        def run(self):
            self.crawl_last_day_corona_virus()
    
    
    if __name__ == '__main__':
        spider = CoronaVirusSpider()
        spider.run()
    

    采集从1月23号以来各国疫情数据

    import requests
    from bs4 import BeautifulSoup
    import re
    import json
    from tqdm import tqdm
    
    class CoronaVirusSpider(object):
        def __init__(self):
            self.home_url = "http://ncov.dxy.cn/ncovh5/view/pneumonia"
    
        def get_content_from_url(self, url):
            """
            根据url,获取响应内容的字符串数据
            :param url:请求的url
            """
            response = requests.get(url)
            return response.content.decode()
    
        def parse_home_page(self, home_page):
            """
            解析首页内容,获取解析后的python数据
            :param home_page:首页内容
            :return:解析后的python数据
            """
            soup = BeautifulSoup(home_page, 'lxml')
            script = soup.find(id="getListByCountryTypeService2true")
            text = script.string
    
            json_str = re.findall(r'[.+]', text)[0]
            # print(json_str)
    
            data = json.loads(json_str)
    
            return data
    
        def save(self, data, path):
            with open(path, 'w', encoding='utf-8') as fp:
                json.dump(data, fp, ensure_ascii=False)
    
        def crawl_last_day_corona_virus(self):
            """
            采集最近一天的各国疫情数据
            :return:
            """
            home_page = self.get_content_from_url(self.home_url)
            last_day_corona_virus = self.parse_home_page(home_page)
            self.save(last_day_corona_virus, 'last_day_corona_virus.json')
    
        def crawl_corona_virus(self):
            """
            采集从1月23号以来各国疫情数据
            :return:
            """
            with open('last_day_corona_virus.json', encoding='utf-8') as fp:
                last_day_corona_virus = json.load(fp)
            # print(last_day_corona_virus)
            corona_virus = []
            for county in tqdm(last_day_corona_virus,'采集1月23日以来各国疫情信息'):
                statistic_data_url = county['statisticsData']
                statistic_data_json_str = self.get_content_from_url(statistic_data_url)
    
                statistic_data = json.loads(statistic_data_json_str)['data']
                # print(statistic_data)
                for one_day in statistic_data:
                    one_day['provinceName'] = county['provinceName']
                    one_day['countryShortCode'] = county['countryShortCode']
                # print(statistic_data)
                corona_virus.extend(statistic_data)
            self.save(corona_virus, 'corona_virus.json')
    
        def run(self):
            # self.crawl_last_day_corona_virus()
            self.crawl_corona_virus()
    
    
    if __name__ == '__main__':
        spider = CoronaVirusSpider()
        spider.run()
    

    疫情爬虫项目

    import requests
    from bs4 import BeautifulSoup
    import re
    import json
    from tqdm import tqdm
    
    
    class CoronaVirusSpider(object):
        def __init__(self):
            self.home_url = "http://ncov.dxy.cn/ncovh5/view/pneumonia"
    
        def get_content_from_url(self, url):
            """
            根据url,获取响应内容的字符串数据
            :param url:请求的url
            """
            response = requests.get(url)
            return response.content.decode()
    
        def parse_home_page(self, home_page, tag_id):
            """
            解析首页内容,获取解析后的python数据
            :param home_page:首页内容
            :return:解析后的python数据
            """
            soup = BeautifulSoup(home_page, 'lxml')
            script = soup.find(id=tag_id)
            text = script.string
    
            json_str = re.findall(r'[.+]', text)[0]
            # print(json_str)
    
            data = json.loads(json_str)
    
            return data
    
        def save(self, data, path):
            with open(path, 'w', encoding='utf-8') as fp:
                json.dump(data, fp, ensure_ascii=False)
    
        def crawl_last_day_corona_virus(self):
            """
            采集最近一天的各国疫情数据
            :return:
            """
            home_page = self.get_content_from_url(self.home_url)
            last_day_corona_virus = self.parse_home_page(home_page, tag_id="getListByCountryTypeService2true")
            self.save(last_day_corona_virus, 'data/last_day_corona_virus.json')
    
        def crawl_corona_virus(self):
            """
            采集从1月23号以来各国疫情数据
            :return:
            """
            with open('last_day_corona_virus.json', encoding='utf-8') as fp:
                last_day_corona_virus = json.load(fp)
            # print(last_day_corona_virus)
            corona_virus = []
            for county in tqdm(last_day_corona_virus, '采集1月23日以来各国疫情信息'):
                statistic_data_url = county['statisticsData']
                statistic_data_json_str = self.get_content_from_url(statistic_data_url)
    
                statistic_data = json.loads(statistic_data_json_str)['data']
                # print(statistic_data)
                for one_day in statistic_data:
                    one_day['provinceName'] = county['provinceName']
                    one_day['countryShortCode'] = county['countryShortCode']
                # print(statistic_data)
                corona_virus.extend(statistic_data)
            self.save(corona_virus, 'data/corona_virus.json')
    
        def crawl_last_day_corona_virus_of_china(self):
            """
            采集最近一日各省疫情数据
            :return:
            """
            # 1.发送请求
            home_page = self.get_content_from_url(self.home_url)
            # 2.解析疫情首页,获取最近一日
            data = self.parse_home_page(home_page,tag_id='getAreaStat')
            # 3.保存
            self.save(data, 'data/last_day_corona_virus_of_china.json')
    
        def run(self):
            self.crawl_last_day_corona_virus()
            # self.crawl_corona_virus()
            self.crawl_last_day_corona_virus_of_china()
    
    
    if __name__ == '__main__':
        spider = CoronaVirusSpider()
        spider.run()
    

    了解可视化

    https://www.acwing.com/activity/content/problem/content/3416/1/

    直接bfs()即可

    #include <bits/stdc++.h>
    using namespace std;
    int a[25][25];
    int visited[25][25];
    int dx[4] = {1,-1,0,0};
    int dy[4] = {0,0,1,-1};
    int n,m,sx,sy;
    struct node
    {
        int x,y;
    };
    void bfs()
    {
        memset(visited,0,sizeof visited);
        int res = 0;
        queue<node> q;
        node st = {sx,sy};
        q.push(st);
        visited[st.x][st.y] = 1;
        while(!q.empty())
        {
            node z = q.front();
            q.pop();
            for (int i = 0; i < 4; i ++)
            {
                node w;
                w.x = z.x + dx[i];
                w.y = z.y + dy[i];
                if (a[w.x][w.y] == 1 && w.x>=1 && w.x <= n && w.y >= 1 && w.y <= m && visited[w.x][w.y] == 0)
                {
                    q.push(w);
                    //cout<<"aa: "<<w.x<<" "<<w.y<<endl;
                    visited[w.x][w.y] = 1;
                    res ++;
                }
            }
        }
        cout<<res + 1<<endl;
        
    }
    int main()
    {
        char c;
        while(cin>>m>>n)
        {
            if (n + m == 0) break;
            memset(a,0,sizeof a);
            for (int i = 1; i <= n; i ++)
            {
                for (int j = 1; j <= m; j ++)
                {
                    cin>>c;
                    if (c == '.') a[i][j] = 1;
                    if (c== '@')
                    {
                        sx = i;
                        sy = j;
                    }
                }
            }
            bfs();
        }
        
    }
    
  • 相关阅读:
    弱智儿童欢乐多游戏android源码完整版
    小龙吃水果游戏IOS源码 V1.0
    PHP实现的轩宇淘宝客系统源码v2.0.1
    友点企业网站管理系统集电脑网站、手机网站、微信三站合一
    仿win8磁贴界面以及功能
    HarestGame史上最难游戏源码 v2.0
    关于js基本类型string
    本周作业
    罗辑思维 怎样成为一个高手
    第18周作业
  • 原文地址:https://www.cnblogs.com/125418a/p/14280180.html
Copyright © 2011-2022 走看看