zoukankan      html  css  js  c++  java
  • python_爬虫_爬取7*24小时财经新闻

    import requests
    import timefrom bs4 import BeautifulSoup
    
    def sina():
        is_first = True
        task_q = [] # 本地存储新闻
        task_time = []
        while True:
            data_list = getNews()
    
            if is_first:
                task_q = data_list
                for data in data_list:
                    print(data['n_time'],data['n_info'])
                    time.sleep(0.5)
                    task_time.append(data['n_time'])
                is_first = False
            else:
                for data in data_list:
                    if data['n_time'] in task_time:
                        pass
                    else:
                        task_time.append(data['n_time'])
                        print('-'*30)
                        print('新消息',data['n_time'],data['n_info'])
    
            time.sleep(5)
    def getNews(): # 获取新闻函数
        news_list =[]
        base_url = 'http://live.sina.com.cn/zt/f/v/finance/globalnews1'
        response = requests.get(base_url)
        response.encoding = response.apparent_encoding
        html = response.text
    
        html_bs4 = BeautifulSoup(html,'lxml')
        info_list = (html_bs4.find_all('div',{'data-nick':'fin_图文直播'}))
    
        for info in info_list:  # 获取页面中自动刷新的新闻
            n_time = info.select('p[class="bd_i_time_c"]')[0].get_text()  # 新闻时间及内容
            n_info = info.select('p[class="bd_i_txt_c"]')[0].get_text()
            data = {
                'n_time': n_time,
                'n_info': n_info
            }
            news_list.append(data)
        return news_list[::-1] # 这里倒序,这样打印时才会先打印旧新闻,后打印新新闻
    if __name__ == '__main__':
        sina()
    
    
    '''
    1 先得到页面的15条新闻
    2 15条新闻放到列表并传递
    3 每隔30秒请求一次页面,界面中时间与列表中的时间对照,不相同则读取
    '''
  • 相关阅读:
    random模块的随机变换
    re模块与正则表达式进阶
    面向对象整体细化
    __new__内部工作方式
    前端之CSS
    前端之HTML
    数据库
    同步异步阻塞非阻塞
    进程间的通信
    day 36(多进程)
  • 原文地址:https://www.cnblogs.com/hejianlong/p/9291384.html
Copyright © 2011-2022 走看看