zoukankan      html  css  js  c++  java
  • 新练习

    import re
    import requests
    from bs4 import BeautifulSoup
    from datetime import datetime

    def getClickCount(newsUrl):
    newId =re.search('\_(.*).html',newsUrl).group(1).split('/')[1]
    clickUrl = "http://oa.gzcc.cn/api.php?op=count&id={}&modelid=80".format(newId)
    return(int(requests.get(clickUrl).text.split('.html')[-1].lstrip("('").rstrip("');")))

    def getNewsDetail(newsUrl):
    resd = requests.get(newsUrl)
    resd.encoding = 'utf-8'
    soupd = BeautifulSoup(resd.text,'html.parser')
    c = soupd.select('#content')[0].text
    info = soupd.select('.show-info')[0].text
    d = info.lstrip('发布时间;')[:19]
    dt = datetime.strptime(d,'%Y-%m-%d %H:%M:%S')
    au = info[info.find('作者:'):].split()[0].lstrip('作者:')
    clickCount = getClickCount(newsUrl)
    print(clickCount,newsUrl,dt,au)

    def getNewsList(pageUrl):
    res = requests.get(pageUrl)
    res.encoding = 'utf-8'
    soup = BeautifulSoup(res.text,'html.parser')
    for news in soup.select('li'):
    if len(news.select('.news-list-title'))>0:
    newsUrl = news.select('a')[0].attrs['href']
    getNewsDetail(newsUrl)
    break
    pageUrl ='http://news.gzcc.cn/html/xiaoyuanxinwen'
    getNewsList(pageUrl)
    for i in (2,233):
    getNewsList(pageUrl)


  • 相关阅读:
    iOS 单例(Singleton)总结 和第三库引用单例
    iOS OpenURL用法简介
    CGContextRef学习笔记
    iOS 绘图(虚线、椭圆)
    iPhone4s 7.0.3-4 TableView 数据越界 解决方案
    Android Media应用开发
    RTMP & HLS
    Debug tool 学习笔记
    video codec 学习笔记
    matplotlib和numpy 学习笔记
  • 原文地址:https://www.cnblogs.com/lg916843/p/8806683.html
Copyright © 2011-2022 走看看