zoukankan      html  css  js  c++  java
  • 爬虫学习---美丽汤

    #coding:utf-8
    #version: 0.1
    #note:实现了查找0daydown最新发布的10页资源。
    import urllib.request

    from bs4 import BeautifulSoup

    for i in range(1,11):
    url = "http://www.0daydown.com/page/" + str(i) #每一页的Url只需在后面加上整数就行
    page = urllib.request.urlopen(url)
    soup_packtpage = BeautifulSoup(page)
    page.close()
    num = " The Page of: " + str(i) #标注当前资源属于第几页
    print(num)
    print("#"*40)
    for article in soup_packtpage.find_all('article', class_="excerpt"): #使用find_all查找出当前页面发布的所有最新资源
    print("Category:".ljust(20), end=''), print(article.header.a.next) #category
    print("Title:".ljust(20), end=''), print(article.h2.string) #title
    print("Pulished_time:".ljust(19), end=''), print(article.p.find('i', class_="icon-time icon12").next) #published_time
    print("Note:",end=''), print(article.p.find_next_sibling().string) #note
    print('-'*50)

    input() #等待输入,为了不让控制台运行后立即结束。

  • 相关阅读:
    7.12.2
    7.12.1
    7.11.8
    循环测试条件前缀和后缀的区别
    7.11.7 两个版本
    7.11.5
    7.12 vowels.c 程序
    7.11 animals.c 程序
    7.6.2 break 语句
    7.10 break.c 程序
  • 原文地址:https://www.cnblogs.com/my-time/p/4507699.html
Copyright © 2011-2022 走看看