zoukankan      html  css  js  c++  java
  • 爬虫学习---美丽汤

    #coding:utf-8
    #version: 0.1
    #note:实现了查找0daydown最新发布的10页资源。
    import urllib.request

    from bs4 import BeautifulSoup

    for i in range(1,11):
    url = "http://www.0daydown.com/page/" + str(i) #每一页的Url只需在后面加上整数就行
    page = urllib.request.urlopen(url)
    soup_packtpage = BeautifulSoup(page)
    page.close()
    num = " The Page of: " + str(i) #标注当前资源属于第几页
    print(num)
    print("#"*40)
    for article in soup_packtpage.find_all('article', class_="excerpt"): #使用find_all查找出当前页面发布的所有最新资源
    print("Category:".ljust(20), end=''), print(article.header.a.next) #category
    print("Title:".ljust(20), end=''), print(article.h2.string) #title
    print("Pulished_time:".ljust(19), end=''), print(article.p.find('i', class_="icon-time icon12").next) #published_time
    print("Note:",end=''), print(article.p.find_next_sibling().string) #note
    print('-'*50)

    input() #等待输入,为了不让控制台运行后立即结束。

  • 相关阅读:
    真正的e时代
    在线手册
    UVA 10616 Divisible Group Sums
    UVA 10721 Bar Codes
    UVA 10205 Stack 'em Up
    UVA 10247 Complete Tree Labeling
    UVA 10081 Tight Words
    UVA 11125 Arrange Some Marbles
    UVA 10128 Queue
    UVA 10912 Simple Minded Hashing
  • 原文地址:https://www.cnblogs.com/my-time/p/4507699.html
Copyright © 2011-2022 走看看