zoukankan      html  css  js  c++  java
  • 网络爬虫基础练习

    import requests
    import re
    from bs4 import BeautifulSoup
    
    
    head = {}
    head['user-agent']='Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/55.0.2883.87 Safari/537.36'
    
    r=requests.get("http://news.gzcc.cn/html/xiaoyuanxinwen/",headers=head)
    r.encoding='utf-8'
    soup=BeautifulSoup(r.text,'html.parser')
    
    print("a标签的链接:")
    print(soup.h1)
    print()
    print("a标签的链接:")
    print(soup.a.attrs['href'])
    print()
    print('所有li标签的所有内容:')
    #print(soup.find_all('li'))
    for each in soup.find_all('li'):
        print(each)
    
    print()
    print()
    a1=soup.select_one(".news-list").select_one("li")
    print(a1.select_one(".news-list-title").text)
    print(a1.a.attrs.get('href'))
    print(a1.select_one(".news-list-info").contents[0].text)
    print(a1.select_one(".news-list-info").contents[1].text)

  • 相关阅读:
    noip模拟赛#38
    noip模拟赛#45
    noip模拟赛#15
    noip模拟赛#14
    noip模拟赛
    rp++
    bzoj2127: happiness
    bzoj3209:3209: 花神的数论题
    10.1 plan
    FFT
  • 原文地址:https://www.cnblogs.com/wban48/p/8670119.html
Copyright © 2011-2022 走看看