zoukankan      html  css  js  c++  java
  • 网络爬虫基础练习

    import requests
    url = 'http://localhost:63343/Demo/cehsi.html?_ijt=ah8jh9pfl0r7j3qoqt6r8egqk5'
    
    res = requests.get(url)
    res.encoding = 'utf-8'
    print(res.text)
    
    
    from bs4 import BeautifulSoup
    soup = BeautifulSoup(res.text,"html.parser")
    
    
    # 取出h1标签的文本
    print(soup.h1.text)
    
    # 取出a标签的链接
    print(soup.a.attrs['href'])
    
    # 取出所有li标签的所有内容
    for i in soup.select('li'):
        print(i.contents)
    
    # 取出一条新闻的标题、链接、发布时间、来源
    print(soup.select('.news-list-title')[0].text)
    print(soup.li.a.attrs['href'])
    print(soup.select('.news-list-info')[0].contents[0].text)
    print(soup.select('.news-list-info')[0].contents[1].text)
    

      

  • 相关阅读:
    Python学习笔记(三)
    Python学习笔记(二)
    GDUFE ACM1159
    GDUEFE ACM1003 练手
    GDUFE ACM1033
    GDUFE ACM1128
    GDUFE ACM1002
    EDUFE ACM1050
    GDUFE ACM1007
    GDUFE ACM1003
  • 原文地址:https://www.cnblogs.com/18128319239F/p/8672362.html
Copyright © 2011-2022 走看看