zoukankan      html  css  js  c++  java
  • 网络爬虫基础练习

    import requests
    from bs4 import BeautifulSoup
    
    '''
       Practice
    '''
    url = 'http://news.gzcc.cn/html/2018/xiaoyuanxinwen_0329/9129.html'
    re = requests.get(url)
    re.encoding = 'utf-8'
    # print(re)
    # print(re.text)
    soup = BeautifulSoup(re.text, 'html.parser')
    # print(soup.p)
    # print(soup.head)
    # print(soup.head.name)
    # print(soup.a.attrs)
    # print(soup.li.contents)
    # print(soup.li.text)
    # 输出所选标签中的字符串
    # print(soup.li.a.string)
    # print(soup.select('div'))
    
    # print(soup.select('div[class="news-list-description"]'))
    '''
       HomeWork
    '''
    "找一个有h1的网站"
    
    h = soup.select('h1')
    print(h)
    a_href = soup.a.attrs
    print(a_href)
    l = soup.select('li')
    # print(l)
    # for i in l:
        # print(i.text)
    title = soup.select('.show-title')
    print(title[0].text)
    href = soup.select('a')
    t = soup.select('.show-info')
    print(t)
    

      

  • 相关阅读:
    密码等级
    ie兼容透明
    分割线
    支付宝银行判断接口
    date只能选择今天之后的时间js
    离开页面之前提示,关闭,刷新等
    使用 Linux 系统的常用命令
    C#窗体简单增删改查
    1
    二维数组
  • 原文地址:https://www.cnblogs.com/miranda-76/p/8672466.html
Copyright © 2011-2022 走看看