zoukankan      html  css  js  c++  java
  • Python3爬虫04(其他例子,如处理获取网页的内容)

    #!/usr/bin/env python
    # -*- coding:utf-8 -*-

    import os
    import re
    import requests
    from bs4 import NavigableString
    from bs4 import BeautifulSoup

    res=requests.get("https://www.qiushibaike.com/")
    qiushi=res.content
    soup=BeautifulSoup(qiushi,"html.parser")
    duanzis=soup.find_all(class_="content")
    for i in duanzis:
    duanzi=i.span.contents[0]
    # duanzi=i.span.string
    print(duanzi)
    # print(i.span.string)


    res=requests.get("http://699pic.com/sousuo-218808-13-1-0-0-0.html")
    image=res.content
    soup=BeautifulSoup(image,"html.parser")
    images=soup.find_all(class_="lazy")

    for i in images:
    original=i["data-original"]
    title=i["title"]
    # print(title)
    # print(original)
    # print("")
    try:
    with open(os.getcwd()+"\jpg\"+title+'.jpg','wb') as file:
    file.write(requests.get(original).content)
    except:
    pass

    r = requests.get("http://699pic.com/sousuo-218808-13-1.html")
    fengjing = r.content
    soup = BeautifulSoup(fengjing, "html.parser")
    # 找出所有的标签
    images = soup.find_all(class_="lazy")
    # print images # 返回list对象

    for i in images:
    jpg_rl = i["data-original"] # 获取url地址
    title = i["title"] # 返回title名称
    print(title)
    print(jpg_rl)
    print("")

    r = requests.get("https://www.qiushibaike.com/")
    r=requests.get("http://www.cnblogs.com/nicetime/")
    blog=r.content
    soup=BeautifulSoup(blog,"html.parser")
    soup=BeautifulSoup(blog,features="lxml")
    print(soup.contents[0].contents)


    tag=soup.find('div')
    tag=soup.find(class_="menu-bar menu clearfix")
    tag=soup.find(id="menu")
    print(list(tag))

    tag01=soup.find(class_="c_b_p_desc")

    print(len(list(tag01.contents)))
    print(len(list(tag01.children)))
    print(len(list(tag01.descendants)))

    print(tag01.contents)
    print(tag01.children)
    for i in tag01.children:
    print(i)


    print(len(tag01.contents))

    for i in tag01:
    print(i)

    print(tag01.contents[0].string)
    print(tag01.contents[1])
    print(tag01.contents[1].string)


    url = "http://www.dygod.net/html/tv/oumeitv/109673.html"
    s = requests.get(url)
    print(s.text.encode("iso-8859-1").decode('gbk'))
    res = re.findall('href="(.*?)">ftp',s.text)
    for resi in res:
    a=resi.encode("iso-8859-1").decode('gbk')
    print(a)

  • 相关阅读:
    Meten Special Activities II
    Meten Special Activities II
    Meten Special Activities II
    Meten Special Activities II
    Meten Special Activities
    Meten Special Activities
    Meten Special Activities
    Meten Special Activities
    Meten Special Activities
    冒泡排序和选择排序
  • 原文地址:https://www.cnblogs.com/NiceTime/p/10125289.html
Copyright © 2011-2022 走看看