zoukankan      html  css  js  c++  java
  • Python BeautifulSoup抓取表情包并保存

    完美解决斗图表情包不够的状况:

    # -* - coding: UTF-8 -* -
    #导入第三方库
    import urllib
    from bs4 import BeautifulSoup
    import requests
    import os
    import time
    import random

    # 获取文件夹,如果文件夹不存在则创建新文件夹
    if os.path.isdir('E://python//biaoqing//'):
    pass
    else:
    os.mkdir('E://python//biaoqing//')
    local="E:\python\biaoqing\" #保存图片的文件夹
    get_url='http://qq.yh31.com'
    url=get_url+'/zjbq/0551964_25.html'
    Agent =[
    'Mozilla/5.0 (Macintosh; U; Intel Mac OS X 10_6_8; en-us)AppleWebKit/534.50 (KHTML, like Gecko) Version/5.1 Safari/534.50',
    'Mozilla/5.0 (compatible; MSIE 9.0; Windows NT 6.1; Trident/5.0',
    'Mozilla/5.0 (X11; U; Linux x86_64; zh-CN; rv:1.9.2.10) Gecko/20100922 Ubuntu/10.10 (maverick) Firefox/3.6.10',
    'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/39.0.2171.71 Safari/537.36',
    'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.11 (KHTML, like Gecko) Chrome/23.0.1271.64 Safari/537.11',
    'Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1; SV1; QQDownload 732; .NET4.0C; .NET4.0E)',
    'Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/66.0.3359.139 Safari/537.36'
    ]
    User_Agent=random.choice(Agent)
    headers={'User-Agent':User_Agent}
    html_doc=requests.get(url,headers=headers).text
    soup=BeautifulSoup(html_doc,'lxml') #解析 html_doc

    #获取所有img标签里面 属性alt值为空的img标签
    pss=soup.find_all('img',attrs={'alt':''})
    for ims in pss:
    img_url=get_url+ims['src']#图片的路径
    s='%s'% ims['src'] # 获取字符 截取相应的名称及格式
    mingzi=s.split('/')[-1] #图片命名所用,可以解决图片的不同格式
    urllib.request.urlretrieve(img_url, local + '%s' % mingzi)
    time.sleep(2) # 设置休眠时间,访问频繁容易被封


    这种翻页的网址,网址名字基本相似,字符拼接获取新的网页,for循环重复获取新的网页内容即可还得想要的表情包!
     
  • 相关阅读:
    E-Eating Together(POJ 3670)
    F-Dining Cows(POJ 3671)
    I-MooFest(POJ 1990)
    A-Apple Catching(POJ 2385)
    H-The Cow Lineup(POJ 1989)
    LOOPS(HDU 3853)
    Card Collector(HDU 4336)
    Help Me Escape (ZOJ 3640)
    Collecting Bugs(POJ 2096)
    c#大圣之路笔记——c# 从DataGrid中导出数据 Session
  • 原文地址:https://www.cnblogs.com/ling-yu/p/9318636.html
Copyright © 2011-2022 走看看