zoukankan      html  css  js  c++  java
  • 使用代理刷阅读量

    #coding=utf-8

    import random
    import re
    import time
    import requests
    from bs4 import BeautifulSoup

    class ProxyHandler(object):
    def __init__(self):
    self.user_agent_list = [
    "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.1 (KHTML, like Gecko) Chrome/22.0.1207.1 Safari/537.1",
    "Mozilla/5.0 (X11; CrOS i686 2268.111.0) AppleWebKit/536.11 (KHTML, like Gecko) Chrome/20.0.1132.57 Safari/536.11",
    "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/536.6 (KHTML, like Gecko) Chrome/20.0.1092.0 Safari/536.6",
    "Mozilla/5.0 (Windows NT 6.2) AppleWebKit/536.6 (KHTML, like Gecko) Chrome/20.0.1090.0 Safari/536.6",
    "Mozilla/5.0 (Windows NT 6.2; WOW64) AppleWebKit/537.1 (KHTML, like Gecko) Chrome/19.77.34.5 Safari/537.1"
    ]
    # 获取代理地址的网址
    self.proxy_get_url = 'http://www.xicidaili.com'
    # 目标地址
    self.visit_url = 'https://blog.csdn.net/t1623183652/article/details/73292150'
    # 获取能用的代理集合
    self.proxy_list = []
    # 请求超时时间为100s
    self.timeout = 100

    def get_proxy_list(self):
    '''
    解析得到需要的代理列表数据
    :return:
    '''
    # 从self.user_agent_list中随机取出一个字符串
    UA = random.choice(self.user_agent_list)
    print('随机产生的UA是====%s' % UA)
    headers = {
    'User-Agent': UA
    }
    response = requests.get(url=self.proxy_get_url, headers=headers, timeout=self.timeout)
    html = response.text
    # 获取<td></td>里面的所有内容
    list = re.findall(r'<td>(.*?)</td>', html)
    for index in range(int(len(list) / 6)):
    http = list[index * 6 + 3]
    ip = list[index * 6]
    port = list[index * 6 + 1]
    # 过滤掉一些socket连接
    if re.search(r'(HTTP|HTTPS)', http) is None:
    continue
    proxy = '%s://%s:%s' % (http.lower(), ip, port)
    self.proxy_list.append(proxy)
    return (self.proxy_list) #80个代理ip

    #获取文章的阅读量
    def readnum(self,url,header,proxy_ip):
    for i in range(5):
    time.sleep(60) #时间过小不能实现准确的刷阅读量(10)
    try:
    req = requests.get(url,headers =header,proxies=proxy_ip)
    soup = BeautifulSoup(req.text,'lxml')
    view = soup.select('#mainBox > main > div.blog-content-box > div.article-info-box > div > div > span')[0].getText()
    print("第%d次:阅读量%s" %(i+1,view))
    except requests.exceptions.ConnectionError:
    print ("ConnectionError")

    if __name__ == '__main__':
    proxy_handler = ProxyHandler()
    proxy_list=proxy_handler.get_proxy_list()
    print (proxy_list)
    proxy_ip={
    'http':random.choice(proxy_list).split(r"//")[1]
    }

    url =('https://blog.csdn.net/relocy/article/details/51533302')
    header = {'User-Agent':'Mozilla/5.0 (Windows NT 6.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/65.0.3325.181 Safari/537.36'}
    proxy_handler.readnum(url,header,proxy_ip)
    ---------------------
    作者:青霄
    来源:CSDN
    原文:https://blog.csdn.net/leiwuhen92/article/details/80370449
    版权声明:本文为博主原创文章,转载请附上博文链接!

  • 相关阅读:
    C#利用System.Net发送邮件(带 抄送、密送、附件、html格式的邮件)
    ASP.NET跨平台实践:无需安装Mono的Jexus“独立版”
    在.NET Core之前,实现.Net跨平台之Mono+CentOS+Jexus初体验
    初识Docker和Windows Server容器
    windows 7 docker oralce安装和使用
    javaweb学习总结(三十)——EL函数库
    javaweb学习总结(二十九)——EL表达式
    javaweb学习总结(二十八)——JSTL标签库之核心标签
    javaweb学习总结(二十七)——jsp简单标签开发案例和打包
    在Servlet使用getServletContext()获取ServletContext对象出现java.lang.NullPointerException(空指针)异常的解决办法
  • 原文地址:https://www.cnblogs.com/zhaobobo10/p/11079699.html
Copyright © 2011-2022 走看看