zoukankan      html  css  js  c++  java
  • <爬虫>反反爬虫的各种知识

    1.代理服务器(中间人)

    # -*- coding: utf8 -*-
    import requests
    
    if __name__ == '__main__':
    	url = "https://www.baidu.com"
    
    	# 1.设置代理地址
    	proxy = '163.204.246.126:9999'
    	# 需要验证
    	# proxy = '账号:密码@47.112.24.110:8226'
    	# 2.设置proxies
    	proxies = {
    		'http': "http://" + proxy,
    		# 'https': "https://" + proxy,
    	}
    	try:
    		response = requests.get(url, proxies=proxies, timeout=10)
    		print(response.content.decode())
    	except requests.exceptions.ConnectionError as e:
    		print("Error", e.args)
    

    2.Cookie和Session

    1.1:手动复制,最笨的方法

    import requests
    from urllib.parse import urlencode
    
    headers = {
    	'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/72.0.3626.109 Safari/537.36',
    	'cookie':'xxxx自己的cookie'
    }
    
    params = {
    	# 	传入一些参数
    }
    # 将参数拼接在网址上
    url = 'http://www.renren.com/972196941' + urlencode(params)
    
    s = requests.Session()
    
    response = s.get(url,headers=headers,verify=False).text
    
    print(response)
    

    1.2 账号密码自动获取cookie并登录---崔大大的-记得多学习

    # coding: utf-8
    import requests
    from scrapy import Selector
    from http.cookiejar import LWPCookieJar
    
    session = requests.session()
    headers = {
            "User-Agent": "Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/75.0.3770.100 Safari/537.36"
        }
    session.headers = headers
    session.cookies = LWPCookieJar(filename='DouBanCookies.txt')  # 实例化一个LWPCookieJar对象
    
    def login():
        name = input("输入账户:")
        password = input("输入密码:")
        url = "https://accounts.douban.com/j/mobile/login/basic"
        data = {
            "ck": "",
            "name": name,
            "password": password,
            "remember": "True",
            "ticket": "",
        }
        response = session.post(url, data=data)
        print(response.text)
        session.cookies.save()
        verify_login()
    
    def verify_login():
        mine_url = "https://www.douban.com/mine/"
        mine_response = session.get(mine_url)
        selector = Selector(text=mine_response.text)
        user_name = selector.css(".info h1 ::text").extract_first("")
        print(f"豆瓣用户名:{user_name.strip()}")
    
    def cookie_login():
        try:
            # 从文件中加载cookies(LWP格式)
            session.cookies.load(ignore_discard=True)
            print(session.cookies)
        except Exception:
            print("Cookies未能加载,使用密码登录")
            login()
    
        else:
            verify_login()
    
    if __name__ == "__main__":
        cookie_login()
    

     

    使用最常用的!!!!! 

    # 最常用的cookie使用
    
    import requests
    from urllib.parse import urlencode
    
    # 实例化session
    session = requests.session()
    
    # 先发送post请求,获取cookie,在带上cookie
    post_url = "http://www.renren.com/PLogin.do"
    
    headers = {
    	'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/72.0.3626.109 Safari/537.36',
    }
    # name="email",name="password"
    post_data = {
    	"email": "xxxx",
    	"password": "xxx",
    }
    session.post(post_url, headers=headers, data=post_data)
    
    params = {
    	# 	传入一些参数
    }
    # 将参数拼接在网址上
    url = 'http://www.renren.com/972196941' + urlencode(params)
    
    response = session.get(url, headers=headers, verify=False)
    with open("renren.html", 'w', encoding='utf-8') as f:
    	f.write(response.content.decode())
    print(response)
    

    1.3 Fiddler抓取电脑代理

    1.翻墙安装SwitchyOmega一个代理设置工具 ----Chrome的扩展

    2.设置代理为Fiddler的地址和端口

     

    3.启用代理

     1.4 Fiddler抓取手机代理

    1.查询本机的IP地址

  • 相关阅读:
    免费的mail server
    opensuse 11.2/11.3安装vmware server 1.0.10笔记
    cisco IOS 免费下载的地方
    自动打开最快镜像站
    [ZT]MSSQL清空或者压缩日志的方法
    Cisco路由器的安全配置方案[zt]
    CISCO2821系列路由器恢复密码
    RTorrent User Guide
    Asp.Net发送邮件详解
    C#在线生成网页缩略图
  • 原文地址:https://www.cnblogs.com/shuimohei/p/11470290.html
Copyright © 2011-2022 走看看