zoukankan      html  css  js  c++  java
  • 爬虫模板

    # -*- coding: utf-8 -*-
    """
    Created on Tue Apr 24 12:16:18 2018
    
    @author: 13769
    """
    
    import requests 
    import http.cookiejar as cookielib
    
    headers = {
        'User-Agent': 'Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:53.0) Gecko/20100101 Firefox/53.0',
        'Accept':'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
        'Accept-Language':'en-US,en;q=0.5',
        'Accept-Encoding':'gzip, deflate',
        'Content_Type':'application/x-www-form-urlencoded',
    #    'Content-length':'5523',
        #'Cookie':'name=value; ASP.NET_SessionId=hgyga345qcutdv55rejc3x45; ASP.NET_SessionId_NS_Sig=oenCV6mdwWlp9VG_',
        'Connection':'keep-alive',
        'Upgrade-Insecure-Requests':'1',
    }
    session = requests.session() # 实例化一个session用于持续化访问.
    def login(username,password,url):
        global session
        
        load_cookiejar = cookielib.LWPCookieJar()   #实例化一个cookieslib 用于登陆 储存cookies信息.
        try:    # 用于登陆储存好的 cookies 信息 如果没有的话就未能加载.
            load_cookiejar.load("cookies"+'.txt', ignore_discard=True, ignore_expires=True)
            load_cookies = requests.utils.dict_from_cookiejar(load_cookiejar)
            session.cookies = requests.utils.cookiejar_from_dict(load_cookies)
        except:
            print("Cookie 未能加载") # 再次登陆然后储存cookie信息
        content = {
                "username":username,
                "password":password,
                "cksave":"0",
                "login":"Login",
                }
    
        session.post(url,data=content,headers = headers)    
    
        new_cookie_jar = cookielib.LWPCookieJar("cookies" + '.txt')
        requests.utils.cookiejar_from_dict({c.name: c.value for c in session.cookies}, new_cookie_jar)
        new_cookie_jar.save("cookies"+'.txt', ignore_discard=True, ignore_expires=True)
    
    
        
    if __name__ == "__main__":
        login("xpower","q.123456","http://115.159.40.116/ajax/login.php")
        #   然后通过 session 进行访问即可.
        
  • 相关阅读:
    Struts2升级注意事项
    使用HttpClient获取网页源码
    The method getJspApplicationContext(ServletContext) is undefined for the type JspFactory解决方案
    IBatis常见错误集锦
    JPush极光推送Java服务器端API
    JS去空trim
    Jquery常用操作
    适配器模式(Adapter)
    常见数据库设计(3)——历史数据问题之多记录变更
    VS2008 工具箱都是textbox(报表设计时)
  • 原文地址:https://www.cnblogs.com/A-FM/p/6926920.html
Copyright © 2011-2022 走看看