zoukankan      html  css  js  c++  java
  • 1.6学习进度

    今天学习1.5小时

    继续昨天的爬虫学习

    from bs4 import BeautifulSoup
    from bs4 import *
    import re
    import requests
    from fake_useragent import UserAgent
    
    url='https://www.qiushibaike.com/text/'
    headers={
        "User-Agent":"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/96.0.4664.110 Safari/537.36 Edg/96.0.1054.62"
    }
    resp=requests.get(url,headers=headers)
    # print(resp.text)
    #创建一个bs4的对象
    soup=BeautifulSoup(resp.text,'lxml')
    #获取标签
    # print(soup.span)
    #获取属性
    print(soup.div.attrs)
    print(soup.div.get('id'))
    print(soup.a['href'])
    #获取内容
    print(soup.title.string)
    print(soup.title.text)
    
    # print(type(soup.div.string))
    # #findall()
    # m=soup.find_all('div')
    print(soup.find_all(class_='author'))
    bs4的使用
    from  urllib.request import *
    from urllib.parse import urlencode
    from fake_useragent import UserAgent
    from random import *
    from http.cookiejar import MozillaCookieJar
    def get_cookie():
        login_url="http://www.sxt.cn/index/login/login"
        form_data={
            "user":"17703181473",
            "password":"123456"
        }
        headers={
            "User-Agent":UserAgent().random
        }
        req=Request(login_url,headers=headers)
    
        cookie_jar=MozillaCookieJar()
        handler=HTTPCookieProcessor(cookie_jar)
        opener=build_opener(handler)
        resp=opener.open(req)
        cookie_jar.save('cookie.txt',ignore_discard=True,ignore_expires=True)
    
    def use_cookie():
        info_url="http://www.sxt.cn/index/user.html"
        headers = {
            "User-Agent": UserAgent().random
        }
        req=Request(info_url,headers=headers)
        cookie_jar=MozillaCookieJar()
        cookie_jar.load("cookie.txt",ignore_expires=True,ignore_discard=True)
        handler=HTTPCookieProcessor(cookie_jar)
        opener=build_opener(handler)
        resp=opener.open(req)
        print(resp.read().decode)
    if __name__=='__main__':
        get_cookie()
        use_cookie()
    cookie的使用
  • 相关阅读:
    DNS部署与安全
    DHCP部署与安全
    jenkins漏洞复现
    Apache Axis2 漏洞复现
    制作war包
    JBOOS 漏洞复现
    Tomcat漏洞复现
    编写登陆接口(2)
    学习使用新工具Pycharm
    while练习99乘法表
  • 原文地址:https://www.cnblogs.com/feng747/p/15587498.html
Copyright © 2011-2022 走看看