zoukankan      html  css  js  c++  java
  • 1.6学习进度

    今天学习1.5小时

    继续昨天的爬虫学习

    from bs4 import BeautifulSoup
    from bs4 import *
    import re
    import requests
    from fake_useragent import UserAgent
    
    url='https://www.qiushibaike.com/text/'
    headers={
        "User-Agent":"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/96.0.4664.110 Safari/537.36 Edg/96.0.1054.62"
    }
    resp=requests.get(url,headers=headers)
    # print(resp.text)
    #创建一个bs4的对象
    soup=BeautifulSoup(resp.text,'lxml')
    #获取标签
    # print(soup.span)
    #获取属性
    print(soup.div.attrs)
    print(soup.div.get('id'))
    print(soup.a['href'])
    #获取内容
    print(soup.title.string)
    print(soup.title.text)
    
    # print(type(soup.div.string))
    # #findall()
    # m=soup.find_all('div')
    print(soup.find_all(class_='author'))
    bs4的使用
    from  urllib.request import *
    from urllib.parse import urlencode
    from fake_useragent import UserAgent
    from random import *
    from http.cookiejar import MozillaCookieJar
    def get_cookie():
        login_url="http://www.sxt.cn/index/login/login"
        form_data={
            "user":"17703181473",
            "password":"123456"
        }
        headers={
            "User-Agent":UserAgent().random
        }
        req=Request(login_url,headers=headers)
    
        cookie_jar=MozillaCookieJar()
        handler=HTTPCookieProcessor(cookie_jar)
        opener=build_opener(handler)
        resp=opener.open(req)
        cookie_jar.save('cookie.txt',ignore_discard=True,ignore_expires=True)
    
    def use_cookie():
        info_url="http://www.sxt.cn/index/user.html"
        headers = {
            "User-Agent": UserAgent().random
        }
        req=Request(info_url,headers=headers)
        cookie_jar=MozillaCookieJar()
        cookie_jar.load("cookie.txt",ignore_expires=True,ignore_discard=True)
        handler=HTTPCookieProcessor(cookie_jar)
        opener=build_opener(handler)
        resp=opener.open(req)
        print(resp.read().decode)
    if __name__=='__main__':
        get_cookie()
        use_cookie()
    cookie的使用
  • 相关阅读:
    面向对象之继承
    面向对象之封装
    进程相关(一)
    面向对象之反射,元类
    实现效果从中间变大
    如何扒一个网站
    java例程练习(引用类型数据的排序和查找)[外篇]
    java例程练习(Iterator)
    java例程练习(增强的for循环)
    java例程练习(Map接口及自动打包、解包)
  • 原文地址:https://www.cnblogs.com/feng747/p/15587498.html
Copyright © 2011-2022 走看看