zoukankan      html  css  js  c++  java
  • pyppeteer屏蔽图片

    # -*- coding: UTF-8 -*-
    """
    @time:2021/11/13
    """
    import asyncio
    import json
    from pyppeteer import launcher
    from pyppeteer import launch
    from pyppeteer.network_manager import Request, Response
    
    async def main():
        start_parm = {
            # "executablePath": r"C:\Users\yq\AppData\Local\pyppeteer\pyppeteer\local-chromium\722234\chrome-win\chrome.exe",
            "headless": False,
            'autoClose':False,
            "args": [
                '--disable-infobars',  # 关闭自动化提示框
                # '--no-sandbox',
                # '--start-maximized', 
                '--user-agent=Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/73.0.3683.103 Safari/537.36',
                # UA
            ],
        }
        browser = await launch(**start_parm)
        page = await browser.newPage()
        # await page.setJavaScriptEnabled(enabled=True)
     
        # 启用拦截器(这样写无效)
        # await page.setRequestInterception(True)
        # page.on('request', intercept_request) 
        # page.on('response', intercept_response)
    
        # 写在这里
        await page.setRequestInterception(True)
        async def intercept(request):
            if any(request.resourceType == _ for _ in ('stylesheet', 'image', 'font')):  #"image", "media", "eventsource", "websocket", "stylesheet", "font"
                await request.abort()
            else:
                await request.continue_()
        page.on('request', lambda req: asyncio.ensure_future(intercept(req)))
    
    
        js_text = """
        () =>{ 
            alert('aaa');
            Object.defineProperties(navigator,{ webdriver:{ get: () => false } });
            window.navigator.chrome = { runtime: {},  };
            Object.defineProperty(navigator, 'languages', { get: () => ['en-US', 'en'] });
            Object.defineProperty(navigator, 'plugins', { get: () => [1, 2, 3, 4, 5,6], });
         }
            """
        # await page.evaluateOnNewDocument(js_text)
        await page.goto('https://hxxxng.com/') #这页无图
        await asyncio.sleep(2)
        
        await page.goto('https://hf.xxxng.com/housing/')  #这页仍然无图。bingo
         # await browser.close()
    
    asyncio.get_event_loop().run_until_complete(main())
  • 相关阅读:
    登录认证,全选,反选
    jQuery基础知识
    jQuery
    js练习
    BOM DOM
    mysql视图
    用CrwalSpider爬取boss直聘
    设置piplines.py数据管道
    在middlewares.py文件里添加代理ip
    爬取豆瓣电影
  • 原文地址:https://www.cnblogs.com/yansc/p/15547198.html
Copyright © 2011-2022 走看看