zoukankan      html  css  js  c++  java
  • python 通过js控制滚动条拉取全文 通过psutil获取pid窗口句柄,通过win32gui使程序窗口前置 通过autopy实现右键菜单和另存为操作

    1.参考

    利用 Python + Selenium 自动化快速截图

    利用 Python + Selenium 实现对页面的指定元素截图(可截长图元素)

    使用python获取系统所有进程PID以及进程名称

     python锁定焦点到指定进程窗口的参考方法

    2.改进js代码,下拉和上拉,精确判断加载是否结束

    #!/usr/bin/env python
    # -*- coding: UTF-8 -*
    import time

    from selenium import webdriver
    from selenium.webdriver.support.ui import WebDriverWait
    from selenium.common.exceptions import NoSuchElementException, TimeoutException
    from selenium.webdriver.common.action_chains import ActionChains

    def
    scroll_page(url, browser='chrome'): if browser=='chrome': driver = webdriver.Chrome() else: driver = webdriver.Firefox() #右键异常 ActionChains(driver).context_click(e).perform() # driver = webdriver.PhantomJS() #可截长图!可是加载长网页时间超长? driver.set_window_size(1200, 900) driver.get(url) # Load page start = time.time() print driver.title driver.execute_script(""" (function () { var y = 0; var step = 1000; //100 window.scroll(0, 0); function f() { if (y < document.body.scrollHeight) { y += step; window.scroll(0, y); setTimeout(f, 100); //100 递归循环调用 } else { if(document.title.indexOf("scroll-done") < 0){ //-1 找不到,还没执行下文的driver.execute_script y -= step; window.scroll(0, y); setTimeout(f, 100); //100 //window.scroll(0, 0); //document.title += "scroll-done"; } //else{ //window.scroll(0, 0); //} } } setTimeout(f, 1000); //1000 })(); """) #拉到最后 出现元素“没有更多了” WebDriverWait(driver, 500).until(lambda x: x.find_element_by_xpath('//div[@style="text-align:center"]/em')) #或者引发 TimeoutException #停止上面的js到终点再上拉 循环 driver.execute_script(""" (function () { function f() { document.title += " scroll-done"; } setTimeout(f, 1000); })(); """) # <div class="js-infinite-layout"> # <div class="js-infinite-item"> # Jetbrains公司正式发布Pycharm 5 rst = driver.find_element_by_xpath('//div[@class="js-infinite-item"][last()]//div[@class="header"]').text #不能在path内部写入 text() print rst if rst != u'Jetbrains公司正式发布Pycharm 5': raise RuntimeError('wrong!!!') print time.time()-start print driver.title # time.sleep(3) #执行js也需要时间 WebDriverWait(driver, 10).until(lambda x: x.title == u'编程派 | Coding Python scroll-done') #或者引发 TimeoutException print driver.title return driver

    3.通过pid进程号和窗口标题定位窗口并前置

    import win32con
    import win32gui
    import win32process
    import psutil

    def
    get_hwnds_for_pid (pid): def callback (hwnd, hwnds): if win32gui.IsWindowVisible(hwnd) and win32gui.IsWindowEnabled(hwnd): _, found_pid = win32process.GetWindowThreadProcessId(hwnd) if found_pid == pid: hwnds.append(hwnd) return True hwnds = [] win32gui.EnumWindows(callback, hwnds) return hwnds def set_process_foreground(pid_part_name, pid_window_text): pids_target = [] for pid in psutil.pids(): p = psutil.Process(pid) if pid_part_name in p.name().lower(): #'chrome.exe' 'EXCEL.EXE' pids_target.append(pid) for pid in pids_target: for hwnd in get_hwnds_for_pid(pid): # 92292 chrome.exe 137328 编程派 | Coding Python - Google Chrome # EXCEL.EXE 857830 Microsoft Excel - Book1.xlsx # 90644 firefox.exe 595556 编程派 | Coding Python scroll-done - Mozilla Firefox if pid_window_text.encode('gbk') in win32gui.GetWindowText(hwnd): print pid, psutil.Process(pid).name(), hwnd, win32gui.GetWindowText(hwnd) win32gui.SetForegroundWindow(hwnd) return raise RuntimeError('process not found')

     

    4.通过autopy实现右键操作,以及网页另存为

    from autopy import key, mouse
    def save_result(driver): 
        time_for_filename = time.strftime('%H%M%S')
        with open('%s.html'%time_for_filename,'wb') as f:
            f.write(driver.page_source.encode('utf-8'))
            
        # e =  driver.find_element_by_xpath('//img[@src="/static/images/logo.png"]')
        # ActionChains(driver).context_click(e).perform()  
        # time.sleep(1.5)
        # esc 退出右键菜单
        # key.tap(key.K_ESCAPE)
        # driver.set_window_position(0,0)
        # window_position = driver.get_window_position() #  {u'x': 10, u'y': 10}
        # mouse.move(int(window_position['x']+150), int(window_position['y']+150)) 
        #右键再左键 避免错误点开链接
        # mouse.click(mouse.RIGHT_BUTTON)     
        # mouse.click(mouse.LEFT_BUTTON)
        
        # 关键是浏览器位于foreground
        set_process_foreground(driver.name,driver.title)
        key.tap('s', key.MOD_CONTROL)
        time.sleep(1.5)    
        key.type_string(time_for_filename)
        time.sleep(0.5)      
        key.tap(key.K_RETURN)   
        
        driver.save_screenshot('%s.png'%time_for_filename)
        # driver.close()
    
    if __name__ == "__main__":
        browser = 'chrome'
        # browser = 'firefox'
        url = "http://codingpy.com"
        driver = scroll_page(url,browser)
        save_result(driver)
        print 'All DONE'
  • 相关阅读:
    Java学习笔记day01
    对有序数组进行二分查找(折半查找)
    对数组进行冒泡排序
    LeetCode #344. Reverse String
    LeetCode #292. Nim Game
    LeetCode #258. Add Digits
    Android DiskLruCache完全解析,硬盘缓存的最佳方案
    Android源码解析——LruCache
    Messenger与AIDL的异同
    Android应用层View绘制流程与源码分析
  • 原文地址:https://www.cnblogs.com/my8100/p/7233444.html
Copyright © 2011-2022 走看看