zoukankan      html  css  js  c++  java
  • 自动网页截图并指定元素位置裁剪图片并保存到excel表格

    # coding=utf-8
    import os
    import time
    from selenium import webdriver
    from selenium.webdriver.chrome.options import Options
    from selenium.webdriver.common.by import By
    from selenium.webdriver.support import expected_conditions as EC
    from selenium.webdriver.support.wait import WebDriverWait
    from PIL import Image
    import xlsxwriter, xlrd
    import pandas as pd
    
    
    def login():
        chrome_options = Options()
        chrome_options.add_argument('--headless')
        driver = webdriver.Chrome(executable_path='./chromedriver', chrome_options=chrome_options)
        driver.set_window_size(1200, 741)
        driver.implicitly_wait(2)
        print('初始化中...')
        driver.get("http://xxxve")
        print('填写登录信息中...')
        acc = driver.find_element_by_id('login-email')
        pwd = driver.find_element_by_id('login-pass')
        btn = driver.find_element_by_tag_name('button')
        acc.send_keys('zhxxxm')
        pwd.send_keys('LONxxxxx$')
        btn.click()
        print('跳转到验证码页面中...')
        time.sleep(2)
        capta = driver.find_element_by_id('code')
        capta_input = input('请输入两步验证码:')
        capta.send_keys(capta_input)
        btn1 = driver.find_element_by_tag_name('button')
        btn1.click()
        time.sleep(2)
        print('跳转到创意编辑页面中...')
        return driver
    
    
    faild_url = []
    
    
    def parse_img(driver, url):
        cid = url.split('/')[-2]
        try:
            driver.get_screenshot_as_file("./screen_shot/{}.png".format(cid))
            element = driver.find_element_by_class_name("AdvertViewer-item")
            # print(element.location)  # 打印元素坐标
            # print(element.size)  # 打印元素大小
    
            left = element.location['x']
            top = element.location['y']
            right = element.location['x'] + element.size['width']
            bottom = element.location['y'] + element.size['height']
    
            im = Image.open("./screen_shot/{}.png".format(cid))
            im = im.crop((left, top, right, bottom))
            im.save("./screen_shot/{}.png".format(cid))
            print("创意-->{}.png 已经保存".format(cid))
        except Exception as e:
            faild_url.append(url)
    
    
    def get_screen(driver, url, tem):
        try:
            driver.get(url)
            time.sleep(10)
            parse_img(driver,url)
    
        except Exception as e:
            print(e, url)
            faild_url.append(url)
    
    
    def new_xlsx():
        # 打开excel文件
        data = xlrd.open_workbook('全xxx意.xlsx')
        book = xlsxwriter.Workbook('全行xxx意-fina.xlsx')
        # 获取第一张工作表(通过索引的方式)
        tables = data.sheets()
        for table in tables:
            worksheet = book.add_worksheet(table.name)
            nrows = table.nrows
    
            # 表头
            rows = table.row_values(0)
            for i, v in enumerate(rows):
                if v != '':
                    worksheet.write(0, i, v)
            # 表体
            for k in range(1, nrows - 1):
                rows = table.row_values(k)
                for i, v in enumerate(rows):
                    if v != '':
                        if str(int(rows[6])) + '.png' in os.listdir('./screen_shot/'):
                            image_width, image_height = Image.open('./screen_shot/' + str(int(rows[6])) + '.png').size
                            worksheet.set_column('H:H', width=58)
                            worksheet.set_row(k, height=image_height*0.8)
                            if i == 7:
                                worksheet.insert_image('H' + str(k + 1), './screen_shot/' + str(int(rows[6])) + '.png',
                                                       {'x_offset': 6, 'y_offset': 3})
                            else:
                                worksheet.write(k, i, v)
                        else:
                            worksheet.write(k, i, v)
    
        book.close()
    
    
    if __name__ == '__main__':
        df = pd.read_excel('./全行xxxx.xlsx')
        driver1 = login()
        link_list = df['link'].tolist()
        tem_list = df['样式'].tolist()
        dict_url = zip(link_list, tem_list)
    
        for url, tem in dict_url:
            count = 1
            get_screen(driver1, url, tem)
            count += 1
            print('还剩 %s 个' % str(len(link_list) - count))
        driver1.quit()
    
        print('失败的url:', faild_url)
        print('所有抓取结束')
        new_xlsx()
        print('插入表格结束')
  • 相关阅读:
    数据缓存/NSURLSession
    NSURLConnection基本使用/多线程断点下载/文件的上传
    HTTP协议/数据安全
    block的概念及基本使用 /block访问外部变量
    NSOperation简单介绍/NSOperation基本操作/自定义NSOperation
    GCD介绍/GCD的基本使用/GCD的常见用法
    NSThread方式创建线程/线程安全/线程间的通信
    面试常见知识点
    新课堂练习题
    线程概述
  • 原文地址:https://www.cnblogs.com/Erick-L/p/9390238.html
Copyright © 2011-2022 走看看