zoukankan      html  css  js  c++  java
  • 常用的小脚本

    1、检测表格中的网站是否能v6访问,是否有AAAA记录

     1 #coding:utf8
     2 import pycurl,xlrd,socket,xlwt
     3 from io import BytesIO
     4 from xlutils.copy import copy
     5 import threadpool
     6 import threading
     7 
     8 
     9 look = threading.Lock()
    10 #检测站点是否可以使用v6访问
    11 def web_can_ipv6(url):
    12     buffer = BytesIO()
    13     c = pycurl.Curl()
    14     c.setopt(pycurl.OPT_CERTINFO, 1)
    15     c.setopt(pycurl.URL, "http://"+url)
    16     #设置只使用v6访问
    17     c.setopt(pycurl.IPRESOLVE,pycurl.IPRESOLVE_V6)
    18     c.setopt(c.WRITEDATA, buffer)
    19     try:
    20         c.perform()
    21         return c.getinfo(pycurl.HTTP_CODE)
    22     except:
    23         return None
    24 
    25 #读取表格内容
    26 def read_excel():
    27     web_list = []
    28     # 打开文件
    29     workbook = xlrd.open_workbook(r'webs.xls')
    30     # 获取所有sheet
    31     sheet1 = workbook.sheet_by_name('Sheet1')
    32     for row_n in range(1,sheet1.nrows):
    33         row = sheet1.row_values(row_n)
    34         web_list.append(row)
    35     return web_list
    36 
    37 def write_excel(row,col,text):
    38     rb = xlrd.open_workbook(r'webs.xls','wb')  # 打开weng.xls文件
    39     wb = copy(rb)  # 利用xlutils.copy下的copy函数复制
    40     ws = wb.get_sheet(0)  # 获取表单0
    41     ws.write(row, col, text)
    42     wb.save(r'webs.xls')  # 保存文件
    43 
    44 #返回站点的AAAA记录
    45 def get_dns(url):
    46     try:
    47         results = socket.getaddrinfo(url, None)
    48         for item in results:
    49             if ':' in item[4][0]:
    50                 return item[4][0]
    51     except:
    52         print('%s-error'%url)
    53 
    54 
    55 def main(web):
    56     print('正在检测第%s个网站' % web[0])
    57     write_excel(web[0],3,get_dns(web[2]))
    58     if web_can_ipv6(web[2]):
    59         write_excel(web[0],4, web_can_ipv6(web[2]))
    60 
    61 
    62 
    63 
    64 
    65 if __name__ == '__main__':
    66     for web in read_excel():
    67         main(web)
    View Code

     2、检测表格中的网站的返回码,归属运营商,多线程

    总共一万多条数据的表格,采用分段处理的方式,大大增加处理速度

      1 ##coding:utf8
      2 import threadpool
      3 import threading
      4 import xlrd,xlwt,requests,socket
      5 from xlutils.copy import copy
      6 from IPy import IP
      7 from contextlib import closing
      8 from concurrent.futures import ThreadPoolExecutor,wait
      9 
     10 lock = threading.Lock()
     11 pool = threadpool.ThreadPool(20)
     12 
     13 #获取网站返回码
     14 def get_web_code(ws,web):
     15     try:
     16         url = str(web[2]).strip()
     17         url = 'http://'+url
     18         headers = {
     19                 'User-Agent': 'Mozilla/5.0 (compatible; MSIE 9.0; Windows NT 6.1; Trident/5.0) ',
     20             }
     21         r = requests.head(url=url,timeout=1,headers=headers)
     22 
     23         print('第%s行-->%s' % (web[0], str(r.status_code)))
     24 
     25         if is_url_in_cernet(get_dns(url)):
     26             write_excel(ws,int(web[0]), 4, 'CERNET')
     27         elif is_url_in_cmcc(get_dns(url)):
     28             write_excel(ws,int(web[0]), 4, 'CMCC')
     29         elif is_url_in_cucc(get_dns(url)):
     30             write_excel(ws,int(web[0]), 4, 'CUCC')
     31         elif is_url_in_ctcc(get_dns(url)):
     32             write_excel(ws,int(web[0]), 4, 'CTCC')
     33         else:
     34             write_excel(ws,int(web[0]), 4, 'UNKnow')
     35         write_excel(ws,int(web[0]),3,r.status_code)
     36         return r.status_code
     37     except Exception as e:
     38         write_excel(ws,int(web[0]),3,'ERROR')
     39         print('第%s行-->%s'%(web[0],e))
     40 
     41 
     42 #读取表格数据
     43 def read_excel():
     44     count=0
     45     web_list = []
     46     # 打开文件
     47     workbook = xlrd.open_workbook(r'webs.xls')
     48     # 获取所有sheet
     49     sheet1 = workbook.sheet_by_name('Sheet1')
     50     for row_n in range(1,sheet1.nrows):
     51         row = sheet1.row_values(row_n)
     52         web_list.append(row)
     53     n=100
     54     new_web_list = [web_list[i:i+n] for i in range(0,len(web_list),n)]
     55     return new_web_list
     56 
     57 
     58 # #表格写入数据
     59 # def write_excel(row,col,text):
     60 #     lock.acquire()
     61 #     try:
     62 #         rb = xlrd.open_workbook(r'webs.xls','wb')
     63 #         wb = copy(rb)  # 利用xlutils.copy下的copy函数复制
     64 #         ws = wb.get_sheet(0)  # 获取表单0
     65 #         ws.write(row, col, text)
     66 #         wb.save(r'webs.xls')  # 保存文件
     67 #     except:
     68 #         pass
     69 #     finally:
     70 #         lock.release()
     71 
     72 
     73 #表格写入数据
     74 def write_excel(ws,row,col,text):
     75     lock.acquire()
     76     try:
     77         ws.write(row, col, text)
     78     except:
     79         pass
     80     finally:
     81         lock.release()
     82 
     83 
     84 #返回站点的A记录
     85 def get_dns(url):
     86     lock.acquire()
     87     if 'http://' or 'https://' in  url.lower():
     88         url = url.strip()
     89         url = url.lower()
     90         url = url.replace('http://','')
     91         url = url.replace('https://', '')
     92     if ':' in url:
     93         url = url.split(':')[0]
     94     try:
     95         results = socket.getaddrinfo(url, None)
     96         return results[0][4][0]
     97     except:
     98         return url
     99     finally:
    100         lock.release()
    101 
    102 def is_url_in_cernet(url):
    103     lock.acquire()
    104     try:
    105         with open('cernet_ip.txt','r') as rf:
    106             for line in rf.readlines():
    107                 if ';' in line:
    108                     line = line.replace(';', '')
    109                 if ':' in line:
    110                     line = line.split(':')[0]
    111                 ip_line = IP(line)
    112                 if url in ip_line:
    113                     return True
    114     except:
    115         pass
    116     finally:
    117         lock.release()
    118 
    119 
    120 def is_url_in_cmcc(url):
    121     lock.acquire()
    122     try:
    123         with open('CMCC_ip.txt','r') as rf:
    124             for line in rf.readlines():
    125                 if ';' in line:
    126                     line = line.replace(';','')
    127                 if ':' in line:
    128                     line = line.split(':')[0]
    129                 ip_line = IP(line)
    130                 if url in ip_line:
    131                     return True
    132     except:
    133         pass
    134     finally:
    135         lock.release()
    136 
    137 def is_url_in_cucc(url):
    138     lock.acquire()
    139     try:
    140         with open('CUCC_ip.txt','r') as rf:
    141             for line in rf.readlines():
    142                 if ';' in line:
    143                     line = line.replace(';', '')
    144                 if ':' in line:
    145                     line = line.split(':')[0]
    146                 ip_line = IP(line)
    147                 if url in ip_line:
    148                     return True
    149     except:
    150         pass
    151     finally:
    152         lock.release()
    153 
    154 def is_url_in_ctcc(url):
    155     lock.acquire()
    156     try:
    157         with open('CTCC_ip.txt','r') as rf:
    158             for line in rf.readlines():
    159                 if ';' in line:
    160                     line = line.replace(';', '')
    161                 if ':' in line:
    162                     line = line.split(':')[0]
    163                 ip_line = IP(line)
    164                 if url in ip_line:
    165                     return True
    166     except:
    167         pass
    168     finally:
    169         lock.release()
    170 
    171 
    172 
    173 
    174 if __name__ == '__main__':
    175     new_web_list = read_excel()
    176     for web_list in new_web_list:
    177         rb = xlrd.open_workbook(r'webs.xls','wb')
    178         wb = copy(rb)  # 利用xlutils.copy下的copy函数复制
    179         ws = wb.get_sheet(0)  # 获取表单0
    180         t_list = []
    181         for web in web_list:
    182             work = threading.Thread(target=get_web_code , args=(ws, web,))
    183             work.start()
    184             t_list.append(work)
    185         for t in t_list:
    186             t.join()
    187         wb.save(r'webs.xls')
    188     print('-----end-----')
    189     # fs = []
    190     # for web in web_list:
    191     #     f = pool.submit(get_web_code, web)
    192     #     fs.append(f)
    193     # # 相遇与join,阻塞主线程
    194     # wait(fs)
    View Code

     3、将n个表格中的m个子表的数据全部汇总到一个总表中

    测试是将30+个表个汇总,耗时不到2分钟

     1 # -*- coding: utf-8 -*- 
     2 import os,xlrd
     3 from xlutils.copy import copy
     4 import time,re
     5 
     6 #获取需要的表格
     7 def get_file():
     8     file_list=[]
     9     for root,dirs,files in os.walk(os.getcwd()):
    10         for file in files:
    11             if 'excel_run.' in file:
    12                 pass
    13             elif file=='total.xls':
    14                 pass
    15             elif file=='~$total.xls':
    16                 pass
    17             else:
    18                 if re.search(r'/*.xls/*',file):
    19                     file_list.append(file)
    20     return file_list
    21 
    22 
    23 #读取表格数据
    24 def read_excel(filename,sheetindex):
    25     web_list = []
    26     # 打开文件
    27     workbook = xlrd.open_workbook(r'%s'%filename)
    28     # 获取所有sheet
    29     sheet1 = workbook.sheet_by_index(sheetindex)
    30     for row_n in range(2,sheet1.nrows):
    31         row = sheet1.row_values(row_n)
    32         web_list.append(row)
    33     return web_list
    34 
    35 
    36 #表格写入数据
    37 def write_excel(sheetindex,file,count):
    38     rb = xlrd.open_workbook(r'total.xls','wb')
    39     wb = copy(rb)  # 利用xlutils.copy下的copy函数复制
    40     ws = wb.get_sheet(sheetindex)  # 获取表单0
    41     nrow = rb.sheet_by_index(sheetindex).nrows
    42     for row in read_excel(file,sheetindex):
    43         count+=1
    44         for index_col,col in enumerate(row):
    45             ws.write(nrow+count, index_col, col)
    46     wb.save(r'total.xls')  # 保存文件
    47 
    48 
    49 if __name__ == '__main__':
    50     t1 = time.time()
    51     for file in get_file():
    52         for sheetindex in [1,2,4,5,6]:
    53             count=-1
    54             print('正在录入%s的第%s张表的数据' % (file, sheetindex))
    55             write_excel(sheetindex,file,count)
    56 
    57     t2=time.time()
    58     print('共耗时%s秒'%(t2-t1))
    59     print('---end---')
    View Code
  • 相关阅读:
    【javascript基础】cc_on的详解
    【css】怪异模式下的盒模型
    【javascript基础】javascript对象的expando属性
    【javascript基础】之深度克隆(深度拷贝)一个对象
    【javascript基础】toString、valueOf、转换 【译】
    【c++】C++ system函数用法
    【javascript基础】toString valueOf toLocaleString的具体区别
    【javascript基础】Nicholas C. Zakas如何面试前端工程师
    【浏览器兼容性】script的type为text/tpl,写入html IE6,7,8下报错
    从写组件说Xml——背景篇(零)
  • 原文地址:https://www.cnblogs.com/cx59244405/p/10150235.html
Copyright © 2011-2022 走看看