zoukankan      html  css  js  c++  java
  • 工作中用到的小脚本2

    import xlwt
    import openpyxl
    from urllib.parse import *
    import xlrd
    def eq(l):
        workbook = xlrd.open_workbook('data.xlsx')
        table = workbook.sheet_by_index(0)
        all_data=[]
        url=[]
        test=[]
        for i in range(0,table.nrows):
            u = table.cell(i, 1).value
            ip=table.cell(i,0).value
            #print(ip)
            if u not in l:
                print(u,"	",ip)
    #文件移动函数
    def moveFile(srcfile,dstfile):
        if not os.path.isfile(srcfile):
            print ("%s 该文件不存在!请检查您的输入"%(srcfile))
        else:
            fpath,fname=os.path.split(dstfile)    #分离文件名和路径
            if not os.path.exists(fpath):
                os.makedirs(fpath)                #创建路径
            shutil.move(srcfile,dstfile)          #移动文件
    
    def searchdata(l,dir):
        workbook=xlrd.open_workbook('C:\Users\yxb\Downloads\汇总高危\网站基本信息20200424(1).xls')
        workbook2=xlrd.open_workbook('C:\Users\yxb\Downloads\汇总高危\网站群网站清单_20200312入库(1).xlsx')
        table1=workbook.sheet_by_index(0)
        table2=workbook2.sheet_by_index(0)
        all_data=[]
        un=[]
        ip=[]
        url=[]
        start=[]
        end=[]
        for i in range(0,table1.nrows):
            u=table1.cell(i,4).value
            unit=table1.cell(i,-1).value
            ip.append(table1.cell(i,5).value)
            un.append(table1.cell(i,16).value)
            if u=='*' or u=='无':
                u=table1.cell(i,5).value
                all_data.append(unit)
                url.append(u)
            url.append(u)
            all_data.append(unit)
        for i in range(0,table2.nrows):
            u=table2.cell(i,0).value
            unit=table2.cell(i,3).value
            url.append(u)
            all_data.append(unit)
        sum=0
        for i in l:
            if i in url:
                print(i,"	",all_data[url.index(i)])
                start.append(i)
                end.append(all_data[url.index(i)])
            else:
                if i in ip:
                    print(i,"	",un[ip.index(i)])
                    start.append(i)
                    end.append(all_data[ip.index(i)])
        list = os.listdir(dir)
        for i in range(0, len(list)):
            path = os.path.join(dir, list[i])
            if os.path.isfile(path):
                with open(path, encoding="utf-8") as f:
                    content = f.read()
                doc = pq(content)  # 解析html 文本
                item = doc("h1")
                s=((item.eq(2).text()))
                res = urlparse(s)
                # print(res)
                if s in start:
                    moveFile(path,"F:\scrapy\819\"+end[start.index(s)]+"\")
                else:
                    if res.scheme == 'http' or res.scheme is None or res.scheme == '' or res.scheme == 'https':
                        if res.netloc == '':
                            # print(res.path)
                            if res.path in start:
                                moveFile(path, "F:\scrapy\819\"+end[start.index(res.path)]+"\")
                        else:
                            if res.netloc in start:
                                moveFile(path, "F:\scrapy\819\"+end[start.index(res.netloc)]+"\")
                            # list.append(res.netloc)
                            # print(res.netloc)
                    else:
                        if res.scheme in start:
                            moveFile(path, "F:\scrapy\819\"+end[start.index(res.scheme)]+"\")
                        # listUrl.append(res.scheme)
                        # print(res.scheme)
        print('操作完成')
    
    
    
    
    
    
    
    
    
    def chooseInfo(dir):
        l=[]
        listUrl=[]
        list = os.listdir(dir)
        for i in range(0, len(list)):
            path = os.path.join(dir, list[i])
            if os.path.isfile(path):
                with open(path, encoding="utf-8") as f:
                    content = f.read()
                doc = pq(content)  # 解析html 文本
                item = doc("h1")
                s=((item.eq(2).text()))
                #if int(item.eq(1).html()) > 0 or int(item.eq(3).html()) > 0:
                #parrten='^?([a-zA-Z0-9]([a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?.)+[a-zA-Z]{2,6}(/)'
                #a=re.findall('(?:[-w.]|(?:%[da-fA-F]{2}))+',s)
                #a=re.split('(?:[-w.]|(?:%[da-fA-F]{2}))+',a)
                #print(a)
                l.append(s)
    
        #print("操作全部完成!")
    
        for url in l:
            res=urlparse(url)
            #print(res)
            if res.scheme=='http' or res.scheme is None or res.scheme=='' or res.scheme=='https':
                if res.netloc =='':
                    #print(res.path)
                    listUrl.append(res.path)
                else:
                    list.append(res.netloc)
                    #print(res.netloc)
            else:
                listUrl.append(res.scheme)
                #print(res.scheme)
        return listUrl
    
    
    
    a=[]
    a=chooseInfo("C:\Users\yxb\Downloads\汇总高危\总\")
    searchdata(a,"C:\Users\yxb\Downloads\汇总高危\总\")
    #eq(a)
    

      

  • 相关阅读:
    DGA域名可以是色情网站域名
    使用cloudflare加速你的网站隐藏你的网站IP
    167. Two Sum II
    leetcode 563. Binary Tree Tilt
    python 多线程
    leetcode 404. Sum of Left Leaves
    leetcode 100. Same Tree
    leetcode 383. Ransom Note
    leetcode 122. Best Time to Buy and Sell Stock II
    天津Uber优步司机奖励政策(12月28日到12月29日)
  • 原文地址:https://www.cnblogs.com/kk328/p/13532163.html
Copyright © 2011-2022 走看看