zoukankan      html  css  js  c++  java
  • 工作中用到的小脚本2

    import xlwt
    import openpyxl
    from urllib.parse import *
    import xlrd
    def eq(l):
        workbook = xlrd.open_workbook('data.xlsx')
        table = workbook.sheet_by_index(0)
        all_data=[]
        url=[]
        test=[]
        for i in range(0,table.nrows):
            u = table.cell(i, 1).value
            ip=table.cell(i,0).value
            #print(ip)
            if u not in l:
                print(u,"	",ip)
    #文件移动函数
    def moveFile(srcfile,dstfile):
        if not os.path.isfile(srcfile):
            print ("%s 该文件不存在!请检查您的输入"%(srcfile))
        else:
            fpath,fname=os.path.split(dstfile)    #分离文件名和路径
            if not os.path.exists(fpath):
                os.makedirs(fpath)                #创建路径
            shutil.move(srcfile,dstfile)          #移动文件
    
    def searchdata(l,dir):
        workbook=xlrd.open_workbook('C:\Users\yxb\Downloads\汇总高危\网站基本信息20200424(1).xls')
        workbook2=xlrd.open_workbook('C:\Users\yxb\Downloads\汇总高危\网站群网站清单_20200312入库(1).xlsx')
        table1=workbook.sheet_by_index(0)
        table2=workbook2.sheet_by_index(0)
        all_data=[]
        un=[]
        ip=[]
        url=[]
        start=[]
        end=[]
        for i in range(0,table1.nrows):
            u=table1.cell(i,4).value
            unit=table1.cell(i,-1).value
            ip.append(table1.cell(i,5).value)
            un.append(table1.cell(i,16).value)
            if u=='*' or u=='无':
                u=table1.cell(i,5).value
                all_data.append(unit)
                url.append(u)
            url.append(u)
            all_data.append(unit)
        for i in range(0,table2.nrows):
            u=table2.cell(i,0).value
            unit=table2.cell(i,3).value
            url.append(u)
            all_data.append(unit)
        sum=0
        for i in l:
            if i in url:
                print(i,"	",all_data[url.index(i)])
                start.append(i)
                end.append(all_data[url.index(i)])
            else:
                if i in ip:
                    print(i,"	",un[ip.index(i)])
                    start.append(i)
                    end.append(all_data[ip.index(i)])
        list = os.listdir(dir)
        for i in range(0, len(list)):
            path = os.path.join(dir, list[i])
            if os.path.isfile(path):
                with open(path, encoding="utf-8") as f:
                    content = f.read()
                doc = pq(content)  # 解析html 文本
                item = doc("h1")
                s=((item.eq(2).text()))
                res = urlparse(s)
                # print(res)
                if s in start:
                    moveFile(path,"F:\scrapy\819\"+end[start.index(s)]+"\")
                else:
                    if res.scheme == 'http' or res.scheme is None or res.scheme == '' or res.scheme == 'https':
                        if res.netloc == '':
                            # print(res.path)
                            if res.path in start:
                                moveFile(path, "F:\scrapy\819\"+end[start.index(res.path)]+"\")
                        else:
                            if res.netloc in start:
                                moveFile(path, "F:\scrapy\819\"+end[start.index(res.netloc)]+"\")
                            # list.append(res.netloc)
                            # print(res.netloc)
                    else:
                        if res.scheme in start:
                            moveFile(path, "F:\scrapy\819\"+end[start.index(res.scheme)]+"\")
                        # listUrl.append(res.scheme)
                        # print(res.scheme)
        print('操作完成')
    
    
    
    
    
    
    
    
    
    def chooseInfo(dir):
        l=[]
        listUrl=[]
        list = os.listdir(dir)
        for i in range(0, len(list)):
            path = os.path.join(dir, list[i])
            if os.path.isfile(path):
                with open(path, encoding="utf-8") as f:
                    content = f.read()
                doc = pq(content)  # 解析html 文本
                item = doc("h1")
                s=((item.eq(2).text()))
                #if int(item.eq(1).html()) > 0 or int(item.eq(3).html()) > 0:
                #parrten='^?([a-zA-Z0-9]([a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?.)+[a-zA-Z]{2,6}(/)'
                #a=re.findall('(?:[-w.]|(?:%[da-fA-F]{2}))+',s)
                #a=re.split('(?:[-w.]|(?:%[da-fA-F]{2}))+',a)
                #print(a)
                l.append(s)
    
        #print("操作全部完成!")
    
        for url in l:
            res=urlparse(url)
            #print(res)
            if res.scheme=='http' or res.scheme is None or res.scheme=='' or res.scheme=='https':
                if res.netloc =='':
                    #print(res.path)
                    listUrl.append(res.path)
                else:
                    list.append(res.netloc)
                    #print(res.netloc)
            else:
                listUrl.append(res.scheme)
                #print(res.scheme)
        return listUrl
    
    
    
    a=[]
    a=chooseInfo("C:\Users\yxb\Downloads\汇总高危\总\")
    searchdata(a,"C:\Users\yxb\Downloads\汇总高危\总\")
    #eq(a)
    

      

  • 相关阅读:
    【HDOJ】1243 反恐训练营
    Eclipse 点击 Run 自动生成 out 文件的错误
    经纬度转凯立德 K 码
    Android开发环境建立
    Android 学习过程中遇到的知识点
    Android
    Android
    素数距离问题
    取石子(一)
    素数求和问题
  • 原文地址:https://www.cnblogs.com/kk328/p/13532163.html
Copyright © 2011-2022 走看看