zoukankan      html  css  js  c++  java
  • 重命名文件及html

    import os
    import nltk
    from bs4 import BeautifulSoup as bs
    
    def get_txt_name_from_bak_name(bak_name):
        if -1 == bak_name.find('.txt'):
            return ''
        file_names = bak_name.split('.')
        return file_names[-2]+ '.' +file_names[0]+'.txt'
    
    def get_txt_name_no_time(file_name):
        if -1 == file_name.find('.txt'):
            return ''
    
        file_names = file_name.split('.')
        return file_names[1]+'.'+file_names[2]
    
    def track_files_under_folder(folder_name):
        if os.path.exists(folder_name):
            #track every file
            for (root,dirs,files) in os.walk(folder_name):
                #process
    
                for filename in files:
                    full_file_name = root+ '\' +filename
    
                    #rename files
                    if '' == get_txt_name_no_time(filename):
                        continue
    
                    new_file_name = root+'\'+get_txt_name_no_time(filename)
                    #print(full_file_name + '--> ' + new_file_name)
                    os.rename(full_file_name,new_file_name)
    
    
                    
    def html_to_txt(file_name):
        with open(file_name,'r') as h,open(file_name+'.txt','w') as t:
            s = bs(h,'lxml')
            t.write(s.get_text())
    
            
    
    if __name__ == '__main__':
    
    
        track_files_under_folder('\Files\tmp')
  • 相关阅读:
    磁盘管理之磁盘组成
    用户管理
    定时任务
    虚拟机安装centos6.9
    linux的文件属性与文件权限
    linux磁盘容量不足
    正则表达式与特殊符号
    linux三剑客与正则案例
    借用父构造函数继承属性
    myeclipse常用快捷键
  • 原文地址:https://www.cnblogs.com/shaivas/p/8658375.html
Copyright © 2011-2022 走看看