zoukankan      html  css  js  c++  java
  • 遍历文件,查找文件下的汉字,并将汉字生成csv文件

    package com.shine.eiuop.utils;

    import java.io.BufferedReader;
    import java.io.BufferedWriter;
    import java.io.File;
    import java.io.FileInputStream;
    import java.io.FileNotFoundException;
    import java.io.FileOutputStream;
    import java.io.IOException;
    import java.io.InputStream;
    import java.io.InputStreamReader;
    import java.io.OutputStreamWriter;
    import java.io.UnsupportedEncodingException;
    import java.util.HashMap;
    import java.util.Iterator;
    import java.util.List;
    import java.util.Map;
    import java.util.UUID;
    import java.util.regex.MatchResult;
    import java.util.regex.Matcher;
    import java.util.regex.Pattern;

    import com.itextpdf.text.pdf.PdfStructTreeController.returnType;
    import com.shine.framework.commutil.typewrap.EDto;

    /**
    * title: 清除注释
    *
    * @author 
    * @时间 
    */
    public class FileCopyChineseUtils {

    /** 根目录 */
    public static String rootDir = "C:\Users\14423\Desktop\亚强\msp中文翻译\msp2\msp\WebRoot";

    public static void main(String args[]) throws Exception {
    dofind(rootDir);
    }

    public static void dofind(String rootDir) throws Exception {
    String alltmSr = deepDir(rootDir);

    System.out.println(alltmSr);
    String[] stringArrStrings = alltmSr.toString().split("\r\n");
    String file_path = "D:\SHINE_ROOT\mspChinese.csv";
    String file_name = "mspChinese.csv";
    writeDataToCsvFile1(file_path,file_name,stringArrStrings);
    }

    public static String deepDir(String rootDir) throws Exception {
    String string = "";
    File folder = new File(rootDir);
    StringBuilder alltmSr = new StringBuilder();
    if (folder.isDirectory()) {
    String[] files = folder.list();

    for (int i = 0; i < files.length; i++) {
    File file = new File(folder, files[i]);
    if (file.isDirectory() && file.isHidden() == false) {
    alltmSr.append(deepDir(file.getPath()));
    } else if (file.isFile()) {
    alltmSr.append(writeComment(file.getPath()));
    }
    }
    } else if (folder.isFile()) {
    alltmSr.append(writeComment(folder.getPath()));
    }
    return alltmSr.toString();
    }

    /**
    * @param currentDir
    * 当前目录
    * @param currentFileName
    * 当前文件名
    * @throws FileNotFoundException
    * @throws UnsupportedEncodingException
    */
    /**
    * @param filePathAndName
    * @throws FileNotFoundException
    * @throws UnsupportedEncodingException
    */
    public static String writeComment(String filePathAndName)
    throws FileNotFoundException, UnsupportedEncodingException {
    StringBuffer buffer = new StringBuffer();
    String line = null; // 用来保存每行读取的内容
    InputStream is = new FileInputStream(filePathAndName);
    BufferedReader reader = new BufferedReader(new InputStreamReader(is,"UTF-8"));
    try {
    line = reader.readLine();
    } catch (IOException e) {
    // TODO Auto-generated catch block
    e.printStackTrace();
    } // 读取第一行
    while (line != null) { // 如果 line 为空说明读完了
    buffer.append(line); // 将读到的内容添加到 buffer 中
    buffer.append(" "); // 添加换行符
    try {
    line = reader.readLine();
    } catch (IOException e) {
    e.printStackTrace();
    } // 读取下一行
    }
    buffer.append(" "); // 添加换行符
    String filecontent = buffer.toString();

    String regex = "[u4e00-u9fa5]";
    Pattern pattern = Pattern.compile(regex);
    Matcher matcher = pattern.matcher(filecontent);
    StringBuilder tmSr = new StringBuilder();
    int tmp = -1;

    while (matcher.find()) {
    MatchResult result = matcher.toMatchResult();
    int start = result.start();
    int end = result.end();
    if(tmp == start || tmp == -1) {
    // 判断连续
    tmSr.append(filecontent.substring(start, end));
    }else {
    // 不连续
    tmSr.append(" ");
    tmSr.append(filecontent.substring(start, end));
    }
    tmp = end;
    }
    tmSr.append(" "); // 添加换行符
    return tmSr.toString();

    }

    /**
    *
    * @Description 写csv文件,
    * @param filePath
    * @param fields
    * @param dtos void
    * @param
    * @throws @author 
    * @date 2019年11月18日 上午9:45:31
    * @see
    */
    public static void writeDataToCsvFile1(String filePath, String fileName,String[] datas) throws Exception {

    File csvFile = null;
    BufferedWriter csvFileOutputStream = null;
    FileOutputStream fos = null;
    String uuidFilePath = "D:\SHINE_ROOT\mspChinese.csv";
    try {
    FileUtils.createNewFile(filePath);
    FileUtils.createNewFile(uuidFilePath);
    csvFile = new File(filePath);
    try {
    // 如果文件不存在,则创建新的文件
    if (!csvFile.exists()) {
    csvFile.createNewFile();
    }
    } catch (Exception e) {
    e.printStackTrace();
    }
    // 写入bom头
    byte[] uft8bom = { (byte) 0xef, (byte) 0xbb, (byte) 0xbf };
    fos = new FileOutputStream(csvFile);
    //fos.write(uft8bom);

    // UTF-8使正确读取分隔符","
    // 如果生产文件乱码,windows下用gbk,linux用UTF-8
    //csvFileOutputStream = new BufferedWriter(new OutputStreamWriter(fos, "UTF-8"), 1024);

    //csvFileOutputStream.newLine();
    for (String dto : datas) {
    if ("".equals(dto)!=true) {
    fos.write((dto+" ").getBytes());
    }
    }
    fos.flush();
    fos.close();
    } catch (Exception e) {
    e.printStackTrace();
    }
    }
    }

  • 相关阅读:
    @topcoder
    @uoj
    Vue-路由跳转的几种方式和路由重定向
    Vue-设置默认路由选中
    Vue-使用webpack+vue-cli搭建项目
    Vue-状态管理Vuex的使用
    Vue-全局变量和方法
    JS-apply、call、bind
    CSS-禁止文本被选中
    Vue-路由模式 hash 和 history
  • 原文地址:https://www.cnblogs.com/lwh-12345/p/13358294.html
Copyright © 2011-2022 走看看