zoukankan      html  css  js  c++  java
  • java在文本处理中的相关辅助工具类

    1,java分词

    package com.bobo.util;
    
    import ICTCLAS.I3S.AC.ICTCLAS50;
    
    public class Cutwords {
        public static String Segment(String microblog) {
            String textSeg = "";
            try {
                ICTCLAS50 testICTCLAS50 = new ICTCLAS50();
                String argu = ".";
                testICTCLAS50.ICTCLAS_Init(argu.getBytes("GB2312"));
    
                String sInput = microblog;
    
                byte nativeBytes[] = testICTCLAS50.ICTCLAS_ParagraphProcess(
                        sInput.getBytes("GB2312"), 0, 0);
                String nativeStr = new String(nativeBytes, 0, nativeBytes.length,
                        "GB2312");
    
                textSeg = nativeStr;
    
            } catch (Exception ex) {
    
            }
            return textSeg;
        }
    }
    CutWords

    2,java文件读写

    package com.bobo.util;
    
    import java.io.BufferedReader;
    import java.io.File;
    import java.io.FileInputStream;
    import java.io.FileNotFoundException;
    import java.io.IOException;
    import java.io.InputStreamReader;
    import java.util.ArrayList;
    import java.util.List;
    
    public class FileUtil {
    
        public static ArrayList<String> FileList = new ArrayList<String>();
    
        /**
         * 列出某個目錄及其子目錄下所有的文件列表
         * 
         * @param filepath
         *            目錄路徑
         * @return 該路徑及其子路經下的所有文件名列表
         * @throws FileNotFoundException
         * @throws IOException
         */
        public static List<String> readDirs(String filepath)
                throws FileNotFoundException, IOException {
            try {
                File file = new File(filepath);
                if (!file.isDirectory()) {
                    System.out.println("输入的不是目錄名称;");
                    System.out.println("filepath:" + file.getAbsolutePath());
                } else {
                    String[] flist = file.list();
                    for (int i = 0; i < flist.length; i++) {
                        File newfile = new File(filepath + "/" + flist[i]);
                        if (!newfile.isDirectory()) {
                            FileList.add(newfile.getAbsolutePath());
                        } else if (newfile.isDirectory()) {
                            readDirs(filepath + "/" + flist[i]);
                        }
                    }
                }
            } catch (FileNotFoundException e) {
                System.out.println(e.getMessage());
            }
            return FileList;
        }
    
        /**
         * 讀取文件內容,以字符串的方式返回
         * 
         * @param file
         *            需要讀取的文件名
         * @return 返回讀取的文件內容構成的字符串,行之間用
    進行分割
         * @throws FileNotFoundException
         * @throws IOException
         */
        public static String readFile(String file) throws FileNotFoundException,
                IOException {
            StringBuffer strSb = new StringBuffer(); // String is constant,
                                                        // StringBuffer can be
                                                        // changed.
            InputStreamReader inStrR = new InputStreamReader(new FileInputStream(
                    file), "gbk"); // byte streams to character streams
            BufferedReader br = new BufferedReader(inStrR);
            String line = br.readLine();
            while (line != null) {
                strSb.append(line).append("
    ");
                line = br.readLine();
            }
    
            return strSb.toString();
        }
        // 其他,一般读取文件的时候,利用bufferedReader方便,逐行写入文件的时候利用printStream比较方便
    
    }
    FileUtil

    3,字符串工具类

    package com.bobo.util;
    
    import java.util.Stack;
    import java.util.regex.Pattern;
    
    public class StringUtil {
        /**
         * 查找左右匹配型符号的位置
         * 
         * @param str
         *            需要查找的字符串
         * @param cLeft
         *            左侧符号
         * @param cRight
         *            右侧符号
         * @return 返回和第一个左侧符号匹配的右侧符号位置,否则返回-1
         */
    
        public static int findRightMatchChar(String str, String cLeft, String cRight) {
            Stack<Integer> stack = new Stack<Integer>();
            boolean pushAtLeastOnce = false;
            for (int i = 0; i < str.length(); i++) {
    
                if (str.substring(i, i + 1).equals(cLeft)) {
                    stack.push(i);
                    pushAtLeastOnce = true;
                }
                if (str.substring(i, i + 1).equals(cRight)) {
                    stack.pop();
                }
    
                if (pushAtLeastOnce && stack.isEmpty()) {
                    return i;
                }
            }
            return -1;
        }
    
        /**
         * 判断是否为null或空�?
         * 
         * @param str
         *            String
         * @return true or false
         */
        public static boolean isNullOrEmpty(String str) {
            return str == null || str.trim().length() == 0;
        }
    
        /**
         * 判断str1和str2是否相同
         * 
         * @param str1
         *            str1
         * @param str2
         *            str2
         * @return true or false
         */
        public static boolean equals(String str1, String str2) {
            return str1 == str2 || str1 != null && str1.equals(str2);
        }
    
        /**
         * 判断str1和str2是否相同(不区分大小写)
         * 
         * @param str1
         *            str1
         * @param str2
         *            str2
         * @return true or false
         */
        public static boolean equalsIgnoreCase(String str1, String str2) {
            return str1 != null && str1.equalsIgnoreCase(str2);
        }
    
        /**
         * 判断字符串str1是否包含字符串str2
         * 
         * @param str1
         *            源字符串
         * @param str2
         *            指定字符�?
         * @return true源字符串包含指定字符串,false源字符串不包含指定字符串
         */
        public static boolean contains(String str1, String str2) {
            return str1 != null && str1.contains(str2);
        }
    
        /**
         * 判断字符串是否为空,为空则返回一个空值,不为空则返回原字符串
         * 
         * @param str
         *            待判断字符串
         * @return 判断后的字符�?
         */
        public static String getString(String str) {
            return str == null ? "" : str;
        }
        /**
         * 判断字符串是否为数字
         * @param str
         * @return 
         */
        public static boolean isNumeric(Object str) {
            Pattern pattern = Pattern.compile("[0-9]*");
            return pattern.matcher(str.toString()).matches();
        }
        /**
         * 判断字符串是否为英文字母
         * @param str
         * @return
         */
        public static boolean isEnglish(Object str) {
            Pattern pattern = Pattern.compile("[a-z]*");
            return pattern.matcher(str.toString()).matches();
                     
        }
    }
    StringUtil

     4,在java中运行shell命令的相关工具类

    package com.bobo.util;
    
    import java.io.BufferedReader;
    import java.io.IOException;
    import java.io.InputStreamReader;
    
    public class CommandHelper {
        // default time out, in millseconds
        public static int DEFAULT_TIMEOUT;
        public static final int DEFAULT_INTERVAL = 1000;
        public static long START;
    
        public static void main(String[] args) {
            DEFAULT_TIMEOUT = 10000;
            try {
                System.out
                        .println(new CommandHelper().exec("wc -l *.*").toString());
            } catch (IOException e) {
                // TODO Auto-generated catch block
                e.printStackTrace();
            } catch (InterruptedException e) {
                // TODO Auto-generated catch block
                e.printStackTrace();
            }
        }
    
        public static CommandResult exec(String command) throws IOException,
                InterruptedException {
            Process process = Runtime.getRuntime().exec(command);
            CommandResult commandResult = wait(process);
            if (process != null) {
                process.destroy();
            }
            return commandResult;
        }
    
        private static boolean isOverTime() {
            return System.currentTimeMillis() - START >= DEFAULT_TIMEOUT;
        }
    
        private static CommandResult wait(Process process)
                throws InterruptedException, IOException {
            BufferedReader errorStreamReader = null;
            BufferedReader inputStreamReader = null;
            try {
                errorStreamReader = new BufferedReader(new InputStreamReader(
                        process.getErrorStream()));
                inputStreamReader = new BufferedReader(new InputStreamReader(
                        process.getInputStream()));
    
                // timeout control
                START = System.currentTimeMillis();
                boolean isFinished = false;
    
                for (;;) {
                    if (isOverTime()) {
                        CommandResult result = new CommandResult();
                        result.setExitValue(CommandResult.EXIT_VALUE_TIMEOUT);
                        result.setOutput("Command process timeout");
                        return result;
                    }
    
                    if (isFinished) {
                        CommandResult result = new CommandResult();
                        result.setExitValue(process.waitFor());
    
                        // parse error info
                        if (errorStreamReader.ready()) {
                            StringBuilder buffer = new StringBuilder();
                            String line;
                            while ((line = errorStreamReader.readLine()) != null) {
                                buffer.append(line);
                            }
                            result.setError(buffer.toString());
                        }
    
                        // parse info
                        if (inputStreamReader.ready()) {
                            StringBuilder buffer = new StringBuilder();
                            String line;
                            while ((line = inputStreamReader.readLine()) != null) {
                                buffer.append(line);
                            }
                            result.setOutput(buffer.toString());
                        }
                        return result;
                    }
    
                    try {
                        isFinished = true;
                        process.exitValue();
                    } catch (IllegalThreadStateException e) {
                        // process hasn't finished yet
                        isFinished = false;
                        Thread.sleep(DEFAULT_INTERVAL);
                    }
                }
    
            } finally {
                if (errorStreamReader != null) {
                    try {
                        errorStreamReader.close();
                    } catch (IOException e) {
                    }
                }
    
                if (inputStreamReader != null) {
                    try {
                        inputStreamReader.close();
                    } catch (IOException e) {
                    }
                }
            }
        }
    }
    CommandHelper
    package com.bobo.util;
    
    public class CommandResult {
        public static final int EXIT_VALUE_TIMEOUT = -1;
    
        private String output;
    
        void setOutput(String error) {
            output = error;
        }
    
        public String getOutput() {
            return output;
        }
    
        int exitValue;
    
        void setExitValue(int value) {
            exitValue = value;
        }
    
        int getExitValue() {
            return exitValue;
        }
    
        private String error;
    
        /**
         * @return the error
         */
        public String getError() {
            return error;
        }
    
        /**
         * @param error
         *            the error to set
         */
        public void setError(String error) {
            this.error = error;
        }
    
        @Override
        public String toString() {
    
            return "output:" + this.output + ";error:" + this.error + ";exitValue:"
                    + this.exitValue;
        }
    }
    CommandResult

     5,过滤某个目录下以特定后缀结尾的文件

    package com.bobo.myinterface;
    
    import java.io.File;
    import java.io.FileFilter;
    
    public class MyFileFilter implements FileFilter {
        private String suffix;
    
        public MyFileFilter(String suffix) {
            this.suffix = suffix;
        }
    
        @Override
        public boolean accept(File arg0) {
            if (arg0.isDirectory() || arg0.getAbsolutePath().endsWith(this.suffix)) {
                return true;
            } else {
                return false;
            }
        }
    
    }
    文件过滤器

    在fileUtil中添加showAllFile方法

        public static void showAllFiles(File dir,FileFilter filter,ArrayList<File> fileList) {
            File[] fs = dir.listFiles(filter);
            for (int i = 0; i < fs.length; i++) {
                if (fs[i].isDirectory()) {
                        showAllFiles(fs[i],filter,fileList);
                }else{
                    System.out.println(fs[i].getAbsolutePath());
                    fileList.add(fs[i]);
                }
            }
         
        }
    showAllFile方法

    最终调用

        File dataDir = new File(Constants.DataDir);
            // 得到所有标注过的数据
            ArrayList<File> fileList = new ArrayList<File>();
            FileUtil.showAllFiles(dataDir, new MyFileFilter(".dealed"), fileList);
            System.out.println(fileList.size());
            
    列举特定后缀文件的调用方法
  • 相关阅读:
    大厂Redis高并发场景设计,面试问的都在这!
    POJ1006——中国剩余定理
    HDU3501——欧拉函数裸题
    堆-动态的排序(洛谷1801-黑匣子)
    图中欧拉回路数量
    ip地址
    网络通信概述
    网络通信概述
    软件安装与卸载
    软件安装与卸载
  • 原文地址:https://www.cnblogs.com/bobodeboke/p/3506698.html
Copyright © 2011-2022 走看看