zoukankan      html  css  js  c++  java
  • Java字符串中文检测转换

    public class ChineseUtils {
        public static void main(String[] args) {
            String str = "中国 (1).jpg";
            try {
                String str2 = new String(str.getBytes("iso-8859-1"),"iso-8859-1");
                System.out.println(str2);
                System.out.println(isMessyCode(str2));
                System.out.println(toChinese(str2));
            } catch (UnsupportedEncodingException e) {
                e.printStackTrace();
            }
    
        }
        private static boolean isChinese(char c) {
            Character.UnicodeBlock ub = Character.UnicodeBlock.of(c);
            if (ub == Character.UnicodeBlock.CJK_UNIFIED_IDEOGRAPHS
                    || ub == Character.UnicodeBlock.CJK_COMPATIBILITY_IDEOGRAPHS
                    || ub == Character.UnicodeBlock.CJK_UNIFIED_IDEOGRAPHS_EXTENSION_A
                    || ub == Character.UnicodeBlock.GENERAL_PUNCTUATION
                    || ub == Character.UnicodeBlock.CJK_SYMBOLS_AND_PUNCTUATION
                    || ub == Character.UnicodeBlock.HALFWIDTH_AND_FULLWIDTH_FORMS) {
                return true;
            }
            return false;
        }
    
        public static boolean isMessyCode(String strName) {
            Pattern p = Pattern.compile("\s*|	*|
    *|
    *");
            Matcher m = p.matcher(strName);
            String after = m.replaceAll("");
            String temp = after.replaceAll("\p{P}", "");
            char[] ch = temp.trim().toCharArray();
            float chLength = 0 ;
            float count = 0;
            for (int i = 0; i < ch.length; i++) {
                char c = ch[i];
                if (!Character.isLetterOrDigit(c)) {
                    if (!isChinese(c)) {
                        count = count + 1;
                    }
                    chLength++;
                }
            }
            float result = count / chLength ;
            if (result > 0.4) {
                return true;
            } else {
                return false;
            }
        }
    
    
        public static String toChinese(String msg){
            if(isMessyCode(msg)){
                try {
                    return new String(msg.getBytes("ISO8859-1"), "UTF-8");
                } catch (Exception e) {
                }
            }
            return msg ;
        }
    }
  • 相关阅读:
    杭电2048--神、上帝以及老天爷
    杭电1012--u Calculate e
    杭电2049--不容易系列之(4)——考新郎
    杭电2045--不容易系列之(3)—— LELE的RPG难题
    Truncate Table user
    Sql server统计查询语句消耗时间
    C/C++:Unions 联合
    NYOJ 27 水池数目
    OpenRisc-39-ORPSoC,or1200的memory hierarchy整体分析
    RCP打包出来 运行 出现 JVM terminated.exit code = 13
  • 原文地址:https://www.cnblogs.com/sagech/p/5671501.html
Copyright © 2011-2022 走看看