zoukankan      html  css  js  c++  java
  • Java字符串中文检测转换

    public class ChineseUtils {
        public static void main(String[] args) {
            String str = "中国 (1).jpg";
            try {
                String str2 = new String(str.getBytes("iso-8859-1"),"iso-8859-1");
                System.out.println(str2);
                System.out.println(isMessyCode(str2));
                System.out.println(toChinese(str2));
            } catch (UnsupportedEncodingException e) {
                e.printStackTrace();
            }
    
        }
        private static boolean isChinese(char c) {
            Character.UnicodeBlock ub = Character.UnicodeBlock.of(c);
            if (ub == Character.UnicodeBlock.CJK_UNIFIED_IDEOGRAPHS
                    || ub == Character.UnicodeBlock.CJK_COMPATIBILITY_IDEOGRAPHS
                    || ub == Character.UnicodeBlock.CJK_UNIFIED_IDEOGRAPHS_EXTENSION_A
                    || ub == Character.UnicodeBlock.GENERAL_PUNCTUATION
                    || ub == Character.UnicodeBlock.CJK_SYMBOLS_AND_PUNCTUATION
                    || ub == Character.UnicodeBlock.HALFWIDTH_AND_FULLWIDTH_FORMS) {
                return true;
            }
            return false;
        }
    
        public static boolean isMessyCode(String strName) {
            Pattern p = Pattern.compile("\s*|	*|
    *|
    *");
            Matcher m = p.matcher(strName);
            String after = m.replaceAll("");
            String temp = after.replaceAll("\p{P}", "");
            char[] ch = temp.trim().toCharArray();
            float chLength = 0 ;
            float count = 0;
            for (int i = 0; i < ch.length; i++) {
                char c = ch[i];
                if (!Character.isLetterOrDigit(c)) {
                    if (!isChinese(c)) {
                        count = count + 1;
                    }
                    chLength++;
                }
            }
            float result = count / chLength ;
            if (result > 0.4) {
                return true;
            } else {
                return false;
            }
        }
    
    
        public static String toChinese(String msg){
            if(isMessyCode(msg)){
                try {
                    return new String(msg.getBytes("ISO8859-1"), "UTF-8");
                } catch (Exception e) {
                }
            }
            return msg ;
        }
    }
  • 相关阅读:
    一步一步写平衡二叉树(AVL树)
    sql关键字
    Remoting技术的应用
    算法:最大公约数
    算法冒泡排序
    C#编码好习惯
    利用VB.Net编程实现PC与掌上电脑PPC间的双向通信
    .Net Remoting与Server 对象详解
    算法迭代和递归
    SQL关键字系列之:minus与intersect
  • 原文地址:https://www.cnblogs.com/sagech/p/5671501.html
Copyright © 2011-2022 走看看