zoukankan      html  css  js  c++  java
  • 中文转Punycode

    package cn.cnnic.ops.udf;
    
    public class GetPunycodeFromChinese {
        static int TMIN = 1;
        static int TMAX = 26;
        static int BASE = 36;
        static int INITIAL_N = 128;
        static int INITIAL_BIAS = 72;
        static int DAMP = 700;
        static int SKEW = 38;
        static char DELIMITER = '-';
        static String PUNY_PREFIX = "xn--";
        static char DOT = '.';
        static String SPLIT_DOT = "\.";
        
        public static void main(String[] args) {
            String str = "互联网络信息中心.中国";
            GetPunycodeFromChinese gpfc = new GetPunycodeFromChinese();
            System.out.println(gpfc.evaluate(str));
        }
    
        /**
         * 
         * @param txt
         * @return
         */
        public String evaluate(String txt) {
            String strResult=txt;
            try {
                strResult = fromChineseToPunycode(txt.toString().trim());
            } catch (Exception e) {
                e.printStackTrace();
            }
            return strResult;
        }
        
        /**
         * 
         * @param input
         * @return
         * @throws Exception
         */
        public String fromChineseToPunycode(String input) throws Exception{
            if(input == null || input.equalsIgnoreCase("")){
                return "";
            }else if(input.indexOf(DOT) < 0){
                return PUNY_PREFIX+fromChineseToPunycodeUnit(input);
            }else if(input.indexOf(DOT) > 0){
                String[] arr = input.split(SPLIT_DOT);
                String result="";
                for(int index = 0; index<arr.length;index++){
                    result = result + PUNY_PREFIX +fromChineseToPunycodeUnit(arr[index])+".";
                }
                return result.substring(0, result.length()-1);
            }
            return input;
        }
        
        /**
         * 
         * @param input
         * @return
         * @throws Exception
         */
        public String fromChineseToPunycodeUnit(String input) throws Exception {
            int n = INITIAL_N;
            int delta = 0;
            int bias = INITIAL_BIAS;
            StringBuilder output = new StringBuilder();
            int b = 0;
            for (int i = 0; i < input.length(); i++) {
                char c = input.charAt(i);
                if (isBasic(c)) {
                    output.append(c);
                    b++;
                }
            }
            if (b > 0) {
                output.append(DELIMITER);
            }
            int h = b;
            while (h < input.length()) {
                int m = Integer.MAX_VALUE;
                // Find the minimum code point >= n
                for (int i = 0; i < input.length(); i++) {
                    int c = input.charAt(i);
                    if (c >= n && c < m) {
                        m = c;
                    }
                }
                if (m - n > (Integer.MAX_VALUE - delta) / (h + 1)) {
                    throw new Exception("OVERFLOW");
                }
                delta = delta + (m - n) * (h + 1);
                n = m;
                for (int j = 0; j < input.length(); j++) {
                    int c = input.charAt(j);
                    if (c < n) {
                        delta++;
                        if (0 == delta) {
                            throw new Exception("OVERFLOW");
                        }
                    }
                    if (c == n) {
                        int q = delta;
                        for (int k = BASE;; k += BASE) {
                            int t;
                            if (k <= bias) {
                                t = TMIN;
                            } else if (k >= bias + TMAX) {
                                t = TMAX;
                            } else {
                                t = k - bias;
                            }
                            if (q < t) {
                                break;
                            }
                            output.append((char) digit2codepoint(t + (q - t) % (BASE - t)));
                            q = (q - t) / (BASE - t);
                        }
                        output.append((char) digit2codepoint(q));
                        bias = adapt(delta, h + 1, h == b);
                        delta = 0;
                        h++;
                    }
                }
                delta++;
                n++;
            }
            return output.toString();
        }
        
        /**
         * 
         * @param delta
         * @param numpoints
         * @param first
         * @return
         */
        public int adapt(int delta, int numpoints, boolean first) {
            if (first) {
                delta = delta / DAMP;
            } else {
                delta = delta / 2;
            }
            delta = delta + (delta / numpoints);
            int k = 0;
            while (delta > ((BASE - TMIN) * TMAX) / 2) {
                delta = delta / (BASE - TMIN);
                k = k + BASE;
            }
            return k + ((BASE - TMIN + 1) * delta) / (delta + SKEW);
        }
    
        /**
         * 
         * @param c
         * @return
         */
        public boolean isBasic(char c) {
            return c < 0x80;
        }
    
        /**
         * 
         * @param d
         * @return
         * @throws Exception
         */
        public int digit2codepoint(int d) throws Exception {
            if (d < 26) {
                // 0..25 : 'a'..'z'
                return d + 'a';
            } else if (d < 36) {
                // 26..35 : '0'..'9';
                return d - 26 + '0';
            } else {
                throw new Exception("BAD_INPUT");
            }
        }
    
        /**
         * 
         * @param c
         * @return
         * @throws Exception
         */
        public int codepoint2digit(int c) throws Exception {
            if (c - '0' < 10) {
                // '0'..'9' : 26..35
                return c - '0' + 26;
            } else if (c - 'a' < 26) {
                // 'a'..'z' : 0..25
                return c - 'a';
            } else {
                throw new Exception("BAD_INPUT");
            }
        }
    }

    【参考】http://blog.csdn.net/a19881029/article/details/18262671

  • 相关阅读:
    国庆·生日
    国足
    Eason's concert
    今天的斩获
    The 4400
    闷热
    24
    一直登录不了。。。原来是因为。。。
    黄色暴雨警告
    绝密飞行
  • 原文地址:https://www.cnblogs.com/zhzhang/p/5751439.html
Copyright © 2011-2022 走看看