zoukankan      html  css  js  c++  java
  • java 中文转换成Unicode编码和Unicode编码转换成中文

    转自:一叶飘舟
    http://blog.csdn.net/jdsjlzx/article/details/7058823
    package lia.meetlucene;
    
    import java.io.IOException;
    import org.apache.lucene.index.CorruptIndexException;
    
    public class Unicode {
        public static void main(String[] args) throws CorruptIndexException,
                IOException {
            String s = "简介";
            String tt = gbEncoding(s); // String tt1 = "你好,我想给你说一个事情";
            System.out.println("unicodeBytes is: " + tt);
            // 输出“简介”的unicode编码
            System.out.println("对应的中文: " + decodeUnicode("\u7b80\u4ecb")); // System.out.println(decodeUnicode(tt1));
            // 输出unicode编码对应的中文
            System.out.println("~~~~~~~~~~~~~~~~~~~~~~~~~~~~~");
            System.out.println(s.indexOf("\"));
        }
    
        public static String gbEncoding(final String gbString) {
            char[] utfBytes = gbString.toCharArray();
            String unicodeBytes = "";
            for (int byteIndex = 0; byteIndex < utfBytes.length; byteIndex++) {
                String hexB = Integer.toHexString(utfBytes[byteIndex]);
                if (hexB.length() <= 2) {
                    hexB = "00" + hexB;
                }
                unicodeBytes = unicodeBytes + "\u" + hexB;
            }
            return unicodeBytes;
        }
    
        public static String decodeUnicode(final String dataStr) {
            int start = 0;
            int end = 0;
            final StringBuffer buffer = new StringBuffer();
            while (start > -1) {
                end = dataStr.indexOf("\u", start + 2);
                String charStr = "";
                if (end == -1) {
                    charStr = dataStr.substring(start + 2, dataStr.length());
                } else {
                    charStr = dataStr.substring(start + 2, end);
                }
                char letter = (char) Integer.parseInt(charStr, 16); // 16进制parse整形字符串。
                buffer.append(new Character(letter).toString());
                start = end;
            }
            return buffer.toString();
        }
    
    }

    代码详解:

        public static String decodeUnicode(final String dataStr) {
            int start = 0;
            int end = 0;
            final StringBuffer buffer = new StringBuffer();
            while (start > -1) {
                end = dataStr.indexOf("\u", start + 1);
                //使得第一个unicode在start~end之间,+1,+2,+3均可
                System.out.println(start + "asdfasd~~~~~~~~~~~~~~~~~~~~~``" + end);
                // the index of the first occurrence of the specified substring,
                // starting at the specified index,
                // or -1 if there is no such occurrence.
                String charStr = "";
                if (end == -1) {
                    charStr = dataStr.substring(start + 2, dataStr.length());
                } else {
                    charStr = dataStr.substring(start + 2, end);
                }
                char letter = 0;
                if (charStr.length() == 4) {
                    letter = (char) Integer.parseInt(charStr, 16); // 16进制parse整形字符串。
                }
                //防止出错
                buffer.append(new Character(letter).toString());
                start = end;
            }
            return buffer.toString();
        }
  • 相关阅读:
    【转】用.NET 2.0压缩/解压封装的类
    复制到剪贴板的js代码(兼容ie、firefox、chorme、safari...什么都兼容!)
    感觉文章和回复都不错,转载了用正则表达式找出不包含连续字符串abc的单词
    订阅到抓虾、google reader、鲜果等的代码
    [转]妙说23种设计模式
    实现Server.UrlEncode和Server.UrlDecode功能的js代码
    【转】用JS操作XML
    添加到某某书签、某某收藏的代码
    [总结]关于在线用户列表的统计![转载]
    多张图片交替变换的实现方法JS实现和flash实现
  • 原文地址:https://www.cnblogs.com/XDJjy/p/4353802.html
Copyright © 2011-2022 走看看