zoukankan      html  css  js  c++  java
  • 字符的编码

    package com.alibaba.china.gene.test;
    
    import java.io.UnsupportedEncodingException;
    import java.net.URLDecoder;
    import java.net.URLEncoder;
    
    /**
     * 模拟测试中文字符从浏览器到Web服务器Java端经过的转码过程
     */
    public class UrlEncodeTest {
    
        public static void main(String[] args) {
    
            System.out.println("模拟测试中文字符从浏览器到Web服务器Java端经过的转码过程");
            System.out.println("--------------------------------------------------");
            String str = "中文";
            String strGbk = "";
            String strUtf8 = "";
            try {
                strGbk = URLEncoder.encode(str, "gbk");
                strUtf8 = URLEncoder.encode(str, "utf8");
            } catch (UnsupportedEncodingException e) {
                e.printStackTrace();
            }
            System.out.print("中文原字符串:");
            System.out.println(str);
            System.out.println("浏览器会做一次编码,FireFox默认gbk、IE默认Utf-8:");
            System.out.print("中文对应gbk编码:");
            System.out.println(strGbk);
            System.out.print("中文对应utf-8编码:");
            System.out.println(strUtf8);
            System.out.println();
    
            System.out.println("在构造Http请求头时,系统会按特定编码转成Byte流");
            System.out.print("中文原字符串转成的Bytes流:");
            byte[] bytes = getInBytes(str);
            printBytes(bytes);
            System.out.print("中文对应gbk编码转成的Bytes流:");
            byte[] bytesGbk = getInBytes(strGbk);
            printBytes(bytesGbk);
            System.out.print("中文对应utf-8编码转成的Bytes流:");
            byte[] bytesUtf8 = getInBytes(strUtf8);
            printBytes(bytesUtf8);
            System.out.println();
    
            System.out.println("在发送Http请求给服务器时,做网络传输时,系统都会转成二进制编码");
            System.out.print("中文原字符串Bytes流对应二进制:");
            String[] binary = printAndGetInBinary(bytes);
            System.out.print("中文对应gbk编码Bytes流对应二进制:");
            String[] binaryGbk = printAndGetInBinary(bytesGbk);
            System.out.print("中文对应utf-8编码Bytes流对应二进制:");
            String[] binaryUtf8 = printAndGetInBinary(bytesUtf8);
            System.out.println();
    
            System.out.println("服务器接收到二进制,系统都会转成Bytes流");
            System.out.print("中文原字符串对应二进制还原得到Bytes流:");
            bytes = restoreBytes(binary);
            printBytes(bytes);
            System.out.print("中文对应gbk编码对应二进制还原得到Bytes流:");
            bytesGbk = restoreBytes(binaryGbk);
            printBytes(bytesGbk);
            System.out.print("中文对应utf-8编码对应二进制还原得到Bytes流:");
            bytesUtf8 = restoreBytes(binaryUtf8);
            printBytes(bytesUtf8);
            System.out.println();
    
            System.out.println("应用服务器如Tomcat,默认会默认编码还原成字符串编码");
            str = new String(bytes);
            strGbk = new String(bytesGbk);
            strUtf8 = new String(bytesUtf8);
            System.out.print("中文原字符串Byte流还原得到的字符串编码:");
            System.out.println(str);
            System.out.print("中文对应gbk编码Byte流还原得到的字符串编码:");
            System.out.println(strGbk);
            System.out.print("中文对应utf-8编码Byte流还原得到的字符串编码:");
            System.out.println(strUtf8);
            System.out.println();
    
            try {
                System.out.println("Java应用,如Webx会按指定的编码还原字符串");
                System.out.print("中文原字符串按gbk还原后:");
                System.out.println(URLDecoder.decode(str, "gbk"));
                System.out.println("这说明如果客户端不进行编码直接发送中文给服务端,会造成信息丢失");
                System.out.print("中文对应gbk编码按gbk还原后:");
                System.out.println(URLDecoder.decode(strGbk, "gbk"));
                System.out.print("中文对应utf-8编码按utf-8还原后:");
                System.out.println(URLDecoder.decode(strUtf8, "utf-8"));
                System.out.println();
    
                System.out.println("Webx如果与浏览器使用的编码不一致,还原出的字符串会是乱码");
                System.out.print("中文对应gbk编码按utf-8还原后:");
                System.out.println(URLDecoder.decode(strGbk, "utf-8"));
                System.out.print("中文对应utf-8编码按gbk还原后:");
                System.out.println(URLDecoder.decode(strUtf8, "gbk"));
                System.out.println();
            } catch (UnsupportedEncodingException e) {
                e.printStackTrace();
            }
    
            try {
                System.out.println("数据库中会转成iso-8859-1编码");
                str = "中文";
                System.out.print("中文字符串原文:");
                System.out.println(str);
                byte[] gbkBytes = str.getBytes("gbk");
                System.out.print("中文字符串对应GBK的Byte流:");
                printBytes(gbkBytes);
                System.out.print("中文字符串对应Byte流转成的iso-8859-1格式字符串:");
                System.out.println(new String(gbkBytes, "iso-8859-1"));
            } catch (UnsupportedEncodingException e) {
                e.printStackTrace();
            }
            System.out.println("--------------------------------------------------");
        }
    
        private static byte[] restoreBytes(String[] binary) {
            if (binary == null) {
                return new byte[0];
            }
            byte[] bytes = new byte[binary.length];
            for (int i = 0; i < binary.length; i++) {
                bytes[i] = (byte) Integer.parseInt(binary[i], 2);
            }
            return bytes;
        }
    
        private static String[] printAndGetInBinary(byte[] bytes) {
            if (bytes == null) {
                return new String[0];
            }
            String[] binaryStrs = new String[bytes.length];
            for (int i = 0; i < bytes.length; i++) {
                binaryStrs[i] = byte2bits(bytes[i]);
            }
            for (String string : binaryStrs) {
                System.out.print(string);
            }
            System.out.println();
            return binaryStrs;
        }
    
        public static String byte2bits(byte b) {
            int z = b;
            z |= 256;
            String str = Integer.toBinaryString(z);
            int len = str.length();
            return str.substring(len - 8, len);
        }
    
        private static void printBytes(byte[] bytes) {
            if (bytes == null) {
                return;
            }
            StringBuilder strBuilder = new StringBuilder();
            for (byte b : bytes) {
                strBuilder.append(b);
            }
            System.out.println(strBuilder.toString());
        }
    
        protected static byte[] getInBytes(String str) {
            if (str == null) {
                return null;
            }
            byte[] bytes = null;
            try {// 这里按iso-8859-1转成Byte流
                bytes = str.getBytes("iso-8859-1");
            } catch (UnsupportedEncodingException e) {
                e.printStackTrace();
            }
            return bytes;
        }
    
    }
    
    
  • 相关阅读:
    最简单,小白易上手 ajax请求数据库信息,echarts页面显示,无需跳转servlet
    北京市民信件大数据简单分析可视化(附加源码) 同含爬虫代码
    echart 横轴 上下分开显示
    echart 横轴倾斜
    echarts 柱状图横轴(x轴)数量太多,可以加一个滚动轴
    错误: 找不到或无法加载主类 org.apache.sqoop.Sqoop
    JAVA爬虫——爬取采集北京市政百姓信件内容——首都之窗(采用htmlunit,webmagic)附源代码、htmlUnit webmagic JAR包
    eclipse 中的DFS Location 找不到了(已解决)
    在Scala 中 val 与 var 的区别(言简意赅 小白易懂 实例代码)
    第21届国际足联世界杯观后感
  • 原文地址:https://www.cnblogs.com/limingluzhu/p/4143094.html
Copyright © 2011-2022 走看看