zoukankan      html  css  js  c++  java
  • [转载] ASCII、UTF8、Uncicode编码下的中英文字符大小

     1 private static void ShowCode() {
     2     string[] strArray = { "b", "abcd", "", "甲乙丙丁" };
     3     byte[] buffer;
     4     string mode, back;
     5 
     6     foreach (string str in strArray) {
     7 
     8         for (int i = 0; i <= 2; i++) {
     9             if (i == 0) {
    10                 buffer = Encoding.ASCII.GetBytes(str);
    11                 back = Encoding.ASCII.GetString(buffer, 0, buffer.Length);
    12                 mode = "ASCII";
    13             } else if (i == 1) {
    14                 buffer = Encoding.UTF8.GetBytes(str);
    15                 back = Encoding.UTF8.GetString(buffer, 0, buffer.Length);
    16                 mode = "UTF8";
    17             } else {
    18                 buffer = Encoding.Unicode.GetBytes(str);
    19                 back = Encoding.Unicode.GetString(buffer, 0, buffer.Length);
    20                 mode = "Unicode";
    21             }
    22 
    23             Console.WriteLine("Mode: {0}, String: {1}, Buffer.Length: {2}",
    24                 mode, str, buffer.Length);
    25 
    26             Console.WriteLine("Buffer:");
    27             for (int j = 0; j <= buffer.Length - 1; j++) {
    28                 Console.Write(buffer[j] + " ");
    29             }
    30 
    31             Console.WriteLine("\nRetrived: {0}\n", back);
    32         }
    33     }
    34 }

    程序输出为:

    Mode: ASCII, String: b, Buffer.Length: 1
    Buffer: 98
    Retrived: b
    
    Mode: UTF8, String: b, Buffer.Length: 1
    Buffer: 98
    Retrived: b
    
    Mode: Unicode, String: b, Buffer.Length: 2
    Buffer: 98 0
    Retrived: b
    
    Mode: ASCII, String: abcd, Buffer.Length: 4
    Buffer: 97 98 99 100
    Retrived: abcd
    
    Mode: UTF8, String: abcd, Buffer.Length: 4
    Buffer: 97 98 99 100
    Retrived: abcd
    
    Mode: Unicode, String: abcd, Buffer.Length: 8
    Buffer: 97 0 98 0 99 0 100 0
    Retrived: abcd
    
    Mode: ASCII, String: 乙, Buffer.Length: 1
    Buffer: 63
    Retrived: ?
    
    Mode: UTF8, String: 乙, Buffer.Length: 3
    Buffer: 228 185 153
    Retrived: 乙
    
    Mode: Unicode, String: 乙, Buffer.Length: 2
    Buffer: 89 78
    Retrived: 乙
    
    Mode: ASCII, String: 甲乙丙丁, Buffer.Length: 4
    Buffer: 63 63 63 63
    Retrived: ????
    
    Mode: UTF8, String: 甲乙丙丁, Buffer.Length: 12
    Buffer: 231 148 178 228 185 153 228 184 153 228 184 129
    Retrived: 甲乙丙丁
    
    Mode: Unicode, String: 甲乙丙丁, Buffer.Length: 8
    Buffer: 50 117 89 78 25 78 1 78
    Retrived: 甲乙丙丁

    大体上可以得出这么几个结论:

    • ASCII不能保存中文(貌似谁都知道=_-`)。
    • UTF8是变长编码。在对ASCII字符编码时,UTF更省空间,只占1个字节,与ASCII编码方式和长度相同;Unicode在对ASCII字符编码时,占用2个字节,且第2个字节补零。
    • UTF8在对中文编码时需要占用3个字节;Unicode对中文编码则只需要2个字节。
  • 相关阅读:
    POJ 1426 Find The Multiple(数论——中国同余定理)
    POJ 2253 Frogger(Dijkstra变形——最短路径最大权值)
    POJ 3790 最短路径问题(Dijkstra变形——最短路径双重最小权值)
    POJ 3278 Catch That Cow(模板——BFS)
    HDU 1071 The area
    HDU 1213 How Many Tables(模板——并查集)
    POJ 1611 The Suspects
    light oj 1214 Large Division
    POJ 1258 Agri-Net(Prim算法求解MST)
    POJ 2387 Til the Cows Come Home(模板——Dijkstra算法)
  • 原文地址:https://www.cnblogs.com/chaosimple/p/2501038.html
Copyright © 2011-2022 走看看