zoukankan      html  css  js  c++  java
  • 字符串编码传输

    在robotter项目(我们为日本人开发的一个web应用项目)的开发中遇到了字符串编码问题。robotter项目采用了微软silverlight beta版,中文和日文有时候在web服务器与silverlight客户端传输的过程中遇到了乱码问题。就算是url encoding过的东西,或者base64过的东西也会产生乱码。只好自己写一个算法给项目组解决这个问题。

    基本思想,采用64进制来表示数据。这是根据16进制的特点来设计的,因为16进制的表示都是两个英文字母或数字的组合,传输中不会乱码。现在,如果存在足够的数字和英文字母,使得64进制可以被表示,那么所有的数据都可以用这个进制来表示。

    动手从ascii码表中选择足够多的字母和数字组合,产生64进制,也就是2的6次方。没有更多的不乱码的字母和数字个数可以达到2的7次方。

    // 编码字典

    public static readonly string codeDictionary = "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz{|";

    二进制的6位可以表示64,也就是这个字典可以表示从0 --- 2的6次方 - 1 范围内的任何数。

    一、根据上面的理论,每个字节的数据可以被编码成两个字节。

                   

    每个字节的低6位取出来,以它的值为位置,取出编码字典中的64进制表示符号,而高2位再次编码,这样,一个字节被编码成编码表中数字和英文字母的符号组合。

    在实现过程中第二个版本采用了这个编码方法。public static string EncodString2(string rawString)函数和public static string DecodString2(string codingString)函数。

    二、在实际应用中发现这种编码有点长。于是改进算法:

                                                   

    按照6位一个编码的思想,把3个字节的原始数据编码成4个字节的64进制代码符号。按照这个思想实现了编码解码的第三个算法版本。public static string EncodString3(string rawString)函数和public static string DecodString3(string codingString)函数。

    经过测试,这种编码的长度不足url编码的一半。

    付上源代码:

    /// <summary>
        /// 用来编码和解码非ascii字符的字符串
        /// </summary>
        public class RoboterCoder
        {
            // 编码字典
            public static readonly string codeDictionary = "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz{|";

            /// <summary>
            /// 在字典中查找代码位置
            /// </summary>
            private static int GetCodePosition(char code)
            {
                int temp = Convert.ToInt32(code);
                if (temp < 65)
                    return (temp - 48);
                if (temp >= 65 && temp <= 90)
                {
                    return (temp - 55);
                }
                if (temp >= 97 && temp <= 124)
                {
                    return (temp - 61);
                }
                throw new RoboterException("RoboterCoder类GetCodePosition方法发生异常", "不正确的码值");
            }

            /// <summary>
            /// 编码字符串
            /// </summary>
            public static string EncodString3(string rawString)
            {
                if (rawString == string.Empty)
                    return string.Empty;
                if (rawString == null)
                    return null;

                byte[] buffer = Encoding.UTF8.GetBytes(rawString);
                StringBuilder sb = new StringBuilder();
                int j = 0;
                byte oldData = 0;
                byte data = 0;
                for (int i = 0; i < buffer.Length;i++)
                {
                    data = buffer[i];
                    int c = 0;
                    switch (j)
                    {
                        case 0:
                            c = (byte)(data << 2);
                            c = c >> 2;
                            oldData = (byte)(data >> 6);
                            j++;
                            sb.Append(codeDictionary[c]);
                            break;
                        case 1:
                            c = (byte)(data << 4);
                            c = c >> 2;
                            c = c | oldData;
                            oldData = (byte)(data >> 4);
                            j++;
                            sb.Append(codeDictionary[c]);
                            break;
                        default:
                            c = (byte)(data << 6);
                            c = c >> 2;
                            c = c | oldData;
                            oldData = (byte)(data >> 2);
                            j=0;
                            sb.Append(codeDictionary[c]);
                            c = oldData;
                            sb.Append(codeDictionary[c]);
                            break;
                    }
                }
                switch (j)
                {
                    case 1:
                        sb.Append(codeDictionary[oldData]);
                        break;
                    case 2:
                        sb.Append(codeDictionary[oldData]);
                        break;
                    default:
                        break;
                }
                return sb.ToString();
            }

            /// <summary>
            /// 解码字符串
            /// </summary>
            public static string DecodString3(string codingString)
            {
                if (codingString == string.Empty)
                    return string.Empty;
                if (codingString == null)
                    return null;

                int length = codingString.Length;
                int bitLength = (length / 4) * 3;
                int remain = (length % 4) - 1;
                if (remain < 0)
                    remain = 0;
                length = bitLength + remain;
                byte[] buffer = new byte[length];
                int j = 0;
                byte data = 0;
                byte oldData = 0;
                int pos = 0;

                for (int i = 0; i < codingString.Length;i++)
                {
                    char c = codingString[i];
                    int index = GetCodePosition(c);
                    data = (byte)index;

                    switch (j)
                    {
                        case 0:
                            oldData = data;
                            j++;
                            break;
                        case 1:
                            oldData = (byte)(oldData | (byte)(data << 6));
                            buffer[pos] = oldData;
                            oldData = (byte)(data >> 2);
                            pos++;
                            j++;
                            break;
                        case 2:
                            oldData = (byte)(oldData | (byte)(data << 4));
                            buffer[pos] = oldData;
                            oldData = (byte)(data >> 4);
                            pos++;
                            j++;
                            break;
                        default:
                            oldData = (byte)(oldData | (byte)(data << 2));
                            buffer[pos] = oldData;
                            pos++;
                            j=0;
                            break;
                    }
                }

                string result = Encoding.UTF8.GetString(buffer, 0, buffer.Length);
                return result;
            }

  • 相关阅读:
    SettingWithCopyWarning
    统计运算
    数据清洗
    dataframe 索引
    那些拯救我的快捷键
    如何拒绝那些哭天抢地向你求救结果把你坑了的同事?
    Linux 笔记
    数据可视化:桑基图
    敏捷
    持续集成的概念
  • 原文地址:https://www.cnblogs.com/worldreason/p/1188617.html
Copyright © 2011-2022 走看看