zoukankan      html  css  js  c++  java
  • C#基础实现URL Unicode编码,编码、解码相关整理

    C#基础】实现URL Unicode编码,编码、解码相关整理

    1、Unicode编码  引用系统 System.Web

    using System.Web;
    
    string postdata = "SAMLRequest=" + HttpUtility.UrlEncode(SAMLRequest) + "&RelayState=" + RelayState;

    2、Unicode编码 自己封装的方法

    //实现URL编码
    public static string UrlEncode(string str){
                StringBuilder sb = new StringBuilder();
                byte[] byStr = System.Text.Encoding.UTF8.GetBytes(str); //默认是System.Text.Encoding.Default.GetBytes(str)
                for (int i = 0; i < byStr.Length; i++)
                {
                    sb.Append(@"%" + Convert.ToString(byStr[i], 16));
                }
    
                return (sb.ToString());
            }

    3、\u50FA\u49AD 转换成 "中国"

         ///<summary>
            /// \u50FA\u49AD 转换成 "中国"
         ///\u8eab\u4efd\u9a8c\u8bc1\u5931\u8d25,\u8bf7\u6ce8\u9500\u5e10\u53f7\u540e\u91cd\u65b0\u767b\u5f55
            ///<summary>
            public static string NormalU2C(string input)
            {
                string str = "";
                char[] chArray = input.ToCharArray();
                Encoding bigEndianUnicode = Encoding.BigEndianUnicode;
                for (int i = 0; i < chArray.Length; i++)
                {
                    char ch = chArray[i];
                    if (ch.Equals('\\'))
                    {
                        i++;
                        i++;
                        char[] chArray2 = new char[4];
                        int index = 0;
                        index = 0;
                        while ((index < 4) && (i < chArray.Length))
                        {
                            chArray2[index] = chArray[i];
                            index++;
                            i++;
                        }
                        if (index == 4)
                        {
                            try
                            {
                                str = str + UnicodeCode2Str(chArray2);
                            }
                            catch (Exception)
                            {
                                str = str + @"\u";
                                for (int j = 0; j < index; j++)
                                {
                                    str = str + chArray2[j];
                                }
                            }
                            i--;
                        }
                        else
                        {
                            str = str + @"\u";
                            for (int k = 0; k < index; k++)
                            {
                                str = str + chArray2[k];
                            }
                        }
                    }
                    else
                    {
                        str = str + ch.ToString();
                    }
                }
                return str;
            }
    
            ///<summary>
            /// UnicodeCode字节 转换成 "中国"
         /// 上面的方法引用此方法
            ///<summary>
            public static string UnicodeCode2Str(char[] u4)
            {
                if (u4.Length < 4)
                {
                    throw new Exception("It's not a unicode code array");
                }
                string str = "0123456789ABCDEF";
                char ch = char.ToUpper(u4[0]);
                char ch2 = char.ToUpper(u4[1]);
                char ch3 = char.ToUpper(u4[2]);
                char ch4 = char.ToUpper(u4[3]);
                int index = str.IndexOf(ch);
                int num2 = str.IndexOf(ch2);
                int num3 = str.IndexOf(ch3);
                int num4 = str.IndexOf(ch4);
                if (((index == -1) || (num2 == -1)) || ((num3 == -1) || (num4 == -1)))
                {
                    throw new Exception("It's not a unicode code array");
                }
                byte num5 = (byte)(((index * 0x10) + num2) & 0xff);
                byte num6 = (byte)(((num3 * 0x10) + num4) & 0xff);
                byte[] bytes = new byte[] { num5, num6 };
                return Encoding.BigEndianUnicode.GetString(bytes);
            }

    4、网页ASCII转换成Unicode

        //网页ASCII转换成Unicode
            public string HtmlEncoding(string htmltext)
            {
                string text = "";
                IHTMLDocument2 doc = new HTMLDocumentClass();
                doc.write(new object[] { htmltext });
                doc.close();
                text = doc.body.innerText;
                return text;
            }
    

    5、解析html的NCR编码方法

    //解析html的NCR编码方法
            public string NCRtoString(string htmltext)
            {
                string result = "";
                try
                {
                    RegexHelper.GetMatchStr(htmltext, "<body>(.*?)</body>", out htmltext);
                    htmltext = htmltext.Replace("\t", "").Replace("\r", "").Replace("\n", "").Replace(" ", "");
                    htmltext = Regex.Replace(htmltext,"<[^>]*>","");
                    htmltext = htmltext.Replace("&#x", "\\u").Replace(";", "");
                    string[] strlist = htmltext.Replace("\\", "").Split('u');
                    for (int i = 1; i < strlist.Length; i++)
                    {
                        if (strlist[i].Length!=4)
                        {
                            strlist[i] = strlist[i].Substring(0,4);
                        }
                        //将unicode字符转为10进制整数,然后转为char中文字符
                        result += (char)int.Parse(strlist[i], System.Globalization.NumberStyles.HexNumber);
                    }
                }
                catch (Exception)
                {
                    return "解析html的NCR编码方法异常";
                }
                return result;
            }
     

    6、C#实现escape编码

         //C#实现escape编码
            public static string UrlEncode(string s)
            {
                StringBuilder sb = new StringBuilder();
                byte[] ba = System.Text.Encoding.Unicode.GetBytes(s);
                for (int i = 0; i < ba.Length; i += 2)
                {
                    sb.Append("%25u");
                    sb.Append(ba[i + 1].ToString("X2"));
                    sb.Append(ba[i].ToString("X2"));
                }
                return sb.ToString();
            }

    7、将Unicode编码转换为汉字字符串

            /// <summary>
    
            /// 汉字转换为Unicode编码
    
            /// </summary>
    
            /// <param name="str">要编码的汉字字符串</param>
    
            /// <returns>Unicode编码的的字符串</returns>
    
            public static string ToUnicode(string str)
            {
    
                byte[] bts = Encoding.Unicode.GetBytes(str);
    
                string r = "";
    
                for (int i = 0; i < bts.Length; i += 2) r += "\\u" + bts[i + 1].ToString("x").PadLeft(2, '0') + bts[i].ToString("x").PadLeft(2, '0');
    
                return r;
    
            }
    
            /// <summary>
    
            /// 将Unicode编码转换为汉字字符串
    
            /// </summary>
    
            /// <param name="str">Unicode编码字符串</param>
    
            /// <returns>汉字字符串</returns>
    
            public static string ToGB2312(string str)
            {
    
                string r = "";
    
                MatchCollection mc = Regex.Matches(str, @"\\u([\w]{2})([\w]{2})", RegexOptions.Compiled | RegexOptions.IgnoreCase);
    
                byte[] bts = new byte[2];
    
                foreach (Match m in mc)
                {
    
                    bts[0] = (byte)int.Parse(m.Groups[2].Value, NumberStyles.HexNumber);
    
                    bts[1] = (byte)int.Parse(m.Groups[1].Value, NumberStyles.HexNumber);
    
                    r += Encoding.Unicode.GetString(bts);
    
                }
    
                return r;
    
            }
  • 相关阅读:
    win7 x64怎么枚举所有快捷键呢
    C/C++多种方法获取文件大小
    中缀表达式转后缀表达式(逆波兰表达式)
    检测文件存在的四种方法
    透明窗口与不规则窗口制作方法总结
    Struts 2命令执行漏洞
    Windows 8 无法安装
    从浏览器启动客户端程序
    tesseractocr训练方法
    Algorithm Gossip: 中序式轉後序式(前序式)
  • 原文地址:https://www.cnblogs.com/grj001/p/12223797.html
Copyright © 2011-2022 走看看