C#基础】实现URL Unicode编码,编码、解码相关整理
1、Unicode编码 引用系统 System.Web
using System.Web;
string postdata = "SAMLRequest=" + HttpUtility.UrlEncode(SAMLRequest) + "&RelayState=" + RelayState;
2、Unicode编码 自己封装的方法
//实现URL编码
public static string UrlEncode(string str){
StringBuilder sb = new StringBuilder();
byte[] byStr = System.Text.Encoding.UTF8.GetBytes(str); //默认是System.Text.Encoding.Default.GetBytes(str)
for (int i = 0; i < byStr.Length; i++)
{
sb.Append(@"%" + Convert.ToString(byStr[i], 16));
}
return (sb.ToString());
}
3、\u50FA\u49AD 转换成 "中国"
///<summary>
/// \u50FA\u49AD 转换成 "中国"
///\u8eab\u4efd\u9a8c\u8bc1\u5931\u8d25,\u8bf7\u6ce8\u9500\u5e10\u53f7\u540e\u91cd\u65b0\u767b\u5f55
///<summary>
public static string NormalU2C(string input)
{
string str = "";
char[] chArray = input.ToCharArray();
Encoding bigEndianUnicode = Encoding.BigEndianUnicode;
for (int i = 0; i < chArray.Length; i++)
{
char ch = chArray[i];
if (ch.Equals('\\'))
{
i++;
i++;
char[] chArray2 = new char[4];
int index = 0;
index = 0;
while ((index < 4) && (i < chArray.Length))
{
chArray2[index] = chArray[i];
index++;
i++;
}
if (index == 4)
{
try
{
str = str + UnicodeCode2Str(chArray2);
}
catch (Exception)
{
str = str + @"\u";
for (int j = 0; j < index; j++)
{
str = str + chArray2[j];
}
}
i--;
}
else
{
str = str + @"\u";
for (int k = 0; k < index; k++)
{
str = str + chArray2[k];
}
}
}
else
{
str = str + ch.ToString();
}
}
return str;
}
///<summary>
/// UnicodeCode字节 转换成 "中国"
/// 上面的方法引用此方法
///<summary>
public static string UnicodeCode2Str(char[] u4)
{
if (u4.Length < 4)
{
throw new Exception("It's not a unicode code array");
}
string str = "0123456789ABCDEF";
char ch = char.ToUpper(u4[0]);
char ch2 = char.ToUpper(u4[1]);
char ch3 = char.ToUpper(u4[2]);
char ch4 = char.ToUpper(u4[3]);
int index = str.IndexOf(ch);
int num2 = str.IndexOf(ch2);
int num3 = str.IndexOf(ch3);
int num4 = str.IndexOf(ch4);
if (((index == -1) || (num2 == -1)) || ((num3 == -1) || (num4 == -1)))
{
throw new Exception("It's not a unicode code array");
}
byte num5 = (byte)(((index * 0x10) + num2) & 0xff);
byte num6 = (byte)(((num3 * 0x10) + num4) & 0xff);
byte[] bytes = new byte[] { num5, num6 };
return Encoding.BigEndianUnicode.GetString(bytes);
}
4、网页ASCII转换成Unicode
//网页ASCII转换成Unicode
public string HtmlEncoding(string htmltext)
{
string text = "";
IHTMLDocument2 doc = new HTMLDocumentClass();
doc.write(new object[] { htmltext });
doc.close();
text = doc.body.innerText;
return text;
}
5、解析html的NCR编码方法
//解析html的NCR编码方法
public string NCRtoString(string htmltext)
{
string result = "";
try
{
RegexHelper.GetMatchStr(htmltext, "<body>(.*?)</body>", out htmltext);
htmltext = htmltext.Replace("\t", "").Replace("\r", "").Replace("\n", "").Replace(" ", "");
htmltext = Regex.Replace(htmltext,"<[^>]*>","");
htmltext = htmltext.Replace("&#x", "\\u").Replace(";", "");
string[] strlist = htmltext.Replace("\\", "").Split('u');
for (int i = 1; i < strlist.Length; i++)
{
if (strlist[i].Length!=4)
{
strlist[i] = strlist[i].Substring(0,4);
}
//将unicode字符转为10进制整数,然后转为char中文字符
result += (char)int.Parse(strlist[i], System.Globalization.NumberStyles.HexNumber);
}
}
catch (Exception)
{
return "解析html的NCR编码方法异常";
}
return result;
}
6、C#实现escape编码
//C#实现escape编码
public static string UrlEncode(string s)
{
StringBuilder sb = new StringBuilder();
byte[] ba = System.Text.Encoding.Unicode.GetBytes(s);
for (int i = 0; i < ba.Length; i += 2)
{
sb.Append("%25u");
sb.Append(ba[i + 1].ToString("X2"));
sb.Append(ba[i].ToString("X2"));
}
return sb.ToString();
}
7、将Unicode编码转换为汉字字符串
/// <summary>
/// 汉字转换为Unicode编码
/// </summary>
/// <param name="str">要编码的汉字字符串</param>
/// <returns>Unicode编码的的字符串</returns>
public static string ToUnicode(string str)
{
byte[] bts = Encoding.Unicode.GetBytes(str);
string r = "";
for (int i = 0; i < bts.Length; i += 2) r += "\\u" + bts[i + 1].ToString("x").PadLeft(2, '0') + bts[i].ToString("x").PadLeft(2, '0');
return r;
}
/// <summary>
/// 将Unicode编码转换为汉字字符串
/// </summary>
/// <param name="str">Unicode编码字符串</param>
/// <returns>汉字字符串</returns>
public static string ToGB2312(string str)
{
string r = "";
MatchCollection mc = Regex.Matches(str, @"\\u([\w]{2})([\w]{2})", RegexOptions.Compiled | RegexOptions.IgnoreCase);
byte[] bts = new byte[2];
foreach (Match m in mc)
{
bts[0] = (byte)int.Parse(m.Groups[2].Value, NumberStyles.HexNumber);
bts[1] = (byte)int.Parse(m.Groups[1].Value, NumberStyles.HexNumber);
r += Encoding.Unicode.GetString(bts);
}
return r;
}