using System;
using System.Collections.Generic;
using System.Text;
namespace taobao
{
class EncodingTransfer
{
static public string UnicodeToGB2312(string unicodeStr)
{
string gb2312info = "你好!";
string utfinfo = string.Empty;
Encoding utf8 = Encoding.UTF8;
Encoding gb2312 = Encoding.GetEncoding("gb2312");
// Convert the string into a byte[].
byte[] unicodeBytes = gb2312.GetBytes(gb2312info);
// Perform the conversion from one encoding to the other.
byte[] asciiBytes = Encoding.Convert(gb2312, utf8, unicodeBytes);
// Convert the new byte[] into a char[] and then into a string.
// This is a slightly different approach to converting to illustrate
// the use of GetCharCount/GetChars.
char[] asciiChars = new char[utf8.GetCharCount(asciiBytes, 0, asciiBytes.Length)];
utf8.GetChars(asciiBytes, 0, asciiBytes.Length, asciiChars, 0);
utfinfo = new string(asciiChars);
return utfinfo;
}
static public string StringToUtf8(string gb2312info)
{
// gb2312info = "你好!";
string utfinfo = string.Empty;
Encoding utf8 = Encoding.UTF8;
Encoding gb2312 = Encoding.GetEncoding("gb2312");
// Convert the string into a byte[].
byte[] gb2312Bytes = gb2312.GetBytes(gb2312info);
// Perform the conversion from one encoding to the other.
byte[] asciiBytes = Encoding.Convert(gb2312, utf8, gb2312Bytes);
// Convert the new byte[] into a char[] and then into a string.
// This is a slightly different approach to converting to illustrate
// the use of GetCharCount/GetChars.
char[] asciiChars = new char[utf8.GetCharCount(asciiBytes, 0, asciiBytes.Length)];
utf8.GetChars(asciiBytes, 0, asciiBytes.Length, asciiChars, 0);
utfinfo = new string(asciiChars);
return utfinfo;
}
static public string Utf8ToGB2312(string utf8Str)
{
string gb2312info = "你好!";
string utfinfo = string.Empty;
Encoding utf8 = Encoding.UTF8;
Encoding gb2312 = Encoding.GetEncoding("gb2312");
byte[] utf8Bytes = utf8.GetBytes(utf8Str);
byte[] gb2312Byte = Encoding.Convert(utf8, gb2312, utf8Bytes);
// Convert the string into a byte[].
byte[] unicodeBytes = gb2312.GetBytes(gb2312info);
// Perform the conversion from one encoding to the other.
byte[] asciiBytes = Encoding.Convert(gb2312, utf8, unicodeBytes);
// Convert the new byte[] into a char[] and then into a string.
// This is a slightly different approach to converting to illustrate
// the use of GetCharCount/GetChars.
char[] asciiChars = new char[utf8.GetCharCount(asciiBytes, 0, asciiBytes.Length)];
utf8.GetChars(asciiBytes, 0, asciiBytes.Length, asciiChars, 0);
utfinfo = new string(asciiChars);
return utfinfo;
}
public static string StringToUnicode(string gbkStr)
{
//北京
//GB2312ToUnicode("北京") 返回 "u5317u4EAC"
Encoding encoder = Encoding.Unicode;
Byte[] encodeBytes = encoder.GetBytes(gbkStr);
string unicodestr="";
for (int i = 0; i < encodeBytes.Length; i += 2)
{
unicodestr+=String.Format ("\u{0:X2}{1:X2}", encodeBytes[i + 1], encodeBytes[i]);
//if (encodeBytes[i + 1] > 0)
// unicodestr += String.Format("\u{0:x2}{1:x2}", encodeBytes[i + 1], encodeBytes[i]);
//else
// unicodestr +="\"+ (char)encodeBytes[i];
}
return unicodestr;
}
/// <summary>
/// unicode编码转换为汉字
/// </summary>
/// <param name="unicodeStr">如"u5317u4EAC"</param>
/// <returns>返回北京</returns>
public static string UnicodeToString(string unicodeStr)
{
//北京
//GB2312ToUnicode("北京") 返回 ""
//快
if (unicodeStr == null||unicodeStr =="")
return "";
unicodeStr = unicodeStr.Trim();
//return System.Text.Encoding.GetEncoding("GB2312").GetString(System.Text.Encoding.GetEncoding("UTF-8").GetBytes(unicodeStr));
if (unicodeStr.IndexOf("&#") > -1)
{
System.Text.RegularExpressions.Regex rx;
System.Text.RegularExpressions.MatchCollection matches;
//@"&#(d+);"
rx = new System.Text.RegularExpressions.Regex(@"&#(?<date>(d+)?);",//new Regex(@"(?<word>w+)s+(k<word>)",
System.Text.RegularExpressions.RegexOptions.Compiled | System.Text.RegularExpressions.RegexOptions.IgnoreCase | System.Text.RegularExpressions.RegexOptions.Singleline);
matches = rx.Matches(unicodeStr);
if (matches.Count > 0)
{
//unicodeStr = unicodeStr.Substring(0, matches[0].Index);
foreach (System.Text.RegularExpressions.Match m in matches)
{
string w=m.Value ;
string v = m.Result("${date}");
int t = int.Parse(v);
string tx = t.ToString("X");
string tmp= "\u" + tx;
unicodeStr = unicodeStr.Replace(w, tmp);
}
}
//string tmp = unicodeStr.Substring(2);
//if (tmp.EndsWith(";"))
// tmp = tmp.Substring(0, tmp.Length - 1);
}
string result = "";
for (int i = 0; i < unicodeStr.Length; )
{
if (i<unicodeStr.Length -1&& unicodeStr.Substring(i, 2) == "\u")
{
string word = unicodeStr.Substring(i + 2, 4);
byte[] bs = new byte[2];
string tmp = word.Substring(2, 2);
bs[0] = Convert.ToByte(tmp, 16);
tmp = word.Substring(0, 2);
bs[1] = Convert.ToByte(tmp, 16);
tmp = Encoding.Unicode.GetString(bs);
result += tmp;
i += 6;
}
else
{
result += unicodeStr[i];
i++;
}
}
return result;
}
public static string StringToGBK (string unicodeString)
{
//创建两个不同的encoding对象
Encoding unicode = Encoding.Unicode;
//创建GBK码对象
Encoding gbk = Encoding.GetEncoding(936);
//将unicode字符串转换为字节
byte[] unicodeBytes = unicode.GetBytes(unicodeString);
//再转化为GBK码
byte[] gbkBytes = Encoding.Convert(unicode, gbk, unicodeBytes);
int i = 0;
string result = "";
while (i < gbkBytes.Length)
{
if (gbkBytes[i] <= 127)
{
result += (char)gbkBytes[i];
}
else
{
result += "%" + gbkBytes[i].ToString("X");
}
i++;
}
return result;
}
public static string GBKToString(string gbkstr)
{
//创建两个不同的encoding对象
Encoding unicode = Encoding.Unicode;
//创建GBK码对象
Encoding gbk = Encoding.GetEncoding(936);
//北京
//GB2312ToUnicode("北京") 返回 ""
string result = "";
if (gbkstr.Length < 3)
{
if (!gbkstr.StartsWith("%"))
{
return gbkstr;
}
}
for (int i = 0; i < gbkstr.Length; )
{
//string char6 = unicodeStr.Substring(i,1);
if (!gbkstr.Substring(i).StartsWith("%"))
{
result += gbkstr[i];
i++;
continue;
}
string word = gbkstr.Substring(i + 1, 2);
byte[] bs = new byte[2];
string tmp = word.Substring(0, 2);
bs[0] = Convert.ToByte(tmp, 16);
word = gbkstr.Substring(i + 4, 2);
tmp = word.Substring(0, 2);
bs[1] = Convert.ToByte(tmp, 16);
tmp = gbk.GetString(bs);
result += tmp;
i += 6;
}
return result;
}
public static string Asc(string character)
{
if (character.Length == 1)
{
ASCIIEncoding encoding = new ASCIIEncoding();
int num = encoding.GetBytes(character)[0];
return num.ToString();
}
return "";
}
public static string Chr(int asciiCode)
{
if ((asciiCode >= 0) && (asciiCode <= 0xff))
{
ASCIIEncoding encoding = new ASCIIEncoding();
byte[] bytes = new byte[] { (byte)asciiCode };
return encoding.GetString(bytes);
}
return "";
}
private static string ByteToString(byte[] byteArr)
{
// return ByteToString(new byte[] { 0x4c, 0x6f, 0x63, 0x6f, 0x79, 0x53, 0x70, 0x69, 100, 0x65, 0x72, 0x2e, 100, 0x6c, 0x6c });
return Encoding.ASCII.GetString(byteArr);
}
private static string ByteToString(byte[] byteArr, int startIdx, int count)
{
return Encoding.ASCII.GetString(byteArr, startIdx, count);
}
public static string escape(string s)
{
StringBuilder sb = new StringBuilder();
byte[] ba = System.Text.Encoding.Unicode.GetBytes(s);
//System.Text.ASCIIEncoding.ASCII.GetBytes(s);
for (int i = 0; i < ba.Length; i += 1)
{ /**///// BE SURE 2's
sb.Append("%");
//sb.Append(ba[i + 1].ToString("X2"));
sb.Append(ba[i].ToString("X2"));
}
return sb.ToString();
}
public static string unescape(string s)
{
s = s.Trim();
// string str = s.Remove(0, 1);//删除最前面两个"%u"
string str = s.Substring(1,s.Length -1);
string[] strArr = str.Split(new string[] { "%" }, StringSplitOptions.None);//以子字符串"%"分隔
byte[] byteArr = new byte[strArr.Length * 2];
for (int i = 0, j = 0; i < strArr.Length; i++, j += 2)
{
byteArr[j + 1] = Convert.ToByte(strArr[i].Substring(0, 2), 16); //把十六进制形式的字串符串转换为二进制字节
byteArr[j] = Convert.ToByte(strArr[i].Substring(2, 2), 16);
}
str = System.Text.Encoding.Unicode.GetString(byteArr); //把字节转为unicode编码
return str;
//string str = s.Remove(0, 2);//删除最前面两个"%u"
//string[] strArr = str.Split(new string[] { "%u" }, StringSplitOptions.None);//以子字符串"%u"分隔
//byte[] byteArr = new byte[strArr.Length * 2];
//for (int i = 0, j = 0; i < strArr.Length; i++, j += 2)
//{
// byteArr[j + 1] = Convert.ToByte(strArr[i].Substring(0, 2), 16); //把十六进制形式的字串符串转换为二进制字节
// byteArr[j] = Convert.ToByte(strArr[i].Substring(2, 2), 16);
//}
//str = System.Text.Encoding.Unicode.GetString(byteArr); //把字节转为unicode编码
//return str;
}
public string GB2312_ISO8859(string write)
{ //声明字符集
System.Text.Encoding iso8859, gb2312;
//iso8859
iso8859 = System.Text.Encoding.GetEncoding("iso8859-1");
//国标2312
gb2312 = System.Text.Encoding.GetEncoding("gb2312");
byte[] gb;
gb = gb2312.GetBytes(write);
//返回转换后的字符
return iso8859.GetString(gb);
}
//读出时进行转换
public string ISO8859_GB2312(string read)
{ //声明字符集
System.Text.Encoding iso8859,gb2312;
//iso8859
iso8859 = System.Text.Encoding.GetEncoding("iso8859-1"); //国标2312
gb2312 = System.Text.Encoding.GetEncoding("gb2312");
byte[] iso;
iso = iso8859.GetBytes(read); //返回转换后的字符
return gb2312.GetString(iso);
}
//public DataSet ISO8859_GB2312(DataSet ds) { string xml; xml = ds.GetXml(); ds.Clear(); //声明字符集 System.Text.Encoding iso8859,gb2312; //iso8859 iso8859 = System.Text.Encoding.GetEncoding("iso8859-1"); //国标2312 gb2312 = System.Text.Encoding.GetEncoding("gb2312"); byte[] bt; bt = iso8859.GetBytes(xml); xml = gb2312.GetString(bt); ds.ReadXml(new System.IO.StringReader(xml)); return ds; }
static public string StringToBase64(string unicodeString)
{
return Convert.ToBase64String(Encoding.Unicode.GetBytes(unicodeString));
}
static public string Base64ToString(string base64)
{
return Encoding.Unicode.GetString(Convert.FromBase64String(base64));
}
// 红色标明的是还可以将BigEndianUnicode(UTF-16),UTF-7,UTF-8,ASCII的几种编码转换回来.
static public string StringToUTF16(string unicodeString)
{
return Convert.ToBase64String(Encoding.BigEndianUnicode.GetBytes(unicodeString));
}
static public string UTF16ToString(string UTF16)
{
return Encoding.BigEndianUnicode.GetString(Convert.FromBase64String(UTF16));
}
static public string ToHex(string input)
{
// ToHex("Hello World");
char[] values = input.ToCharArray();
string output="";
foreach (char c in values)
{
int value = Convert.ToInt32(c);
string hex = string.Format("{0:X2}", value);
output += hex;
}
return output;
}
static public string HexToString(string input)
{
//HexToString("48 65 6C");
string output = "";
string[] hexArr = input.Split(' ');
foreach (string hex in hexArr)
{
int value = Convert.ToInt32(hex, 16);
string stringvalue = Char.ConvertFromUtf32(value);
char charValue = (Char)value;
output += charValue;
} return output;
}
//static public Byte[] unicodeToAnsi(char[] uc)
//{
// byte[] buf = Encoding.Unicode.GetBytes(uc);
// Byte[] ac = Encoding.GetEncoding(1252).GetBytes(buf);
//}
}
}