今天遇到用LumiSoft.Net这个组件收取邮件中含有类似于=?utf-8?B?5rWL6K+V6YKu5Lu2?= ,=?gbk?Q?=C6=BD=B0=B2=D6=A4=C8=AF*=C3=BF=D6=DC=B1=A8?=这两种格式的乱码,随后Google了下,原因是邮件本身的编码,跟传输过程采用的编码不一致,=?utf-8?B?5rWL6K+V6YKu5Lu2?= 这个表示邮件编码是utf-8,传输采用base64编码格式,第二个Q表示传输格式为Quote-Printable。
对于这种格式的字符,Google大神帮我搜到了相关的处理代码,然后综合项目,整理出来了,留个记号。
/// <summary> /// 乱码解析 /// </summary> /// <param name="input"></param> /// <returns></returns> private string GetMailSubject(string input) { try { string regex = @"=\?(?<encode>.*?)\?(?<type>[B|Q])\?(?<body>.*?)\?="; Regex re = new Regex(regex, RegexOptions.IgnoreCase | RegexOptions.IgnorePatternWhitespace | RegexOptions.Multiline); MatchCollection mcs = re.Matches(input); foreach (Match mc in mcs) { string encode = mc.Groups["encode"].Value; string type = mc.Groups["type"].Value; Encoding encod = null; if (!string.IsNullOrEmpty(encode)) { if ((encode.ToLower().Contains("gbk") || encode.ToLower().Contains("utf8") || encode.ToLower().Contains("utf-8")) && type == "Q") { if (encode.ToLower().Contains("utf-8")) { encod = Encoding.UTF8; } else if (encode.ToLower().Contains("gbk")) { encod = Encoding.GetEncoding("gb2312"); } input = input.Replace(mc.Value, QuotedPrintable.Decode(mc.Groups["body"].Value, encod)); } else { if (encode.ToLower().Contains("euccn") || encode.ToLower().Contains("euc-cn")) { encode = "gb2312"; } else if (encode.ToLower().Contains("utf8")) { encode = "utf-8"; } string body = mc.Groups["body"].Value; byte[] bytes = Convert.FromBase64String(body); string result = Encoding.GetEncoding(encode).GetString(bytes); input = input.Replace(mc.Value, result); } } } if (mcs.Count > 0) { FileLogHelper.WriteInfo(string.Format("原邮件标题:[{0}]\r\n解析后标题:[{1}]", input, input)); } return input; } catch (Exception) { return input; } }
针对两种不同类型的字符做了处理,这里调用了一个Quote-Printable编码解码类,代码如下:
using System; using System.Collections; using System.Text; namespace Wind.MailRobot.BLL { public class QuotedPrintable { private const byte EQUALS = 61; private const byte CR = 13; private const byte LF = 10; private const byte SPACE = 32; private const byte TAB = 9; /// <summary> /// Encodes a string to QuotedPrintable /// </summary> /// <param name="_ToEncode">String to encode</param> /// <returns>QuotedPrintable encoded string</returns> public static string Encode(string _ToEncode, Encoding encoding) { StringBuilder Encoded = new StringBuilder(); string hex = string.Empty; byte[] bytes = encoding.GetBytes(_ToEncode); int count = 0; for (int i = 0; i < bytes.Length; i++) { //these characters must be encoded if ((bytes[i] < 33 || bytes[i] > 126 || bytes[i] == EQUALS) && bytes[i] != CR && bytes[i] != LF && bytes[i] != SPACE) { if (bytes[i].ToString("X").Length < 2) { hex = "0" + bytes[i].ToString("X"); Encoded.Append("=" + hex); } else { hex = bytes[i].ToString("X"); Encoded.Append("=" + hex); } } else { //check if index out of range if ((i + 1) < bytes.Length) { //if TAB is at the end of the line - encode it! if ((bytes[i] == TAB && bytes[i + 1] == LF) || (bytes[i] == TAB && bytes[i + 1] == CR)) { Encoded.Append("=0" + bytes[i].ToString("X")); } //if SPACE is at the end of the line - encode it! else if ((bytes[i] == SPACE && bytes[i + 1] == LF) || (bytes[i] == SPACE && bytes[i + 1] == CR)) { Encoded.Append("=" + bytes[i].ToString("X")); } else { Encoded.Append(System.Convert.ToChar(bytes[i])); } } else { Encoded.Append(System.Convert.ToChar(bytes[i])); } } if (count == 75) { Encoded.Append("=\r\n"); //insert soft-linebreak count = 0; } count++; } return Encoded.ToString(); } /// <summary> /// Decodes a QuotedPrintable encoded string /// </summary> /// <param name="_ToDecode">The encoded string to decode</param> /// <returns>Decoded string</returns> public static string Decode(string _ToDecode, Encoding encoding) { try { //remove soft-linebreaks first _ToDecode = _ToDecode.Replace("=\r\n", ""); char[] chars = _ToDecode.ToCharArray(); byte[] bytes = new byte[chars.Length]; int bytesCount = 0; for (int i = 0; i < chars.Length; i++) { // if encoded character found decode it if (chars[i] == '=') { bytes[bytesCount++] = System.Convert.ToByte(int.Parse(chars[i + 1].ToString() + chars[i + 2].ToString(), System.Globalization.NumberStyles.HexNumber)); i += 2; } else { bytes[bytesCount++] = System.Convert.ToByte(chars[i]); } } return encoding.GetString(bytes, 0, bytesCount); } catch (Exception) { return _ToDecode; } } } }