不多说了,先上代码:
/// <summary> /// 获取页面内容 /// </summary> /// <param name="Url">链接地址</param> /// <returns></returns> public static string GetWebContent(string Url) { string strResult = "", strCharacterSet=""; try { HttpWebRequest request = (HttpWebRequest)WebRequest.Create(Url); StreamReader streamReader; //声明一个HttpWebRequest请求 request.Timeout = 30000; //设置连接超时时间 request.Headers.Set("Pragma", "no-cache"); HttpWebResponse response = (HttpWebResponse)request.GetResponse(); Stream streamReceive = response.GetResponseStream(); Encoding encoding; string strPageCharacterSet = response.CharacterSet.ToLower();//获取页面响应中定义的编码字符串 encoding = Encoding.GetEncoding(strPageCharacterSet); streamReader = new StreamReader(streamReceive, encoding); strResult = streamReader.ReadToEnd(); strResult = StringHelps.RepalceStr(strResult, 0); strCharacterSet = GetEncoding(strResult).ToLower();//获取页面html中声明的编码字符串 if (!strCharacterSet.Equals(strPageCharacterSet))//比较两者的编码格式是否一致,如果不一致,以页面中定义的编码格式再次去获取页面内容 { strResult = GetWebContentByCharecterSet(Url, strCharacterSet); } if (string.IsNullOrEmpty(strResult)) { streamReader = new StreamReader(streamReceive, encoding); strResult = streamReader.ReadToEnd(); } } catch (Exception ex) { LogHelper.Save("获取页面出现乱码" + Url + ex.ToString()); } return strResult; } /// <summary> /// 指定编码格式获取页面代码 /// </summary> /// <param name="Url"></param> /// <param name="strCharacterSet"></param> /// <returns></returns> public static string GetWebContentByCharecterSet(string Url,string strCharacterSet) { string strResult = ""; try { HttpWebRequest request = (HttpWebRequest)WebRequest.Create(Url); StreamReader streamReader; //声明一个HttpWebRequest请求 request.Timeout = 30000; //设置连接超时时间 request.Headers.Set("Pragma", "no-cache"); HttpWebResponse response = (HttpWebResponse)request.GetResponse(); Stream streamReceive = response.GetResponseStream(); Encoding encoding; encoding = Encoding.GetEncoding(strCharacterSet); streamReader = new StreamReader(streamReceive, encoding); strResult = streamReader.ReadToEnd(); } catch (Exception ex) { LogHelper.Save("获取页面出现异常" + Url + ex.ToString()); } return strResult; } /// <summary> /// 根据网页的HTML内容提取网页的Encoding /// </summary> /// <param name="html"></param> /// <returns></returns> static string GetEncoding(string html) { string pattern = @"(?i)charset=(?<charset>[-a-zA-Z_0-9]+)"; string charset = Regex.Match(html, pattern).Groups["charset"].Value; if (string.IsNullOrEmpty(charset)) charset = "utf-8"; return charset; }