zoukankan      html  css  js  c++  java
  • .net 环境下get 获取页面出现乱码问题解决

    不多说了,先上代码:

    /// <summary>
            /// 获取页面内容
            /// </summary>
            /// <param name="Url">链接地址</param>
            /// <returns></returns>
            public static string GetWebContent(string Url)
            {
    
                string strResult = "", strCharacterSet="";
                try
                {
                    HttpWebRequest request = (HttpWebRequest)WebRequest.Create(Url);
                    StreamReader streamReader;
                    //声明一个HttpWebRequest请求
                    request.Timeout = 30000;
                    //设置连接超时时间
                    request.Headers.Set("Pragma", "no-cache");
                    HttpWebResponse response = (HttpWebResponse)request.GetResponse();
                    Stream streamReceive = response.GetResponseStream();
                    Encoding encoding;
                    string strPageCharacterSet = response.CharacterSet.ToLower();//获取页面响应中定义的编码字符串
                    encoding = Encoding.GetEncoding(strPageCharacterSet);
                    streamReader = new StreamReader(streamReceive, encoding);
                    strResult = streamReader.ReadToEnd();
                    strResult = StringHelps.RepalceStr(strResult, 0);
                    strCharacterSet = GetEncoding(strResult).ToLower();//获取页面html中声明的编码字符串
                    if (!strCharacterSet.Equals(strPageCharacterSet))//比较两者的编码格式是否一致,如果不一致,以页面中定义的编码格式再次去获取页面内容
                    {
                        strResult = GetWebContentByCharecterSet(Url, strCharacterSet);
                    }
                    if (string.IsNullOrEmpty(strResult))
                    {
                        streamReader = new StreamReader(streamReceive, encoding);
                        strResult = streamReader.ReadToEnd();
                    }
                }
                catch (Exception ex)
                {
                    LogHelper.Save("获取页面出现乱码" + Url + ex.ToString());
                }
                return strResult;
            }
            /// <summary>
            /// 指定编码格式获取页面代码
            /// </summary>
            /// <param name="Url"></param>
            /// <param name="strCharacterSet"></param>
            /// <returns></returns>
            public static string GetWebContentByCharecterSet(string Url,string strCharacterSet)
            {
    
                string strResult = "";
                try
                {
                    HttpWebRequest request = (HttpWebRequest)WebRequest.Create(Url);
                    StreamReader streamReader;
                    //声明一个HttpWebRequest请求
                    request.Timeout = 30000;
                    //设置连接超时时间
                    request.Headers.Set("Pragma", "no-cache");
                    HttpWebResponse response = (HttpWebResponse)request.GetResponse();
                    Stream streamReceive = response.GetResponseStream();
                    Encoding encoding;
                    encoding = Encoding.GetEncoding(strCharacterSet);
                    streamReader = new StreamReader(streamReceive, encoding);
                    strResult = streamReader.ReadToEnd();
                }
                catch (Exception ex)
                {
                    LogHelper.Save("获取页面出现异常" + Url + ex.ToString());
                }
                return strResult;
            }
            /// <summary>
            /// 根据网页的HTML内容提取网页的Encoding
            /// </summary>
            /// <param name="html"></param>
            /// <returns></returns>
            static string GetEncoding(string html)
            {
                string pattern = @"(?i)charset=(?<charset>[-a-zA-Z_0-9]+)";
                string charset = Regex.Match(html, pattern).Groups["charset"].Value;
                if (string.IsNullOrEmpty(charset))
                    charset = "utf-8";
                return charset;
            }
    
  • 相关阅读:
    事务隔离级别
    手机摄影之生活拍照技巧
    html.unescape(s)
    正则表达式编译和DOTALL小结
    重试模块==>retrying
    js逆向==>js2py
    mysql索引设计原则
    Django请求与响应
    Linux常用命令
    C++11 auto_ptr 的问题
  • 原文地址:https://www.cnblogs.com/wdkshy/p/5311950.html
Copyright © 2011-2022 走看看