zoukankan      html  css  js  c++  java
  • .net 环境下get 获取页面出现乱码问题解决

    不多说了,先上代码:

    /// <summary>
            /// 获取页面内容
            /// </summary>
            /// <param name="Url">链接地址</param>
            /// <returns></returns>
            public static string GetWebContent(string Url)
            {
    
                string strResult = "", strCharacterSet="";
                try
                {
                    HttpWebRequest request = (HttpWebRequest)WebRequest.Create(Url);
                    StreamReader streamReader;
                    //声明一个HttpWebRequest请求
                    request.Timeout = 30000;
                    //设置连接超时时间
                    request.Headers.Set("Pragma", "no-cache");
                    HttpWebResponse response = (HttpWebResponse)request.GetResponse();
                    Stream streamReceive = response.GetResponseStream();
                    Encoding encoding;
                    string strPageCharacterSet = response.CharacterSet.ToLower();//获取页面响应中定义的编码字符串
                    encoding = Encoding.GetEncoding(strPageCharacterSet);
                    streamReader = new StreamReader(streamReceive, encoding);
                    strResult = streamReader.ReadToEnd();
                    strResult = StringHelps.RepalceStr(strResult, 0);
                    strCharacterSet = GetEncoding(strResult).ToLower();//获取页面html中声明的编码字符串
                    if (!strCharacterSet.Equals(strPageCharacterSet))//比较两者的编码格式是否一致,如果不一致,以页面中定义的编码格式再次去获取页面内容
                    {
                        strResult = GetWebContentByCharecterSet(Url, strCharacterSet);
                    }
                    if (string.IsNullOrEmpty(strResult))
                    {
                        streamReader = new StreamReader(streamReceive, encoding);
                        strResult = streamReader.ReadToEnd();
                    }
                }
                catch (Exception ex)
                {
                    LogHelper.Save("获取页面出现乱码" + Url + ex.ToString());
                }
                return strResult;
            }
            /// <summary>
            /// 指定编码格式获取页面代码
            /// </summary>
            /// <param name="Url"></param>
            /// <param name="strCharacterSet"></param>
            /// <returns></returns>
            public static string GetWebContentByCharecterSet(string Url,string strCharacterSet)
            {
    
                string strResult = "";
                try
                {
                    HttpWebRequest request = (HttpWebRequest)WebRequest.Create(Url);
                    StreamReader streamReader;
                    //声明一个HttpWebRequest请求
                    request.Timeout = 30000;
                    //设置连接超时时间
                    request.Headers.Set("Pragma", "no-cache");
                    HttpWebResponse response = (HttpWebResponse)request.GetResponse();
                    Stream streamReceive = response.GetResponseStream();
                    Encoding encoding;
                    encoding = Encoding.GetEncoding(strCharacterSet);
                    streamReader = new StreamReader(streamReceive, encoding);
                    strResult = streamReader.ReadToEnd();
                }
                catch (Exception ex)
                {
                    LogHelper.Save("获取页面出现异常" + Url + ex.ToString());
                }
                return strResult;
            }
            /// <summary>
            /// 根据网页的HTML内容提取网页的Encoding
            /// </summary>
            /// <param name="html"></param>
            /// <returns></returns>
            static string GetEncoding(string html)
            {
                string pattern = @"(?i)charset=(?<charset>[-a-zA-Z_0-9]+)";
                string charset = Regex.Match(html, pattern).Groups["charset"].Value;
                if (string.IsNullOrEmpty(charset))
                    charset = "utf-8";
                return charset;
            }
    
  • 相关阅读:
    hdu 1042 N!
    hdu 1002 A + B Problem II
    c++大数模板
    hdu 1004 Let the Balloon Rise
    hdu 4027 Can you answer these queries?
    poj 2823 Sliding Window
    hdu 3074 Multiply game
    hdu 1394 Minimum Inversion Number
    hdu 5199 Gunner
    九度oj 1521 二叉树的镜像
  • 原文地址:https://www.cnblogs.com/wdkshy/p/5311950.html
Copyright © 2011-2022 走看看