zoukankan      html  css  js  c++  java
  • .net 环境下get 获取页面出现乱码问题解决

    不多说了,先上代码:

    /// <summary>
            /// 获取页面内容
            /// </summary>
            /// <param name="Url">链接地址</param>
            /// <returns></returns>
            public static string GetWebContent(string Url)
            {
    
                string strResult = "", strCharacterSet="";
                try
                {
                    HttpWebRequest request = (HttpWebRequest)WebRequest.Create(Url);
                    StreamReader streamReader;
                    //声明一个HttpWebRequest请求
                    request.Timeout = 30000;
                    //设置连接超时时间
                    request.Headers.Set("Pragma", "no-cache");
                    HttpWebResponse response = (HttpWebResponse)request.GetResponse();
                    Stream streamReceive = response.GetResponseStream();
                    Encoding encoding;
                    string strPageCharacterSet = response.CharacterSet.ToLower();//获取页面响应中定义的编码字符串
                    encoding = Encoding.GetEncoding(strPageCharacterSet);
                    streamReader = new StreamReader(streamReceive, encoding);
                    strResult = streamReader.ReadToEnd();
                    strResult = StringHelps.RepalceStr(strResult, 0);
                    strCharacterSet = GetEncoding(strResult).ToLower();//获取页面html中声明的编码字符串
                    if (!strCharacterSet.Equals(strPageCharacterSet))//比较两者的编码格式是否一致,如果不一致,以页面中定义的编码格式再次去获取页面内容
                    {
                        strResult = GetWebContentByCharecterSet(Url, strCharacterSet);
                    }
                    if (string.IsNullOrEmpty(strResult))
                    {
                        streamReader = new StreamReader(streamReceive, encoding);
                        strResult = streamReader.ReadToEnd();
                    }
                }
                catch (Exception ex)
                {
                    LogHelper.Save("获取页面出现乱码" + Url + ex.ToString());
                }
                return strResult;
            }
            /// <summary>
            /// 指定编码格式获取页面代码
            /// </summary>
            /// <param name="Url"></param>
            /// <param name="strCharacterSet"></param>
            /// <returns></returns>
            public static string GetWebContentByCharecterSet(string Url,string strCharacterSet)
            {
    
                string strResult = "";
                try
                {
                    HttpWebRequest request = (HttpWebRequest)WebRequest.Create(Url);
                    StreamReader streamReader;
                    //声明一个HttpWebRequest请求
                    request.Timeout = 30000;
                    //设置连接超时时间
                    request.Headers.Set("Pragma", "no-cache");
                    HttpWebResponse response = (HttpWebResponse)request.GetResponse();
                    Stream streamReceive = response.GetResponseStream();
                    Encoding encoding;
                    encoding = Encoding.GetEncoding(strCharacterSet);
                    streamReader = new StreamReader(streamReceive, encoding);
                    strResult = streamReader.ReadToEnd();
                }
                catch (Exception ex)
                {
                    LogHelper.Save("获取页面出现异常" + Url + ex.ToString());
                }
                return strResult;
            }
            /// <summary>
            /// 根据网页的HTML内容提取网页的Encoding
            /// </summary>
            /// <param name="html"></param>
            /// <returns></returns>
            static string GetEncoding(string html)
            {
                string pattern = @"(?i)charset=(?<charset>[-a-zA-Z_0-9]+)";
                string charset = Regex.Match(html, pattern).Groups["charset"].Value;
                if (string.IsNullOrEmpty(charset))
                    charset = "utf-8";
                return charset;
            }
    
  • 相关阅读:
    基于Diff机制的多个状态合并
    do_mmap解读
    Linux对用户态的动态内存管理
    我的WordPress站点
    使用Bochs学习硬件原理
    inode的若干锚
    Use sed and awk to prettify json
    IO完成端口
    如何使用iText制作中文PDF
    Font and PDF
  • 原文地址:https://www.cnblogs.com/wdkshy/p/5311950.html
Copyright © 2011-2022 走看看