zoukankan      html  css  js  c++  java
  • .net 环境下get 获取页面出现乱码问题解决

    不多说了,先上代码:

    /// <summary>
            /// 获取页面内容
            /// </summary>
            /// <param name="Url">链接地址</param>
            /// <returns></returns>
            public static string GetWebContent(string Url)
            {
    
                string strResult = "", strCharacterSet="";
                try
                {
                    HttpWebRequest request = (HttpWebRequest)WebRequest.Create(Url);
                    StreamReader streamReader;
                    //声明一个HttpWebRequest请求
                    request.Timeout = 30000;
                    //设置连接超时时间
                    request.Headers.Set("Pragma", "no-cache");
                    HttpWebResponse response = (HttpWebResponse)request.GetResponse();
                    Stream streamReceive = response.GetResponseStream();
                    Encoding encoding;
                    string strPageCharacterSet = response.CharacterSet.ToLower();//获取页面响应中定义的编码字符串
                    encoding = Encoding.GetEncoding(strPageCharacterSet);
                    streamReader = new StreamReader(streamReceive, encoding);
                    strResult = streamReader.ReadToEnd();
                    strResult = StringHelps.RepalceStr(strResult, 0);
                    strCharacterSet = GetEncoding(strResult).ToLower();//获取页面html中声明的编码字符串
                    if (!strCharacterSet.Equals(strPageCharacterSet))//比较两者的编码格式是否一致,如果不一致,以页面中定义的编码格式再次去获取页面内容
                    {
                        strResult = GetWebContentByCharecterSet(Url, strCharacterSet);
                    }
                    if (string.IsNullOrEmpty(strResult))
                    {
                        streamReader = new StreamReader(streamReceive, encoding);
                        strResult = streamReader.ReadToEnd();
                    }
                }
                catch (Exception ex)
                {
                    LogHelper.Save("获取页面出现乱码" + Url + ex.ToString());
                }
                return strResult;
            }
            /// <summary>
            /// 指定编码格式获取页面代码
            /// </summary>
            /// <param name="Url"></param>
            /// <param name="strCharacterSet"></param>
            /// <returns></returns>
            public static string GetWebContentByCharecterSet(string Url,string strCharacterSet)
            {
    
                string strResult = "";
                try
                {
                    HttpWebRequest request = (HttpWebRequest)WebRequest.Create(Url);
                    StreamReader streamReader;
                    //声明一个HttpWebRequest请求
                    request.Timeout = 30000;
                    //设置连接超时时间
                    request.Headers.Set("Pragma", "no-cache");
                    HttpWebResponse response = (HttpWebResponse)request.GetResponse();
                    Stream streamReceive = response.GetResponseStream();
                    Encoding encoding;
                    encoding = Encoding.GetEncoding(strCharacterSet);
                    streamReader = new StreamReader(streamReceive, encoding);
                    strResult = streamReader.ReadToEnd();
                }
                catch (Exception ex)
                {
                    LogHelper.Save("获取页面出现异常" + Url + ex.ToString());
                }
                return strResult;
            }
            /// <summary>
            /// 根据网页的HTML内容提取网页的Encoding
            /// </summary>
            /// <param name="html"></param>
            /// <returns></returns>
            static string GetEncoding(string html)
            {
                string pattern = @"(?i)charset=(?<charset>[-a-zA-Z_0-9]+)";
                string charset = Regex.Match(html, pattern).Groups["charset"].Value;
                if (string.IsNullOrEmpty(charset))
                    charset = "utf-8";
                return charset;
            }
    
  • 相关阅读:
    Azure Functions(一)什么是 ServerLess
    Azure Terraform(八)利用Azure DevOps 实现Infra资源和.NET CORE Web 应用程序的持续集成、持续部署
    Azure Terraform(六)Common Module
    Azure Terraform(五)利用Azure DevOps 实现自动化部署基础资源
    Azure Terraform(四)状态文件存储
    Java | zuul 1.x 是如何实现请求转发的
    Go | Go 结合 Consul 实现动态反向代理
    Java | 在 Java 中执行动态表达式语句: 前中后缀、Ognl、SpEL、Groovy、Jexl3
    宝,我今天CR了,C的什么R? 走过场的CR
    被监控轰炸了,不得不使出绝招
  • 原文地址:https://www.cnblogs.com/wdkshy/p/5311950.html
Copyright © 2011-2022 走看看