zoukankan      html  css  js  c++  java
  • HtmlAgilityPack中文乱码问题

    1. 打开HtmlAgilityPack.1.4.0.Source工程   
    2. 找到HtmlWeb.cs文件打开修改下面方法中的一小段代码:  
    3.     private HttpStatusCode Get(Uri uri, string method, string path, HtmlDocument doc, IWebProxy proxy,  
    4.                                    ICredentials creds)函数中的下方的代码  
    5.             Encoding respenc = !string.IsNullOrEmpty(resp.ContentEncoding)  
    6.                                   ? Encoding.GetEncoding(resp.ContentEncoding)  
    7.                                   : null;  
    8.             /*修改成下面的即可*/  
    9.             /*王..修改 中文乱码问题*/  
    10.              //Encoding respenc = !string.IsNullOrEmpty(resp.ContentEncoding)  
    11.             //                       ? Encoding.GetEncoding(resp.ContentEncoding)  
    12.             //                       : null;  
    13.             System.Text.Encoding respenc;  
    14.   
    15.             if ((resp.ContentEncoding != null) && (resp.ContentEncoding.Length > 0))  
    16.             {  
    17.                 respenc = System.Text.Encoding.GetEncoding(resp.ContentEncoding);  
    18.             }  
    19.             else if ((resp.CharacterSet != null) && (resp.CharacterSet.Length > 0))//根据Content-Type中获取的charset  
    20.             {  
    21.                 if (string.Compare(resp.CharacterSet, "ISO-8859-1"true, System.Globalization.CultureInfo.InvariantCulture) == 0)  
    22.                     respenc = System.Text.Encoding.GetEncoding("GB2312");  
    23.                 else  
    24.                     respenc = System.Text.Encoding.GetEncoding(resp.CharacterSet);  
    25.             }  
    26.             else  
    27.             {  
    28.                 respenc = System.Text.Encoding.GetEncoding("GB2312");  
    29.             }  
    /// <summary>
            
    /// 获取指定URL的HTML源代码
            
    /// </summary>
            
    /// <param name="url"></param>
            
    /// <param name="encoding">如果为NULL 则自动识别</param>
            
    /// <returns></returns>
            public static string GetWebHtml(string url, Encoding encoding)
            {
                
    try
                {
                    HttpWebRequest hwr 
    = (HttpWebRequest)HttpWebRequest.Create(url);
                    HttpWebResponse res;

                    
    try
                    {
                        res 
    = (HttpWebResponse)hwr.GetResponse();
                    }
                    
    catch
                    {
                        
    return string.Empty;
                    }

                    
    if (res.StatusCode == HttpStatusCode.OK)
                    {
                        
    using (Stream mystream = res.GetResponseStream())
                        {
                            
    //没有指定编码,
                            if (encoding == null)
                            {
                                
    return DecodeData(mystream, res);
                            }
                            
    //指定了编码
                            else
                            {
                                
    using (StreamReader reader = new StreamReader(mystream, encoding))
                                {
                                    
    return reader.ReadToEnd();
                                }
                            }
                        }
                    }

                    
    return null;
                }
                
    catch
                {
                    
    return null;
                }
            }


            
    private static string DecodeData(Stream responseStream, HttpWebResponse response)
            {
                
    string name = null;
                
    string text2 = response.Headers["content-type"];
                
    if (text2 != null)
                {
                    
    int index = text2.IndexOf("charset=");
                    
    if (index != -1)
                    {
                        name 
    = text2.Substring(index + 8);
                    }
                }
                MemoryStream stream 
    = new MemoryStream();
                
    byte[] buffer = new byte[0x400];
                
    for (int i = responseStream.Read(buffer, 0, buffer.Length); i > 0; i = responseStream.Read(buffer, 0, buffer.Length))
                {
                    stream.Write(buffer, 
    0, i);
                }
                responseStream.Close();
                
    if (name == null)
                {
                    MemoryStream stream3 
    = stream;
                    stream3.Seek((
    long)0, SeekOrigin.Begin);
                    
    string text3 = new StreamReader(stream3, Encoding.ASCII).ReadToEnd();
                    
    if (text3 != null)
                    {
                        
    int startIndex = text3.IndexOf("charset=");
                        
    int num4 = -1;
                        
    if (startIndex != -1)
                        {
                            num4 
    = text3.IndexOf("\"", startIndex);
                            
    if (num4 != -1)
                            {
                                
    int num5 = startIndex + 8;
                                name 
    = text3.Substring(num5, (num4 - num5) + 1).TrimEnd(new char[] { '>''"' });
                            }
                        }
                    }
                }
                Encoding aSCII 
    = null;
                
    if (name == null)
                {
                    aSCII 
    = Encoding.GetEncoding("gb2312");
                }
                
    else
                {
                    
    try
                    {
                        
    if (name == "GBK")
                        {
                            name 
    = "GB2312";
                        }
                        aSCII 
    = Encoding.GetEncoding(name);
                    }
                    
    catch
                    {
                        aSCII 
    = Encoding.GetEncoding("gb2312");
                    }
                }
                stream.Seek((
    long)0, SeekOrigin.Begin);
                StreamReader reader2 
    = new StreamReader(stream, aSCII);
                
    return reader2.ReadToEnd();
            } 
     string Html = XINLG.Labs.Utils.NetUtil.GetWebHtml("http://www.cnblogs.com/pick/"null);
                HtmlDocument doc 
    = new HtmlDocument();
                doc.LoadHtml(Html); 
  • 相关阅读:
    localStorage和sessionStorage区别(包括同源的定义)
    跨域问题实践总结! 上(JSONP/document.domain/window.name)
    7月11日计划
    图形验证码知识点整理 Object.prototype.toString.call()等
    学习日报 7-10(验证码)
    Mysql安装与主从配置
    windows service编程
    Entity Framework——常见报错总结
    Entity Framework——读写分离
    Entity Framework——执行sql语句
  • 原文地址:https://www.cnblogs.com/jes_shaw/p/2247632.html
Copyright © 2011-2022 走看看