zoukankan      html  css  js  c++  java
  • 获取一个网页数据返回的编码类型是gzip,解压后,网页中包含的中文字段变成了乱码,只需要把编码更改为BIG5 ,繁体字就正常显示了!

    using ICSharpCode.SharpZipLib.GZip;

    //需要添加ICSharpCode.SharpZipLib.dll引用)
    ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////

                //   准备请求  
                string url = "http://www.www.com";

                HttpWebRequest request = (HttpWebRequest)
                        WebRequest.Create(url);
                request.ProtocolVersion = new Version(1, 1);
                request.Accept = "*/*";
                request.Headers.Add("Accept-Encoding:   gzip");
                request.KeepAlive = true;
                request.UserAgent = "Mozilla/4.0   (compatible;   MSIE   6.0;   Windows   NT   5.1;   SV1;   .NET   CLR   1.1.4322;   .NET   CLR   2.0.50727)";
              

                //   读取回应  

                HttpWebResponse response = (HttpWebResponse)request.GetResponse();

                //Console.WriteLine("{0}", response.StatusDescription);
                //foreach (string key in response.Headers.AllKeys)
                //{
                //    Console.WriteLine("{0}:   {1}", key, response.Headers[key]);
                //}

                //   将回应全部读入一个   MemoryStream:  
                MemoryStream ms = new MemoryStream();
                try
                {
                    Stream res = response.GetResponseStream();
                    byte[] buffer = new byte[8192];
                    while (true)
                    {
                        int read = res.Read(buffer, 0, 8192);
                        if (read == 0)
                        {
                            //   如果服务器用的是   gzip   的话,只能靠读不出更多数据来判断是否已经读完  
                            Console.WriteLine("Response   is   terminated   due   to   zero   byte   reception.");
                            break;
                        }
                        else
                        {
                            ms.Write(buffer, 0, read);
                        }
                    }
                }
                catch (Exception esd)
                {
                    //   抛出异常也可能表示已经读完  
                    Console.WriteLine("Response   is   terminated   due   to   exception   " + esd.Message);
                }
                finally
                {
                    response.Close();
                }
                Console.WriteLine("Response   has   {0}   bytes.", ms.Length);

                //   ms   倒回开头:  

                ms.Seek(0, SeekOrigin.Begin);

                //   用   GZipInputStream   包裹:  

                GZipInputStream gzip = new GZipInputStream(ms);

                //   用   GZipInputStream   读取   ms   的内容并写入   ms2:  

                MemoryStream ms2 = new MemoryStream();
                try
                {
                    byte[] buffer = new byte[1];   //   一点一点读——因为这个服务器的gzip没有Footer,读到结尾的时候会出错,所以为了把最后一个字节都读出来,只能一点一点读  

                    while (true)
                    {
                        int read = gzip.Read(buffer, 0, 1);
                        if (read == 0) break;
                        ms2.Write(buffer, 0, read);

                       
                    }
                }
                catch (Exception sa)
                {
                    Console.WriteLine("Exception!   " + sa.ToString());
                }
                Console.WriteLine("Unzipped.");

                //   将   ms2(解压后的内容)保存到文件  
                //, System.Text.Encoding.GetEncoding("gb2312")
                Stream  fs;
              
                fs = File.Create("r00000000000.txt");
              
                             
                ms2.Seek(0, SeekOrigin.Begin);
                ms2.WriteTo(fs);  
                fs.Close();
               
                //Encoding code = System.Text.Encoding.GetEncoding("gb2312");
                this.textBox1.Text = System.Text.Encoding.GetEncoding("BIG5").GetString(ms2.ToArray());

  • 相关阅读:
    在Python中使用多进程快速处理数据
    深度学习中Embedding层有什么用?
    split("\s+") 和 split(" +") 有什么区别?
    python merge、concat合并数据集
    机器学习中常见的损失函数
    XGBoost、LightGBM的详细对比介绍
    $(function(){})的执行过程分析
    jQuery.extend({...})分析
    jquery核心功能分析
    print打印网页相关
  • 原文地址:https://www.cnblogs.com/Fooo/p/752437.html
Copyright © 2011-2022 走看看