zoukankan      html  css  js  c++  java
  • 获取一个网页数据返回的编码类型是gzip,解压后,网页中包含的中文字段变成了乱码,只需要把编码更改为BIG5 ,繁体字就正常显示了!

    using ICSharpCode.SharpZipLib.GZip;

    //需要添加ICSharpCode.SharpZipLib.dll引用)
    ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////

                //   准备请求  
                string url = "http://www.www.com";

                HttpWebRequest request = (HttpWebRequest)
                        WebRequest.Create(url);
                request.ProtocolVersion = new Version(1, 1);
                request.Accept = "*/*";
                request.Headers.Add("Accept-Encoding:   gzip");
                request.KeepAlive = true;
                request.UserAgent = "Mozilla/4.0   (compatible;   MSIE   6.0;   Windows   NT   5.1;   SV1;   .NET   CLR   1.1.4322;   .NET   CLR   2.0.50727)";
              

                //   读取回应  

                HttpWebResponse response = (HttpWebResponse)request.GetResponse();

                //Console.WriteLine("{0}", response.StatusDescription);
                //foreach (string key in response.Headers.AllKeys)
                //{
                //    Console.WriteLine("{0}:   {1}", key, response.Headers[key]);
                //}

                //   将回应全部读入一个   MemoryStream:  
                MemoryStream ms = new MemoryStream();
                try
                {
                    Stream res = response.GetResponseStream();
                    byte[] buffer = new byte[8192];
                    while (true)
                    {
                        int read = res.Read(buffer, 0, 8192);
                        if (read == 0)
                        {
                            //   如果服务器用的是   gzip   的话,只能靠读不出更多数据来判断是否已经读完  
                            Console.WriteLine("Response   is   terminated   due   to   zero   byte   reception.");
                            break;
                        }
                        else
                        {
                            ms.Write(buffer, 0, read);
                        }
                    }
                }
                catch (Exception esd)
                {
                    //   抛出异常也可能表示已经读完  
                    Console.WriteLine("Response   is   terminated   due   to   exception   " + esd.Message);
                }
                finally
                {
                    response.Close();
                }
                Console.WriteLine("Response   has   {0}   bytes.", ms.Length);

                //   ms   倒回开头:  

                ms.Seek(0, SeekOrigin.Begin);

                //   用   GZipInputStream   包裹:  

                GZipInputStream gzip = new GZipInputStream(ms);

                //   用   GZipInputStream   读取   ms   的内容并写入   ms2:  

                MemoryStream ms2 = new MemoryStream();
                try
                {
                    byte[] buffer = new byte[1];   //   一点一点读——因为这个服务器的gzip没有Footer,读到结尾的时候会出错,所以为了把最后一个字节都读出来,只能一点一点读  

                    while (true)
                    {
                        int read = gzip.Read(buffer, 0, 1);
                        if (read == 0) break;
                        ms2.Write(buffer, 0, read);

                       
                    }
                }
                catch (Exception sa)
                {
                    Console.WriteLine("Exception!   " + sa.ToString());
                }
                Console.WriteLine("Unzipped.");

                //   将   ms2(解压后的内容)保存到文件  
                //, System.Text.Encoding.GetEncoding("gb2312")
                Stream  fs;
              
                fs = File.Create("r00000000000.txt");
              
                             
                ms2.Seek(0, SeekOrigin.Begin);
                ms2.WriteTo(fs);  
                fs.Close();
               
                //Encoding code = System.Text.Encoding.GetEncoding("gb2312");
                this.textBox1.Text = System.Text.Encoding.GetEncoding("BIG5").GetString(ms2.ToArray());

  • 相关阅读:
    [USACO][最短路]Cow Tours
    [USACO][枚举]Preface Numbering
    [USACO][枚举]Hamming Code
    [USACO][枚举]Healthy Holsteins
    [USACO][DAG上的动态规划]Sorting A Three-Valued Sequence
    [USACO][暴力]The Castle
    [USACO][枚举]Ski Course Design
    运算符重载must take either zero or one argument错误
    关于js鼠标事件综合各大浏览器能获取到坐标的属性总共以下五种
    鼠标滚轮事件封装
  • 原文地址:https://www.cnblogs.com/Fooo/p/752437.html
Copyright © 2011-2022 走看看