zoukankan      html  css  js  c++  java
  • Bing搜索背景图抓取

    首先是两个读取html的方法。两个编码格式,两个不同的方法(哪个都可以,第二个乱码,待处理)

      private static string  ReturnUTF8Html()
            {
                try
                {
                    WebClient MyWebClient = new WebClient();
                    MyWebClient.Credentials = CredentialCache.DefaultCredentials;//获取或设置用于向Internet资源的请求进行身份验证的网络凭据
                    Byte[] pageData = MyWebClient.DownloadData("http://www.bing.com"); //从指定网站下载数据
                    //  string pageHtml = Encoding.Default.GetString(pageData);  //如果获取网站页面采用的是GB2312,则使用这句            
                    string pageHtml = Encoding.UTF8.GetString(pageData); //如果获取网站页面采用的是UTF-8,则使用这句
                //    Console.WriteLine(pageHtml);//在控制台输入获取的内容
                    using (StreamWriter sw = new StreamWriter("F:\\Desktop\\txt2.html"))//将获取的内容写入文本
                    {
                        sw.Write(pageHtml);
                    }
                    return pageHtml;
                }
                catch (WebException webEx)
                {
                  //  Console.WriteLine(webEx.Message.ToString());
                    return webEx.Message.ToString();
                }
            }

        public static string ReturnGB2312Html()
            {
                string getWeatherUrl = "http://www.ithome.com/";
    
                //WebRequest webReq = WebRequest.Create(getWeatherUrl);
                //WebResponse webResp = webReq.GetResponse();
                //Stream stream = webResp.GetResponseStream();
                Stream 请求字节 = WebRequest.Create(getWeatherUrl).GetResponse().GetResponseStream();
                StreamReader 获取内容 = new StreamReader(请求字节, Encoding.GetEncoding("gb2312"));
                string html = 获取内容.ReadToEnd();
                获取内容.Close();
                请求字节.Close();
                //保存网页
                FileStream fstream = new FileStream(@"F:\Desktop\txt.html", FileMode.OpenOrCreate, FileAccess.Write);
                byte[] buffer = Encoding.Default.GetBytes(html);
                fstream.Write(buffer, 0, buffer.Length);
                fstream.Close();
                fstream.Dispose();
                return html;
            }

    抓取文件

      public static string 抓取文件(string strUrl)
    
            {
                string strMsg = string.Empty;
                try
                {
                    WebRequest request = WebRequest.Create(strUrl);
                    WebResponse response = request.GetResponse();
                    Stream reader = response.GetResponseStream();
                    FileStream writer = new FileStream(Program.path, FileMode.OpenOrCreate, FileAccess.Write);
                    //  FileStream writer = new FileStream(@"F:\Desktop\Bing"+name+".jpg", FileMode.OpenOrCreate, FileAccess.Write);
                    byte[] buff = new byte[512];
                    int c = 0; //实际读取的字节数 
                    while ((c = reader.Read(buff, 0, buff.Length)) > 0)
                    {
                        writer.Write(buff, 0, c);
                    }
                    writer.Close();
                    writer.Dispose();
                    reader.Close();
                    reader.Dispose();
                    response.Close();
                    strMsg = "保存成功";
                }
                catch
                { }
                return strMsg;
            }

    入口

     1 static void Main(string[] args)
     2         {
     3             Console.WriteLine("正在读取网页...");
     4             string html= ReturnUTF8Html();
     5             Console.WriteLine("网页读取正常...");
     6             Console.WriteLine("正在匹配正则表达式...");
     7             // Console.WriteLine(html);
     8             //string w = Program.ReturnGB2312Html();
     9             //Console.WriteLine(w);
    10             //匹配正则表达式
    11             Match match=   Regex.Match(html, @"http://s.cn.bing.net/az/hprichbg/rb/[a-zA-Z]+_ZH-CN[0-9]{11}_1920x1080.jpg");
    12            
    13             Console.WriteLine(match.Value);
    14 
    15              Program.抓取文件(match.Value.ToString());
    16              Console.WriteLine("下载完毕!");
    17            
    18             Thread.Sleep(3000);
    19             //  Console.ReadKey();
    20            // SystemParametersInfo(20, 0, "D:\\AAA.bmp", 0x2);
    21         }

    路径

     //路径
     public static  string path = "E:\\background\\Bing\\bing" + DateTime.Now.ToString("yyyy-MM-dd HH:m:ss") + ".jpg";
    

      

  • 相关阅读:
    Leetcode题目practice
    文件操作
    39个奇葩代码注释,拿走不谢
    Spring Boot 之配置导入,强大到不行!
    Git 的这个神技,学会爽歪歪~
    同事天天写垃圾代码,就没办法?
    for (;;) 与 while (true),哪个更快?
    Spring Boot 怎么打一个可执行 Jar 包?
    程序员真的是太太太太太太太太难了!
    面试官:new一个对象有哪两个过程?
  • 原文地址:https://www.cnblogs.com/Amayer/p/5224746.html
Copyright © 2011-2022 走看看