zoukankan      html  css  js  c++  java
  • 一个用来提取网页中图片的小工具

    public Array MatchHtml(string html,string com)
           {
               List<string> urls = new List<string>();
               html = html.ToLower();
               //获取SRC标签中的URL
               Regex regexSrc = new Regex("src="[^"]*[(.jpg)(.png)(.gif)(.bmp)(.ico)]"");
               foreach(Match m in regexSrc.Matches(html))
               {
                   string src = m.Value;
                   src = src.Replace("src=","").Replace(""","");
                   if (!src.Contains("http"))
                       src = com + src;
                   if(!urls.Contains(src))
                   urls.Add(src);
               }
               //获取HREF标签中URL
               Regex regexHref = new Regex("href="[^"]*[(.jpg)(.png)(.gif)(.bmp)(.ico)]"");
               foreach (Match m in regexHref.Matches(html))
               {
                   string href = m.Value;
                   href = href.Replace("href=", "").Replace(""", "");
                   if (!href.Contains("http"))
                       href = com + href;
                   if(!urls.Contains(href))
                   urls.Add(href);
               }
               return urls.ToArray();
           }
    
           [DllImport("kernel32.dll")]
           static extern bool SetConsoleMode(IntPtr hConsoleHandle, int mode);
           [DllImport("kernel32.dll")]
           static extern bool GetConsoleMode(IntPtr hConsoleHandle, out int mode);
           [DllImport("kernel32.dll")]
           static extern IntPtr GetStdHandle(int handle);
           const int STD_INPUT_HANDLE = -10;
           const int ENABLE_QUICK_EDIT_MODE = 0x40 | 0x80;
           public static void EnableQuickEditMode()
           {
               int mode; IntPtr handle = GetStdHandle(STD_INPUT_HANDLE);
               GetConsoleMode(handle, out mode);
               mode |= ENABLE_QUICK_EDIT_MODE;
               SetConsoleMode(handle, mode);
           }
           static void Main(string[] args)
           {
               EnableQuickEditMode();
               int oldCount = 0;
               Console.Title = "TakeImageFromInternet";
               string path = "E:\Download\loading\";
               while (true)
               {
                   Console.Clear();
                   string countFile = "E:\CountFile.txt";//用来计数的文本,以至于文件名不重复
                   int cursor = 0;
                   if (File.Exists(countFile))
                   {
                       string text = File.ReadAllText(countFile);
                       try
                       {
                           cursor =oldCount = Convert.ToInt32(text);//次数多了建议使用long
                       }
                       catch { }
                   }
                   Console.Write("please input a url:");
                   string url = "http://www.baidu.com/";
                   string temp = Console.ReadLine();
                   if (!string.IsNullOrEmpty(temp))
                       url = temp;
                   Match mcom = new Regex(@"^(?i)http://(w+.){2,3}(com(.cn)?|cn|net)").Match(url);//获取域名
                   string com = mcom.Value;
                   //Console.WriteLine(mcom.Value);
                   Console.Write("please input a save path:");
                   temp = Console.ReadLine();
                   if (Directory.Exists(temp))
                       path = temp;
                   Console.WriteLine();
                   WebClient client = new WebClient();
                   byte[] htmlData = null;
                   htmlData = client.DownloadData(url);
                   MemoryStream mstream = new MemoryStream(htmlData);
                   string html = "";
                   using (StreamReader sr = new StreamReader(mstream))
                   {
                       html = sr.ReadToEnd();
                   }
                   Array urls = new MatchHtmlImageUrl().MatchHtml(html,com);
     
                   foreach (string imageurl in urls)
                   {
                      Console.WriteLine(imageurl);
                       byte[] imageData = null;
                       try
                       {
                           imageData = client.DownloadData(imageurl);
                       }
                       catch { }
                       if (imageData != null && imageData.Length>0)
                           using (MemoryStream ms = new MemoryStream(imageData))
                           {
                               try
                               {
                                    
                                   string ext = Aping.Utility.File.FileOpration.ExtendName(imageurl);
                                   ImageFormat format = ImageFormat.Jpeg;
                                   switch (ext)
                                   {
                                       case ".jpg":
                                           format = ImageFormat.Jpeg;
                                           break;
                                       case ".bmp":
                                           format = ImageFormat.Bmp;
                                           break;
                                       case ".png":
                                           format = ImageFormat.Png;
                                           break;
                                       case ".gif":
                                           format = ImageFormat.Gif;
                                           break;
                                       case ".ico":
                                           format = ImageFormat.Icon;
                                           break;
                                       default:
                                           continue;
                                   }
                                   Image image = new Bitmap(ms);
                                   if (Directory.Exists(path))
                                       image.Save(path + "\" + cursor + ext, format);
                               }
                               catch(Exception ex) { Console.WriteLine(ex.Message); }
                           }
                       cursor++;
                   }
                   mstream.Close();
                   File.WriteAllText(countFile, cursor.ToString(), Encoding.UTF8);
                   Console.WriteLine("take done...image count:"+(cursor-oldCount).ToString());
               }            
           }

    转自:http://www.oschina.net/code/snippet_1415082_27053

  • 相关阅读:
    html 6 border border-width border-style border-color CSS三角形
    html 5 marign top right bottom left
    html 布局
    python学习——生成列表并修改其元素
    python学习——读取染色体长度(七:for循环对染色体序列进行反向互补)
    python学习——读取染色体长度(七:读取fasta文件)
    python学习——读取染色体长度(六:读取含有染色体长度的文件)
    python学习——读取染色体长度(五:从命令行输入染色体长度)
    python学习——读取染色体长度(四:获取最长染色体的编号)
    python学习——读取染色体长度(三、用循环或者函数求总长并获取最长染色体长度)
  • 原文地址:https://www.cnblogs.com/sumg/p/3850927.html
Copyright © 2011-2022 走看看