zoukankan      html  css  js  c++  java
  • C# 网页图片采集

    http://blog.csdn.net/a237428367/article/details/5987832

    using System;  

    • using System.Collections.Generic;  
    • using System.Linq;  
    • using System.Text;  
    • using System.Text.RegularExpressions;  
    • using System.Net;  
    • using System.IO;  
    • using System.Windows.Forms;  
    • namespace ImageCollect  
    • {  
    •     public class GatherPic  
    •     {  
    •         private string savePath;  
    •         private string getUrl;  
    •         private WebBrowser wb;  
    •         private int iImgCount;  
    •         //初始化参数  
    •         public GatherPic(string sWebUrl, string sSavePath)  
    •         {  
    •             this.getUrl = sWebUrl;  
    •             this.savePath = sSavePath;  
    •         }  
    •         //开始采集  
    •         public bool start()  
    •         {  
    •             if (getUrl.Trim().Equals(""))  
    •             {  
    •                 MessageBox.Show("哪来的虾米连网址都没输!");  
    •                 return false;  
    •             }  
    •             this.wb = new WebBrowser();  
    •             this.wb.Navigate(getUrl);  
    •             //委托事件  
    •             this.wb.DocumentCompleted += new System.Windows.Forms.WebBrowserDocumentCompletedEventHandler(DocumentCompleted);  
    •             return true;  
    •         }  
    •         //WebBrowser.DocumentCompleted委托事件  
    •         private void DocumentCompleted(object sender, WebBrowserDocumentCompletedEventArgs e)  
    •         {  
    •             //页面里框架iframe加载完成不掉用SearchImgList()  
    •             if (e.Url != wb.Document.Url) return;  
    •             SearchImgList();  
    •         }  
    •         //检查出所有图片并采集到本地  
    •         public void SearchImgList()  
    •         {  
    •             string sImgUrl;  
    •             //取得所有图片地址  
    •             HtmlElementCollection elemColl = this.wb.Document.GetElementsByTagName("img");  
    •             this.iImgCount = elemColl.Count;  
    •             foreach (HtmlElement elem in elemColl)  
    •             {  
    •                 sImgUrl = elem.GetAttribute("src");  
    •                 //调用保存远程图片函数  
    •                 SaveImageFromWeb(sImgUrl, this.savePath);  
    •             }  
    •         }  
    •         //保存远程图片函数  
    •         public int SaveImageFromWeb(string imgUrl, string path)  
    •         {  
    •             string imgName = imgUrl.ToString().Substring(imgUrl.ToString().LastIndexOf("/") + 1);  
    •             path = path + "//" + imgName;  
    •             string defaultType = ".jpg";  
    •             string[] imgTypes = new string[] { ".jpg", ".jpeg", ".png", ".gif", ".bmp" };  
    •             string imgType = imgUrl.ToString().Substring(imgUrl.ToString().LastIndexOf("."));  
    •             foreach (string it in imgTypes)  
    •             {  
    •                 if (imgType.ToLower().Equals(it))  
    •                     break;  
    •                 if (it.Equals(".bmp"))  
    •                     imgType = defaultType;  
    •             }  
    •             try  
    •             {  
    •                 HttpWebRequest request = (HttpWebRequest)WebRequest.Create(imgUrl);  
    •                 request.UserAgent = "Mozilla/6.0 (MSIE 6.0; Windows NT 5.1; Natas.Robot)";  
    •                 request.Timeout = 10000;  
    •                 WebResponse response = request.GetResponse();  
    •                 Stream stream = response.GetResponseStream();  
    •                 if (response.ContentType.ToLower().StartsWith("image/"))  
    •                 {  
    •                     byte[] arrayByte = new byte[1024];  
    •                     int imgLong = (int)response.ContentLength;  
    •                     int l = 0;  
    •                     // CreateDirectory(path);  
    •                     FileStream fso = new FileStream(path, FileMode.Create);  
    •                     while (l < imgLong)  
    •                     {  
    •                         int i = stream.Read(arrayByte, 0, 1024);  
    •                         fso.Write(arrayByte, 0, i);  
    •                         l += i;  
    •                     }  
    •                     fso.Close();  
    •                     stream.Close();  
    •                     response.Close();  
    •                     return 1;  
    •                 }  
    •                 else  
    •                 {  
    •                     return 0;  
    •                 }  
    •             }  
    •             catch (WebException)  
    •             {  
    •                 return 0;  
    •             }  
    •             catch (UriFormatException)  
    •             {  
    •                 return 0;  
    •             }  
    •         }  
    •     }  
    • }  

     

    调用方法

    1. GatherPic g = new GatherPic(“http://www.baidu.com”,"E:/XXX");  
    2.             g.start();  

    =====================================================

    在web项目中使用WebBrowser类-----给网站抓图

     

    最近做一个WEB项目,其中要求有个功能就是程序能网页抓图,举个例子: 在test.aspx页面上放一个TextBox和一个Button,TextBox用来输入要抓取的网页地址,然后按了Button之后,服务器要对前面输入的网址进行抓图,然后显示出来。我把抓图的业务逻辑做成一个类:

    using System;
    using System.Data;
    using System.Windows.Forms;
    using System.Drawing;
    
    /// <summary>
    /// WebSnap :网页抓图对象
    /// </summary>
    public class WebSnap2
    {
    
        public WebSnap2()
        {
            //
            // TODO: 在此处添加构造函数逻辑
            //
        }
    
        /// <summary>
        /// 开始一个抓图并返回图象
        /// </summary>
        /// <param name="Url">要抓取的网页地址</param>
        /// <returns></returns>
        public Bitmap StartSnap(string Url)
        {
            WebBrowser myWB = this.GetPage(Url);
            Bitmap returnValue = this.SnapWeb(myWB);
            myWB.Dispose();
            return returnValue;
        }
    
        private WebBrowser GetPage(string Url)
        {
            WebBrowser myWB = new WebBrowser();
            myWB.ScrollBarsEnabled = false;
            myWB.Navigate(Url);
            while (myWB.ReadyState != WebBrowserReadyState.Complete)
            {
                System.Windows.Forms.Application.DoEvents();
            }
            return myWB;
        }
    
        private Bitmap SnapWeb(WebBrowser wb)
        {
            HtmlDocument hd = wb.Document;
            int height = Convert.ToInt32(hd.Body.GetAttribute("scrollHeight")) + 10;
            int width = Convert.ToInt32(hd.Body.GetAttribute("scrollWidth")) + 10;
            wb.Height = height;
            wb.Width = width;
            Bitmap bmp = new Bitmap(width, height);
            Rectangle rec = new Rectangle();
            rec.Width = width;
            rec.Height = height;
            wb.DrawToBitmap(bmp, rec);
            return bmp;
        }
    
    }

    然后在test.asp的button_click事件里面调用:

            WebSnap ws = new WebSnap();
            Bitmap bmp= ws.StartSnap(TextBox1.Text);
            System.IO.MemoryStream ms = new System.IO.MemoryStream();
            bmp.Save(ms, System.Drawing.Imaging.ImageFormat.Jpeg);
            Response.BinaryWrite(ms.GetBuffer());
  • 相关阅读:
    Swift _ OC _ 混编
    CoreAnimation 寄宿图
    CoreAnimation 开篇
    iOS 杂笔-26(苹果禁用热更新)
    Swift_TableView(delegate,dataSource,prefetchDataSource 详解)
    Swift_ScrollView _ API详解
    插入排序_c++
    选择排序_c++
    冒泡排序_c++
    Swift_协议
  • 原文地址:https://www.cnblogs.com/qq260250932/p/5361043.html
Copyright © 2011-2022 走看看