zoukankan      html  css  js  c++  java
  • C#语言实现对网页图片的随机抓取。基础办法,但还有一点小问题,欢迎大家指正

      //当点击读取的时候,抓取网页源代码
            string wangzhi;
            string respHtml;
            private void 读取网页源代码ToolStripMenuItem_Click(object sender, EventArgs e)
            {
                textBox2.Clear();  //读 取之前清空
                wangzhi = comboBox1.Text;
                HttpWebRequest rep = (HttpWebRequest)WebRequest.Create(wangzhi);  //通过网址找到网页放在rep里。建立连接
                HttpWebResponse resp = (HttpWebResponse)rep.GetResponse();  //读取网页
                Encoding htmlEncoding = Encoding.Default;   //确定编码格式
                StreamReader sr = new StreamReader(resp.GetResponseStream(), htmlEncoding); //把网页源代码存入流中
                respHtml = sr.ReadToEnd();  //把流从头到尾读出,转换成字符串
                textBox2.Text = respHtml; //获取的网页源代码
                comboBox1.Items.Add(comboBox1.Text);

        }

      //从网页源码中获取图片,并且下载到E盘
            public int num = 0;
            private void 从网页源码中读取图片ToolStripMenuItem_Click(object sender, EventArgs e)
            {
                 listView1.Columns.Clear();
                 listView1.Items.Clear();
                 listView1.Columns.Add("链接地址和图片地址",700);
                 string result = respHtml;     //用result接受网页源代码
                 MatchCollection mc;            //mc是个集合可以放任何东西           
    
                  //正则表达式获取<img src=>图片url 
                mc = Regex.Matches(result, @"<img[^<>]*?src[s	
    ]*=[s	
    ]*[""']?[s	
    ]*(?<imgUrl>[^s	
    ""'<>]*)[^<>]*?/?[s	
    ]*>", RegexOptions.IgnoreCase);  
                foreach (Match match in mc)      //遍历集合,并把所有的图片地址放在listViews里
                {  
                listView1.Items.Add(match.Value.ToString());  
                }
                num = listView1.Items.Count;  //记录有多少条img地址
                string imgsrc = string.Empty;  //定义
                //循环下载
                for (int i = 0; i < num; i++)
                {
                    string imgurl = listView1.Items[i].ToString();  //获取图片url地址
                    Regex reg = new Regex(@"<img.*?src=""(?<src>[^""]*)""[^>]*>",RegexOptions.IgnoreCase);//表示不可变的正则表达式
                    MatchCollection mcl = reg.Matches(imgurl);  //设定要查找的字符串
                    foreach (Match mm in mcl)
                    {
                        try
                        {
                            WebRequest req = WebRequest.Create(mm.Groups["src"].Value);  //图片的src内容
                            WebResponse res = req.GetResponse();
                            //用文件流读取图片
                            Stream reader = res.GetResponseStream(); //从互联网返回数据流
                            string path = "E://" + i.ToString() + ".jpg";   //图片路径命名.注意格式,E后面的引号用中文!!!!!!!!!!
                            FileStream writer = new FileStream(path, FileMode.OpenOrCreate, FileAccess.Write);
                            byte[] buff = new byte[512];     //不是很明白,求大神解释
                            int c = 0;
                            while ((c = reader.Read(buff, 0, buff.Length)) > 0)
                            {
                                writer.Write(buff, 0, c);
    
                            }
                            writer.Close();
                            writer.Dispose();
                            reader.Close();
                            reader.Dispose();
                            res.Close();
                            listView1.Items.Add(path + "图片保存成功!");
                           
                        }
                        catch
                        {
                            MessageBox.Show("本网页图片读取完毕");
                        }
                       
                    }
                }
    
               
            }
        }
      }
  • 相关阅读:
    python 类函数
    scala 排序
    php基础-面向对象
    PHP基础-常用的数组相关处理函数
    PHP基础-PHP中预定义的超全局数组
    PHP基础-数组
    装饰器
    Python3.x 文件操作练习
    Python3.x 文件操作
    Python3 内置函数
  • 原文地址:https://www.cnblogs.com/275147378abc/p/4590389.html
Copyright © 2011-2022 走看看