zoukankan      html  css  js  c++  java
  • C#简单爬虫爬取图片并保存本地

    static void Main(string[] args)
    {
    
        var page = 1;//抓取的页数
    
        //抓取网页资源
        for (int i = 1; i <= page; i++)
        {
            string str = GetHtmlStr($"https://fabiaoqing.com/biaoqing/lists/page/{i}.html", "UTF8");
            //匹配图片的正则表达式    
            string regstr = "http://wx[1-4].sinaimg.cn/bmiddle/.+?.[jg][pi][fg]";
            foreach (Match match in Regex.Matches(str, regstr))
            //使用正则表达式解析网页文本,获得图片地址     
            {
                //下载图片
                SaveAsWebImg(match.Value);
            }
        }
        Console.ReadKey();
        Console.WriteLine("已执行结束,按任意键退出!");
    
    }
    /// <summary>  
    /// 获取网页的HTML码  
    /// </summary>  
    /// <param name="url">链接地址</param>  
    /// <param name="encoding">编码类型</param>  
    /// <returns></returns>  
    public static string GetHtmlStr(string url, string encoding)
    {
        string htmlStr = "";
        if (!String.IsNullOrEmpty(url))
        {
            WebRequest request = WebRequest.Create(url);            //实例化WebRequest对象  
            WebResponse response = request.GetResponse();           //创建WebResponse对象  
            Stream datastream = response.GetResponseStream();       //创建流对象  
            Encoding ec = Encoding.Default;
            if (encoding == "UTF8")
            {
                ec = Encoding.UTF8;
            }
            else if (encoding == "Default")
            {
                ec = Encoding.Default;
            }
            StreamReader reader = new StreamReader(datastream, ec);
            htmlStr = reader.ReadToEnd();                           //读取数据  
            reader.Close();
            datastream.Close();
            response.Close();
        }
        return htmlStr;
    }
    /// <summary> 
    /// 下载网站图片 
    /// </summary> 
    /// <param name="picUrl"></param> 
    /// <returns></returns> 
    public static string SaveAsWebImg(string picUrl)
    {
        string result = "";
        //设置保存目录
        string path = AppDomain.CurrentDomain.SetupInformation.ApplicationBase + @"/File/";
        //不存在目录则创建
        if (!Directory.Exists(path))
        {
            //创建目录
            Directory.CreateDirectory(path);
        }
        try
        {
            //判断图片是否为空或者null
            if (!String.IsNullOrEmpty(picUrl))
            {
                //伪随机数生成器
                Random rd = new Random();
                //获取当前日期时间
                DateTime nowTime = DateTime.Now;
                //获取URL扩展名
                var Extension = Path.GetExtension(picUrl);
                //自定义文件名
                string fileName = nowTime.Month.ToString() + nowTime.Day.ToString() + nowTime.Hour.ToString() + nowTime.Minute.ToString() + nowTime.Second.ToString() + rd.Next(1000, 1000000) + Extension;
                WebClient webClient = new WebClient();
                //下载url链接文件,并指定到本地的文件夹路径和文件名称
                webClient.DownloadFile(picUrl, path + fileName);
                //返回结果
                result = fileName;
            }
        }catch(Exception ex) {
            Console.WriteLine(ex.Message);
        }
        return result;
    }
    注:强推一波个人小站:小语雀网 | 欢迎大佬们访问哈~
  • 相关阅读:
    [quote]HowTo Format Date For Display or Use In a Shell Script
    [quote] standard Input and Output Redirection
    [quote] Re: [ECOS] printf and diag_printf go to nowhere 2
    [quote] Re: [ECOS] printf and diag_printf go to nowhere
    [quote] DMA engine in Linux Kernel
    [linux]How to set PATH in shell script, and keep it avaiable even after it exits
    use AWK to extract some lines according to some patterns in file
    [Reprinted] Howto Use Linux Watchdog
    Hadoop 1 ecosystem
    Java Comparable & Comparator
  • 原文地址:https://www.cnblogs.com/zpblogs/p/14705308.html
Copyright © 2011-2022 走看看