zoukankan      html  css  js  c++  java
  • C# 爬取图片

    网络收集整理  爬取图片 

    引用AngleSharp  NuGet 包

    using AngleSharp;
    using System;
    using System.Collections.Generic;
    using System.IO;
    using System.Linq;
    using System.Net;
    
    namespace CoreConsoleApp
    {
        public class Program
        {
            public static void Main(string[] args)
            {
                // 设置配置以支持文档加载
                var config = Configuration.Default.WithDefaultLoader();
                int pageIndex = 50;
                for (int i = 1; i < pageIndex; i++)
                {
                    // url地址
                    //var address = "https://www.qwe.com";
                    var address = @"https://www.qwe.com?pageIndex=" + i;
                    // 请求网页
                    var document = BrowsingContext.New(config).OpenAsync(address);
                    // 根据class获取html元素
                    var cells = document.Result.QuerySelectorAll(".panel-body li");
                    var fileName = (i + " - " + document.Result.Title).Replace("|", "");
                    foreach (var item in cells)
                    {
                        //var belle = new Belle
                        //{
                        //    Title = item.QuerySelector("img").GetAttribute("title"),
                        //    ImageUrl = item.QuerySelector("img").GetAttribute("src")
                        //};
                        var imageUrl = item.QuerySelector("img").GetAttribute("src");
                        //string str = DateTime.Now.ToString("yyyyMMddHHmmss");
                        string localPath = "D:\Image\" + DateTime.Now.ToString("yyyMMdd") + "\" + fileName;
                        List<string> arr = imageUrl.Split('/').ToList();
                        if (!Directory.Exists(localPath))
                        {
                            // Create the directory it does not exist.
                            Directory.CreateDirectory(localPath);
                        }
                        string filepath = localPath + "\" + arr.Last();
    
                        //方法一
                        //Download(imageUrl, filepath);
    
                        //方法二
                        WebClient mywebclient = new WebClient();
                        mywebclient.DownloadFile(imageUrl, filepath);
                    }
                }
    
                Console.ReadLine();
            }
    
            /// <summary>
            /// Http方式下载文件
            /// </summary>
            /// <param name="url">http地址</param>
            /// <param name="localfile">本地文件</param>
            /// <returns></returns>
            public static bool Download(string url, string localfile)
            {
                bool flag = false;
                long startPosition = 0; // 上次下载的文件起始位置
                FileStream writeStream; // 写入本地文件流对象
    
                long remoteFileLength = GetHttpLength(url);// 取得远程文件长度
                System.Console.WriteLine("remoteFileLength=" + remoteFileLength);
                if (remoteFileLength == 745)
                {
                    System.Console.WriteLine("远程文件不存在.");
                    return false;
                }
    
                // 判断要下载的文件夹是否存在
                if (File.Exists(localfile))
                {
    
                    writeStream = File.OpenWrite(localfile);             // 存在则打开要下载的文件
                    startPosition = writeStream.Length;                  // 获取已经下载的长度
    
                    if (startPosition >= remoteFileLength)
                    {
                        System.Console.WriteLine("本地文件长度" + startPosition + "已经大于等于远程文件长度" + remoteFileLength);
                        writeStream.Close();
    
                        return false;
                    }
                    else
                    {
                        writeStream.Seek(startPosition, SeekOrigin.Current); // 本地文件写入位置定位
                    }
                }
                else
                {
                    writeStream = new FileStream(localfile, FileMode.Create);// 文件不保存创建一个文件
                    startPosition = 0;
                }
    
    
                try
                {
                    HttpWebRequest myRequest = (HttpWebRequest)HttpWebRequest.Create(url);// 打开网络连接
    
                    if (startPosition > 0)
                    {
                        myRequest.AddRange((int)startPosition);// 设置Range值,与上面的writeStream.Seek用意相同,是为了定义远程文件读取位置
                    }
    
    
                    Stream readStream = myRequest.GetResponse().GetResponseStream();// 向服务器请求,获得服务器的回应数据流
    
    
                    byte[] btArray = new byte[512];// 定义一个字节数据,用来向readStream读取内容和向writeStream写入内容
                    int contentSize = readStream.Read(btArray, 0, btArray.Length);// 向远程文件读第一次
    
                    long currPostion = startPosition;
    
                    while (contentSize > 0)// 如果读取长度大于零则继续读
                    {
                        currPostion += contentSize;
                        int percent = (int)(currPostion * 100 / remoteFileLength);
                        System.Console.WriteLine("percent=" + percent + "%");
    
                        writeStream.Write(btArray, 0, contentSize);// 写入本地文件
                        contentSize = readStream.Read(btArray, 0, btArray.Length);// 继续向远程文件读取
                    }
    
                    //关闭流
                    writeStream.Close();
                    readStream.Close();
    
                    flag = true;        //返回true下载成功
                }
                catch (Exception)
                {
                    writeStream.Close();
                    flag = false;       //返回false下载失败
                }
    
                return flag;
            }
    
            // 从文件头得到远程文件的长度
            private static long GetHttpLength(string url)
            {
                long length = 0;
    
                try
                {
                    HttpWebRequest req = (HttpWebRequest)HttpWebRequest.Create(url);// 打开网络连接
                    HttpWebResponse rsp = (HttpWebResponse)req.GetResponse();
    
                    if (rsp.StatusCode == HttpStatusCode.OK)
                    {
                        length = rsp.ContentLength;// 从文件头得到远程文件的长度
                    }
    
                    rsp.Close();
                    return length;
                }
                catch (Exception e)
                {
                    return length;
                }
    
            }
    
        }
    }
    

      

  • 相关阅读:
    Python3入门基础--str常用方法
    大学jsp实验4include,forword
    大学jsp实验3include指令的使用
    初识MFC----运行时类信息机制
    状态栏
    工具栏
    菜单栏
    程序启动画面
    字符串的截取
    字符串相关类
  • 原文地址:https://www.cnblogs.com/sanday/p/10007631.html
Copyright © 2011-2022 走看看