zoukankan      html  css  js  c++  java
  • C#程序

    using System;
    using System.Collections.Generic;
    using System.IO;
    using System.Linq;
    using System.Net;
    using System.Text;
    using System.Text.RegularExpressions;
    using System.Threading.Tasks;
    
    namespace MeiZi
    {
        public class Program
        {
            static void Main(string[] args)
            {
                new GetMeiziPic();
            }
        }
        /// <summary>
        /// 获取妹子图片
        /// </summary>
        public class GetMeiziPic
        {
            private readonly string _path;
            private const string ImgRegex = @"<img[^>]*?srcs*=s*[""']?([^'"" >]+?)[ '""][^>]*?>";//图片的正则表达式
            private const string LinkRegex = @"<h2><as+[^>]*?>[^<>]*?</a></h2>";
            public GetMeiziPic()
            {
                _path = DealDir(Path.Combine(Environment.CurrentDirectory, "Images"));
                Console.WriteLine("===============    开始采集   ===============");
                for (var i = 1; i < 10; i++)
                {
                    Console.WriteLine("===============正在下载第{0}页数据===============", i);
                    DoFetchStep1(i);
                }
    
                Console.WriteLine("===============   采集完成   ===============");
            }
            private string DealDir(string path)
            {
                if (!Directory.Exists(path))
                    Directory.CreateDirectory(path);
                return path;
            }
            private void DoFetchStep1(int pageNum)
            {
                var request = (HttpWebRequest)WebRequest.Create("http://www.sepaidui.com/?sort=4&page=" + pageNum);
                request.Credentials = CredentialCache.DefaultCredentials;
                var response = (HttpWebResponse)request.GetResponse();
                if (response.StatusCode != HttpStatusCode.OK) return;
                var stream = response.GetResponseStream();
                if (stream == null) return;
                using (var sr = new StreamReader(stream))
                {
                    FetchLinksFromSource1(sr.ReadToEnd());
                }
            }
    
            private void FetchLinksFromSource1(string htmlSource)
            {
                var matchesLink = Regex.Matches(htmlSource, LinkRegex, RegexOptions.IgnoreCase | RegexOptions.Singleline);
                foreach (Match m in matchesLink)
                {
                    string href = m.Groups[0].Value.Split('"')[1];
                    DoFetchStep2(href);
                }
            }
    
            private void DoFetchStep2(string href)
            {
                var request = (HttpWebRequest)WebRequest.Create(href);
                var h = request.Headers;
                request.Credentials = CredentialCache.DefaultCredentials;
                var response = (HttpWebResponse)request.GetResponse();
                if (response.StatusCode != HttpStatusCode.OK) return;
                var stream = response.GetResponseStream();
                if (stream == null) return;
                using (var sr = new StreamReader(stream))
                {
                    FetchLinksFromSource2(sr.ReadToEnd());
                }
            }
            private void FetchLinksFromSource2(string htmlSource)
            {
                var matchesImgSrc = Regex.Matches(htmlSource, ImgRegex, RegexOptions.IgnoreCase | RegexOptions.Singleline);
                foreach (Match m in matchesImgSrc)
                {
                    var href = m.Groups[1].Value;
                    //只选取来自新浪相册的图片
                    if (href.Contains("sinaimg") && CheckIsUrlFormat(href) && !href.Contains("60d02b59tw1eq6g7srmiwj20pv03mdg8"))
                    {
                        Console.WriteLine(href);
                    }
                    else
                        continue;
                    using (var myWebClient = new WebClient())
                    {
                        try
                        {
                            myWebClient.DownloadFile(new Uri(href), Path.Combine(_path, Path.GetRandomFileName() + Path.GetExtension(href)));
                        }
                        catch (Exception ex)
                        {
                            Console.WriteLine(ex.Message);
                        }
                    }
                }
            }
    
            private readonly Regex _isUrlFormat = new Regex(@"http://?([w-]+.)+[w-]+(/[w- ./?%&=]*)?");
            private bool CheckIsUrlFormat(string value)
            {
                return _isUrlFormat.IsMatch(value);
            }
        }
    }
  • 相关阅读:
    致命错误: mysql/mysql.h:没有那个文件或目录 解决办法
    [转载]解决/usr/bin/ld: cannot find lxxx 问题
    mysql5.5.25a安装:Installation of system tables failed解决办法
    ubuntu源码编译安装mysql5.5.25a
    ubuntu 11.10 安装配置NFS
    nrpe简单插件编写
    我也学erlang(五)——简单的列表处理
    C语言中自加与自减效率的思考
    DBI connect() failed: Can't connect to local MySQL server through socket '/var/run/mysqld/mysqld.sock'
    std::ostream_iterator用法
  • 原文地址:https://www.cnblogs.com/talentzemin/p/4355035.html
Copyright © 2011-2022 走看看