zoukankan      html  css  js  c++  java
  • 通过WebClient类来发起请求并下载html 抓取邮箱 图片

     using System;
    using System.Collections.Generic;
    using System.Linq;
    using System.Text;
    using System.Net;
    using System.Text.RegularExpressions;
    using System.IO;
    
    namespace 通过WebClient类来发起请求并下载html 抓取邮箱 图片
    {
        class Program
        {
            static void Main(string[] args)
            {
    
                #region 抓取网页email
                //string url = "http://192.168.1.100:8080/提取Email.htm";
                ////1.根据网址下载对应html字符串
                //WebClient wc = new WebClient();
                //wc.Encoding = Encoding.UTF8;
                //string html = wc.DownloadString("http://192.168.1.100:8080/提取Email.htm");
                ////2.从下载到字符串中提取Email,并把提取到的Email写入到文本文件中
                //MatchCollection matches = Regex.Matches(html, @"[-a-zA-Z0-9_.]+@[-a-zA-Z0-9]+(.[a-zA-Z0-9]+){1,}");
    
                //using (StreamWriter writer = new StreamWriter("email.txt"))
                //{
                //    //遍历提取到的email
                //    foreach (Match item in matches)
                //    {
                //        //Console.WriteLine(item.Value);
                //        writer.WriteLine(item.Value);
                //    }
                //}
    
    
                //Console.ReadKey();
                #endregion
    
    
    
                #region 抓取网页图片
                //WebClient wc = new WebClient();
    
    
                ////1.下载网页源代码
                //string html = wc.DownloadString("http://192.168.1.100:8080/美女图片/美女们.htm");
                ////2.提取网页中的图片,其实就是<img>标签
                ////<img alt="" src="hotgirls/00_00.jpg" />
                //MatchCollection matches = Regex.Matches(html, @"<imgs+alt="""" src=""(.+)""s*/>");
                //foreach (Match item in matches)
                //{
                //    string imgPath = "http://192.168.1.100:8080/美女图片/" + item.Groups[1].Value;
                //    //下载图片
                //    wc.DownloadFile(imgPath, @"c:mv" + Path.GetFileName(imgPath));
                //}
                //Console.WriteLine("ok");
                //Console.ReadKey();
    
                #endregion
    
                #region 抓取职位信息
                WebClient webClient = new WebClient();
                string html = webClient.DownloadString("http://192.168.1.100:8080/【上海,IT-管理,计算机软件招聘,求职】-前程无忧.htm");
    
                //<a href="http://search.51job.com/job/46621778,c.html" onclick="zzSearch.acStatRecJob( 1 );" class="jobname" target="_blank">ERP项目经理</a>
                MatchCollection matches = Regex.Matches(html, @"<as+href=""http://search.51job.com/job/[0-9]{8},c.html"".+>(.+)</a>");
                foreach (Match item in matches)
                {
                    Console.WriteLine(item.Groups[1].Value);
                }
                Console.WriteLine("共{0}个职位信息。", matches.Count);
                Console.ReadKey();
    
                #endregion
    
    
    
    
    
    
            }
        }
    }
  • 相关阅读:
    mybatis-day1
    java基础
    pytest进阶之html测试报告
    pycharm在github上clone项目
    selenium中的显示等待WebDriverWait与条件判断expected_conditions举例
    pytest-html报告中添加报错截图
    Fiddler抓包工具如何设置过滤域名
    os.system运行cmd命令时,命令中嵌套了引号
    Pytest之模块之间共享skipif标记
    Pycharm出现同一目录的py文件不能相互调用的问题
  • 原文地址:https://www.cnblogs.com/blacop/p/6021345.html
Copyright © 2011-2022 走看看