zoukankan      html  css  js  c++  java
  • 抓取网页并用正则表达式匹配邮箱地址

    using System;
    using System.Collections.Generic;
    using System.Linq;
    using System.Text;
    using System.Text.RegularExpressions;
    using System.Net;
    using System.IO;
    
    namespace _07正则_匹配邮箱
    {
        class Program
        {
            static void Main(string[] args)
            {
                List<Uri> listUrl = new List<Uri>() {
                    new Uri("http://gb.corp.163.com/gb/contactus.html"),
                    new Uri("https://passport.csdn.net/help/faq"),
                    new Uri("http://www.kuaipan.cn/"),
                    new Uri("http://www.ksyun.com/home/joinUs/campus"),
                    new Uri("http://www.cnblogs.com/about/ad.aspx"),
                    new Uri("http://www.cnblogs.com/about/contactus.aspx"),
                    new Uri("http://www.csdn.net/company/statement.html"),
                    new Uri("http://hb.qq.com/job/dczp/index.htm")
                };
                List<string> listMail = new List<string>();
                foreach (Uri ur in listUrl) 
                {
                    GetMails(ur, listMail);
                }
    
                cw(listMail);
    
                Console.ReadKey();
            }
    
            private static void GetMails(Uri uri,List<string> list)
            {
                try
                {
                    WebClient wc = new WebClient();
                    Console.WriteLine("创建WebClient - [{0}]", uri.ToString());
                    Stream stream = wc.OpenRead(uri);
                    //Console.WriteLine("正在下载:{0}", uri.ToString());
                    StreamReader reader = new StreamReader(stream, Encoding.Default);
                    string input = reader.ReadToEnd();
                    string reg = @"(?<mail1>[a-zA-Z0-9_]+@[a-zA-Z0-9]+(?:.[a-zA-Z0-9]+)+)"  //zhangsan@163.com
                        + @"|((?<mail2>[a-zA-Z0-9_]+#[a-zA-Z0-9]+(?:.[a-zA-Z0-9]+)+))"      //zhangsan#163.com
                        + @"|((?<mail3>[a-zA-Z0-9_]+(at)[a-zA-Z0-9]+(?:.[a-zA-Z0-9]+)+))";//zhangsan(at)163.com
                    Regex regex = new Regex(reg);
                    Console.WriteLine(Regex.IsMatch(input, reg));
                    MatchCollection matches = regex.Matches(input);
                    for (int i = 0; i < matches.Count; i++)
                    {
                        Match match = matches[i];
                        //Console.WriteLine("match: {0}",match.Value);
                        //Console.WriteLine(match.Groups.Count);
                        for (int j = 1; j < match.Groups.Count; j++)
                        {
                            string mail = match.Groups[j].Value;
                            if (!string.IsNullOrEmpty(mail))
                            {
                                mail = Regex.Replace(mail, @"(.+)(?:@)(.+)", "$1@$2");
                                mail = Regex.Replace(mail, "(.+)#(.+)", "$1@$2"); //把zhangsan#163.com替换为zhangsan@163.com
                                mail = Regex.Replace(mail, @"(.+)(at)(.+)", "$1@$2");
    
                                if (!list.Contains(mail)) 
                                {
                                    list.Add(mail);
                                } 
                            }
                            //Console.WriteLine("group: {0}", match.Groups[j].Value);
                        }
                    }
                }
                catch (Exception e) 
                {
                    Console.WriteLine(e.Message);
                }
            }
    
            static void cw(List<string> list)
            {
                Console.WriteLine("长度为{0}", list.Count);
                int i = 0;
                foreach (string str in list)
                {
                    i++;
                    Console.WriteLine("{0} - [{1}]", i, str);
                }
                Console.WriteLine("______________________");
            }
        }
    }
    

      

  • 相关阅读:
    洛谷P6218 [USACO06NOV] Round Numbers S 题解 数位DP
    Duilib的双缓冲实现,附带GDI、WTL的双缓冲实现
    关于热键HotKey与WM_KEYDOWN的一点心得
    源码不匹配,找到了xxx的副本。
    SetForegroundWindow的失效问题: 跨进程的窗口前置。
    2021年4月的一份总结
    制作msi文件,exe转msi文件。
    内存限制
    ISPRS Vaihingen 数据集解析
    Linux中sudo、su和su -命令的区别小结
  • 原文地址:https://www.cnblogs.com/liqipeng/p/4576160.html
Copyright © 2011-2022 走看看