zoukankan      html  css  js  c++  java
  • C#收集网页中的EMail实现源码

    C#收集网页中的EMail实现源码:

         //CAll
            private void GetAllURL(string urlStr)
            {
                new Thread(new ParameterizedThreadStart(GetEmailAddress)).Start(urlStr); 
                   ...    //处理页面中的Link
                  }
            /**//// <summary>
            /// 提取网页中的Eamil
            /// </summary>
            /// <param name="urlStr">网页地址</param>
            private void GetEmailAddress(object urlStr)
            {
                ArrayList EmailStrs = GetWebInfo((string)urlStr, @"(?<EmailStr>\b[A-Z0-9._%-]+@[A-Z0-9._%-]+\.[A-Z]{2,4}\b)");   //得到Email
                foreach (object tmp in EmailStrs)
                {
                    Invoke(new AppendTextDelegate(AppendText), new object[] { tmp + "\r\n" });
                }
            }

            private ArrayList GetWebInfo(string URlStr,string RegExpress)
            {
                //打开指定页
                HttpWebRequest webRequest1 = (HttpWebRequest)WebRequest.Create(new Uri(URlStr));
                webRequest1.Method = "GET";
                HttpWebResponse response = (HttpWebResponse)webRequest1.GetResponse();
                String textData = new StreamReader(response.GetResponseStream(), Encoding.Default).ReadToEnd();


                //用正则表达式,提取指定内容,带一个变量
                Regex r;
                Match m;
                r = new Regex(RegExpress,   //@"copyTitle.\'(?<AdInfo>.*)\'",
                    RegexOptions.IgnoreCase | RegexOptions.Compiled);
                int pos1=RegExpress.IndexOf("(?<");
                int pos2=RegExpress.IndexOf(">",pos1);
                string DestionKey = RegExpress.Substring(pos1 + 3, pos2 - pos1 - 3);
                string  AdStr = "";
                ArrayList Result = new ArrayList();
                for (m = r.Match(textData); m.Success; m = m.NextMatch())
                {
                    AdStr = m.Result("${" + DestionKey + "}").Trim();   //地址
                    Result.Add(AdStr);
                }
                return Result;
            }

    上述代码中的关键是书写提取EMail的表达式:
                   @"(?<EmailStr>\b[A-Z0-9._%-]+@[A-Z0-9._%-]+\.[A-Z]{2,4}\b)"

  • 相关阅读:
    面试题-代码
    面试题-数仓
    烂大街的NginxRedisMqDb架构.md
    Ansible-B站.md
    openstack.md
    rabbitMQ.md
    大数据仓库实战项目-电商数仓3.0版.md
    linux分析工具之top命令详解
    小程序开发中 在 wxml格式化 属性断行
    redhat 7.2更新yum源时踩的坑
  • 原文地址:https://www.cnblogs.com/hfzsjz/p/1656714.html
Copyright © 2011-2022 走看看