zoukankan      html  css  js  c++  java
  • C#收集网页中的EMail实现源码

    C#收集网页中的EMail实现源码:

         //CAll
            private void GetAllURL(string urlStr)
            {
                new Thread(new ParameterizedThreadStart(GetEmailAddress)).Start(urlStr); 
                   ...    //处理页面中的Link
                  }
            /**//// <summary>
            /// 提取网页中的Eamil
            /// </summary>
            /// <param name="urlStr">网页地址</param>
            private void GetEmailAddress(object urlStr)
            {
                ArrayList EmailStrs = GetWebInfo((string)urlStr, @"(?<EmailStr>\b[A-Z0-9._%-]+@[A-Z0-9._%-]+\.[A-Z]{2,4}\b)");   //得到Email
                foreach (object tmp in EmailStrs)
                {
                    Invoke(new AppendTextDelegate(AppendText), new object[] { tmp + "\r\n" });
                }
            }

            private ArrayList GetWebInfo(string URlStr,string RegExpress)
            {
                //打开指定页
                HttpWebRequest webRequest1 = (HttpWebRequest)WebRequest.Create(new Uri(URlStr));
                webRequest1.Method = "GET";
                HttpWebResponse response = (HttpWebResponse)webRequest1.GetResponse();
                String textData = new StreamReader(response.GetResponseStream(), Encoding.Default).ReadToEnd();


                //用正则表达式,提取指定内容,带一个变量
                Regex r;
                Match m;
                r = new Regex(RegExpress,   //@"copyTitle.\'(?<AdInfo>.*)\'",
                    RegexOptions.IgnoreCase | RegexOptions.Compiled);
                int pos1=RegExpress.IndexOf("(?<");
                int pos2=RegExpress.IndexOf(">",pos1);
                string DestionKey = RegExpress.Substring(pos1 + 3, pos2 - pos1 - 3);
                string  AdStr = "";
                ArrayList Result = new ArrayList();
                for (m = r.Match(textData); m.Success; m = m.NextMatch())
                {
                    AdStr = m.Result("${" + DestionKey + "}").Trim();   //地址
                    Result.Add(AdStr);
                }
                return Result;
            }

    上述代码中的关键是书写提取EMail的表达式:
                   @"(?<EmailStr>\b[A-Z0-9._%-]+@[A-Z0-9._%-]+\.[A-Z]{2,4}\b)"

  • 相关阅读:
    XML 文档的结构
    java 事件机制
    Spring 中的 Resource和ResourceLoader
    Spring PropertyPlaceholderConfigurer
    生产者——消费者模型的java代码实现
    encodeURI() 函数概述
    ECMAScript 6
    node
    AJAX常见面试题
    AJAX(Asynchronous JavaScript and XML)
  • 原文地址:https://www.cnblogs.com/hfzsjz/p/1656714.html
Copyright © 2011-2022 走看看