zoukankan      html  css  js  c++  java
  • 20131127-正则表达式

    [1]从网页上下载图片

    namespace 下载图片

    {

    class Program

    {

    static void Main(string[] args)

    {

    WebClient wc = new WebClient();

    wc.Encoding = Encoding.Default;

    string webAddress = "http://localhost:8080/美女图片/美女们.htm";

    string strRegex = " <img alt="" src="(.+)" />";

    string path = "E:\test\";

    MatchCollection mat = GetWebInfo(wc, webAddress, strRegex);

    foreach (Match item in mat)

    {

    if (item.Success)

    {

    //拼接出图片在网络上的的真实路径

    string realImgAddress = "http://localhost:8080/美女图片/" + item.Groups[1].Value;

    string localpath = path + Path.GetFileName(item.Groups[1].Value);

    wc.DownloadFile(realImgAddress, localpath);

    }

    }

    Console.WriteLine("OK");

    Console.ReadKey();

    }

    public static MatchCollection GetWebInfo(WebClient wc, string webAddress, string strRegex)

    {

    //获得图片所在网页的html的字符串

    string html = wc.DownloadString(webAddress);

    //通过匹配获得网页标签中图片的链接地址

    Regex regex = new Regex(strRegex);

    MatchCollection mat = regex.Matches(html);

    return mat;

    }

    }

    }

     

    [2]从网页提取邮箱

    namespace 网页提取邮箱2

    {

    class Program

    {

    static void Main(string[] args)

    {

    WebClient wc = new WebClient();

    wc.Encoding = Encoding.Default;

    string webAddress = "http://localhost:8080/提取Email.htm";

    string strRegex = @"([0-9a-zA-Z_.-]+)@([0-9a-zA-Z-]+(.[a-zA-Z]+){1,2})";

    MatchCollection mat = GetWebInfo(wc, webAddress, strRegex);

    foreach (Match item in mat)

    {

    if (item.Success)

    {

    Console.WriteLine(item.Value+"===用户名为:"+item.Groups[1].Value+"===域名为:"+item.Groups[2].Value);

    }

    }

    Console.WriteLine("一共有{0}", mat.Count);

    Console.ReadKey();

    }

    public static MatchCollection GetWebInfo(WebClient wc, string webAddress, string strRegex)

    {

    string html = wc.DownloadString(webAddress);

    Regex regex = new Regex(strRegex);

    MatchCollection mat = regex.Matches(html);

    return mat;

    }

    }

    }

  • 相关阅读:
    OA系统权限管理设计方案【转】
    UML类图几种关系的总结
    在pl/sql中使用exp/imp工具实现oracle数据导出/导入
    page 的范围
    JSP页面跳转的五种方法
    Start with...Connect By
    秒杀系统架构
    对系统负载的理解
    sort(7)
    cat(6)
  • 原文地址:https://www.cnblogs.com/CharlesZHENG/p/3527521.html
Copyright © 2011-2022 走看看