zoukankan      html  css  js  c++  java
  • 20131127-正则表达式

    [1]从网页上下载图片

    namespace 下载图片

    {

    class Program

    {

    static void Main(string[] args)

    {

    WebClient wc = new WebClient();

    wc.Encoding = Encoding.Default;

    string webAddress = "http://localhost:8080/美女图片/美女们.htm";

    string strRegex = " <img alt="" src="(.+)" />";

    string path = "E:\test\";

    MatchCollection mat = GetWebInfo(wc, webAddress, strRegex);

    foreach (Match item in mat)

    {

    if (item.Success)

    {

    //拼接出图片在网络上的的真实路径

    string realImgAddress = "http://localhost:8080/美女图片/" + item.Groups[1].Value;

    string localpath = path + Path.GetFileName(item.Groups[1].Value);

    wc.DownloadFile(realImgAddress, localpath);

    }

    }

    Console.WriteLine("OK");

    Console.ReadKey();

    }

    public static MatchCollection GetWebInfo(WebClient wc, string webAddress, string strRegex)

    {

    //获得图片所在网页的html的字符串

    string html = wc.DownloadString(webAddress);

    //通过匹配获得网页标签中图片的链接地址

    Regex regex = new Regex(strRegex);

    MatchCollection mat = regex.Matches(html);

    return mat;

    }

    }

    }

     

    [2]从网页提取邮箱

    namespace 网页提取邮箱2

    {

    class Program

    {

    static void Main(string[] args)

    {

    WebClient wc = new WebClient();

    wc.Encoding = Encoding.Default;

    string webAddress = "http://localhost:8080/提取Email.htm";

    string strRegex = @"([0-9a-zA-Z_.-]+)@([0-9a-zA-Z-]+(.[a-zA-Z]+){1,2})";

    MatchCollection mat = GetWebInfo(wc, webAddress, strRegex);

    foreach (Match item in mat)

    {

    if (item.Success)

    {

    Console.WriteLine(item.Value+"===用户名为:"+item.Groups[1].Value+"===域名为:"+item.Groups[2].Value);

    }

    }

    Console.WriteLine("一共有{0}", mat.Count);

    Console.ReadKey();

    }

    public static MatchCollection GetWebInfo(WebClient wc, string webAddress, string strRegex)

    {

    string html = wc.DownloadString(webAddress);

    Regex regex = new Regex(strRegex);

    MatchCollection mat = regex.Matches(html);

    return mat;

    }

    }

    }

  • 相关阅读:
    基于FFI模块CAPI与JavaScript的各种类型匹配总结
    在Electron中通过ffi模块实现JavaScript调用C++动态库
    谷歌地图OGC WMTS服务规则
    tiff/tfw, jpg/jpgw坐标文件的格式(6个参数)
    GreenDao 多表事务操作
    Asp.net WebAPI 使用流下载文件注意事项
    mvn 用指定setting.xml 执行指定pom.xml
    Swagger自动生成接口文档
    Windows下控制Nginx的状态
    Android 动态权限申请
  • 原文地址:https://www.cnblogs.com/CharlesZHENG/p/3527521.html
Copyright © 2011-2022 走看看