zoukankan      html  css  js  c++  java
  • Java 正则表达式提取标签中的属性(src 连接地址)等

    public class Test {

    public static void main(String[] args) {
    String source = "<p><img src="https://xxxxx/xxx/xxxx/182cd48c587651767921868181f66ca8.jpg" />sdfasdfasdfsadfasdfasdfasdfasdfasdf</p><img src="https://xxxxx/xxxx/182cd48c587651767921868181f66ca8sdf.jpg" />";
    System.out.println(getImgStr(source));
    String htmlStr = "<p><video controls="controls" durationtime="72" filesize="27117469" height="200px" poster="https://xxxxxx/b439b0281450abce7f13b2920da04346.png" src="https://xxxxxxxxxx/b439b0281450abce7f13b2920da04346.mp4" style="">&nbsp;</video></p>";

    System.out.println(getVideoStr(htmlStr));
    }


    public static Set<String> getImgStr(String htmlStr) {
    Set<String> pics = new HashSet<>();
    String img = "";
    Pattern p_image;
    Matcher m_image;
    String regEx_img = "<img.*src\s*=\s*(.*?)[^>]*?>";
    p_image = Pattern.compile(regEx_img, Pattern.CASE_INSENSITIVE);
    m_image = p_image.matcher(htmlStr);
    while (m_image.find()) {
    // 得到<img />数据
    img = m_image.group();
    // 匹配<img>中的src数据
    Matcher m = Pattern.compile("src\s*=\s*"?(.*?)("|>|\s+)").matcher(img);
    while (m.find()) {
    pics.add(m.group(1));
    }
    }

    return pics;
    }

    public static Map<String, String> getVideoStr(String htmlStr) {
    Map<String, String> pics = new HashMap<String, String>();
    String regEx_video="<video.*poster\s*=\s*(.*?)[^>]*?src\s*=\s*(.*?)[^>]*?>";
    Pattern p = Pattern.compile(regEx_video,Pattern.CASE_INSENSITIVE);
    Matcher m = p.matcher(htmlStr);
    String video="";
    Map<String, String> map = new HashMap<String, String>();
    while (m.find()) {
    video=m.group();
    Matcher mPoster = Pattern.compile("poster\s*=\s*"?(.*?)("|>|\s+)").matcher(video);
    Matcher mSrc = Pattern.compile("src\s*=\s*"?(.*?)("|>|\s+)").matcher(video);
    String poster ="";
    String src="";
    while (mPoster.find()){
    poster=mPoster.group(1);
    }
    while (mSrc.find()){
    src=mSrc.group(1);
    }
    map.put("poster", poster);
    map.put("src", src);
    }
    return map;
    }
    }

    
    
  • 相关阅读:
    我从0开始开发了一个LDAP服务。
    C#开发中常用的小功能
    webapi swagger 报错 路由集合中已存在名为“swagger_docsswagger/docs/{apiVersion}”的路由。路由名称必须唯一
    h5 web vlc 播放rtsp流
    Docker的基础概念与在window10下的安装
    .Net Core JWT 动态设置接口与权限
    .Net Core官方的 JWT 授权验证
    IdentityServer4中文文档
    中介者模式及在NetCore中的使用MediatR来实现
    .Net Core 使用 FluentValidation
  • 原文地址:https://www.cnblogs.com/austinspark-jessylu/p/8252936.html
Copyright © 2011-2022 走看看