zoukankan      html  css  js  c++  java
  • 去除html代码中的标签

    public static String htmlText(String inputString) {
    String htmlStr = inputString; //含html标签的字符串
    String textStr ="";
    java.util.regex.Pattern p_script;
    java.util.regex.Matcher m_script;
    java.util.regex.Pattern p_style;
    java.util.regex.Matcher m_style;
    java.util.regex.Pattern p_html;
    java.util.regex.Matcher m_html;
    java.util.regex.Pattern p_nbsp;
    java.util.regex.Matcher m_nbsp;
    java.util.regex.Pattern p_r;
    java.util.regex.Matcher m_r;
    java.util.regex.Pattern p_n;
    java.util.regex.Matcher m_n;
    try {
    String regEx_script = "<[\s]*?script[^>]*?>[\s\S]*?<[\s]*?\/[\s]*?script[\s]*?>"; //定义script的正则表达式{或<script[^>]*?>[\s\S]*?<\/script> }
    String regEx_style = "<[\s]*?style[^>]*?>[\s\S]*?<[\s]*?\/[\s]*?style[\s]*?>"; //定义style的正则表达式{或<style[^>]*?>[\s\S]*?<\/style> }
    String regEx_html = "<[^>]+>"; //定义HTML标签的正则表达式
    String regEx_nbsp = "&nbsp;"; //定义&nbsp;标签的正则表达式
    String regEx_r = " "; //定义&nbsp;标签的正则表达式
    String regEx_n = " "; //定义&nbsp;标签的正则表达式

    p_script = Pattern.compile(regEx_script,Pattern.CASE_INSENSITIVE);
    m_script = p_script.matcher(htmlStr);
    htmlStr = m_script.replaceAll(""); //过滤script标签

    p_style = Pattern.compile(regEx_style,Pattern.CASE_INSENSITIVE);
    m_style = p_style.matcher(htmlStr);
    htmlStr = m_style.replaceAll(""); //过滤style标签

    p_html = Pattern.compile(regEx_html,Pattern.CASE_INSENSITIVE);
    m_html = p_html.matcher(htmlStr);
    htmlStr = m_html.replaceAll(""); //过滤html标签

    p_nbsp = Pattern.compile(regEx_nbsp,Pattern.CASE_INSENSITIVE);
    m_nbsp = p_nbsp.matcher(htmlStr);
    htmlStr = m_nbsp.replaceAll(""); //过滤&nbsp;

    // p_r = Pattern.compile(regEx_r, Pattern.CASE_INSENSITIVE);
    // m_r = p_r.matcher(htmlStr);
    // htmlStr = m_r.replaceAll("");//过滤
    //
    // p_n = Pattern.compile(regEx_n, Pattern.CASE_INSENSITIVE);
    // m_n = p_n.matcher(htmlStr);
    // htmlStr = m_n.replaceAll("");//过滤

    textStr = htmlStr;

    }catch(Exception e) {
    }
    return textStr;
    }

  • 相关阅读:
    2020 春 学期总结
    计算机科学的咬文嚼字:“并行”与“并发”
    Codeforces 1251E Voting
    Codeforces 1251D Salary Changing
    Asia Jakarta Regional Contest 2019 I
    hdu1007 Quoit Design
    2019春季学期回忆和总结
    bzoj5017 [Snoi2017]炸弹
    我永远讨厌gch文件
    bzoj5102 [POI2018]Prawnicy
  • 原文地址:https://www.cnblogs.com/dead-trap-ramble/p/3477907.html
Copyright © 2011-2022 走看看