zoukankan      html  css  js  c++  java
  • 富文本中文字部分提取

    //富文本编辑器内的内容保存到数据库后是一段html代码,先因某些需求需要去掉其中的样式等内容,只保留文字,代码如下:
    public class HtmlToText extends HTMLEditorKit.ParserCallback {
        private static HtmlToText html2Text = new HtmlToText();
        StringBuffer stringBuffer;
        private HtmlToText() {
        }
        public void parse(String str) throws IOException {
            InputStream iin = new ByteArrayInputStream(str.getBytes());
            Reader in = new InputStreamReader(iin);
            stringBuffer = new StringBuffer();
            ParserDelegator delegator = new ParserDelegator();
            delegator.parse(in, this, Boolean.TRUE);
            iin.close();
            in.close();
        }
        public void handleText(char[] text, int pos) {
            stringBuffer.append(text);
        }
        public String getText() {
            return stringBuffer.toString();
        }
        public static String getContent(String str) {
            try {
                html2Text.parse(str);
            } catch (IOException e) {
                e.printStackTrace();
            }
            return html2Text.getText();
        }
        public static void main(String[] args) {
            String text = HtmlToText.getContent("你的富文本字符串");
            System.out.println(text);
        }
    }
    

      

  • 相关阅读:
    Java随笔
    Java随笔
    Java随笔
    CF1271D Portals(反悔贪心)
    CF938D Buy a Ticket(最短路)
    CF1117C Magic Ship(二分)
    HDU6820 Tree(树形dp)
    P2393 美味(主席树+贪心)
    HDU6831 Fragrant numbers(区间dp)
    HDU6832 A Very Easy Graph Problem(生成树)
  • 原文地址:https://www.cnblogs.com/ShouWangYiXin/p/13929885.html
Copyright © 2011-2022 走看看