zoukankan      html  css  js  c++  java
  • nekohtml转换html时标签变大写的问题

    public static Document transferByNeko(InputStream stream, String charset)
        {
            if (stream == null)
                return null;
            
            if(StringUtils.isEmpty(charset)){
                charset = DEFAULT_CHARSET;
            }
    
    
            //NEKOHTML的DOMParser会将html标签转化成大写,是否设置下面的配置都没有意义,解决办法是需要使用xerces的DOMParser
    //        DOMParser domParser = new DOMParser();
    //        Document doc = null;
    //        ByteArrayOutputStream byteOs = null;
    //        Writer writer = null;
    //        InputSource inputSource = null;
    //        DocumentType documentType = null;
    //        org.w3c.dom.Document document = null;
    //        DOMReader domReader = null;
    //        try {
    //            domParser.setProperty("http://cyberneko.org/html/properties/names/elems", "lower");
    //            domParser.setProperty("http://cyberneko.org/html/properties/names/attrs", "lower");
    //            domParser.setProperty("http://cyberneko.org/html/properties/default-encoding", "UTF-8");
    //
    //            domParser.setFeature("http://xml.org/sax/features/namespaces", false);
    //            domParser.setFeature("http://cyberneko.org/html/features/balance-tags", true);
    //            domParser.setFeature("http://cyberneko.org/html/features/scanner/script/strip-comment-delims", false);
    //
    //            byteOs = new ByteArrayOutputStream();
    //            writer = new Writer(byteOs, charset);
    //            XMLDocumentFilter domFilter[] = {
    //                writer
    //            };
    //            domParser.setProperty("http://cyberneko.org/html/properties/filters", domFilter);
    //            inputSource = new InputSource(new InputStreamReader(stream, Charset.forName(charset)));
    //            domParser.parse(inputSource);
    //            document = domParser.getDocument();
    //            documentType = document.getDoctype();
    //            if (documentType != null)
    //                document.removeChild(documentType);
    //            domReader = new DOMReader();
    //            doc = domReader.read(document);
    //        } catch (SAXNotRecognizedException e) {
    //            e.printStackTrace();
    //        } catch (SAXNotSupportedException e) {
    //            e.printStackTrace();
    //        } catch (UnsupportedEncodingException e) {
    //            e.printStackTrace();
    //        } catch (SAXException e) {
    //            e.printStackTrace();
    //        } catch (IOException e) {
    //            e.printStackTrace();
    //        }finally{
    //            IOUtils.closeQuietly(byteOs);
    //            IOUtils.closeQuietly(stream);
    //        }
    
            //采用xerces的DOMParser
            Document doc = null;
            DocumentType documentType = null;
            org.w3c.dom.Document document = null;
            DOMReader domReader = null;
            ByteArrayOutputStream byteOs = null;
            Writer writer = null;
            InputSource inputSource = null;
            try {
                HTMLConfiguration htmlConfiguration = new HTMLConfiguration();
                htmlConfiguration.setProperty("http://cyberneko.org/html/properties/names/elems","lower");
                org.apache.xerces.parsers.DOMParser parser = new org.apache.xerces.parsers.DOMParser(htmlConfiguration);
                inputSource = new InputSource(new InputStreamReader(stream, Charset.forName(charset)));
                parser.parse(inputSource);
                document = parser.getDocument();
                documentType = document.getDoctype();
                if (documentType != null)
                    document.removeChild(documentType);
                domReader = new DOMReader();
                doc = domReader.read(document);
            } catch (SAXException e) {
                e.printStackTrace();
            } catch (IOException e) {
                e.printStackTrace();
            }
            return doc;
        }
        
  • 相关阅读:
    例子2.5
    例子2.4
    例子2.3
    例2
    例2.1
    快闪PPT 快速入门教程 脑洞大开,特效随你定 ----口袋动画PA(初学易上手001):
    Linux 基础与应用教程 0011(DNS解析)
    课外阅读记录:001
    好的特效模板
    学习记住并且时常要打开去看的VIM
  • 原文地址:https://www.cnblogs.com/yesun/p/8628285.html
Copyright © 2011-2022 走看看