zoukankan      html  css  js  c++  java
  • [工具类] 获取URL编码1

    package com.claw.util.html;
    
    import java.io.BufferedInputStream;
    import java.io.IOException;
    import java.io.InputStream;
    import java.net.HttpURLConnection;
    import java.net.URL;
    import java.util.ArrayList;
    import java.util.List;
    import java.util.regex.Matcher;
    import java.util.regex.Pattern;
    
    public class Charset {
    
        /**
         * @param args
         */
        public static void main(String[] args) {
            List<String> list = new ArrayList<String>();
            
            list.add("http://li200429.iteye.com/blog/1608758");
            list.add("http://blog.csdn.net/vic0228/article/details/49634311");
            list.add("http://www.zhihu.com/");
            list.add("http://www.sohu.com/");
            list.add("http://blog.163.com/wenchangqing_live/blog/static/173722309201182044545864/");
            
    /*        for (String url : list) {
                String html = getHTML(url);
                String title = getTitle(html);
                System.out.println("url:"+url+" ----- title:"+title);
                if(title.equals("")){
                    System.out.println(html);
                }
            }*/
        }
        
        
        public static String getCharset(InputStream in){
            String charset = "UTF-8";
            BytesEncodingDetect s = new BytesEncodingDetect();
            byte[] b = new byte[1024];
            try {
                int length = in.read(b);
                String encode = BytesEncodingDetect.nicename[s.detectEncoding(b)];
                if(encode.equals("GB-2312")){
                    encode = "GBK";
                }
                /*if(encode.equals("ASCII")){
                    encode = "UTF-8";
                }*/
                charset = encode;
            } catch (Exception e) {
                e.printStackTrace();
            }
            return charset;
        }
        
        
        /**
         * 404有问题 暂时停用
         * @param urlStr
         * @return
         */
        public static String getCharset(String urlStr) {
            String charset = "UTF-8";
            URL url = null;
            BufferedInputStream in = null;
            try {
                url = new URL(urlStr);
                HttpURLConnection conn = (HttpURLConnection) url.openConnection();
                conn.setConnectTimeout(10000);
                conn.setRequestProperty("User-Agent",
                        "Mozilla/5.0 (compatible; MSIE 10.0; Windows NT 6.2; Trident/6.0)");
                conn.connect();
                
                int status = conn.getResponseCode();
                System.out.println(status);
                if(status==200){
                    in = new BufferedInputStream(conn.getInputStream());
                    BytesEncodingDetect s = new BytesEncodingDetect();
                    StringBuffer sb = new StringBuffer();
                    byte[] b = new byte[1024];
                    int length = in.read(b);
                    String encode = BytesEncodingDetect.nicename[s.detectEncoding(b)];
                    System.out.println("encode:" + encode);
                    if(encode.equals("GB-2312")){
                        encode = "GBK";
                    }
                    charset = encode;
                }else if(status==404){
                    
                }
            } catch (Exception e) {
                System.out.println(urlStr);
                e.printStackTrace();
            } finally {
                if (in != null)
                    try {
                        in.close();
                    } catch (IOException e) {
                        System.out.println(urlStr);
                        e.printStackTrace();
                    }
            }
            return charset;
        }
        
        
    
    
      
    }
  • 相关阅读:
    201521123051 《Java程序设计》 第二周学习总结
    201521123001《Java程序设计》第11周学习总结
    201521123001《Java程序设计》第12周学习总结
    201521123001《Java程序设计》第11周学习总结
    201521123001《Java程序设计》第10周学习总结
    201521123001《Java程序设计》第9周学习总结
    201521123001《Java程序设计》第8周学习总结
    201521123001《Java程序设计》第7周学习总结
    201521123001《Java程序设计》第6周学习总结
    201521123001《Java程序设计》第5周学习总结
  • 原文地址:https://www.cnblogs.com/feelgood/p/4992131.html
Copyright © 2011-2022 走看看