zoukankan      html  css  js  c++  java
  • 检查文本文件编码的Java程序

    package checkCoding;

    import java.io.BufferedInputStream;
    import java.io.File;
    import java.io.FileInputStream;

    public class CheckCoding
    {
        private File file;
       
        public CheckCoding(File file)
        {
            this.file = file;
        }
       
        public CheckCoding(String path)
        {
            file = new File(path);
        }
       
        public String getCharset()
        {
            File file = this.file;
           
            String charset = "GBK";
            byte[] first3Bytes = new byte[3];
            BufferedInputStream bis = null;
            try
            {
                //boolean checked = false;
                bis = new BufferedInputStream(new FileInputStream(file));
                bis.mark(0);
                int read = bis.read(first3Bytes, 0, 3);
                if (read == -1)
                {
                    return charset;
                }
                if (first3Bytes[0] == (byte) 0xFF && first3Bytes[1] == (byte) 0xFE)
                {
                    charset = "UTF-16LE";
                    //checked = true;
                }
                else if (first3Bytes[0] == (byte) 0xFE
                        && first3Bytes[1] == (byte) 0xFF)
                {
                    charset = "UTF-16BE";
                    //checked = true;
                }
                else if (first3Bytes[0] == (byte) 0xEF
                        && first3Bytes[1] == (byte) 0xBB
                        && first3Bytes[2] == (byte) 0xBF)
                {
                    charset = "UTF-8";
                    //checked = true;
                }
                /** *//*******************************************************************
                 * bis.reset(); if (!checked) { int loc = 0; while ((read =
                 * bis.read()) != -1) { loc++; if (read >= 0xF0) { break; } if (0x80 <=
                 * read && read <= 0xBF) // 单独出现BF以下的,也算是GBK { break; } if (0xC0 <=
                 * read && read <= 0xDF) { read = bis.read(); if (0x80 <= read &&
                 * read <= 0xBF)// 双字节 (0xC0 - 0xDF) { // (0x80 - 0xBF),也可能在GB编码内
                 * continue; } else { break; } } else if (0xE0 <= read && read <=
                 * 0xEF) { // 也有可能出错,但是几率较小 read = bis.read(); if (0x80 <= read &&
                 * read <= 0xBF) { read = bis.read(); if (0x80 <= read && read <=
                 * 0xBF) { charset = "UTF-8"; break; } else { break; } } else {
                 * break; } } } System.out.println(loc + " " +
                 * Integer.toHexString(read)); }
                 ******************************************************************/
            }
            catch (Exception e)
            {
                e.printStackTrace();
            }
            finally
            {
                if (bis != null)
                {
                    try
                    {
                        bis.close();
                    }
                    catch (Exception ex)
                    {
                        ex.printStackTrace();
                    }
                }
            }
            return charset;
        }
       
        public static void main(String[] args)
        {
            CheckCoding fer = new CheckCoding("d:\1231232.txt");
            System.out.println(fer.getCharset());
        }
    }

  • 相关阅读:
    53. Maximum Subarray
    64. Minimum Path Sum
    28. Implement strStr()
    26. Remove Duplicates from Sorted Array
    21. Merge Two Sorted Lists
    14. Longest Common Prefix
    7. Reverse Integer
    412. Fizz Buzz
    linux_修改域名(centos)
    linux_redis常用数据类型操作
  • 原文地址:https://www.cnblogs.com/alaricblog/p/3278354.html
Copyright © 2011-2022 走看看