zoukankan      html  css  js  c++  java
  • 检查文本文件编码的Java程序

    package checkCoding;

    import java.io.BufferedInputStream;
    import java.io.File;
    import java.io.FileInputStream;

    public class CheckCoding
    {
        private File file;
       
        public CheckCoding(File file)
        {
            this.file = file;
        }
       
        public CheckCoding(String path)
        {
            file = new File(path);
        }
       
        public String getCharset()
        {
            File file = this.file;
           
            String charset = "GBK";
            byte[] first3Bytes = new byte[3];
            BufferedInputStream bis = null;
            try
            {
                //boolean checked = false;
                bis = new BufferedInputStream(new FileInputStream(file));
                bis.mark(0);
                int read = bis.read(first3Bytes, 0, 3);
                if (read == -1)
                {
                    return charset;
                }
                if (first3Bytes[0] == (byte) 0xFF && first3Bytes[1] == (byte) 0xFE)
                {
                    charset = "UTF-16LE";
                    //checked = true;
                }
                else if (first3Bytes[0] == (byte) 0xFE
                        && first3Bytes[1] == (byte) 0xFF)
                {
                    charset = "UTF-16BE";
                    //checked = true;
                }
                else if (first3Bytes[0] == (byte) 0xEF
                        && first3Bytes[1] == (byte) 0xBB
                        && first3Bytes[2] == (byte) 0xBF)
                {
                    charset = "UTF-8";
                    //checked = true;
                }
                /** *//*******************************************************************
                 * bis.reset(); if (!checked) { int loc = 0; while ((read =
                 * bis.read()) != -1) { loc++; if (read >= 0xF0) { break; } if (0x80 <=
                 * read && read <= 0xBF) // 单独出现BF以下的,也算是GBK { break; } if (0xC0 <=
                 * read && read <= 0xDF) { read = bis.read(); if (0x80 <= read &&
                 * read <= 0xBF)// 双字节 (0xC0 - 0xDF) { // (0x80 - 0xBF),也可能在GB编码内
                 * continue; } else { break; } } else if (0xE0 <= read && read <=
                 * 0xEF) { // 也有可能出错,但是几率较小 read = bis.read(); if (0x80 <= read &&
                 * read <= 0xBF) { read = bis.read(); if (0x80 <= read && read <=
                 * 0xBF) { charset = "UTF-8"; break; } else { break; } } else {
                 * break; } } } System.out.println(loc + " " +
                 * Integer.toHexString(read)); }
                 ******************************************************************/
            }
            catch (Exception e)
            {
                e.printStackTrace();
            }
            finally
            {
                if (bis != null)
                {
                    try
                    {
                        bis.close();
                    }
                    catch (Exception ex)
                    {
                        ex.printStackTrace();
                    }
                }
            }
            return charset;
        }
       
        public static void main(String[] args)
        {
            CheckCoding fer = new CheckCoding("d:\1231232.txt");
            System.out.println(fer.getCharset());
        }
    }

  • 相关阅读:
    Linux安装Nginx
    CentOS7上安装google谷歌浏览器
    解决MYSQL的错误:Got a packet bigger than 'max_allowed_packet' bytes
    Mysql ERROR 1067: Invalid default value for 字段
    Mysql查看版本号的五种方式介绍
    linux下mysql 查看默认端口号与修改端口号方法
    yum 和 rpm安装mysql彻底删除
    SpringBootSecurity学习(06)网页版登录方法级别的权限
    SpringBootSecurity学习(05)网页版登录内存中配置默认用户
    SpringBootSecurity学习(04)网页版登录其它授权和登录处理
  • 原文地址:https://www.cnblogs.com/alaricblog/p/3278354.html
Copyright © 2011-2022 走看看