utf-8编码的txt文件第一行含有不可见字符
原因
文件的开始隐藏了(标识文件使用了UTF-8编码的3个字节)"EF BB BF",使用UltraEdit将文件以16进制打开也能很清楚的看到第一行开始部分的EF BB BF这三个字符。
解决方法
1、手动修改编码
使用Notepad++打开txt文件执行如下操作“编码-->以UTF-8无BOM格式编码”,修改后将txt文本进行保存。

2、用代码消除(JAVA)
package com.montnets.email.common; /** * @author : chenlinyan * @version : 2.0 * @date : 2019/8/15 11:00 */ import java.io.*; /** * Generic unicode textreader, which will use BOM mark to identify the encoding * to be used. If BOM is not found then use a given default or system encoding. */ public class UnicodeReader extends Reader { PushbackInputStream internalIn; InputStreamReader internalIn2 = null; String defaultEnc; private static final int BOM_SIZE = 4; /** * * @param in * inputstream to be read * @param defaultEnc * default encoding if stream does not have BOM marker. Give NULL * to use system-level default. */ public UnicodeReader(InputStream in, String defaultEnc) { internalIn = new PushbackInputStream(in, BOM_SIZE); this.defaultEnc = defaultEnc; } public String getDefaultEncoding() { return defaultEnc; } /** * Get stream encoding or NULL if stream is uninitialized. Call init() or * read() method to initialize it. */ public String getEncoding() { if (internalIn2 == null) return null; return internalIn2.getEncoding(); } /** * Read-ahead four bytes and check for BOM marks. Extra bytes are unread * back to the stream, only BOM bytes are skipped. */ protected void init() throws IOException { if (internalIn2 != null) return; String encoding; byte bom[] = new byte[BOM_SIZE]; int n, unread; n = internalIn.read(bom, 0, bom.length); if ((bom[0] == (byte) 0x00) && (bom[1] == (byte) 0x00) && (bom[2] == (byte) 0xFE) && (bom[3] == (byte) 0xFF)) { encoding = "UTF-32BE"; unread = n - 4; } else if ((bom[0] == (byte) 0xFF) && (bom[1] == (byte) 0xFE) && (bom[2] == (byte) 0x00) && (bom[3] == (byte) 0x00)) { encoding = "UTF-32LE"; unread = n - 4; } else if ((bom[0] == (byte) 0xEF) && (bom[1] == (byte) 0xBB) && (bom[2] == (byte) 0xBF)) { encoding = "UTF-8"; unread = n - 3; } else if ((bom[0] == (byte) 0xFE) && (bom[1] == (byte) 0xFF)) { encoding = "UTF-16BE"; unread = n - 2; } else if ((bom[0] == (byte) 0xFF) && (bom[1] == (byte) 0xFE)) { encoding = "UTF-16LE"; unread = n - 2; } else { // Unicode BOM mark not found, unread all bytes encoding = defaultEnc; unread = n; } // System.out.println("read=" + n + ", unread=" + unread); if (unread > 0) internalIn.unread(bom, (n - unread), unread); // Use given encoding if (encoding == null) { internalIn2 = new InputStreamReader(internalIn); } else { internalIn2 = new InputStreamReader(internalIn, encoding); } } public void close() throws IOException { init(); internalIn2.close(); } public int read(char[] cbuf, int off, int len) throws IOException { init(); return internalIn2.read(cbuf, off, len); } }
以上代码消除文件第一行用于标识的字节
用法
public class Main { public static void main(String[] args){ File txtFile = new File("sms.txt"); FileInputStream fis ; BufferedReader br; String line; try { fis = new FileInputStream(txtFile); br = new BufferedReader(new UnicodeReader(fis, "utf-8")); while((line= br.readLine()) != null) { System.out.println(line); } }catch (Exception e){ e.printStackTrace(); } } }
参考地址 https://blog.csdn.net/sinat_37356064/article/details/78044929