zoukankan      html  css  js  c++  java
  • 将GB2312的文件转为Unicode格式

    将GB2312的文件转为Unicode格式

    using System;
    using System.Collections.Generic;
    using System.Text;

    namespace CrmTools
    {
    class FileUtil
    {
    public static string readFile(string fileName)
    {
    //return System.IO.File.ReadAllText(fileName,System.Text.Encoding.Default);
    System.Text.StringBuilder sb = new StringBuilder();
    System.IO.FileStream fs = System.IO.File.Open(fileName, System.IO.FileMode.Open);
    byte[] b = new byte[1024];
    while (fs.Read(b, 0, b.Length) > 0)
    {
    sb.Append(System.Text.Encoding.Default.GetString(b));
    }
    fs.Close();
    return sb.ToString();
    }
    ///


    /// 将一个文件转化为UTF8编码
    ///

    ///
    ///
    ///
    public static bool convertFileEncode(string srcFileName,string destEncode)
    {
    if (isUTF8File(srcFileName)) return true;
    try
    {
    string destFileName = System.IO.Path.GetTempFileName();
    System.Text.StringBuilder sb = new StringBuilder();
    System.IO.FileStream fs = System.IO.File.Open(srcFileName, System.IO.FileMode.Open);

    /*
    System.IO.StreamWriter sw = System.IO.File.CreateText(destFileName);
    byte[] b = new byte[1024];
    while (fs.Read(b, 0, b.Length) > 0)
    {
    string line = System.Text.Encoding.Default.GetString(b);
    line = GB2312toUnicode(line);
    sw.Write(line);
    }
    sw.Close();
    fs.Close();
    */
    System.IO.FileStream sw = System.IO.File.Create(destFileName);
    if (fs.Length > 0)
    {
    sw.WriteByte(0xFF);
    sw.WriteByte(0xFE);
    }
    byte[] b = new byte[1024];
    while (true)
    {
    int i = fs.Read(b, 0, b.Length);
    byte[] bc = new byte[i];
    Array.Copy(b, bc, i);
    if (i <= 0) break;
    byte[] btemp = GB2312toUnicodeBytes(bc);
    foreach (byte bi in btemp)
    {
    sw.WriteByte(bi);
    }
    }
    sw.Close();
    fs.Close();

    System.IO.File.Copy(destFileName, srcFileName,true);
    System.IO.File.Delete(destFileName);
    }
    catch
    {
    return false;
    }
    return true;
    }
    ///
    /// 判断一个文件是不是UTF8文件
    ///

    ///
    ///
    public static bool isUTF8File(string srcFileName)
    {
    bool isUTF8 = false;
    System.IO.FileStream fs = System.IO.File.Open(srcFileName,
    System.IO.FileMode.Open,System.IO.FileAccess.Read,System.IO.FileShare.Read);
    byte[] b = new byte[2];
    if (fs.Read(b, 0, b.Length) > 0)
    {
    if (b[0] == 0xFF && b[1] == 0xFE)
    {
    isUTF8 = true;
    }
    }
    fs.Close();
    return isUTF8;
    }
    //public static string GB2312toUnicodeString(string content)
    //{
    // string gb2312Info = string.Empty;

    // Encoding utf8 = Encoding.Unicode;
    // Encoding gb2312 = Encoding.GetEncoding("gb2312");

    // // Convert the string into a byte[].
    // byte[] gb2312Bytes = gb2312.GetBytes(content);
    // // Perform the conversion from one encoding to the other.
    // byte[] utf8Bytes = Encoding.Convert(gb2312, utf8, gb2312Bytes);

    // // Convert the new byte[] into a char[] and then into a string.
    // // This is a slightly different approach to converting to illustrate
    // // the use of GetCharCount/GetChars.

    // char[] utf8Chars = new char[utf8.GetCharCount(utf8Bytes, 0, utf8Bytes.Length)];
    // utf8.GetChars(utf8Bytes, 0, utf8Bytes.Length, utf8Chars, 0);
    // string utf8info = new string(utf8Chars);

    // return utf8info;
    //}
    public static byte[] GB2312toUnicodeBytes(byte[] gb2312Bytes)
    {
    Encoding gb2312 = Encoding.GetEncoding("gb2312");
    Encoding utf8 = Encoding.Unicode;
    byte[] utf8Bytes = Encoding.Convert(gb2312, utf8, gb2312Bytes);
    return utf8Bytes;
    }
    }
    }

    antony
    :antony1029@163.com
    :http://antony1029.cnblogs.com
  • 相关阅读:
    SQL Server游标的使用
    SQL函数说明大全
    基本数据结构:链表(list)
    QT+C++实现连连看
    printf按8进制、16进制输出
    关于数据库优化问题收集
    SQL Server 查询处理中的各个阶段(SQL执行顺序) 转
    MFC消息机制
    _T() 和_L() _TEXT __T,L区别与联系详解
    Windows 各种控件使用心得
  • 原文地址:https://www.cnblogs.com/antony1029/p/1295421.html
Copyright © 2011-2022 走看看