zoukankan      html  css  js  c++  java
  • 将GB2312的文件转为Unicode格式

    将GB2312的文件转为Unicode格式

    using System;
    using System.Collections.Generic;
    using System.Text;

    namespace CrmTools
    {
    class FileUtil
    {
    public static string readFile(string fileName)
    {
    //return System.IO.File.ReadAllText(fileName,System.Text.Encoding.Default);
    System.Text.StringBuilder sb = new StringBuilder();
    System.IO.FileStream fs = System.IO.File.Open(fileName, System.IO.FileMode.Open);
    byte[] b = new byte[1024];
    while (fs.Read(b, 0, b.Length) > 0)
    {
    sb.Append(System.Text.Encoding.Default.GetString(b));
    }
    fs.Close();
    return sb.ToString();
    }
    ///


    /// 将一个文件转化为UTF8编码
    ///

    ///
    ///
    ///
    public static bool convertFileEncode(string srcFileName,string destEncode)
    {
    if (isUTF8File(srcFileName)) return true;
    try
    {
    string destFileName = System.IO.Path.GetTempFileName();
    System.Text.StringBuilder sb = new StringBuilder();
    System.IO.FileStream fs = System.IO.File.Open(srcFileName, System.IO.FileMode.Open);

    /*
    System.IO.StreamWriter sw = System.IO.File.CreateText(destFileName);
    byte[] b = new byte[1024];
    while (fs.Read(b, 0, b.Length) > 0)
    {
    string line = System.Text.Encoding.Default.GetString(b);
    line = GB2312toUnicode(line);
    sw.Write(line);
    }
    sw.Close();
    fs.Close();
    */
    System.IO.FileStream sw = System.IO.File.Create(destFileName);
    if (fs.Length > 0)
    {
    sw.WriteByte(0xFF);
    sw.WriteByte(0xFE);
    }
    byte[] b = new byte[1024];
    while (true)
    {
    int i = fs.Read(b, 0, b.Length);
    byte[] bc = new byte[i];
    Array.Copy(b, bc, i);
    if (i <= 0) break;
    byte[] btemp = GB2312toUnicodeBytes(bc);
    foreach (byte bi in btemp)
    {
    sw.WriteByte(bi);
    }
    }
    sw.Close();
    fs.Close();

    System.IO.File.Copy(destFileName, srcFileName,true);
    System.IO.File.Delete(destFileName);
    }
    catch
    {
    return false;
    }
    return true;
    }
    ///
    /// 判断一个文件是不是UTF8文件
    ///

    ///
    ///
    public static bool isUTF8File(string srcFileName)
    {
    bool isUTF8 = false;
    System.IO.FileStream fs = System.IO.File.Open(srcFileName,
    System.IO.FileMode.Open,System.IO.FileAccess.Read,System.IO.FileShare.Read);
    byte[] b = new byte[2];
    if (fs.Read(b, 0, b.Length) > 0)
    {
    if (b[0] == 0xFF && b[1] == 0xFE)
    {
    isUTF8 = true;
    }
    }
    fs.Close();
    return isUTF8;
    }
    //public static string GB2312toUnicodeString(string content)
    //{
    // string gb2312Info = string.Empty;

    // Encoding utf8 = Encoding.Unicode;
    // Encoding gb2312 = Encoding.GetEncoding("gb2312");

    // // Convert the string into a byte[].
    // byte[] gb2312Bytes = gb2312.GetBytes(content);
    // // Perform the conversion from one encoding to the other.
    // byte[] utf8Bytes = Encoding.Convert(gb2312, utf8, gb2312Bytes);

    // // Convert the new byte[] into a char[] and then into a string.
    // // This is a slightly different approach to converting to illustrate
    // // the use of GetCharCount/GetChars.

    // char[] utf8Chars = new char[utf8.GetCharCount(utf8Bytes, 0, utf8Bytes.Length)];
    // utf8.GetChars(utf8Bytes, 0, utf8Bytes.Length, utf8Chars, 0);
    // string utf8info = new string(utf8Chars);

    // return utf8info;
    //}
    public static byte[] GB2312toUnicodeBytes(byte[] gb2312Bytes)
    {
    Encoding gb2312 = Encoding.GetEncoding("gb2312");
    Encoding utf8 = Encoding.Unicode;
    byte[] utf8Bytes = Encoding.Convert(gb2312, utf8, gb2312Bytes);
    return utf8Bytes;
    }
    }
    }

    antony
    :antony1029@163.com
    :http://antony1029.cnblogs.com
  • 相关阅读:
    访问日志不记录静态文件、访问日志切割、静态元素(文件)过期时间
    apache用户认证、域名跳转、Apache访问日志(两种格式)
    apache 和 php 整合、apache配制虚拟机
    PHP5和PHP7的安装、PHP和apache的整合!
    mariaDB的安装 apache的安装
    lamp 相关mysql的安装
    rsync命令 续集 、linux系统日志、screen工具
    rsync命令
    linux cron计划任务、chkconfig 命令、systemd命令、unit 相关、target 相关
    spark_to_es
  • 原文地址:https://www.cnblogs.com/antony1029/p/1295421.html
Copyright © 2011-2022 走看看