zoukankan      html  css  js  c++  java
  • 使用C#.NET调用ICU进行编码检测和编码转换

    ICU的C/C++版本:ICU4C

    相关API的用法可查阅官方文档,本例只演示使用P/Invoke调用。

    DLL文件需要注意区分32位和64位。

    官方API文档:ICU-docs

    P/Invoke相关文档:Native interoperability Interop Marshaling

    非常有用的P/Invoke函数签名查询工具:PINVOKE.NET

    using System;
    using System.Collections.Generic;
    using System.IO;
    using System.Linq;
    using System.Runtime.InteropServices;
    using System.Text;
    
    namespace EncodingConverter.Console
    {
        class Program
        {
            static void Main(string[] args)
            {
                Convert_UCS2_To_GBK();
                Convert_GBK_To_UCS2();
                Detect_Encoding();
    
                System.Console.ReadKey();
            }
    
            static void Detect_Encoding()
            {
                ICU4C.UErrorCode status;
    
                byte[] input = File.ReadAllBytes(@"TEST.GBK.TXT");
    
                // 打开检测器
                status = ICU4C.UErrorCode.U_ZERO_ERROR;
                IntPtr ucsd = ICU4C.NativeMethods.ucsdet_open(ref status);
    
                // 设置要检查的文本
                status = ICU4C.UErrorCode.U_ZERO_ERROR;
                ICU4C.NativeMethods.ucsdet_setText(ucsd, input, input.Length, ref status);
    
                // 执行检测
                status = ICU4C.UErrorCode.U_ZERO_ERROR;
                IntPtr ucsm = ICU4C.NativeMethods.ucsdet_detect(ucsd, ref status);
    
                // 取结果
                IntPtr lpstr = ICU4C.NativeMethods.ucsdet_getName(ucsm, ref status);
                string str = Marshal.PtrToStringAnsi(lpstr);
    
                // 关闭检测器
                ICU4C.NativeMethods.ucsdet_close(ucsd);
    
                System.Console.WriteLine($"Detected Encoding");
                System.Console.WriteLine($"  Result = {str}");
            }
    
            static void Convert_UCS2_To_GBK()
            {
                string input = File.ReadAllText(@"TEST.TXT", Encoding.Unicode);
    
                ICU4C.UErrorCode status;
    
                // 打开转换器
                status = ICU4C.UErrorCode.U_ZERO_ERROR;
                IntPtr cnv = ICU4C.NativeMethods.ucnv_open("GBK", ref status);
    
                // 计算输出长度
                status = ICU4C.UErrorCode.U_ZERO_ERROR;
                int outputLength = ICU4C.NativeMethods.ucnv_fromUChars(cnv, null, 0, input, input.Length, ref status);
    
                // 输出缓冲区
                byte[] output = new byte[outputLength];
    
                // 转换并输出
                status = ICU4C.UErrorCode.U_ZERO_ERROR;
                ICU4C.NativeMethods.ucnv_fromUChars(cnv, output, output.Length, input, input.Length, ref status);
    
                // 关闭转换器
                ICU4C.NativeMethods.ucnv_close(cnv);
    
                // 写出文件
                File.WriteAllBytes(@"TEST.GBK.TXT", output);
    
                System.Console.WriteLine("Convert UCS2 to GBK");
                System.Console.WriteLine($"  Input Length = {input.Length} characters");
                System.Console.WriteLine($"  Output Length = {output.Length} bytes");
            }
    
            static void Convert_GBK_To_UCS2()
            {
                byte[] input = File.ReadAllBytes(@"TEST.GBK.TXT");
    
                ICU4C.UErrorCode status;
    
                // 打开转换器
                status = ICU4C.UErrorCode.U_ZERO_ERROR;
                IntPtr cnv = ICU4C.NativeMethods.ucnv_open("GBK", ref status);
    
                // 计算输出长度
                status = ICU4C.UErrorCode.U_ZERO_ERROR;
                int outputLength = ICU4C.NativeMethods.ucnv_toUChars(cnv, null, 0, input, input.Length, ref status);
    
                // 输出缓冲区
                StringBuilder output = new StringBuilder(outputLength);
    
                // 转换并输出
                status = ICU4C.UErrorCode.U_ZERO_ERROR;
                ICU4C.NativeMethods.ucnv_toUChars(cnv, output, output.Capacity, input, input.Length, ref status);
    
                // 关闭转换器
                ICU4C.NativeMethods.ucnv_close(cnv);
    
                // 写出文件
                File.WriteAllText(@"TEST.UCS2.TXT", output.ToString(), Encoding.Unicode);
    
                System.Console.WriteLine("Convert GBK to UCS2");
                System.Console.WriteLine($"  Input Length = {input.Length} bytes");
                System.Console.WriteLine($"  Output Length = {output.Length} characters");
            }
        }
    }

    P/Invoke API 定义

    using System;
    using System.Collections.Generic;
    using System.Runtime.InteropServices;
    using System.Text;
    
    #pragma warning disable IDE1006 // Naming Styles
    
    namespace EncodingConverter.ICU4C
    {
        enum UErrorCode
        {
            U_ZERO_ERROR
        }
    
        static class NativeMethods
        {
            [DllImport("icuin67.dll", CallingConvention = CallingConvention.Cdecl, EntryPoint = "ucsdet_open_67")]
            public static extern IntPtr ucsdet_open(ref UErrorCode status);
    
            [DllImport("icuin67.dll", CallingConvention = CallingConvention.Cdecl, EntryPoint = "ucsdet_close_67")]
            public static extern void ucsdet_close(IntPtr ucsd);
    
            [DllImport("icuin67.dll", CallingConvention = CallingConvention.Cdecl, EntryPoint = "ucsdet_setText_67")]
            public static extern void ucsdet_setText(IntPtr ucsd, byte[] textIn, int len, ref UErrorCode status);
    
            [DllImport("icuin67.dll", CallingConvention = CallingConvention.Cdecl, EntryPoint = "ucsdet_detect_67")]
            public static extern IntPtr ucsdet_detect(IntPtr ucsd, ref UErrorCode status);
    
            [DllImport("icuin67.dll", CallingConvention = CallingConvention.Cdecl, EntryPoint = "ucsdet_getName_67")]
            public static extern IntPtr ucsdet_getName(IntPtr ucsm, ref UErrorCode status);
    
            [DllImport("icuuc67.dll", CallingConvention = CallingConvention.Cdecl, EntryPoint = "ucnv_open_67")]
            public static extern IntPtr ucnv_open([MarshalAs(UnmanagedType.LPStr)] string converterName, ref UErrorCode err);
    
            [DllImport("icuuc67.dll", CallingConvention = CallingConvention.Cdecl, EntryPoint = "ucnv_close_67")]
            public static extern void ucnv_close(IntPtr converter);
    
            [DllImport("icuuc67.dll", CallingConvention = CallingConvention.Cdecl, EntryPoint = "ucnv_fromUChars_67")]
            public static extern int ucnv_fromUChars(IntPtr cnv, [Out] byte[] dest, int destCapacity, [MarshalAs(UnmanagedType.LPWStr)] string src, int srcLength, ref UErrorCode pErrorCode);
    
            [DllImport("icuuc67.dll", CallingConvention = CallingConvention.Cdecl, EntryPoint = "ucnv_toUChars_67")]
            public static extern int ucnv_toUChars(IntPtr cnv, [MarshalAs(UnmanagedType.LPWStr)] StringBuilder dest, int destCapacity, byte[] src, int srcLength, ref UErrorCode pErrorCode);
        }
    }
  • 相关阅读:
    activeMQ功能Demo
    业务逻辑:shiro框架的功能实现
    业务逻辑: Quartz的整合应用
    思考题
    业务逻辑:七、完成系统菜单根据登录人动态展示
    业务逻辑:五、完成认证用户的动态授权功能 六、完成Shiro整合Ehcache缓存权限数据
    业务逻辑:三、完成用户数据列表显示功能 四、完成用户添加功能
    业务逻辑:完成角色列表显示功能,二、完成角色添加功能 三、完成用户数据列表显示功能
    Struts2 看1
    Spring2 看1
  • 原文地址:https://www.cnblogs.com/crsky/p/13785729.html
Copyright © 2011-2022 走看看