zoukankan      html  css  js  c++  java
  • C# 根据传入(字节流)BYTE[]判断编码格式是否UTF8 网上转的,地址忘了

    public static bool IsTextUTF8(byte[] inputStream)
            {
                int encodingBytesCount = 0;
                bool allTextsAreASCIIChars = true;
    
                for (int i = 0; i < inputStream.Length; i++)
                {
                    byte current = inputStream[i];
    
                    if ((current & 0x80) == 0x80)
                    {
                        allTextsAreASCIIChars = false;
                    }
                    // First byte
                    if (encodingBytesCount == 0)
                    {
                        if ((current & 0x80) == 0)
                        {
                            // ASCII chars, from 0x00-0x7F
                            continue;
                        }
    
                        if ((current & 0xC0) == 0xC0)
                        {
                            encodingBytesCount = 1;
                            current <<= 2;
    
                            // More than two bytes used to encoding a unicode char.
                            // Calculate the real length.
                            while ((current & 0x80) == 0x80)
                            {
                                current <<= 1;
                                encodingBytesCount++;
                            }
                        }
                        else
                        {
                            // Invalid bits structure for UTF8 encoding rule.
                            return false;
                        }
                    }
                    else
                    {
                        // Following bytes, must start with 10.
                        if ((current & 0xC0) == 0x80)
                        {
                            encodingBytesCount--;
                        }
                        else
                        {
                            // Invalid bits structure for UTF8 encoding rule.
                            return false;
                        }
                    }
                }
    
                if (encodingBytesCount != 0)
                {
                    // Invalid bits structure for UTF8 encoding rule.
                    // Wrong following bytes count.
                    return false;
                }
    
                // Although UTF8 supports encoding for ASCII chars, we regard as a input stream, whose contents are all ASCII as default encoding.
                return !allTextsAreASCIIChars;
            }
  • 相关阅读:
    C#操作符??和?:
    使用Windows8开发Metro风格应用一
    使用Windows8开发Metro风格应用二
    Win8使用技巧
    详解 xls xlst xml 一
    SqlDataAdapter DataSet DataTable 详解
    DataSet 与 xml
    FileTracker : error FTK1011编译错误
    我的CHROME插件
    Komodo升级错误
  • 原文地址:https://www.cnblogs.com/jasonlwings/p/3092207.html
Copyright © 2011-2022 走看看