HuffmanTree构造和编码
1
2 using System.Collections.Generic;
3
4
5 namespace HuffmanCode
6 {
7 /// <summary>
8 /// Huffman树结点
9 /// </summary>
10 public class Node
11 {
12 //字符
13 public char Char;
14 //计数
15 public int Count;
16 //二进制编码
17 public string Code;
18
19 public Node Lchild;
20 public Node Rchild;
21 public Node Parent;
22
23 public Node()
24 {
25 Code = "";
26 Char = '\0';
27 Count = 0;
28 Lchild = null;
29 Rchild = null;
30 Parent = null;
31 }
32 }
33 /// <summary>
34 /// 构造二叉树,叶子节点编码
35 /// </summary>
36 public class HuffmanTree
37 {
38 //HuffmanTree根结点
39 public Node Root = new Node();
40
41 //构造HuffmanTree
42 public void CreateHuffmanTree(List<Node> chlist)
43 {
44 while (chlist.Count > 2)
45 {
46 Node node1 = chlist[0];//Count最小结点;
47 Node node2 = chlist[1];//Count次小结点
48 Node node3 =new Node();
49
50 node3.Count = node1.Count + node2.Count;
51
52 node3.Lchild = node1;
53 node1.Parent = node3;
54 node1.Code = "0";//若为左结点,Code标记为0
55
56 node3.Rchild = node2;
57 node2.Parent = node3;
58 node2.Code = "1";//若为右结点,Code标记为0
59
60 //移除两个最小的结点,插入Count为两者之和的父结点
61 chlist.RemoveAt(0);
62 chlist.RemoveAt(0);
63 chlist.Add(node3);
64
65 //对改动后的LIst按Count排序
66 StaticMethod.Sort(chlist);
67 }
68 //退出whlie循环时List剩两个结点,则分别作为根结点的左右结点
69 Root.Lchild = chlist[0];
70 chlist[0].Parent = Root;
71 chlist[0].Code = "0";
72 Root.Rchild = chlist[1];
73 chlist[1].Parent = Root;
74 chlist[1].Code = "1";
75 Root.Count = chlist[0].Count + chlist[1].Count;
76 }
77 //从叶子结点到根结点编码
78 public List<Node> GoToRoot(List<Node> list)
79 {
80 Node pnode;
81 foreach (Node node in list)
82 {
83 pnode = node;
84 while (pnode.Parent!=null)
85 {
86 node.Code = pnode.Parent.Code + node.Code;
87 pnode = pnode.Parent;
88 }
89 }
90 return list;
91 }
92
93 }//class HuffmanTree
94 }
95
对文本中各字符出现频率进行统计
using System;
using System.Collections.Generic;
using System.Text;
using System.IO;
namespace HuffmanCode
{
/// <summary>
/// 对文本统计字符并计数,存入Chlist
/// </summary>
public class CharList
{
public static List<Node> ChList;
public List<Node> List
{
get
{
return ChList;
}
set
{
ChList = value;
}
}
public CharList()
{
ChList = new List<Node>();
}
public void Read()
{
string path = "text.txt";
Encoding encoding = TxtFileEncoding.GetEncoding(path);
Console.WriteLine(encoding);
var sr = new StreamReader(path, encoding);
while (!sr.EndOfStream)
{
Node node=new Node();
node.Char = (char)sr.Read();
node.Count = 1;
int flag = 0;
foreach (Node v in ChList)
{
if(v.Char==node.Char)
{
v.Count += 1;//若字符已入表,则计数
flag = 1;
break;
}
}
//新字符入表,拒绝空格和回车和制表符
if (flag == 0 && node.Char != '\x20' && node.Char != '\n' && node.Char != '\r')
{
ChList.Add(node);
}
}
sr.Close();
}
}
/// <summary>
/// 对字符集按计数升序排序,显示字符集的静态方法
/// </summary>
public class StaticMethod
{
//升序
public static int Compare(Node a , Node b)
{
if (a.Count<b.Count)
return -1;
if (a.Count==b.Count)
return 0;
return 1;
}
public static void Sort(List<Node> list)
{
list.Sort(new Comparison<Node>(Compare));
}
//打印字符集
public static void Display( List<Node> list)
{
for (int i = 0; i < list.Count;i++ )
{
if (list[i].Char>128)
{
Console.Write("字符:" + list[i].Char + " 计数:");
}
else
{
Console.Write("字符:" + list[i].Char + " 计数:");
}
Console.Write("{0,4}", list[i].Count);
Console.Write(" 编码: ");
Console.Write("{0,-20}", list[i].Code); Console.WriteLine();
}
}
}
}
StreamReader
1 using System;
2 using System.Text;
3 using System.IO;
4
5 namespace HuffmanCode
6 {
7 ///
8 /// 用于取得一个文本文件的编码方式(Encoding)。
9 ///
10 public class TxtFileEncoding
11 {
12 /// <summary>
13 /// 取得一个文本文件的编码方式。如果无法在文件头部找到有效的前导符,Encoding.Default将被返回。
14 /// </summary>
15 /// <param name="fileName">文件名。</param>
16 /// <returns></returns>
17 public static Encoding GetEncoding(string fileName)
18 {
19 return GetEncoding(fileName, Encoding.Default);
20 }
21
22 /// <summary>
23 /// 取得一个文本文件流的编码方式。
24 /// </summary>
25 /// <param name="stream">文本文件流。</param>
26 /// <returns></returns>
27 public static Encoding GetEncoding(FileStream stream)
28 {
29 return GetEncoding(stream, Encoding.Default);
30 }
31
32 /// <summary>
33 /// 取得一个文本文件的编码方式。
34 /// </summary>
35 /// <param name="fileName">文件名。</param>
36 /// <param name="defaultEncoding">默认编码方式。当该方法无法从文件的头部取得有效的前导符时,将返回该编码方式。 </param>
37 /// <returns></returns>
38 public static Encoding GetEncoding(string fileName, Encoding defaultEncoding)
39 {
40 FileStream fs = new FileStream(fileName, FileMode.Open);
41 Encoding targetEncoding = GetEncoding(fs, defaultEncoding);
42 fs.Close();
43 return targetEncoding;
44 }
45
46 /// <summary>
47 /// 取得一个文本文件流的编码方式。
48 /// </summary>
49 /// <param name="stream">文本文件流。 </param>
50 /// <param name="defaultEncoding">默认编码方式。当该方法无法从文件的头部取得有效的前导符时,将返回该编码方式。 </param>
51 /// <returns></returns>
52 public static Encoding GetEncoding(FileStream stream, Encoding defaultEncoding)
53 {
54 Encoding targetEncoding = defaultEncoding;
55 if (stream != null && stream.Length >= 2)
56 {
57 //保存文件流的前4个字节
58 byte byte1 = 0;
59 byte byte2 = 0;
60 byte byte3 = 0;
61 byte byte4 = 0;
62 //保存当前Seek位置
63 long origPos = stream.Seek(0, SeekOrigin.Begin);
64 stream.Seek(0, SeekOrigin.Begin);
65
66 int nByte = stream.ReadByte();
67 byte1 = Convert.ToByte(nByte);
68 byte2 = Convert.ToByte(stream.ReadByte());
69 if (stream.Length >= 3)
70 {
71 byte3 = Convert.ToByte(stream.ReadByte());
72 }
73 if (stream.Length >= 4)
74 {
75 byte4 = Convert.ToByte(stream.ReadByte());
76 }
77 //根据文件流的前4个字节判断Encoding
78 //Unicode {0xFF, 0xFE};
79 //BE-Unicode {0xFE, 0xFF};
80 //UTF8 = {0xEF, 0xBB, 0xBF};
81 if (byte1 == 0xFE && byte2 == 0xFF)//UnicodeBe
82 {
83 targetEncoding = Encoding.BigEndianUnicode;
84 }
85 if (byte1 == 0xFF && byte2 == 0xFE && byte3 != 0xFF)//Unicode
86 {
87 targetEncoding = Encoding.Unicode;
88 }
89 if (byte1 == 0xEF && byte2 == 0xBB && byte3 == 0xBF)//UTF8
90 {
91 targetEncoding = Encoding.UTF8;
92 }
93 //恢复Seek位置
94 stream.Seek(origPos, SeekOrigin.Begin);
95 }
96 return targetEncoding;
97 }
98 }
99
100 }
101