哈夫曼编码 —— 二进制前缀编码,以 n 个字符的出现频率作为权重,以求得字长最短的二进制编码
哈夫曼树 —— 采用二叉树的形式储存编码格式
前缀编码:字符集中任一字符的编码都不是其它字符的编码的前缀
数据结构课代码实现 之 哈夫曼树
简要理解 在已知每个字符结点 及其 权重的情况下,每次选择两个权重最小的字符结点,创建一个新的结点
将新结点的左右孩子结点设置为选择的两个字符结点,设置新结点权重为两子结点权重之和
设置两个字符结点的双亲结点为新结点
标记两个子结点已被选择过,并将在下次选择时将新结点设为可选择
有 n
个字符的哈夫曼树 即 有 n
个叶子结点的哈夫曼树共有 2n - 1
个结点
所以在 n - 1
次选择过后哈夫曼树即被建立
具体理解 与 代码实现 声明 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 #include <iostream> #include <cstdio> #include <cstring> #include <algorithm> #include <cstdlib> #include <map> #include <utility> #define swap(a, b) {int c = a; a = b; b = c;} using namespace std ;typedef struct { unsigned int weight; unsigned int parent, lchild, rchild; char data; } HTNode, *HuffmanTree; typedef char **HuffmanCode;bool *visit; map <char , unsigned int > x; struct w { unsigned int w; int pos; }; bool cmp (w a, w b) { if (a.w < b.w) return true ; else return false ; }
Select 函数 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 void Select (HuffmanTree HT, int i, int &s1, int &s2) { w we[i]; for (int j = 1 ; j <= i; j++) { we[j].w = HT[j].weight; we[j].pos = j; } sort(we + 1 , we + i + 1 , cmp); for (int j = 1 ; j <= i; j++) { if (!visit[we[j].pos]) { if (s1 == 0 ) s1 = we[j].pos; else if (s2 == 0 ) { s2 = we[j].pos; break ; } } } if (s1 > s2) swap(s1, s2); visit[s1] = true ; visit[s2] = true ; }
哈夫曼建树编码的过程 读取模式1 (预先设置好每个字符的 权重 手动输入对应字符) 方法一 (由 叶子结点 到 根 编码) 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 void HuffmanCoding (HuffmanTree &HT, HuffmanCode &HC, unsigned int *w, int n) { if (n < 1 ) return ; int m = 2 * n - 1 ; HT = (HuffmanTree)malloc ((m + 1 ) * sizeof (HTNode)); HuffmanTree p = HT + 1 ; int i; cout << "请输入各字符:(不需要空格分开,回车结束)" << endl ; for (i = 1 ; i <= n; ++i, ++p, ++w) { char x; x = getchar(); *p = {*w, 0 , 0 , 0 , x}; } for (; i <= m; ++i) { int s1 = 0 ; int s2 = 0 ; Select(HT, i - 1 , s1, s2); HT[s1].parent = i; HT[s2].parent = i; HT[i].lchild = s1; HT[i].rchild = s2; HT[i].weight = HT[s1].weight + HT[s2].weight; } HT[m].parent = 0 ; HC = (HuffmanCode)malloc ((n + 1 ) * sizeof (char *)); char *cd = (char *)malloc (n * sizeof (char )); cd[n - 1 ] = '\0' ; for (i = 1 ; i <= n; ++i) { int start = n - 1 ; int f, c; for (c = i, f = HT[i].parent; f != 0 ; c = f, f = HT[f].parent) { if (HT[f].lchild == c) cd[--start] = '0' ; else cd[--start] = '1' ; } HC[i] = (char *)malloc ((n - start) * sizeof (char )); strcpy (HC[i], &cd[start]); } free (cd); }
方法二 (由 根 向 叶子结点 编码) 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 void HuffmanCoding2 (HuffmanTree &HT, HuffmanCode &HC, unsigned int * w, int n) { char *cd = (char *)malloc (n * sizeof (char )); HC = (HuffmanCode)malloc ((n + 1 ) * sizeof (char *)); int p = 2 * n - 1 ; int cdlen = 0 ; HT[p].parent = 0 ; for (int i = 1 ; i <= p; i++) HT[i].weight = 0 ; while (p) { if (HT[p].weight == 0 ) { HT[p].weight = 1 ; if (HT[p].lchild != 0 ) { p = HT[p].lchild; cd[cdlen++] = '0' ; } else if (HT[p].rchild == 0 ) { HC[p] = (char *)malloc ((cdlen + 1 ) * sizeof (char )); cd[cdlen] = '\0' ; strcpy (HC[p], cd); } } else if (HT[p].weight == 1 ) { HT[p].weight = 2 ; if (HT[p].rchild != 0 ) { p = HT[p].rchild; cd[cdlen++] = '1' ; } } else { HT[p].weight = 0 ; p = HT[p].parent; --cdlen; } } }
读取模式2 (输入一段话来计算 字符 与 相应权重) 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 void HuffmanCoding3 (HuffmanTree &HT, HuffmanCode &HC,int n) { int i; int m = 2 * n - 1 ; HT = (HuffmanTree)malloc ((m + 1 ) * sizeof (HTNode)); HuffmanTree p = HT + 1 ; map <char , unsigned int >::iterator it; cout << "字符" << " " << "权重" << endl ; for (it = x.begin(); it != x.end(); it++) { *p = {it->second, 0 , 0 , 0 , it->first}; p++; cout << it->first << " " << it->second << endl ; } }
输入原字段输出哈夫曼编码 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 void prtHuf (HuffmanTree HT, HuffmanCode HC, int n) { char *read; read = (char *)malloc (1000 * sizeof (char )); char a; a = getchar(); int cnt = 0 ; while (a != '#' ) { for (int i = 1 ; i <= n; i++) { if (a == HT[i].data) { strcpy (read + cnt, HC[i]); cnt += strlen (HC[i]); } } a = getchar(); } getchar(); cout << "Huffman编码:" << endl ; cout << read << endl ; }
输入哈夫曼编码输出原字段 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 void prtData (HuffmanTree HT, int n) { char a; int pos = 2 * n - 1 ; char *ans = (char *)malloc (1000 * sizeof (char )); int cnt = 0 ; a = getchar(); while (a != '#' ) { if (a == '0' ) pos = HT[pos].lchild; if (a == '1' ) pos = HT[pos].rchild; if (HT[pos].lchild == 0 || HT[pos].rchild == 0 ) { ans[cnt++] = HT[pos].data; pos = 2 * n - 1 ; } a = getchar(); } getchar(); ans[++cnt] = '\0' ; cout << "原字段:" << endl ; cout << ans; }
main函数 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 int main () { cout << "1.手动输入字符与权重" << endl << "2.字段自动识别" << endl << "请输入相应序号:" << endl ; int mode; cin >> mode; getchar(); if (mode == 1 ) { int n; cout << "请输入字符个数:" << endl ; cin >> n; unsigned int *w = (unsigned int *)malloc (n * sizeof (unsigned int )); visit = (bool *)malloc ((2 * n + 1 ) * sizeof (bool )); for (int i = 0 ; i <= 2 * n + 1 ; i++) visit[i] = false ; cout << "请输入各字符权重:(空格分开回车结束)" << endl ; for (int i = 0 ; i < n; i++) cin >> w[i]; getchar(); HuffmanCode Code, Code1; HuffmanTree Tree; HuffmanCoding(Tree, Code, w, n); cout << "请输入原字段,以#结束: " << endl ; prtHuf(Tree, Code, n); cout << "请输入Huffman编码,以#结束:" << endl ; prtData(Tree, n); } else if (mode == 2 ) { cout << "请输入一段话:(以#结束)" << endl ; char a; a = getchar(); while (a != '#' ) { if (!x.count(a)) { x[a] = 1 ; } else { x[a]++; } a = getchar(); } int n = x.size(); unsigned int *w = (unsigned int *)malloc (n * sizeof (unsigned int )); visit = (bool *)malloc ((2 * n + 1 ) * sizeof (bool )); for (int i = 0 ; i <= 2 * n + 1 ; i++) visit[i] = false ; HuffmanCode Code; HuffmanTree Tree; HuffmanCoding3(Tree, Code, n); cout << "请输入原字段,以#结束: " << endl ; prtHuf(Tree, Code, n); cout << "请输入Huffman编码,以#结束:" << endl ; prtData(Tree, n); } return 0 ; }
给个数据 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 1.手动输入字符与权重 2.字段自动识别 请输入相应序号: 2 请输入一段话:(以#结束) Youth is not a time of life; it is a state of mind. It is not a matter of rosy cheeks, red lips and supple knees. It is a matter of the will, a quality of the imagination, vigor of the emotions; it is the freshness of the deep spring of life.# 字符 权重 51 , 3 . 3 ; 2 I 2 Y 1 a 12 c 1 d 4 e 22 f 11 g 3 h 8 i 20 k 2 l 7 m 6 n 10 o 16 p 5 q 1 r 7 s 16 t 22 u 3 v 1 w 1 y 2 请输入原字段,以#结束: I think i am a pop man.# Huffman编码: 1001101000101001011001101010011100011000001100111000011000110111101011011100011100110110101111111 请输入Huffman编码,以#结束: 1001101000101001011001101010011100011000001100111000011000110111101011011100011100110110101111111# 原字段: I think i am a pop man.
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 1.手动输入字符与权重 2.字段自动识别 请输入相应序号: 1 请输入字符个数: 8 请输入各字符权重:(空格分开回车结束) 15 7 5 4 9 2 1 6 请输入各字符:(不需要空格分开,回车结束) ILOVEYU // U 后面有个空格 请输入原字段,以#结束: I LOVE YOU# Huffman编码: 100111100101110000111111001011111 请输入Huffman编码,以#结束: 100111100101110000111111001011111# 原字段: I LOVE YOU