// Á¦ ¸ñ: Huffman Code Encoder
// ÀÛ¼ºÀÚ: ±è¿µ´ë ( http://www.howto.pe.kr )
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <fcntl.h>
#include <math.h>
main(int argc, char **argv) {
int ifd, ofd, fsize;
unsigned char *src_data;
int i, j;
int Min, Max, Range, Pos, MinProb;
int Buffer, BufLen;
int *Prob, *Parent;
char *Bit, *BitCode;
int Len, TotalLen=0;
char outfile[255];
int debug = 0;
if (argc != 2) {
fprintf(stderr, "»ç¿ë¹ý: %s ÆÄÀϸín", argv[0]);
exit(1);
}
if ((ifd = open(argv[1], O_RDONLY)) < 0) {
fprintf(stderr, "%s ÆÄÀÏÀ» ÀÐÀ» ¼ö ¾ø½À´Ï´Ùn", argv[1]);
exit(1);
}
// source file ÀÇ Å©±â¸¦ ±¸ÇÏ¿© ÀúÀåÇÒ ¹öÆÛ¸¦ ÇÒ´ç ¹Þ´Â´Ù
fsize = lseek(ifd, 0L, 2); // ÆÄÀÏÅ©±â
lseek(ifd, 0L, 0); // ÆÄÀÏ Æ÷ÀÎÅ͸¦ ¸Ç ¾ÕÀ¸·Î À̵¿
src_data = (char *)malloc(fsize);
// sourceÆÄÀÏÀ» Àд´Ù
if (read(ifd, src_data, fsize) < fsize)
exit(1);
// targetÆÄÀÏÀ» »ý¼ºÇÑ´Ù
strcpy(outfile, argv[1]);
strcat(outfile, ".huf"); // È®ÀåÀÚ´Â .huf
if ((ofd = creat(outfile, 0644)) < 0) {
fprintf(stderr, "%s ÆÄÀÏÀ» ¸¸µé ¼ö ¾ø½À´Ï´Ùn", outfile);
exit(1);
}
// min,max byte °ªÀ» ±¸ÇÑ´Ù
Min = Max = src_data[0];
for (i = 1; i < fsize; i++) {
if (Min > src_data[i])
Min = src_data[i];
else if (Max < src_data[i])
Max = src_data[i];
}
Range = Max - Min + 1; // ºÐÆ÷µÈ byte °ªµéÀÇ ¹üÀ§
// ÃʱâÈ
Bit = (char *)malloc(2*Range-1); // ÄÚµå·Î ÇÒ´çÇÒ bit¸¦ ÀúÀå(0 or 1)
BitCode = (char *)malloc(2*Range-1); // ÄÚµå·Î ÇÒ´çÇÒ bitÀÇ ASCII ÀúÀå('0' or '1')
Prob = (int *)malloc(sizeof(int)*2*Range-1); // È®·ü(0..Range-1), ¸ðµç Prob¸¦ ´õÇϸé Max °¡ µÈ´Ù
Parent = (int *)malloc(sizeof(int)*2*Range-1);
memset(Bit, 0, 2*Range-1);
memset(BitCode, 0, 2*Range-1);
memset(Prob, 0, sizeof(int)*2*Range-1);
memset(Parent, -1, sizeof(int)*2*Range-1);
/* Example
source data: 3 4 4 5 6 6 6 7 7 10
src_data: 0 1 1 2 3 3 3 4 4 7
Min: 3
Max: 10
Range: 10-3+1 = 8
Probability:
index: 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14
Prob: 1 2 1 3 2 0 0 1 0 1 2 3 4 6 10
parent: 9 11 10 13 12 8 8 10 9 11 12 13 14 14 -1
*/
for (i = 0; i < fsize; i++) {
// byte °ª¿¡¼ minÀ» »©¼ 0..Max-Min(= Range-1) ·Î ȯ»êÇÑ´Ù
// ±× ÀÌÀ¯´Â Prob[]ÀÇ index¸¦ 0 ºÎÅÍ ¾²±â À§Çؼ
src_data[i] -= Min;
Prob[src_data[i]]++; // byte °ªÀÌ ³ªÅ¸³ Ƚ¼ö(È®·ü)À» ±¸ÇÑ´Ù
}
// Huffman code tree ¸¦ ¸¸µç´Ù
for (i = 0; i < (Range-1); i++) {
/* °¡Àå ÀÛÀº ù¹øÂ° È®·üÀ» ±¸ÇÑ´Ù */
Pos = -1;
MinProb = fsize; // ´ÜÁö °¡Àå ÀÛÀº¼ö¸¦ ±¸Çϱâ À§ÇÑ Å«¼ö
// ÀÌÀü¿¡ ±¸ÇÑ µÎ byte ÀÇ È®·üµµ loop ¿¡ Æ÷ÇÔ½ÃŲ´Ù
for (j = 0; j < Range+i; j++)
if ((Prob[j] < MinProb) && (Parent[j] == -1)) {
MinProb = Prob[j];
Pos = j;
}
Bit[Pos] = 0; // ºñÆ®ÇÒ´ç
Parent[Pos] = Range + i; // tree·Î °¡Á¤ÇÒ°æ¿ì »óÀ§(µÎ byte ÀÇ È®·üÀ» °¡Áö°í ÀÖ´Â) ³ëµåÀÇ À§Ä¡
Prob[Parent[Pos]] += MinProb; // ù¹øÂ° °¡Àå ÀÛÀº°Í°ú µÎ¹øÂ° °¡Àå ÀÛÀº µÎ byte ÀÇ È®·üÀ» ´õÇÑ´Ù
/* °¡Àå ÀÛÀº µÎ¹øÂ° È®·üÀ» ±¸ÇÑ´Ù */
Pos = -1;
MinProb = fsize;
for (j = 0; j < Range+i; j++)
if ((Prob[j] < MinProb) && (Parent[j] == -1)) {
MinProb = Prob[j];
Pos = j;
}
Bit[Pos] = 1;
Parent[Pos] = Range + i;
Prob[Parent[Pos]] += MinProb;
}
/* Print Huffman code tree */
if (debug) {
for (i = 0; i < Range; i++) {
// È®·üÀÌ 0 Àΰ͵µ Äڵ尡 ºÎ¿©µÇÁö¸¸ Àǹ̰¡ ¾øÀ¸¹Ç·Î Á¦¿Ü
if (Prob[i] == 0)
continue;
// leaf ³ëµåºÎÅÍ parent ·Î ¿Ã¶ó°¡¸é¼ huffman code¸¦ ±¸ÇÑ´Ù
Len = 0;
Pos = i;
printf("°ª %.2X, È®·ü %d/%d, ÄÚµå ", i+Min, Prob[i], fsize);
while (Parent[Pos] >= 0) {
BitCode[Len++] = Bit[Pos]; // leafÀÇ bitºÎÅÍ µé¾î°¡Áö¸¸ ½ÇÁ¦ »ç¿ë½Ã´Â ¹Ý´ë
Pos = Parent[Pos]; // »óÀ§ ³ëµå·Î À̵¿
}
// Huffman code·Î encoding ÇÏ¿´À»¶§ÀÇ ¼Ò¿äµÇ´Â bit¼ö ÇÕ»ê
TotalLen += Len * Prob[i];
// leafÀÇ bitºÎÅÍ µé¾î°¡ ÀÖÀ¸¹Ç·Î ¿ªÀ¸·Î Ãâ·Â
while (Len > 0)
printf("%c", '0' + BitCode[--Len]);
printf("n");
}
printf("ÄÚµåÀÇ Æò±Õ±æÀÌ %f bitsn", TotalLen / (float)(fsize));
}
// Huffman encoded ÆÄÀÏÀÇ Çì´õ¸¦ Ãâ·ÂÇÑ´Ù
write(ofd, "HM", 2); // ASCII "HM"
write(ofd, &Min, sizeof(Min)); // min Byte °ª
write(ofd, &Max, sizeof(Max)); // max Byte °ª, Range ´Â Max-Min+1 ·Î °è»êÇÏ¿© ±¸ÇÑ´Ù
write(ofd, Prob, Range*sizeof(Prob[0])); // È®·ü
// Huffman codes ¸¦ ÀÌ¿ëÇÏ¿© ½ÇÁ¦·Î Encoding ÇÏ¿© Ãâ·ÂÇÑ´Ù
Buffer = 0;
BufLen = 0;
for (i = 0; i < fsize; i++) {
// leaf ³ëµåºÎÅÍ parent ·Î ¿Ã¶ó°¡¸é¼ huffman code¸¦ ±¸ÇÑ´Ù
Len = 0;
Pos = src_data[i];
while (Parent[Pos] >= 0) {
BitCode[Len++] = Bit[Pos];
Pos = Parent[Pos];
}
// Write Huffman code one bit at a time
while (Len > 0) {
// ÀÌÀü bit¸¦ left shift(»óÀ§ bit·Î À̵¿) ½Ã۰í ÇöÀç bit¸¦ or ½ÃŲ´Ù
Buffer = (Buffer << 1) | BitCode[--Len];
BufLen++;
// Buffer full = 4 byte boundary
if (BufLen == sizeof(Buffer)) {
write(ofd, &Buffer, 1, sizeof(Buffer));
Buffer = 0;
BufLen = 0;
}
}
}
// Buffer ¿¡ ³²¾ÆÀÖÀ¸¸é 4 byte boundary ½ÃÄÑ Ãâ·Â
if (BufLen != 0) {
// left most shift(»óÀ§ bit·Î À̵¿)
Buffer = Buffer << (32 - BufLen);
write(ofd, &Buffer, 1, sizeof(Buffer));
}
free(src_data);
free(Bit);
free(BitCode);
free(Prob);
free(Parent);
close(ifd);
close(ofd);
} |
|