by Bar Zohan
10. June 2010 01:28
Huffman coding is the algorithm used for string compression. These days I really have lack of time so there is no description for code, sorry :) (i know there never was but this time not even a short paragraph)
#include <stdlib.h>
#include <iostream>
#include <fstream>
#include <stdio.h>
#include <string>
#include <vector>
#include <algorithm>
#include <map>
using namespace std;
class Node
{
public:
char ch;
int freq;
Node* leftNode;
Node* rightNode;
Node(){
this->ch;
this->freq = 0;
this->leftNode = NULL;
this->rightNode = NULL;
}
Node(char charachter){
this->ch=charachter;
this->freq = 0;
this->leftNode = NULL;
this->rightNode = NULL;
}
void increaseFreq()
{
this->freq++;
}
};
bool NodeSort(const Node* d1, const Node* d2)
{
if(d1->freq!=d2->freq)
return d1->freq < d2->freq;
else
return d1->ch < d2->ch;
}
bool NodeSort2(const Node* d1, const Node* d2)
{
return d1->ch < d2->ch;
}
bool MapSort(map<char, string>::const_iterator map_iter, map<char, string>::const_iterator map_iter2)
{
return map_iter->second.size() > map_iter2->second.size();
}
map <char, string> character_map;
vector<char> text;
vector<Node*> nodeQueue;
vector<char> char_array;
vector<Node*> node_array;
string first_input;
void buildCode(Node *x, string s);
void getCharacters()
{
string input;
cin >> first_input;
input.append(first_input);
input.append(".txt");*/
ifstream infile("input.txt",ios_base::in);
while(infile.good())
{
char cha = infile.get();
text.push_back(cha);
}
text.pop_back();
for(int i =0; i<text.size();i++)
{
if(count(char_array.begin(), char_array.end(), text[i])<=0)
{
int cou;
cou = count(text.begin(), text.end(), text[i]);
char_array.push_back(text[i]);
Node *node = new Node(text[i]);
node->freq = cou;
nodeQueue.push_back(node);
node_array.push_back(node);
}
}
sort(node_array.begin(), node_array.end(), NodeSort2);
}
void buildCode(Node *x, string s)
{
if (x->leftNode!=NULL && x->rightNode!=NULL) {
buildCode( x->leftNode, s + '0');
buildCode( x->rightNode, s + '1');
}
else {
character_map[x->ch] = s;
}
}
void huffman_coding()
{
sort(nodeQueue.begin(), nodeQueue.end(), NodeSort);
while(nodeQueue.size()>1)
{
Node *newNode = new Node();
newNode->leftNode = nodeQueue[0];
newNode->rightNode= nodeQueue[1];
newNode->freq = nodeQueue[0]->freq + nodeQueue[1]->freq;
int index;
bool indexSet = false;
for(int i = 0 ; i<nodeQueue.size(); i++)
{
if(nodeQueue[i]->freq > newNode->freq)
{
index = i;
indexSet = true;
break;
}
}
if(indexSet == true)
{
vector<Node*>::iterator it;
it = nodeQueue.begin() + index;
nodeQueue.insert(it,newNode);
}
else
{
nodeQueue.push_back(newNode);
}
nodeQueue.erase(nodeQueue.begin());
nodeQueue.erase(nodeQueue.begin());
}
string s;
buildCode(nodeQueue.front(), s);
}
void output_hcodes()
{
string input;
ofstream outline_hcode("hcodes.txt",ios_base::out);
map<char, string>::const_iterator map_iter;
for (map_iter=character_map.begin(); map_iter != character_map.end(); ++map_iter)
{
outline_hcode << map_iter->first << " " << map_iter->second << endl;
}
outline_hcode.close();
}
string text_bit_size;
void output_encoded()
{
string input;
ofstream out_encode("encoded.txt",ios_base::out);
for(int i = 0; i<text.size(); i++)
{
string s = character_map[text[i]];
out_encode<<character_map[text[i]];
text_bit_size.append(character_map[text[i]]);
}
out_encode.close();
}
void output_freq()
{
string input;
ofstream outline("freq.txt",ios_base::out);
for(int i = 0; i < node_array.size(); i++)
{
int fr ;
fr = node_array[i]->freq;
char ch;
ch = node_array[i]->ch;
outline << ch<<" "<<fr<<endl;
}
outline.close();
}
void ratio_calc()
{
int len_of_enc_txt= text_bit_size.size();
sort(node_array.begin(), node_array.end(), NodeSort);
string longest_char = character_map[node_array[0]->ch];
int maximum_bit = longest_char.size();
int text_normal_size =text.size();
float compress_ratio =(float) len_of_enc_txt/(8*text_normal_size);
string input;
ofstream output_ratio("stats.txt",ios_base::out);
output_ratio<<maximum_bit<<endl;
output_ratio<<len_of_enc_txt<<endl;
output_ratio<<compress_ratio;
output_ratio.close();
}
int main()
{
getCharacters();
huffman_coding();
output_freq();
output_hcodes();
output_encoded();
ratio_calc();
return 0;
}