!DOCTYPE html>
Huffman Compression is a type of lossless prefix compression developed by Huffman in the 1940s. I did a project on this which I've copied snippets of below.
A note on the rosetta txt file at the bottom: It's trained based off of a large database of text to determine the most common characters to have the prefix code attached to it. The specific text I used was about half of the Great Gatsby, which was the book I was reading at the time.
// Java code for the Huffman Programs - 3 classes plus the rosetta txt file based on the trained text.
import java.io.BufferedReader;
import java.io.File;
import java.io.FileNotFoundException;
import java.io.FileReader;
import java.io.IOException;
import java.io.PrintWriter;
import java.util.PriorityQueue;
public class HuffmanCodeGenerator
{
int[] arr = new int[255];
public HuffmanCodeGenerator(String inputFile) throws IOException
{
File input = new File (inputFile);
BufferedReader buff = new BufferedReader(new FileReader(input));
while (buff.ready())
{
int place = buff.read();
arr[place]++;
}
buff.close();
}
public int getFrequency(char c)
{
int place = (int) c;
return arr[place];
}
public PriorityQueue makeHeap ()
{
PriorityQueue heap = new PriorityQueue();
for (int i = 0; i < arr.length; i++)
{
if (getFrequency((char)i) > 0)
{
OrphanNode leafy = new OrphanNode ((char)i, getFrequency ((char)i));
heap.add(leafy);
}
}
return heap;
}
public OrphanNode makeTree ()
{
PriorityQueue heap = makeHeap();
while (heap.size() > 1)
{
OrphanNode lefty = heap.remove();
OrphanNode righty = heap.remove();
OrphanNode par = new OrphanNode (lefty.getFrequency()+ righty.getFrequency(), lefty, righty);
heap.add(par);
}
return heap.peek();
}
public String getCode (char c)
{
PriorityQueue list = new PriorityQueue();
list.add(makeTree());
while (list.size() > 0)
{
OrphanNode current = list.remove();
if (current.hasName())
{
if (current.getName() == c)
{
return current.getCode();
}
}
if (current.hasLeftChild())
{
OrphanNode left = current.getLeftChild();
left.setCode(current.getCode() + "0");
list.add(left);
}
if (current.hasRightChild())
{
OrphanNode right = current.getRightChild();
right.setCode(current.getCode() + "1");
list.add(right);
}
}
return "";
}
public void makeCodeFile (String codeFile) throws FileNotFoundException
{
PrintWriter pw = new PrintWriter (codeFile);
for (int i = 0; i < 128; i++)
{
pw.println(getCode((char)i));
}
pw.close();
}
}
import java.io.BufferedReader;
import java.io.File;
import java.io.FileReader;
import java.io.IOException;
import java.io.PrintWriter;
public class HuffmanDecoder
{
private String[] rosetta = new String[128];
public HuffmanDecoder (String codeFile) throws IOException
{
File input = new File (codeFile);
BufferedReader buff = new BufferedReader(new FileReader(input));
for (int i = 0; i < 128; i++)
{
rosetta[i] = buff.readLine();
}
buff.close();
}
public boolean isCode (String binary)
{
for (int i = 0; i < 128; i++)
{
if (rosetta[i].equals(binary))
{
return true;
}
}
return false;
}
public char decodeChar (String binary)
{
if (!isCode(binary))
{
throw new NullPointerException();
}
else
{
char ans = 0;
for (int i = 0; i < 128; i++)
{
if (rosetta[i].equals(binary))
{
ans = (char) i;
}
}
return ans;
}
}
public void decodeLong (String encodedFile, String decodedFile) throws IOException
{
PrintWriter pw = new PrintWriter (decodedFile);
BufferedReader buff = new BufferedReader(new FileReader(encodedFile));
String str = "";
while (buff.ready())
{
str += (char) buff.read();
if (isCode(str))
{
pw.print(decodeChar(str));
str = "";
}
}
buff.close();
pw.close();
}
}
import java.io.BufferedReader;
import java.io.File;
import java.io.FileReader;
import java.io.IOException;
import java.io.PrintWriter;
public class HuffmanEncoder
{
private String[] rosetta = new String[128];
public HuffmanEncoder (String codeFile) throws IOException
{
File input = new File (codeFile);
BufferedReader buff = new BufferedReader(new FileReader(input));
for (int i = 0; i < 128; i++)
{
rosetta[i] = buff.readLine();
}
buff.close();
}
public String encodeChar (char input)
{
return rosetta[(int)input];
}
public void encodeLong (String fileToCompress, String encodedFile) throws IOException
{
PrintWriter pw = new PrintWriter (encodedFile);
BufferedReader buff = new BufferedReader(new FileReader(fileToCompress));
while (buff.ready())
{
pw.print(encodeChar((char)buff.read()));
}
buff.close();
pw.close();
}
public void encodeFile (String encodedFile, String newEncoded) throws IOException
{
PrintWriter pw = new PrintWriter (newEncoded);
BufferedReader buff = new BufferedReader(new FileReader(encodedFile));
buff.close();
pw.close();
}
}
public class OrphanNode implements Comparable
{
private char name;
private int frequency;
private OrphanNode leftChild;
private OrphanNode rightChild;
private boolean hasName;
private String code;
public OrphanNode (char n, int f)
{
name = n;
frequency = f;
leftChild = null;
rightChild = null;
hasName = true;
code = "";
}
public OrphanNode (int f, OrphanNode left, OrphanNode right)
{
this.frequency = f;
leftChild = left;
rightChild = right;
hasName = false;
code = "";
}
public boolean hasName ()
{
return hasName;
}
public char getName()
{
return name;
}
public int compareTo(OrphanNode other)
{
return this.frequency - other.frequency;
}
public int getFrequency ()
{
return frequency;
}
public boolean hasLeftChild ()
{
return leftChild !=null;
}
public boolean hasRightChild ()
{
return rightChild !=null;
}
public OrphanNode getLeftChild ()
{
return leftChild;
}
public OrphanNode getRightChild ()
{
return rightChild;
}
public void setCode (String str)
{
code = str;
}
public String getCode ()
{
return code;
}
}
1101111
111
10110100101
1101001
10110101000010011
11010000
101101001000100
1101000111001111
1101101
10110110
011001
11010001110010
10110100100011
1011010100001100
101101001000101
10110101000010000
101101010000101
1011010100001101
11010001110011100
11010001110011101
110100011100110
101101001001
101101010001
1011011100
1011011111
0110000101
11010001100
1011010111
0110000011
110100011101
011000000
011000011
0110001
10110101001
1101000111000
10110100110
1011011101
0110000100
11010001101
101101011000
1011010100001110
101101011001
1011011110
110100010
1011010100000
1011010010000
101101000
11010001111
1011010100001111
101101010000100010
101101010000100101
1000
1101100
100111
10111
001
011011
101100
0000
0100
0110000010
0110101
10010
110001
0101
0111
1101110
10110101101
11001
0001
1010
110101
0110100
110000
1011010101
100110
10110100111
101101010000100011
101101010000100100