!DOCTYPE html> Asher's Personal Website - Recursive Programs

  • Home Page
  • Huffman Compression

    Huffman Compression is a type of lossless prefix compression developed by Huffman in the 1940s. I did a project on this which I've copied snippets of below.

    A note on the rosetta txt file at the bottom: It's trained based off of a large database of text to determine the most common characters to have the prefix code attached to it. The specific text I used was about half of the Great Gatsby, which was the book I was reading at the time.

    Learn more about Huffman
              
    // Java code for the Huffman Programs - 3 classes plus the rosetta txt file based on the trained text.
    
    import java.io.BufferedReader;
    import java.io.File;
    import java.io.FileNotFoundException;
    import java.io.FileReader;
    import java.io.IOException;
    import java.io.PrintWriter;
    import java.util.PriorityQueue;
    
    public class HuffmanCodeGenerator 
    {
    	int[] arr = new int[255];
    	public HuffmanCodeGenerator(String inputFile) throws IOException
    	{
    		File input = new File (inputFile);
    		BufferedReader buff = new BufferedReader(new FileReader(input));
    
    		while (buff.ready())
    		{
    			int place = buff.read();
    			arr[place]++;
    		}
    
    		buff.close();
    
    	}
    	public int getFrequency(char c)
    	{
    		int place = (int) c;
    		return arr[place];
    	}
    
    	public PriorityQueue makeHeap ()
    	{
    		PriorityQueue heap = new PriorityQueue();
    
    		for (int i = 0; i < arr.length; i++)
    		{
    			if (getFrequency((char)i) > 0)
    			{
    				OrphanNode leafy = new OrphanNode ((char)i, getFrequency ((char)i));
    				heap.add(leafy);
    			}
    		}
    		return heap;
    	}
    
    
    	public OrphanNode makeTree ()
    	{
    		PriorityQueue heap = makeHeap();
    		while (heap.size() > 1)
    		{
    			OrphanNode lefty = heap.remove();
    			OrphanNode righty = heap.remove();
    			OrphanNode par = new OrphanNode (lefty.getFrequency()+ righty.getFrequency(), lefty, righty);
    			heap.add(par);
    		}
    		return heap.peek();
    	}
    	public String getCode (char c)
    	{
    		PriorityQueue list = new PriorityQueue();
    		list.add(makeTree());
    		while (list.size() > 0)
    		{
    			OrphanNode current = list.remove();
    			if (current.hasName())
    			{
    				if (current.getName() == c)
    				{
    					return current.getCode();
    				}
    			}
    			if (current.hasLeftChild())
    			{
    				OrphanNode left = current.getLeftChild();
    				left.setCode(current.getCode() + "0");
    				list.add(left);
    			}
    			if (current.hasRightChild())
    			{
    				OrphanNode right = current.getRightChild();
    				right.setCode(current.getCode() + "1");
    				list.add(right);
    			}
    		}
    
    		return "";
    	}
    
    	public void makeCodeFile (String codeFile) throws FileNotFoundException
    	{
    		PrintWriter pw = new PrintWriter (codeFile);
    
    		for (int i = 0; i < 128; i++)
    		{
    			pw.println(getCode((char)i));
    
    		}
    
    		pw.close();
    	}
    
    }
    
    
    import java.io.BufferedReader;
    import java.io.File;
    import java.io.FileReader;
    import java.io.IOException;
    import java.io.PrintWriter;
    
    public class HuffmanDecoder 
    {
    	private String[] rosetta = new String[128];
    	public HuffmanDecoder (String codeFile) throws IOException
    	{
    		File input = new File (codeFile);
    		BufferedReader buff = new BufferedReader(new FileReader(input));
    
    		for (int i = 0; i < 128; i++)
    		{
    			rosetta[i] = buff.readLine();
    		}
    		buff.close();
    	}
    	public boolean isCode (String binary)
    	{
    		for (int i = 0; i < 128; i++)
    		{
    			if  (rosetta[i].equals(binary))
    			{
    				return true;
    			}
    		}
    		return false;
    	}
    	public char decodeChar (String binary)
    	{
    		if (!isCode(binary))
    		{
    			throw new NullPointerException();
    		}
    		else
    		{
    			char ans = 0;
    			for (int i = 0; i < 128; i++)
    			{
    				if  (rosetta[i].equals(binary))
    				{
    					ans = (char) i;
    				}
    			}
    			return ans;
    		}
    	}
    	public void decodeLong (String encodedFile, String decodedFile) throws IOException
    	{
    		PrintWriter pw = new PrintWriter (decodedFile);
    		
    		BufferedReader buff = new BufferedReader(new FileReader(encodedFile));
    
    		String str = "";
    		while (buff.ready())
    		{
    			str += (char) buff.read();
    			if (isCode(str))
    			{
    				pw.print(decodeChar(str));
    				str = "";
    			}
    		}
    		
    		buff.close();
    		pw.close();
    	}
    }
    import java.io.BufferedReader;
    import java.io.File;
    import java.io.FileReader;
    import java.io.IOException;
    import java.io.PrintWriter;
    
    public class HuffmanEncoder 
    {
    	private String[] rosetta = new String[128];
    	public HuffmanEncoder (String codeFile) throws IOException
    	{
    		File input = new File (codeFile);
    		BufferedReader buff = new BufferedReader(new FileReader(input));
    
    		for (int i = 0; i < 128; i++)
    		{
    			rosetta[i] = buff.readLine();
    		}
    		buff.close();
    	}
    
    	public String encodeChar (char input)
    	{
    		return rosetta[(int)input];
    	}
    
    	public void encodeLong (String fileToCompress, String encodedFile) throws IOException
    	{
    		PrintWriter pw = new PrintWriter (encodedFile);
    		BufferedReader buff = new BufferedReader(new FileReader(fileToCompress));
    
    		while (buff.ready())
    		{
    			pw.print(encodeChar((char)buff.read()));
    
    		}
    		buff.close();
    		pw.close();
    
    	}
    
    	public void encodeFile (String encodedFile, String newEncoded) throws IOException
    	{
    		PrintWriter pw = new PrintWriter (newEncoded);
    		BufferedReader buff = new BufferedReader(new FileReader(encodedFile));
    
    		
    		
    		
    		
    		buff.close();
    		pw.close();
    	}
    }
    public class OrphanNode implements Comparable
        {
            private char name;
            private int frequency;
            private OrphanNode leftChild;
            private OrphanNode rightChild;
            private boolean hasName;
            private String code;
            
            public OrphanNode (char n, int f)
            {
                name = n;
                frequency = f;
                leftChild = null;
                rightChild = null;
                hasName = true;
                code = "";
            }
            public OrphanNode (int f, OrphanNode left, OrphanNode right)
            {
                this.frequency = f;
                leftChild = left;
                rightChild = right;	
                hasName = false;
                code = "";
            }
            
            public boolean hasName ()
            {
                return hasName;
            }
            public char getName()
            {
                return name;
            }
            public int compareTo(OrphanNode other) 
            {
                return this.frequency - other.frequency;
            }
            public int getFrequency ()
            {
                return frequency;
            }
            public boolean hasLeftChild ()
            {
                return leftChild !=null;
            }
            public boolean hasRightChild ()
            {
                return rightChild !=null;
            }
            public OrphanNode getLeftChild ()
            {
                return leftChild;
            }
            public OrphanNode getRightChild ()
            {
                return rightChild;
            }
            public void setCode (String str)
            {
                code = str;
            }
            public String getCode ()
            {
                return code;
            }
        }
    
    
    
    
    
    
    
    
    
    
        1101111
    
    
    
    
    
    
    
    
    
    
    
    
    
    
    
    
    
    
    
    
    
        111
        10110100101
        1101001
        
        10110101000010011
        
        
        11010000
        101101001000100
        1101000111001111
        
        
        1101101
        10110110
        011001
        
        11010001110010
        10110100100011
        1011010100001100
        101101001000101
        10110101000010000
        101101010000101
        1011010100001101
        11010001110011100
        11010001110011101
        110100011100110
        101101001001
        101101010001
        
        
        
        1011011100
        
        1011011111
        0110000101
        11010001100
        1011010111
        0110000011
        110100011101
        011000000
        011000011
        0110001
        10110101001
        1101000111000
        10110100110
        1011011101
        0110000100
        11010001101
        101101011000
        1011010100001110
        101101011001
        1011011110
        110100010
        1011010100000
        1011010010000
        101101000
        
        11010001111
        1011010100001111
        101101010000100010
        
        101101010000100101
        
        
        
        1000
        1101100
        100111
        10111
        001
        011011
        101100
        0000
        0100
        0110000010
        0110101
        10010
        110001
        0101
        0111
        1101110
        10110101101
        11001
        0001
        1010
        110101
        0110100
        110000
        1011010101
        100110
        10110100111
        101101010000100011
        
        101101010000100100