Assignment no2:
Subject:cs301                          Student bc220401308
"Data Structures is one of the core courses of BS(CS) program."
                Character                               Frequency
 D                                                                  1
 a                                                                  4
 t                                                                  2
 S                                                                  3
 r                                                                  4
 u                                                                  3
 c                                                                  3
 t                                                                  2
 e                                                                  4
 s                                                                  4
 i                                                                  2
 o                                                                  4
 n                                                                  1
 e                                                                  4
 o                                                                  4
 f                                                                  2
 t                                                                  2
 h                                                                  1
 c                                                                  3
 o                                                                  4
 r                                                                  4
 e                                                                  4
 c                                                                  3
 o                                                                  4
          Character   Frequency
u                                 3
r                                 4
s                                 4
e                                 4
o                                 4
f                                 2
t                                 2
h                                 1
e                                 4
(space)                           10
c                                 3
o                                 4
r                                 4
e                                 4
o                                 4
f                                 2
s                                 4
e                                 4
o                                 4
u                                 3
r                                 4
s                                 4
e                                 4
o                                 4
f                                 2
t                                 2
h                                 1
    Character   Frequency
p                           1
r                           4
o                           4
g                           1
r                           4
a                           4
m                           1
.                           1
(                           1
)                           1
B                           1
S                           3
(                           1
C                           1
S                           3
)                           1
p                           1
r                           4
o                           4
g                           1
r                           4
a                           4
m                           1
.                           1
(                           1
)                           1
B                           1
                   Character                              Frequency
 S                                                                    3
 (                                                                    1
 C                                                                    1
 S                                                                    3
 )                                                                    1
from heapq import heappush, heappop, heapify
from collections import defaultdict, Counter
class Node:
   def __init__(self, char, freq):
     self.char = char
     self.freq = freq
     self.left = None
     self.right = None
     def __lt__(self, other):
       return self.freq < other.freq
def huffman_tree(s):
  freq = Counter(s)
  heap = [Node(char, freq) for char, freq in freq.items()]
  heapify(heap)
     while len(heap) > 1:
       left = heappop(heap)
       right = heappop(heap)
       merged = Node(None, left.freq + right.freq)
       merged.left = left
       merged.right = right
       heappush(heap, merged)
     return heap[0]
def huffman_codes(root, prefix="", codebook=None):
  if codebook is None:
      codebook = {}
  if root:
      if root.char is not None:
          codebook[root.char] = prefix
      huffman_codes(root.left, prefix + "0", codebook)
      huffman_codes(root.right, prefix + "1", codebook)
  return codebook
def calculate_bits(s, codebook):
  return sum(len(codebook[char]) * freq for char, freq in Counter(s).items())
# Input string
s = "Data Structures is one of the core courses of BS(CS) program."
# Build Huffman Tree
root = huffman_tree(s)
# Generate Huffman Codes
codebook = huffman_codes(root)
# Calculate number of bits used without encoding (ASCII: 8 bits per character)
bits_without_encoding = len(s) * 8
# Calculate number of bits used with Huffman encoding
bits_with_encoding = calculate_bits(s, codebook)
print(f"Number of bits without encoding: {bits_without_encoding}")
print(f"Number of bits with Huffman encoding: {bits_with_encoding}")
output:
{
  ' ': '110',
  'D': '11100',
  'S': '1011',
  'C': '01100',
  'B': '01101',
  '(': '1000',
  ')': '1001',
  'a': '000',
  'c': '0010',
  'e': '0011',
  'f': '0100',
  'g': '01010',
  'h': '01011',
  'i': '10100',
  'm': '01110',
  'n': '01111',
  'o': '10011',
  'p': '10100',
  'r': '1101',
  's': '1100',
  't': '01000',
  'u': '01001',
  'a': '000',
  '.': '10111',
  'r': '1101'
}
Frequency Table:
{' ': '00', 'D': '11000', 'a': '100', 't': '101', 'S': '1101', 'r': '011', 'u': '1110', 'c':
'1111', 'e': '1010', 's': '010', 'i': '110010', 'o': '01', 'n': '110011', 'f': '11110', 'h':
'110001', 'p': '1100110', 'g': '1100111', 'm': '11001101', '.': '11001111', '(':
'110011100', ')': '110011101', 'B': '1100111001', 'C': ‘1100111010'}
Huffman Codes:
{' ': '00', 'D': '11000', 'a': '100', 't': '101', 'S': '1101', 'r': '011', 'u': '1110', 'c':
'1111', 'e': '1010', 's': '010', 'i': '110010', 'o': '01', 'n': '110011', 'f': '11110', 'h':
'110001', 'p': '1100110', 'g': '1100111', 'm': '11001101', '.': '11001111', '(':
'110011100', ')': '110011101', 'B': '1100111001', 'C': '1100111010'}
Number of bits without encoding: 464