4/16/25, 12:28 AM handwriting-recognition
Handwritting recognition model : by
Shashank(28083) and Aftab(28052)
In [1]: import os
import cv2
import random
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import tensorflow as tf
from keras import backend as K
from keras.models import Model
from keras.layers import Input, Conv2D, MaxPooling2D, Reshape, Bidirectional, LS
from keras.optimizers import Adam
Variables
In [24]: train_size = 30000
valid_size= 3000
num_epochs = 60
batch_size_value = 128
Load and view data
In [3]: train = pd.read_csv('/kaggle/input/handwriting-recognition/written_name_train_v2
valid = pd.read_csv('/kaggle/input/handwriting-recognition/written_name_validati
In [4]: plt.figure(figsize=(15, 10))
for i in range(6):
ax = plt.subplot(2, 3, i+1)
img_dir = '/kaggle/input/handwriting-recognition/train_v2/train/'+train.loc[
image = cv2.imread(img_dir, cv2.IMREAD_GRAYSCALE)
plt.imshow(image, cmap = 'gray')
plt.title(train.loc[i, 'IDENTITY'], fontsize=12)
plt.axis('off')
plt.subplots_adjust(wspace=0.2, hspace=-0.8)
file:///C:/Users/hp/Downloads/handwriting-recognition (1).html 1/12
4/16/25, 12:28 AM handwriting-recognition
Cleaning Data by removing null and
unreadable data
In [5]: print("Number of NaNs in train set : ", train['IDENTITY'].isnull().sum())
print("Number of NaNs in validation set : ", valid['IDENTITY'].isnull().sum())
Number of NaNs in train set : 565
Number of NaNs in validation set : 78
In [6]: train.dropna(axis=0, inplace=True)
valid.dropna(axis=0, inplace=True)
In [7]: unreadable = train[train['IDENTITY'] == 'UNREADABLE']
unreadable.reset_index(inplace = True, drop=True)
plt.figure(figsize=(15, 10))
for i in range(6):
ax = plt.subplot(2, 3, i+1)
img_dir = '/kaggle/input/handwriting-recognition/train_v2/train/'+unreadable
image = cv2.imread(img_dir, cv2.IMREAD_GRAYSCALE)
plt.imshow(image, cmap = 'gray')
plt.title(unreadable.loc[i, 'IDENTITY'], fontsize=12)
plt.axis('off')
plt.subplots_adjust(wspace=0.2, hspace=-0.8)
In [8]: train = train[train['IDENTITY'] != 'UNREADABLE']
valid = valid[valid['IDENTITY'] != 'UNREADABLE']
In [9]: train['IDENTITY'] = train['IDENTITY'].str.upper()
valid['IDENTITY'] = valid['IDENTITY'].str.upper()
In [10]: train.reset_index(inplace = True, drop=True)
valid.reset_index(inplace = True, drop=True)
Data Preprocessing
In [11]: def preprocess(img):
(h, w) = img.shape
final_img = np.ones([64, 256])*255
# crop
if w > 256:
img = img[:, :256]
file:///C:/Users/hp/Downloads/handwriting-recognition (1).html 2/12
4/16/25, 12:28 AM handwriting-recognition
if h > 64:
img = img[:64, :]
final_img[:h, :w] = img
return cv2.rotate(final_img, cv2.ROTATE_90_CLOCKWISE)
In [12]: train_x = []
for i in range(train_size):
img_dir = '/kaggle/input/handwriting-recognition/train_v2/train/'+train.loc[
image = cv2.imread(img_dir, cv2.IMREAD_GRAYSCALE)
image = preprocess(image)
image = image/255.
train_x.append(image)
In [13]: valid_x = []
for i in range(valid_size):
img_dir = '/kaggle/input/handwriting-recognition/validation_v2/validation/'+
image = cv2.imread(img_dir, cv2.IMREAD_GRAYSCALE)
image = preprocess(image)
image = image/255.
valid_x.append(image)
In [14]: train_x = np.array(train_x).reshape(-1, 256, 64, 1)
valid_x = np.array(valid_x).reshape(-1, 256, 64, 1)
Convert labels to numerical format and
back
Prepares the data for training a model with CTC loss, which requires numerical labels and
their lengths for alignment during training.
In [15]: alphabets = u"ABCDEFGHIJKLMNOPQRSTUVWXYZ-' "
max_str_len = 24 # max length of input labels
num_of_characters = len(alphabets) + 1 # +1 for ctc pseudo blank
num_of_timestamps = 64 # max length of predicted labels
def label_to_num(label):
label_num = []
for ch in label:
label_num.append(alphabets.find(ch))
return np.array(label_num)
def num_to_label(num):
ret = ""
for ch in num:
if ch == -1: # CTC Blank
break
else:
ret+=alphabets[ch]
return ret
file:///C:/Users/hp/Downloads/handwriting-recognition (1).html 3/12
4/16/25, 12:28 AM handwriting-recognition
In [16]: name = 'OUSSAMA'
print(name, '\n',label_to_num(name))
OUSSAMA
[14 20 18 18 0 12 0]
In [17]: train_y = np.ones([train_size, max_str_len]) * -1
train_label_len = np.zeros([train_size, 1])
train_input_len = np.ones([train_size, 1]) * (num_of_timestamps-2)
train_output = np.zeros([train_size])
for i in range(train_size):
train_label_len[i] = len(train.loc[i, 'IDENTITY'])
train_y[i, 0:len(train.loc[i, 'IDENTITY'])]= label_to_num(train.loc[i, 'IDEN
In [18]: valid_y = np.ones([valid_size, max_str_len]) * -1
valid_label_len = np.zeros([valid_size, 1])
valid_input_len = np.ones([valid_size, 1]) * (num_of_timestamps-2)
valid_output = np.zeros([valid_size])
for i in range(valid_size):
valid_label_len[i] = len(valid.loc[i, 'IDENTITY'])
valid_y[i, 0:len(valid.loc[i, 'IDENTITY'])]= label_to_num(valid.loc[i, 'IDEN
In [19]: print('True label : ',train.loc[100, 'IDENTITY'] , '\ntrain_y : ',train_y[100],'
'\ntrain_input_len : ', train_input_len[100])
True label : NOUR
train_y : [13. 14. 20. 17. -1. -1. -1. -1. -1. -1. -1. -1. -1. -1. -1. -1. -1. -
1.
-1. -1. -1. -1. -1. -1.]
train_label_len : [4.]
train_input_len : [62.]
Building the model
This model leverages the strengths of both CNNs for feature extraction and RNNs for
sequence modeling, making it highly suitable for handwriting recognition tasks.
In [20]: # Input layer: expects grayscale images of shape (256, 64, 1)
input_data = Input(shape=(256, 64, 1), name='input')
# First Convolutional Block
inner = Conv2D(32, (3, 3), padding='same', name='conv1', kernel_initializer='he_
inner = BatchNormalization()(inner)
inner = Activation('relu')(inner)
inner = MaxPooling2D(pool_size=(2, 2), name='max1')(inner)
# Second Convolutional Block
inner = Conv2D(64, (3, 3), padding='same', name='conv2', kernel_initializer='he_
inner = BatchNormalization()(inner)
inner = Activation('relu')(inner)
inner = MaxPooling2D(pool_size=(2, 2), name='max2')(inner)
inner = Dropout(0.3)(inner)
# Third Convolutional Block
inner = Conv2D(128, (3, 3), padding='same', name='conv3', kernel_initializer='he
file:///C:/Users/hp/Downloads/handwriting-recognition (1).html 4/12
4/16/25, 12:28 AM handwriting-recognition
inner = BatchNormalization()(inner)
inner = Activation('relu')(inner)
inner = MaxPooling2D(pool_size=(1, 2), name='max3')(inner)
inner = Dropout(0.3)(inner)
# Reshape layer to prepare for RNN input
inner = Reshape(target_shape=((64, 1024)), name='reshape')(inner) # Reshape to
inner = Dense(64, activation='relu', kernel_initializer='he_normal', name='dense
# Recurrent Neural Network (RNN) Layers
inner = Bidirectional(LSTM(256, return_sequences=True), name='lstm1')(inner) #
inner = Bidirectional(LSTM(256, return_sequences=True), name='lstm2')(inner) #
# Output layer
inner = Dense(num_of_characters, kernel_initializer='he_normal', name='dense2')(
y_pred = Activation('softmax', name='softmax')(inner)
# Model definition
model = Model(inputs=input_data, outputs=y_pred)
model.summary()
file:///C:/Users/hp/Downloads/handwriting-recognition (1).html 5/12
4/16/25, 12:28 AM handwriting-recognition
Model: "functional_1"
_________________________________________________________________
Layer (type) Output Shape Param #
=================================================================
input (InputLayer) [(None, 256, 64, 1)] 0
_________________________________________________________________
conv1 (Conv2D) (None, 256, 64, 32) 320
_________________________________________________________________
batch_normalization (BatchNo (None, 256, 64, 32) 128
_________________________________________________________________
activation (Activation) (None, 256, 64, 32) 0
_________________________________________________________________
max1 (MaxPooling2D) (None, 128, 32, 32) 0
_________________________________________________________________
conv2 (Conv2D) (None, 128, 32, 64) 18496
_________________________________________________________________
batch_normalization_1 (Batch (None, 128, 32, 64) 256
_________________________________________________________________
activation_1 (Activation) (None, 128, 32, 64) 0
_________________________________________________________________
max2 (MaxPooling2D) (None, 64, 16, 64) 0
_________________________________________________________________
dropout (Dropout) (None, 64, 16, 64) 0
_________________________________________________________________
conv3 (Conv2D) (None, 64, 16, 128) 73856
_________________________________________________________________
batch_normalization_2 (Batch (None, 64, 16, 128) 512
_________________________________________________________________
activation_2 (Activation) (None, 64, 16, 128) 0
_________________________________________________________________
max3 (MaxPooling2D) (None, 64, 8, 128) 0
_________________________________________________________________
dropout_1 (Dropout) (None, 64, 8, 128) 0
_________________________________________________________________
reshape (Reshape) (None, 64, 1024) 0
_________________________________________________________________
dense1 (Dense) (None, 64, 64) 65600
_________________________________________________________________
lstm1 (Bidirectional) (None, 64, 512) 657408
_________________________________________________________________
lstm2 (Bidirectional) (None, 64, 512) 1574912
_________________________________________________________________
dense2 (Dense) (None, 64, 30) 15390
_________________________________________________________________
softmax (Activation) (None, 64, 30) 0
=================================================================
Total params: 2,406,878
Trainable params: 2,406,430
Non-trainable params: 448
_________________________________________________________________
The CTC loss function
In [21]: def ctc_lambda_func(args):
y_pred, labels, input_length, label_length = args
# the 2 is critical here since the first couple outputs of the RNN
# tend to be garbage
file:///C:/Users/hp/Downloads/handwriting-recognition (1).html 6/12
4/16/25, 12:28 AM handwriting-recognition
y_pred = y_pred[:, 2:, :]
return K.ctc_batch_cost(labels, y_pred, input_length, label_length)
In [22]: labels = Input(name='gtruth_labels', shape=[max_str_len], dtype='float32')
input_length = Input(name='input_length', shape=[1], dtype='int64')
label_length = Input(name='label_length', shape=[1], dtype='int64')
ctc_loss = Lambda(ctc_lambda_func, output_shape=(1,), name='ctc')([y_pred, label
model_final = Model(inputs=[input_data, labels, input_length, label_length], out
Compile the model
In [23]: # the loss calculation occurs elsewhere, so we use a dummy lambda function for t
model_final.compile(loss={'ctc': lambda y_true, y_pred: y_pred}, optimizer=Adam(
Train our model
In [25]: model_final.fit(x=[train_x, train_y, train_input_len, train_label_len], y=train_
validation_data=([valid_x, valid_y, valid_input_len, valid_label
epochs=num_epochs, batch_size=batch_size_value)
file:///C:/Users/hp/Downloads/handwriting-recognition (1).html 7/12
4/16/25, 12:28 AM handwriting-recognition
Epoch 1/60
235/235 [==============================] - 35s 149ms/step - loss: 24.2538 - val_l
oss: 20.9804
Epoch 2/60
235/235 [==============================] - 33s 140ms/step - loss: 20.1466 - val_l
oss: 20.0750
Epoch 3/60
235/235 [==============================] - 33s 140ms/step - loss: 19.7909 - val_l
oss: 19.6560
Epoch 4/60
235/235 [==============================] - 33s 140ms/step - loss: 19.2599 - val_l
oss: 18.8176
Epoch 5/60
235/235 [==============================] - 33s 140ms/step - loss: 17.9369 - val_l
oss: 17.3873
Epoch 6/60
235/235 [==============================] - 33s 140ms/step - loss: 15.8385 - val_l
oss: 16.1034
Epoch 7/60
235/235 [==============================] - 33s 139ms/step - loss: 13.3372 - val_l
oss: 12.4269
Epoch 8/60
235/235 [==============================] - 33s 139ms/step - loss: 10.4938 - val_l
oss: 9.3627
Epoch 9/60
235/235 [==============================] - 33s 140ms/step - loss: 8.0767 - val_lo
ss: 8.6478
Epoch 10/60
235/235 [==============================] - 33s 140ms/step - loss: 6.6766 - val_lo
ss: 6.4901
Epoch 11/60
235/235 [==============================] - 33s 139ms/step - loss: 5.7721 - val_lo
ss: 5.6327
Epoch 12/60
235/235 [==============================] - 33s 139ms/step - loss: 5.1255 - val_lo
ss: 4.7117
Epoch 13/60
235/235 [==============================] - 33s 139ms/step - loss: 4.6275 - val_lo
ss: 4.0240
Epoch 14/60
235/235 [==============================] - 33s 139ms/step - loss: 4.2499 - val_lo
ss: 3.8764
Epoch 15/60
235/235 [==============================] - 33s 139ms/step - loss: 3.9470 - val_lo
ss: 3.5540
Epoch 16/60
235/235 [==============================] - 33s 139ms/step - loss: 3.6851 - val_lo
ss: 3.3619
Epoch 17/60
235/235 [==============================] - 33s 139ms/step - loss: 3.4803 - val_lo
ss: 3.1234
Epoch 18/60
235/235 [==============================] - 33s 139ms/step - loss: 3.2980 - val_lo
ss: 3.0201
Epoch 19/60
235/235 [==============================] - 33s 140ms/step - loss: 3.1289 - val_lo
ss: 2.8721
Epoch 20/60
235/235 [==============================] - 32s 138ms/step - loss: 2.9974 - val_lo
ss: 2.8220
file:///C:/Users/hp/Downloads/handwriting-recognition (1).html 8/12
4/16/25, 12:28 AM handwriting-recognition
Epoch 21/60
235/235 [==============================] - 33s 138ms/step - loss: 2.8737 - val_lo
ss: 2.7791
Epoch 22/60
235/235 [==============================] - 33s 138ms/step - loss: 2.7688 - val_lo
ss: 2.6779
Epoch 23/60
235/235 [==============================] - 32s 138ms/step - loss: 2.6599 - val_lo
ss: 2.5977
Epoch 24/60
235/235 [==============================] - 33s 139ms/step - loss: 2.5765 - val_lo
ss: 2.5042
Epoch 25/60
235/235 [==============================] - 32s 138ms/step - loss: 2.4775 - val_lo
ss: 2.4803
Epoch 26/60
235/235 [==============================] - 33s 138ms/step - loss: 2.4019 - val_lo
ss: 2.3765
Epoch 27/60
235/235 [==============================] - 33s 139ms/step - loss: 2.3354 - val_lo
ss: 2.3629
Epoch 28/60
235/235 [==============================] - 33s 139ms/step - loss: 2.2755 - val_lo
ss: 2.3669
Epoch 29/60
235/235 [==============================] - 33s 139ms/step - loss: 2.2168 - val_lo
ss: 2.3507
Epoch 30/60
235/235 [==============================] - 32s 138ms/step - loss: 2.1544 - val_lo
ss: 2.2798
Epoch 31/60
235/235 [==============================] - 33s 138ms/step - loss: 2.0973 - val_lo
ss: 2.2251
Epoch 32/60
235/235 [==============================] - 32s 138ms/step - loss: 2.0523 - val_lo
ss: 2.1829
Epoch 33/60
235/235 [==============================] - 33s 138ms/step - loss: 2.0090 - val_lo
ss: 2.1933
Epoch 34/60
235/235 [==============================] - 32s 138ms/step - loss: 1.9518 - val_lo
ss: 2.2380
Epoch 35/60
235/235 [==============================] - 33s 138ms/step - loss: 1.9124 - val_lo
ss: 2.1247
Epoch 36/60
235/235 [==============================] - 33s 139ms/step - loss: 1.8700 - val_lo
ss: 2.1631
Epoch 37/60
235/235 [==============================] - 32s 138ms/step - loss: 1.8235 - val_lo
ss: 2.1261
Epoch 38/60
235/235 [==============================] - 33s 139ms/step - loss: 1.7984 - val_lo
ss: 2.1171
Epoch 39/60
235/235 [==============================] - 32s 138ms/step - loss: 1.7503 - val_lo
ss: 2.1015
Epoch 40/60
235/235 [==============================] - 32s 138ms/step - loss: 1.6974 - val_lo
ss: 2.0820
file:///C:/Users/hp/Downloads/handwriting-recognition (1).html 9/12
4/16/25, 12:28 AM handwriting-recognition
Epoch 41/60
235/235 [==============================] - 32s 138ms/step - loss: 1.6743 - val_lo
ss: 2.0836
Epoch 42/60
235/235 [==============================] - 32s 138ms/step - loss: 1.6519 - val_lo
ss: 2.0550
Epoch 43/60
235/235 [==============================] - 32s 138ms/step - loss: 1.6135 - val_lo
ss: 2.0475
Epoch 44/60
235/235 [==============================] - 32s 138ms/step - loss: 1.5787 - val_lo
ss: 2.0467
Epoch 45/60
235/235 [==============================] - 32s 138ms/step - loss: 1.5445 - val_lo
ss: 2.0261
Epoch 46/60
235/235 [==============================] - 32s 138ms/step - loss: 1.5123 - val_lo
ss: 2.0527
Epoch 47/60
235/235 [==============================] - 33s 138ms/step - loss: 1.4843 - val_lo
ss: 2.0099
Epoch 48/60
235/235 [==============================] - 33s 139ms/step - loss: 1.4491 - val_lo
ss: 2.0523
Epoch 49/60
235/235 [==============================] - 32s 138ms/step - loss: 1.4254 - val_lo
ss: 2.0279
Epoch 50/60
235/235 [==============================] - 32s 138ms/step - loss: 1.3980 - val_lo
ss: 2.0014
Epoch 51/60
235/235 [==============================] - 32s 137ms/step - loss: 1.3571 - val_lo
ss: 2.0160
Epoch 52/60
235/235 [==============================] - 33s 138ms/step - loss: 1.3361 - val_lo
ss: 2.0246
Epoch 53/60
235/235 [==============================] - 32s 138ms/step - loss: 1.3080 - val_lo
ss: 2.0253
Epoch 54/60
235/235 [==============================] - 32s 138ms/step - loss: 1.2794 - val_lo
ss: 2.0305
Epoch 55/60
235/235 [==============================] - 32s 138ms/step - loss: 1.2518 - val_lo
ss: 2.0469
Epoch 56/60
235/235 [==============================] - 32s 138ms/step - loss: 1.2313 - val_lo
ss: 2.0653
Epoch 57/60
235/235 [==============================] - 33s 139ms/step - loss: 1.2018 - val_lo
ss: 2.0503
Epoch 58/60
235/235 [==============================] - 32s 138ms/step - loss: 1.1733 - val_lo
ss: 2.0275
Epoch 59/60
235/235 [==============================] - 32s 137ms/step - loss: 1.1540 - val_lo
ss: 2.0384
Epoch 60/60
235/235 [==============================] - 32s 138ms/step - loss: 1.1233 - val_lo
ss: 2.0683
file:///C:/Users/hp/Downloads/handwriting-recognition (1).html 10/12
4/16/25, 12:28 AM handwriting-recognition
Out[25]: <tensorflow.python.keras.callbacks.History at 0x7b47340dded0>
Model performance
In [24]: preds = model.predict(valid_x)
decoded = K.get_value(K.ctc_decode(preds, input_length=np.ones(preds.shape[0])*p
greedy=True)[0][0])
prediction = []
for i in range(valid_size):
prediction.append(num_to_label(decoded[i]))
In [25]: y_true = valid.loc[0:valid_size, 'IDENTITY']
correct_char = 0
total_char = 0
correct = 0
for i in range(valid_size):
pr = prediction[i]
tr = y_true[i]
total_char += len(tr)
for j in range(min(len(tr), len(pr))):
if tr[j] == pr[j]:
correct_char += 1
if pr == tr :
correct += 1
print('Correct characters predicted : %.2f%%' %(correct_char*100/total_char))
print('Correct words predicted : %.2f%%' %(correct*100/valid_size))
Correct characters predicted : 87.86%
Correct words predicted : 75.00%
Some predictions
In [26]: test = pd.read_csv('/kaggle/input/handwriting-recognition/written_name_test_v2.c
plt.figure(figsize=(15, 10))
for i in range(6):
ax = plt.subplot(2, 3, i+1)
img_dir = '/kaggle/input/handwriting-recognition/test_v2/test/'+test.loc[i,
image = cv2.imread(img_dir, cv2.IMREAD_GRAYSCALE)
plt.imshow(image, cmap='gray')
image = preprocess(image)
image = image/255.
pred = model.predict(image.reshape(1, 256, 64, 1))
decoded = K.get_value(K.ctc_decode(pred, input_length=np.ones(pred.shape[0])
greedy=True)[0][0])
plt.title(num_to_label(decoded[0]), fontsize=12)
plt.axis('off')
plt.subplots_adjust(wspace=0.2, hspace=-0.8)
file:///C:/Users/hp/Downloads/handwriting-recognition (1).html 11/12
4/16/25, 12:28 AM handwriting-recognition
In [27]: # Save the model
model_final.save('handwriting_recognition_model.h5')
file:///C:/Users/hp/Downloads/handwriting-recognition (1).html 12/12