TYBSc(CS) USCSP6032: Information Retrieval Roll No: 3
Practical – 3
Aim: Spelling Correction in IR Systems
• Develop a spelling correction module using edit distance algorithms.
• Integrate the spelling correction module into an information retrieval system.
Solution:
Step 1: Type the following code and run the python file:
def levenshtein_dist(str1, str2):
len_str1 = len(str1) + 1
len_str2 = len(str2) + 1
matrix = [[0 for _ in range(len_str2)] for _ in range(len_str1)]
for i in range(len_str1):
matrix[i][0] = i
for j in range(len_str2):
matrix[0][j] = j
for i in range(1, len_str1):
for j in range(len_str2):
cost = 0 if str1[i-1] == str2[j-1] else 1
matrix[i][j] = min(
matrix[i-1][j] + 1,
matrix[i][j-1] + 1,
matrix[i-1][j-1] + cost
)
return matrix[len_str1 - 1][len_str2 - 1]
def suggest_correction(word, word_list):
Department of CS (2024-25) VKKM 1
TYBSc(CS) USCSP6032: Information Retrieval Roll No: 3
dist = [(w, levenshtein_dist(word, w)) for w in word_list]
dist.sort(key = lambda x: x[1])
return dist[0][0]
def retrieve_info(query, dictionary):
query_w = query.split()
corrected_words = [suggest_correction(x, dictionary) for x in query_w]
correct_query = " ".join(corrected_words)
print(f"Retrieving information for '{query}': {correct_query}")
input_word = "helo"
dictionary = ["hello", "world", "python", "spell", "correct", "algorithm"]
suggested_correction = suggest_correction(input_word, dictionary)
print(f"Suggested correction for '{input_word}': {suggested_correction}")
user_query = "seplinng corecton algothm"
dictionary = ["spelling", "correction", "algorithm", "information", "retrieval"]
retrieve_info(user_query, dictionary)
Output:
Department of CS (2024-25) VKKM 2