NLP PRACTICALS
Experiment No. 01
Code:
import nltk
from nltk.tokenize import word_tokenize, sent_tokenize
nltk.download('punkt')
text = "Natural Language Processing with Python. It's
amazing!"
sentences = sent_tokenize(text)
print("Sentence Tokenization:")
print(sentences)
words = word_tokenize(text)
print("\nWord Tokenization:")
print(words)
lower_text = text.lower()
print("\nLowercase Conversion:")
print(lower_text)
upper_text = text.upper()
print("\nUppercase Conversion:")
print(upper_text)
Output:
NLP PRACTICALS
Experiment No. 02
Code (Stop word Removal):
import nltk
from nltk.corpus import stopwords
from nltk.tokenize import word_tokenize
nltk.download('punkt')
nltk.download('stopwords')
text = "This is a simple text processing example using
Python."
words = word_tokenize(text)
stop_words = set(stopwords.words('english'))
stop_words_in_text = [word for word in words if word.lower()
in stop_words]
filtered_text = [word for word in words if word.lower() not in
stop_words]
print("Stop Words Found in Text:")
print(stop_words_in_text)
print("\nText after Stop Word Removal:")
print(filtered_text)
Output:
NLP PRACTICALS
Code (Filtering)
import string
from nltk.tokenize import word_tokenize
text = "Hello there! NLP with Python is fun, isn't it?"
words = word_tokenize(text)
filtered_text = [word for word in words if word not in
string.punctuation and len(word) > 2]
unfiltered_text = [word for word in words if word in
string.punctuation or len(word) <= 2]
print("Filtered Words:")
print(filtered_text)
print("\nUnfiltered Words:")
print(unfiltered_text)
Output:
NLP PRACTICALS
Experiment No. 03
Code (Stemming):
from nltk.stem import PorterStemmer
words = ["running", "jumps", "easily", "fairly", "processed"]
ps = PorterStemmer()
stemmed_words = [ps.stem(word) for word in words]
print("Stemmed Words:")
print(stemmed_words)
Output:
Code (Lemmatization):
from nltk.stem import WordNetLemmatizer
import nltk
nltk.download('wordnet')
nltk.download('omw-1.4')
words = ["running", "geese", "better", "children", "processed"]
lemmatizer = WordNetLemmatizer()
lemmatized_words = [lemmatizer.lemmatize(word) for word in words]
print("Lemmatized Words:")
print(lemmatized_words)
Output:
NLP PRACTICALS
Code (Comparison)
from nltk.stem import PorterStemmer, WordNetLemmatizer
import nltk
nltk.download('wordnet')
nltk.download('omw-1.4')
ps = PorterStemmer()
lemmatizer = WordNetLemmatizer()
unique_words = ["geese", "children", "better", "running", "happily",
"mice", "flying", "brought", "cacti"]
print("Word Comparison:")
for word in unique_words:
stemmed = ps.stem(word)
lemmatized = lemmatizer.lemmatize(word)
print(f"Word: {word} | Stemmed: {stemmed} | Lemmatized:
{lemmatized}")
Output:
NLP PRACTICALS
Experiment No. 04
Code:
import nltk
nltk.download('punkt')
nltk.download('averaged_perceptron_tagger')
nltk.download('brown') # Download the Brown corpus
text = "Natural Language Processing enables computers to understand
human language."
# Tokenize the text
words = nltk.word_tokenize(text)
# Using the default POS tagger
default_tagged = nltk.pos_tag(words)
print("Default POS Tagging:")
print(default_tagged)
# Using the Unigram POS tagger
unigram_tagger =
nltk.UnigramTagger(nltk.corpus.brown.tagged_sents(categories='news')
)
unigram_tagged = unigram_tagger.tag(words)
print("\nUnigram POS Tagging:")
print(unigram_tagged)
# Using the Bigram POS tagger
bigram_tagger =
nltk.BigramTagger(nltk.corpus.brown.tagged_sents(categories='news'))
bigram_tagged = bigram_tagger.tag(words)
print("\nBigram POS Tagging:")
print(bigram_tagged)
# Using the Trigram POS tagger
trigram_tagger =
nltk.TrigramTagger(nltk.corpus.brown.tagged_sents(categories='news')
)
trigram_tagged = trigram_tagger.tag(words)
print("\nTrigram POS Tagging:")
print(trigram_tagged)
NLP PRACTICALS
Output:
Code(Chunking Making Word)
import nltk
nltk.download('punkt')
nltk.download('averaged_perceptron_tagger')
text = "Natural Language Processing enables computers to understand human
language."
words = nltk.word_tokenize(text)
tagged_words = nltk.pos_tag(words)
grammar = "NP: {<DT>?<JJ>*<NN>}"
chunk_parser = nltk.RegexpParser(grammar)
chunked = chunk_parser.parse(tagged_words)
print("Chunked Output:")
for subtree in chunked.subtrees():
print(subtree)
Output:
NLP PRACTICALS
Experiment No. 05
Code:
import nltk
from nltk import ngrams
nltk.download('punkt')
text = "Natural Language Processing is a fascinating field of
study."
n = 2
tokens = nltk.word_tokenize(text)
n_grams = list(ngrams(tokens, n))
print("N-grams:")
for gram in n_grams:
print(gram)
Output:
NLP PRACTICALS
Experiment No. 06
Code & Output:
NLP PRACTICALS
NLP PRACTICALS
Experiment No. 07
Code:
import nltk
from nltk.corpus import wordnet as wn
def lesk(word, sentence):
# Tokenize the sentence
sentence = nltk.word_tokenize(sentence)
# Get possible senses of the word
best_sense = None
max_overlap = 0
# Iterate over each sense of the word
for sense in wn.synsets(word):
# Get the definition and examples of the sense
definition = sense.definition()
examples = sense.examples()
# Combine definition and examples
context = definition + ' ' + ' '.join(examples)
# Compute overlap with the context of the sentence
overlap =
len(set(nltk.word_tokenize(context)).intersection(set(sentence)))
# Check if this sense has the most overlap
if overlap > max_overlap:
max_overlap = overlap
best_sense = sense
return best_sense
# Example usage
sentence = "I went to the bank to deposit money."
word = "bank"
# Run the Lesk algorithm
best_sense = lesk(word, sentence)
if best_sense:
print(f"The best sense for the word '{word}' in the sentence
is:")
print(f"Synset: {best_sense}")
print(f"Definition: {best_sense.definition()}")
else:
print(f"No sense found for the word '{word}'.")
NLP PRACTICALS
Output:
Code:
import nltk
from nltk.corpus import wordnet as wn
def get_synonyms(word):
"""Return a list of synonyms for a given word."""
synonyms = set()
for syn in wn.synsets(word):
for lemma in syn.lemmas():
synonyms.add(lemma.name())
return list(synonyms)
def get_hyponyms(word):
"""Return a list of hyponyms for a given word."""
hyponyms = []
for syn in wn.synsets(word):
hyponyms.extend(syn.hyponyms())
return [h.name().split('.')[0] for h in hyponyms]
def get_hypernyms(word):
"""Return a list of hypernyms for a given word."""
hypernyms = []
for syn in wn.synsets(word):
hypernyms.extend(syn.hypernyms())
return [h.name().split('.')[0] for h in hypernyms]
def get_meronyms(word):
"""Return a list of meronyms for a given word."""
meronyms = []
for syn in wn.synsets(word):
meronyms.extend(syn.part_meronyms())
return [m.name().split('.')[0] for m in meronyms]
def get_holonyms(word):
"""Return a list of holonyms for a given word."""
holonyms = []
for syn in wn.synsets(word):
holonyms.extend(syn.member_holonyms())
return [h.name().split('.')[0] for h in holonyms]
NLP PRACTICALS
def word_similarity(word1, word2):
"""Return the similarity score between two words."""
synsets1 = wn.synsets(word1)
synsets2 = wn.synsets(word2)
if not synsets1 or not synsets2:
return None
# Get the first synset of each word
syn1 = synsets1[0]
syn2 = synsets2[0]
# Calculate the similarity
return syn1.wup_similarity(syn2)
def display_wordnet_info(word1, word2):
"""Display WordNet information for the given words."""
print(f"\nWordNet Information for '{word1}':")
print(f"-----------------------------------")
print(f"Synonyms: {get_synonyms(word1)}")
print(f"Hyponyms: {get_hyponyms(word1)}")
print(f"Hypernyms: {get_hypernyms(word1)}")
print(f"Meronyms: {get_meronyms(word1)}")
print(f"Holonyms: {get_holonyms(word1)}")
similarity = word_similarity(word1, word2)
print(f"\nSimilarity between '{word1}' and '{word2}': {similarity:.2f}"
if similarity is not None else f"\nSimilarity between '{word1}' and
'{word2}': Not Available")
# Example usage
word = "dog"
word2 = "cat"
display_wordnet_info(word, word2)
NLP PRACTICALS
Output:
WordNet Information for 'dog':
-----------------------------------
Synonyms: ['firedog', 'give_chase', 'wiener', 'frump', 'weenie',
'track', 'trail', 'dog', 'tail', 'frank', 'chase_after', 'heel',
'blackguard', 'cad', 'click', 'dog-iron', 'chase', 'tag', 'hound',
'bounder', 'pawl', 'hotdog', 'detent', 'go_after', 'domestic_dog',
'Canis_familiaris', 'hot_dog', 'andiron', 'frankfurter', 'wienerwurst']
Hyponyms: ['basenji', 'corgi', 'cur', 'dalmatian', 'great_pyrenees',
'griffon', 'hunting_dog', 'lapdog', 'leonberg', 'mexican_hairless',
'newfoundland', 'pooch', 'poodle', 'pug', 'puppy', 'spitz', 'toy_dog',
'working_dog', 'perisher', 'vienna_sausage', 'hound', 'quest',
'run_down', 'tree']
Hypernyms: ['canine', 'domestic_animal', 'unpleasant_woman', 'chap',
'villain', 'sausage', 'catch', 'support', 'pursue']
Meronyms: ['flag']
Holonyms: ['canis', 'pack']
Similarity between 'dog' and 'cat': 0.86
NLP PRACTICALS
Experiment No. 08
CASE STUDY
Code & Output
!pip install -q transformers
from transformers import pipeline
sentiment_pipeline =pipeline("sentiment-analysis")
data = ["I Love You","I Hate You"]
sentiment_pipeline(data)
import pandas as pd
import numpy as np
from textblob import TextBlob
import matplotlib.pyplot as plt
from nltk.corpus import stopwords
text1="Food made at the restaurant was very good"
blob1=TextBlob(text1)
print(blob1.sentiment)
text2="Food made at the restaurant was very bad"
blob2=TextBlob(text2)
print(blob2.sentiment)
NLP PRACTICALS
text3="earth revolves around sun"
blob3=TextBlob(text3)
print(blob3.sentiment)
text2="flight was horrible and filled with turbulence"
blob2=TextBlob(text2)
print(blob2.sentiment)
text3="earth revolves around sun"
blob3=TextBlob(text3)
print(blob3.sentiment)