PRACTICAL NO: 5
Q. W.A.P TO IMPLEMENT NAIVE BAYES CLASSIFIER
In [1]: import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.naive_bayes import MultinomialNB
from sklearn.pipeline import make_pipeline
from sklearn.metrics import accuracy_score, classification_report
from sklearn.datasets import load_files
In [2]: import nltk
from nltk.corpus import movie_reviews
nltk.download('movie_reviews')
[nltk_data] Downloading package movie_reviews to
[nltk_data] C:\Users\LENOVO\AppData\Roaming\nltk_data...
[nltk_data] Package movie_reviews is already up-to-date!
Out[2]: True
In [3]: X = [" ".join(movie_reviews.words(fileid)) for fileid in movie_reviews.fileids()]
y = [1 if fileid.startswith("pos") else 0 for fileid in movie_reviews.fileids()]
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42
model = make_pipeline(CountVectorizer(stop_words='english'), MultinomialNB())
model.fit(X_train, y_train)
y_pred = model.predict(X_test)
new_review = ["The movie was fantastic! I really enjoyed it."]
prediction = model.predict(new_review)
print(f"Predicted Sentiment: {'Positive' if prediction[0] == 1 else 'Negative'}")
accuracy = accuracy_score(y_test, y_pred)
print(f"Accuracy: {accuracy:.2f}")
print(classification_report(y_test, y_pred))
Predicted Sentiment: Positive
Accuracy: 0.81
precision recall f1-score support
0 0.78 0.85 0.81 199
1 0.84 0.76 0.80 201
accuracy 0.81 400
macro avg 0.81 0.81 0.80 400
weighted avg 0.81 0.81 0.80 400