import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.naive_bayes import MultinomialNB
from sklearn.pipeline import make_pipeline
from sklearn.metrics import accuracy_score, classification_report
from sklearn.datasets import load_files
import nltk
from nltk.corpus import movie_reviews
nltk.download('movie_reviews')
X = [" ".join(movie_reviews.words(fileid)) for fileid in movie_reviews.fileids()]
y = [1 if fileid.startswith("pos") else 0 for fileid in movie_reviews.fileids()]
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
model = make_pipeline(CountVectorizer(stop_words='english'), MultinomialNB())
model.fit(X_train, y_train)
y_pred = model.predict(X_test)
new_review = ["The movie was fantastic! I really enjoyed it."]
prediction = model.predict(new_review)
print(f"Predicted Sentiment: {'Positive' if prediction[0] == 1 else 'Negative'}")
accuracy = accuracy_score(y_test, y_pred)
print(f"Accuracy: {accuracy:.2f}")
print(classification_report(y_test, y_pred))