0% found this document useful (0 votes)

24 views2 pages

Q 3

question 3

Uploaded by

Anonymous

We take content rights seriously. If you suspect this is your content, claim it here.

Available Formats

Download as PDF, TXT or read online on Scribd

0% found this document useful (0 votes)

24 views2 pages

Q 3

question 3

Uploaded by

Anonymous

We take content rights seriously. If you suspect this is your content, claim it here.

Available Formats

Download as PDF, TXT or read online on Scribd

You are on page 1/ 2

3).

Product review dataset

In [ ]: import pandas as pd
import nltk
import re
from nltk.corpus import stopwords
from nltk.stem.porter import PorterStemmer
from nltk.tokenize import word_tokenize
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.naive_bayes import MultinomialNB
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score

In [ ]: nltk.download('punkt')
nltk.download('stopwords')

[nltk_data] Downloading package punkt to /root/nltk_data...

[nltk_data] Package punkt is already up-to-date!
[nltk_data] Downloading package stopwords to /root/nltk_data...
[nltk_data] Package stopwords is already up-to-date!
Out[ ]: True

Load the dataset

In [ ]: with open('/content/dataset.txt', 'r', encoding='utf-8') as file:
lines = file.readlines()

In [ ]: # Initialize lists to store labels and reviews

labels = []
reviews = []

In [ ]: # Process each line in the dataset

for line in lines:
# Split the line by '__label__'
parts = line.split('__label__')
# Check if there are two parts
if len(parts) == 2:
# Extract label and review
label = '__label__' + parts[1].strip().split()[0] # Extracting the label
review = ' '.join(parts[1].strip().split()[1:]) # Extracting the review
labels.append(label)
reviews.append(review)

Creating a dataframe
In [ ]: # Create a DataFrame
df = pd.DataFrame({'label': labels, 'review': reviews})

# Map labels to sentiments

sentiment_map = {
'__label__1': 'positive',
'__label__2': 'negative'
}

df['sentiment'] = df['label'].map(sentiment_map)

# Drop the 'label' column

df.drop(columns=['label'], inplace=True)

# Display the DataFrame

print(df.head())

print(df.tail())

review sentiment
0 Great CD: My lovely Pat has one of the GREAT v... negative
1 One of the best game music soundtracks - for a... negative
2 Batteries died within a year ...: I bought thi... positive
3 works fine, but Maha Energy is better: Check o... negative
4 Great for the non-audiophile: Reviewed quite a... negative
review sentiment
399995 Unbelievable- In a Bad Way: We bought this Tho... positive
399996 Almost Great, Until it Broke...: My son reciev... positive
399997 Disappointed !!!: I bought this toy for my son... positive
399998 Classic Jessica Mitford: This is a compilation... negative
399999 Comedy Scene, and Not Heard: This DVD will be ... positive

Preprocessing
In [ ]: def preprocess_text(text):
text = text.lower() # Convert to lowercase
tokenizer = nltk.RegexpTokenizer(r'\w+')
tokens = tokenizer.tokenize(text) # Tokenize
stop_words = set(stopwords.words('english'))
tokens = [word for word in tokens if word not in stop_words] # Remove stopwords
stemmer = PorterStemmer()
tokens = [stemmer.stem(word) for word in tokens] # Stemming
return ' '.join(tokens)

In [ ]: df['review'] = df['review'].apply(preprocess_text)

Sentiment Distribution
In [ ]: df['sentiment'].value_counts().plot(kind='bar')
plt.title('Product Review Data Sentiment Distribution')
plt.xlabel('Sentiment')
plt.ylabel('Count')
plt.show()

Vectorization using TF-IDF

In [ ]: tfidf = TfidfVectorizer(max_features=1500, min_df=5, max_df=0.7)
X = tfidf.fit_transform(df['review']).toarray()

Split the data into training and testing sets

In [ ]: y = df['sentiment'].map({'positive': 1, 'negative': 0}).values
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=0)

In [ ]: y

Out[ ]: array([0, 0, 1, ..., 1, 0, 1])

Logistic Regression classifier

In [ ]: lr_classifier = LogisticRegression()
lr_classifier.fit(X_train, y_train)
lr_pred = lr_classifier.predict(X_test)
lr_accuracy = accuracy_score(y_test, lr_pred)
print("Logistic Regression Accuracy:", lr_accuracy)

Logistic Regression Accuracy: 0.86795

Naive Bayes classifier

In [ ]: nb_classifier = MultinomialNB()
nb_classifier.fit(X_train, y_train)
nb_pred = nb_classifier.predict(X_test)
nb_accuracy = accuracy_score(y_test, nb_pred)
print("Naive Bayes Accuracy:", nb_accuracy)

Naive Bayes Accuracy: 0.827925

In [ ]: def plot_confusion_matrix(y_true, y_pred, model_name):

cm = confusion_matrix(y_true, y_pred)
plt.figure(figsize=(8, 6))
sns.heatmap(cm, annot=True, fmt='d', cmap='Blues',
xticklabels=['Negative','Positive'],
yticklabels=['Negative','Positive'])
plt.title(f'{model_name} Confusion Matrix')
plt.xlabel('Predicted labels')
plt.ylabel('True labels')
plt.show()

Confusion matrix for Logistic Regression

In [ ]: plot_confusion_matrix(y_test, lr_pred, "Logistic Regression")

Confusion matrix for Naive Bayes

In [ ]: plot_confusion_matrix(y_test, nb_pred, "Naive Bayes")

Accuracy, Precision, Recall, and F1 score for Logistic Regression and Naive Bayes
In [ ]: def print_evaluation_metrics(y_true, y_pred, model_name):
accuracy = accuracy_score(y_true, y_pred)
precision = precision_score(y_true, y_pred, average='weighted')
recall = recall_score(y_true, y_pred, average='weighted')
f1 = f1_score(y_true, y_pred, average='weighted')

print(f"----------- {model_name} Evaluation Metrics -----------")

print(f"Accuracy: {accuracy:.4f}")
print(f"Precision: {precision:.4f}")
print(f"Recall: {recall:.4f}")
print(f"F1 Score: {f1:.4f}")

In [ ]: print_evaluation_metrics(y_test, lr_pred, "Logistic Regression")

print_evaluation_metrics(y_test, nb_pred, "Naive Bayes")

----------- Logistic Regression Evaluation Metrics -----------

Accuracy: 0.8679
Precision: 0.8680
Recall: 0.8679
F1 Score: 0.8679
----------- Naive Bayes Evaluation Metrics -----------
Accuracy: 0.8279
Precision: 0.8280
Recall: 0.8279
F1 Score: 0.8279

Sentimental Analysis
No ratings yet
Sentimental Analysis
3 pages
Sma Exp 10 Code Print
No ratings yet
Sma Exp 10 Code Print
7 pages
Dataset Description: Amazon Reviews of Unlocked Phone
No ratings yet
Dataset Description: Amazon Reviews of Unlocked Phone
4 pages
Sentiment Analysis With NLP Deep Learning
No ratings yet
Sentiment Analysis With NLP Deep Learning
8 pages
Kindle Review Sentiment Analysis - Ipynb - Colab
No ratings yet
Kindle Review Sentiment Analysis - Ipynb - Colab
5 pages
Amna Bagh Ali
No ratings yet
Amna Bagh Ali
6 pages
ML Week10.1
No ratings yet
ML Week10.1
5 pages
Detailed Report
No ratings yet
Detailed Report
6 pages
Amazon Review Sentiment Analysis
No ratings yet
Amazon Review Sentiment Analysis
4 pages
Bert Sentiment
No ratings yet
Bert Sentiment
7 pages
NLP for Airline Sentiment Analysis
No ratings yet
NLP for Airline Sentiment Analysis
29 pages
Machine Learning Code Explanation
No ratings yet
Machine Learning Code Explanation
33 pages
Logistic Regression Example
No ratings yet
Logistic Regression Example
7 pages
Python 21to30
No ratings yet
Python 21to30
9 pages
Comsats University Islamabad Wah Campus (Project Report) : Submitted by
No ratings yet
Comsats University Islamabad Wah Campus (Project Report) : Submitted by
14 pages
Code
No ratings yet
Code
13 pages
BAET Record
No ratings yet
BAET Record
19 pages
Code
No ratings yet
Code
18 pages
RajSingh WIexp7
No ratings yet
RajSingh WIexp7
8 pages
Neural Networks
No ratings yet
Neural Networks
8 pages
Solution T1
No ratings yet
Solution T1
9 pages
Foundations of Python For AI
No ratings yet
Foundations of Python For AI
67 pages
Ai Project
No ratings yet
Ai Project
15 pages
NM Project
No ratings yet
NM Project
18 pages
AIML IA3 Loki & SG
No ratings yet
AIML IA3 Loki & SG
31 pages
DS - Lab Report.
No ratings yet
DS - Lab Report.
25 pages
Sentiment Analysis From H El Reviews: Data Mining For Business Intelligence
No ratings yet
Sentiment Analysis From H El Reviews: Data Mining For Business Intelligence
13 pages
Capstone Project - Jaro-Prof. Babji
No ratings yet
Capstone Project - Jaro-Prof. Babji
5 pages
05 ML PDF
No ratings yet
05 ML PDF
1 page
21bce3701 Senti K9ar
No ratings yet
21bce3701 Senti K9ar
28 pages
DL Exp-10,11,12
No ratings yet
DL Exp-10,11,12
6 pages
Sentiment Analysis On Tweets
No ratings yet
Sentiment Analysis On Tweets
2 pages
8-Text Classification - Jupyter Notebook
No ratings yet
8-Text Classification - Jupyter Notebook
2 pages
R002 KrishAhuja BDA Lab9.Ipynb - Colab
No ratings yet
R002 KrishAhuja BDA Lab9.Ipynb - Colab
3 pages
2023 Aug How To Produce Data For A Neural networkORG
No ratings yet
2023 Aug How To Produce Data For A Neural networkORG
6 pages
Python CA 4
No ratings yet
Python CA 4
9 pages
17 Practicals
No ratings yet
17 Practicals
7 pages
Sentiment Analysis with NLTK
No ratings yet
Sentiment Analysis with NLTK
4 pages
Text Classification - Movie Review - News Wires
No ratings yet
Text Classification - Movie Review - News Wires
5 pages
Module4 TextAnalytics
No ratings yet
Module4 TextAnalytics
9 pages
Part A
No ratings yet
Part A
6 pages
Group 4 MovieReview
No ratings yet
Group 4 MovieReview
10 pages
2023 Aug How To Prepare Data For A Neural Network A Step-by-Step Guide
No ratings yet
2023 Aug How To Prepare Data For A Neural Network A Step-by-Step Guide
7 pages
Maneesha Nidigonda Major Project
No ratings yet
Maneesha Nidigonda Major Project
11 pages
Maneesha Nidigonda Verzeo Major Project
No ratings yet
Maneesha Nidigonda Verzeo Major Project
11 pages
Lab Report 8
No ratings yet
Lab Report 8
11 pages
Importing Packages: Id Label Tweet 0 1 2 3 4
No ratings yet
Importing Packages: Id Label Tweet 0 1 2 3 4
8 pages
DL 3
No ratings yet
DL 3
5 pages
Jadavpur University: Assignment Submission
No ratings yet
Jadavpur University: Assignment Submission
9 pages
Sentiment Analysis of Reviews Using Machine Learning
100% (1)
Sentiment Analysis of Reviews Using Machine Learning
33 pages
WDM - Week - I
No ratings yet
WDM - Week - I
24 pages
Amazon Product Review - Ipynb - Colaboratory
No ratings yet
Amazon Product Review - Ipynb - Colaboratory
7 pages
CTI Record
No ratings yet
CTI Record
49 pages
22K61A0654 2 Sasi Auto
No ratings yet
22K61A0654 2 Sasi Auto
24 pages
Sentiment Analysis Using Text Mining PDF
100% (1)
Sentiment Analysis Using Text Mining PDF
12 pages
Ritesh Mangla ML PracticalFile
No ratings yet
Ritesh Mangla ML PracticalFile
55 pages
Research Paper Text Classification
No ratings yet
Research Paper Text Classification
17 pages
Sentiment Analysis On Online Reviews
No ratings yet
Sentiment Analysis On Online Reviews
11 pages
AI Project
No ratings yet
AI Project
6 pages
Crud y Dashboard
No ratings yet
Crud y Dashboard
12 pages
AI Based Health Monitoring System
No ratings yet
AI Based Health Monitoring System
2 pages
440N-Z2NRS1B: Mag-Coded 1 24V DC 2Nc-Sfty-Reed 10M Catalogue No
No ratings yet
440N-Z2NRS1B: Mag-Coded 1 24V DC 2Nc-Sfty-Reed 10M Catalogue No
2 pages
ABB Robotic Product Range Brochure 2018 RevE
No ratings yet
ABB Robotic Product Range Brochure 2018 RevE
37 pages
18csl57 Cn@Azdocuments - in
No ratings yet
18csl57 Cn@Azdocuments - in
99 pages
Comunnication PLC-FLNET
No ratings yet
Comunnication PLC-FLNET
4 pages
Optimization With R - Tips and Tricks
No ratings yet
Optimization With R - Tips and Tricks
17 pages
How To Create EEG - Hat - DIY
No ratings yet
How To Create EEG - Hat - DIY
4 pages
AWS Static Website Hosting Lab
No ratings yet
AWS Static Website Hosting Lab
7 pages
Verview: Pingstation 2 Ads-B Dual Receiver
No ratings yet
Verview: Pingstation 2 Ads-B Dual Receiver
2 pages
Bpr-Examples From Indian Corporate World
No ratings yet
Bpr-Examples From Indian Corporate World
29 pages
Usenet
No ratings yet
Usenet
5 pages
Iso17458 3 2013
No ratings yet
Iso17458 3 2013
874 pages
Major Project - Reverse Engineering
No ratings yet
Major Project - Reverse Engineering
45 pages
Second Quarterly Test in ICT 9
No ratings yet
Second Quarterly Test in ICT 9
2 pages
Eaton 9390 Datasheet Com5
No ratings yet
Eaton 9390 Datasheet Com5
12 pages
UNIT IV - Searching and Sorting
No ratings yet
UNIT IV - Searching and Sorting
21 pages
KBlaze
No ratings yet
KBlaze
1 page
Mastermind Monitoring 6.22.01
100% (2)
Mastermind Monitoring 6.22.01
209 pages
Research Paper B
No ratings yet
Research Paper B
44 pages
How To Keep Journal Entry Approval History
No ratings yet
How To Keep Journal Entry Approval History
8 pages
Software Project Management: Project Evaluation and Project Planning
No ratings yet
Software Project Management: Project Evaluation and Project Planning
121 pages
Peliculas Compativeis
No ratings yet
Peliculas Compativeis
20 pages
1z0-1094-23 by Oracle Actual Free Exam Q&As - ITExams - Com6
No ratings yet
1z0-1094-23 by Oracle Actual Free Exam Q&As - ITExams - Com6
2 pages
To Diagnose and Resolve Audio Issues in PES 2017 On Your PC
100% (1)
To Diagnose and Resolve Audio Issues in PES 2017 On Your PC
2 pages
INIS Online Portal Guide
No ratings yet
INIS Online Portal Guide
18 pages
Conti Inc.: Understanding The Internal Discussions of A Large Ransomware-as-a-Service Operator With Machine Learning
No ratings yet
Conti Inc.: Understanding The Internal Discussions of A Large Ransomware-as-a-Service Operator With Machine Learning
10 pages
BPSC 102 em 2022 23
No ratings yet
BPSC 102 em 2022 23
16 pages
Diary Entry - Notes
No ratings yet
Diary Entry - Notes
3 pages
Elipse IEC 61850 Client Driver MICS Ed2
No ratings yet
Elipse IEC 61850 Client Driver MICS Ed2
4 pages

Q 3

Uploaded by

Q 3

Uploaded by

3).

Product review dataset

[nltk_data] Downloading package punkt to /root/nltk_data...

Load the dataset

In [ ]: # Initialize lists to store labels and reviews

In [ ]: # Process each line in the dataset

# Map labels to sentiments

# Drop the 'label' column

# Display the DataFrame

Vectorization using TF-IDF

Split the data into training and testing sets

Out[ ]: array([0, 0, 1, ..., 1, 0, 1])

Logistic Regression classifier

Logistic Regression Accuracy: 0.86795

Naive Bayes classifier

Naive Bayes Accuracy: 0.827925

In [ ]: def plot_confusion_matrix(y_true, y_pred, model_name):

Confusion matrix for Logistic Regression

Confusion matrix for Naive Bayes

print(f"----------- {model_name} Evaluation Metrics -----------")

In [ ]: print_evaluation_metrics(y_test, lr_pred, "Logistic Regression")

----------- Logistic Regression Evaluation Metrics -----------

You might also like