0% found this document useful (0 votes)

10 views6 pages

Rag Model

The document outlines a Python script that processes emails using Google Generative AI and LangChain. It includes functions for cleaning email content, extracting relevant data, appending data to an Excel file, and generating replies based on previous email context. The script also demonstrates how to handle a sample email, retrieve relevant chunks from a database, and update an Excel file with extracted information.

Uploaded by

sudarshanainwad10

We take content rights seriously. If you suspect this is your content, claim it here.

Available Formats

Download as DOCX, PDF, TXT or read online on Scribd

0% found this document useful (0 votes)

10 views6 pages

Rag Model

Uploaded by

sudarshanainwad10

We take content rights seriously. If you suspect this is your content, claim it here.

Available Formats

Download as DOCX, PDF, TXT or read online on Scribd

You are on page 1/ 6

import pandas as pd

import re

import os

import google.generativeai as genai

from langchain_text_splitters import RecursiveCharacterTextSplitter

from sentence_transformers import SentenceTransformer

from chromadb import Client

# from dotenv import load_dotenv # No longer needed if using Colab secrets

from google.colab import userdata

# Load environment variables (using Colab secrets now)

API_KEY = userdata.get("GOOGLE_API_KEY")

if not API_KEY:

raise ValueError("GOOGLE_API_KEY not found. Please set it in Colab secrets.")

genai.configure(api_key=API_KEY)

# Use the working model

model = genai.GenerativeModel('gemini-1.5-flash-latest')

# Initialize components (assuming these are already initialized from previous steps)

# Initialize the text splitter

text_splitter = RecursiveCharacterTextSplitter(

chunk_size=100,

chunk_overlap=20,

length_function=len,

is_separator_regex=False,

)
# Initialize the embedding model

embedding_model = SentenceTransformer('all-MiniLM-L6-v2')

# Initialize an in-memory ChromaDB client and get the collection

client = Client()

collection = client.get_or_create_collection(name="email_chunks")

# Define cleaning, extraction, and append functions (re-defining for clarity in this block)

def clean_email_content(email_content):

"""

Cleans and formats email content by removing HTML tags and special characters.

"""

cleaned_content = re.sub(r'<.*?>', '', email_content)

cleaned_content = re.sub(r'[^a-zA-Z0-9\s.,!?;:]', '', cleaned_content)

cleaned_content = re.sub(r'\s+', ' ', cleaned_content).strip()

return cleaned_content

def extract_data_for_excel(email_content):

"""

Extracts sender, subject, content summary, and contact number from email content.

(Simplified for this example, assuming sender/subject are known from the email structure)

"""

extracted_data = {

'Sender': 'N/A',

'Subject': 'N/A',

'Summary': 'N/A',

'Contact': 'N/A'

extracted_data['Summary'] = email_content[:100] + '...' if len(email_content) > 100 else

email_content
contact_match = re.search(r'[\d\s-]{7,}', email_content)

if contact_match:

extracted_data['Contact'] = contact_match.group(0).strip()

return extracted_data

def append_to_excel(data_dict, file_path):

"""

Appends extracted email data to an Excel file.

"""

# Ensure the directory exists

os.makedirs(os.path.dirname(file_path), exist_ok=True)

if not os.path.exists(file_path):

df = pd.DataFrame([data_dict])

df.to_excel(file_path, index=False)

else:

df = pd.DataFrame([data_dict])

with pd.ExcelWriter(file_path, mode='a', engine='openpyxl', if_sheet_exists='overlay') as writer:

df.to_excel(writer, index=False, header=False, startrow=writer.book.active.max_row)

def search_email_chunks(query_string, n_results=3):

"""

Searches the ChromaDB collection for email chunks relevant to a query.

"""

query_embedding = embedding_model.encode(query_string).tolist()

results = collection.query(

query_embeddings=[query_embedding],

n_results=n_results,

include=['documents', 'distances', 'metadatas']

)

return results

def generate_reply(original_email_content, retrieved_chunks):

"""

Generates a suitable email reply using the original email and retrieved context.

"""

prompt = f"""Original Email:

{original_email_content}

Relevant Context from past emails:

"""

for i, chunk in enumerate(retrieved_chunks):

prompt += f"Chunk {i+1}: {chunk}\n"

prompt += "\nBased on the original email and the relevant context, generate a concise and helpful
reply."

try:

response = model.generate_content(prompt)

return response.text

except Exception as e:

print(f"Error generating content: {e}")

return "Could not generate a reply at this time."

# Integrated function to process a new email

def process_new_email(new_email_content_raw, sender, subject,

excel_file_path='extracted_email_data.xlsx'):

"""

Simulates receiving and processing a new email through the RAG workflow.

"""
print(f"Processing new email from {sender} with subject: {subject}")

try:

# 1. Clean the email content

cleaned_content = clean_email_content(new_email_content_raw)

print("Email content cleaned.")

# 2. Chunk the cleaned content (optional for RAG query, but good for adding to DB if needed)

# chunks = text_splitter.split_text(cleaned_content)

# print(f"Email content chunked into {len(chunks)} chunks.")

# 3. Retrieve relevant context from the vector database

retrieval_results = search_email_chunks(cleaned_content)

retrieved_chunks = [doc for sublist in retrieval_results.get('documents', []) for doc in sublist]

print(f"Retrieved {len(retrieved_chunks)} relevant chunks.")

# 4. Generate a reply using the language model

generated_reply = generate_reply(cleaned_content, retrieved_chunks)

print("\nGenerated Reply:")

print(generated_reply)

# 5. Extract data for Excel and append

extracted_data = extract_data_for_excel(cleaned_content)

extracted_data['Sender'] = sender # Add sender from input

extracted_data['Subject'] = subject # Add subject from input

append_to_excel(extracted_data, excel_file_path)

print(f"\nExtracted data appended to {excel_file_path}.")

return generated_reply

except Exception as e:

print(f"\nAn error occurred during email processing: {e}")

return "An error occurred during processing."

# 3. Create a sample "new" email string

sample_new_email_content = """

Hello team,

Just following up on the project update meeting. Could someone confirm the deadline for phase 2
implementation?

Also, please let me know if you need anything from my side. You can reach me at 555-1234.

Thanks,

Alice

"""

sample_sender = "Alice"

sample_subject = "Follow up on Project Update"

# 4. Call the integrated function with the sample new email

print("--- Starting New Email Processing ---")

process_new_email(sample_new_email_content, sample_sender, sample_subject)

print("--- New Email Processing Finished ---")

# 5. Optionally, read and display the updated Excel file

try:

updated_extracted_df = pd.read_excel('extracted_email_data.xlsx')

print("\nUpdated Excel File Content:")

display(updated_extracted_df)

except FileNotFoundError:

print("\nExcel file not found after processing.")

ICS Lab 1
No ratings yet
ICS Lab 1
5 pages
Email Class
No ratings yet
Email Class
39 pages
Python
No ratings yet
Python
7 pages
Spam Email Detection and Deletion
No ratings yet
Spam Email Detection and Deletion
5 pages
Python Email Reading & Attachment Download
No ratings yet
Python Email Reading & Attachment Download
2 pages
Coding For Bulk Email Sender
No ratings yet
Coding For Bulk Email Sender
4 pages
MailMentor Scope of Work
No ratings yet
MailMentor Scope of Work
3 pages
Quiz 2
No ratings yet
Quiz 2
11 pages
LP IV Changes
No ratings yet
LP IV Changes
7 pages
Complete Code
No ratings yet
Complete Code
6 pages
NLP - Colaboratory
No ratings yet
NLP - Colaboratory
14 pages
AI Project2024 2025format
No ratings yet
AI Project2024 2025format
26 pages
FICE Project Report Spam
No ratings yet
FICE Project Report Spam
14 pages
HappyFox Backend Assignment
100% (1)
HappyFox Backend Assignment
3 pages
Docs
No ratings yet
Docs
8 pages
Day 5 & 6 Python Internship - Ipynb - Colab
No ratings yet
Day 5 & 6 Python Internship - Ipynb - Colab
5 pages
SWconstruction 02
No ratings yet
SWconstruction 02
8 pages
Annotated Listing of The System
No ratings yet
Annotated Listing of The System
120 pages
Email Header Analyser
No ratings yet
Email Header Analyser
5 pages
DeepSeek Email Classification Overview
No ratings yet
DeepSeek Email Classification Overview
8 pages
Email Spam Filtering Using Machine Learning in Python Ex No: 1 Date: 20/6/25
No ratings yet
Email Spam Filtering Using Machine Learning in Python Ex No: 1 Date: 20/6/25
5 pages
Automating Email Sending and Reading Using Smtplib and Imaplib
No ratings yet
Automating Email Sending and Reading Using Smtplib and Imaplib
8 pages
Email Data Processing Guide
No ratings yet
Email Data Processing Guide
83 pages
Gmail
No ratings yet
Gmail
3 pages
Code
No ratings yet
Code
3 pages
Code Output
No ratings yet
Code Output
12 pages
Smart Email Assistant Assignment
No ratings yet
Smart Email Assistant Assignment
4 pages
Moxebinha
No ratings yet
Moxebinha
2 pages
Checkv 1
No ratings yet
Checkv 1
2 pages
Chatgpt Code Chat Data
No ratings yet
Chatgpt Code Chat Data
32 pages
Resumne Bulider
No ratings yet
Resumne Bulider
11 pages
AI For Business Lab Mannual
No ratings yet
AI For Business Lab Mannual
16 pages
Zoom
No ratings yet
Zoom
20 pages
Codesrepl
No ratings yet
Codesrepl
16 pages
Ass 3
No ratings yet
Ass 3
2 pages
RPA Project Guide for Students
No ratings yet
RPA Project Guide for Students
22 pages
Email Analysis and Whitelisting Script
No ratings yet
Email Analysis and Whitelisting Script
11 pages
Python Assignment Harsh Ue218122
No ratings yet
Python Assignment Harsh Ue218122
8 pages
Automation Cheat Sheet 2.0
100% (1)
Automation Cheat Sheet 2.0
6 pages
Roadmap For Building An Automated Email Sender in Python
No ratings yet
Roadmap For Building An Automated Email Sender in Python
8 pages
VVR Bpa
No ratings yet
VVR Bpa
9 pages
Python
No ratings yet
Python
4 pages
Main Py
No ratings yet
Main Py
2 pages
RAG With Reinforcement Learning
No ratings yet
RAG With Reinforcement Learning
40 pages
Abstract
No ratings yet
Abstract
2 pages
Neel
No ratings yet
Neel
12 pages
How To Build An Email Slicer Using Python Seminar 2
No ratings yet
How To Build An Email Slicer Using Python Seminar 2
3 pages
Practical List
No ratings yet
Practical List
8 pages
Email Header Analyser
No ratings yet
Email Header Analyser
5 pages
Automation Cheat Sheet 2.0
100% (1)
Automation Cheat Sheet 2.0
6 pages
AI Intern Assignment - InveeSync
No ratings yet
AI Intern Assignment - InveeSync
4 pages
1a NLTK
No ratings yet
1a NLTK
10 pages
Automation Scripts Part1
No ratings yet
Automation Scripts Part1
13 pages
Abap Email
No ratings yet
Abap Email
4 pages
Da
No ratings yet
Da
49 pages
Document
No ratings yet
Document
11 pages
Login Page Tkinter: Widgets Used To Create Tkinter
No ratings yet
Login Page Tkinter: Widgets Used To Create Tkinter
12 pages
LLM Prcess
No ratings yet
LLM Prcess
7 pages
Requirment of Shilong State
No ratings yet
Requirment of Shilong State
32 pages
8 - 01 - 2025 Lrmanu
No ratings yet
8 - 01 - 2025 Lrmanu
4 pages
Jan 2025 - Present 6 Month: Extra Curriculum Activities &CERTIFICAIONS
No ratings yet
Jan 2025 - Present 6 Month: Extra Curriculum Activities &CERTIFICAIONS
1 page
Coursera UFXW873QKHGE
No ratings yet
Coursera UFXW873QKHGE
1 page
Crystal Growth Completed .
No ratings yet
Crystal Growth Completed .
1 page
TSIS Report
No ratings yet
TSIS Report
1 page
4 - 08 - 2025 - Dos and Donts
No ratings yet
4 - 08 - 2025 - Dos and Donts
4 pages
Frequently Asked Questions Updated As On 18th Dec
No ratings yet
Frequently Asked Questions Updated As On 18th Dec
4 pages
Report IIT Bombay
No ratings yet
Report IIT Bombay
6 pages
Polishing Process
No ratings yet
Polishing Process
2 pages
(Shortened Title Up To 50 Characters) : Running Head: 1
No ratings yet
(Shortened Title Up To 50 Characters) : Running Head: 1
8 pages
Padding of LFSR Seeds With Low Transition Bits For Reduced Input Test Data Volume
No ratings yet
Padding of LFSR Seeds With Low Transition Bits For Reduced Input Test Data Volume
8 pages
2 BHEL Rapping Controller PDF
No ratings yet
2 BHEL Rapping Controller PDF
31 pages
Codiz Requriment Infomation
No ratings yet
Codiz Requriment Infomation
1 page
PortableSSD 0
No ratings yet
PortableSSD 0
9 pages
VLIS Design Engineer - ELE - Q1201 - v3.0
No ratings yet
VLIS Design Engineer - ELE - Q1201 - v3.0
31 pages
Fingerprint Based Exam Hall Authanticati
No ratings yet
Fingerprint Based Exam Hall Authanticati
32 pages
825x BMW
0% (1)
825x BMW
28 pages
PMA Survey SOP V1.0
No ratings yet
PMA Survey SOP V1.0
6 pages
Part 1 Asterix Structure
No ratings yet
Part 1 Asterix Structure
30 pages
Yozolog
No ratings yet
Yozolog
1 page
Guia de Flasheo BLU Z3 Music
No ratings yet
Guia de Flasheo BLU Z3 Music
4 pages
OFI Interfaces AR AP 1.0
No ratings yet
OFI Interfaces AR AP 1.0
25 pages
How To Install Aspen Hysys v9
No ratings yet
How To Install Aspen Hysys v9
21 pages
Manual Charge 2 en US
No ratings yet
Manual Charge 2 en US
44 pages
ABAP-Download All Custom (Z) Objects With Respect To DEV Class - Code Gallery - Community Wiki
No ratings yet
ABAP-Download All Custom (Z) Objects With Respect To DEV Class - Code Gallery - Community Wiki
3 pages
Security
No ratings yet
Security
20 pages
I PUC - Practice Paper
No ratings yet
I PUC - Practice Paper
2 pages
FSN160024 e
No ratings yet
FSN160024 e
65 pages
SBP - Rule Based System Forward Chaining
No ratings yet
SBP - Rule Based System Forward Chaining
14 pages
Report ITF
No ratings yet
Report ITF
25 pages
New IR Structure Guidelines - 2311 Intake
No ratings yet
New IR Structure Guidelines - 2311 Intake
14 pages
Debugging Android App Logs
No ratings yet
Debugging Android App Logs
5 pages
Pivottable For Data Analysis in Excel
No ratings yet
Pivottable For Data Analysis in Excel
12 pages
B.Sc. Software Engg Transcript
No ratings yet
B.Sc. Software Engg Transcript
1 page
Tech Talent in Transition Seven Technology Trends Reshaping Telcos
No ratings yet
Tech Talent in Transition Seven Technology Trends Reshaping Telcos
11 pages
Chalani SilvaCV
No ratings yet
Chalani SilvaCV
1 page
Ippb Go Live - SOP-6
No ratings yet
Ippb Go Live - SOP-6
33 pages
Coolfreecv Resume en 03 N
No ratings yet
Coolfreecv Resume en 03 N
1 page
Simio Cloud License Guide
No ratings yet
Simio Cloud License Guide
2 pages

Rag Model

Uploaded by

Rag Model

Uploaded by

import pandas as pd

import google.generativeai as genai

from langchain_text_splitters import RecursiveCharacterTextSplitter

from sentence_transformers import SentenceTransformer

from chromadb import Client

# from dotenv import load_dotenv # No longer needed if using Colab secrets

from google.colab import userdata

# Load environment variables (using Colab secrets now)

raise ValueError("GOOGLE_API_KEY not found. Please set it in Colab secrets.")

# Use the working model

# Initialize the text splitter

# Initialize an in-memory ChromaDB client and get the collection

cleaned_content = re.sub(r'<.*?>', '', email_content)

cleaned_content = re.sub(r'[^a-zA-Z0-9\s.,!?;:]', '', cleaned_content)

cleaned_content = re.sub(r'\s+', ' ', cleaned_content).strip()

extracted_data['Summary'] = email_content[:100] + '...' if len(email_content) > 100 else

def append_to_excel(data_dict, file_path):

Appends extracted email data to an Excel file.

# Ensure the directory exists

with pd.ExcelWriter(file_path, mode='a', engine='openpyxl', if_sheet_exists='overlay') as writer:

df.to_excel(writer, index=False, header=False, startrow=writer.book.active.max_row)

def search_email_chunks(query_string, n_results=3):

Searches the ChromaDB collection for email chunks relevant to a query.

include=['documents', 'distances', 'metadatas']

def generate_reply(original_email_content, retrieved_chunks):

prompt = f"""Original Email:

Relevant Context from past emails:

for i, chunk in enumerate(retrieved_chunks):

prompt += f"Chunk {i+1}: {chunk}\n"

print(f"Error generating content: {e}")

return "Could not generate a reply at this time."

# Integrated function to process a new email

def process_new_email(new_email_content_raw, sender, subject,

# 1. Clean the email content

print("Email content cleaned.")

# print(f"Email content chunked into {len(chunks)} chunks.")

# 3. Retrieve relevant context from the vector database

retrieved_chunks = [doc for sublist in retrieval_results.get('documents', []) for doc in sublist]

print(f"Retrieved {len(retrieved_chunks)} relevant chunks.")

# 4. Generate a reply using the language model

generated_reply = generate_reply(cleaned_content, retrieved_chunks)

# 5. Extract data for Excel and append

extracted_data['Sender'] = sender # Add sender from input

extracted_data['Subject'] = subject # Add subject from input

print(f"\nExtracted data appended to {excel_file_path}.")

print(f"\nAn error occurred during email processing: {e}")

# 3. Create a sample "new" email string

sample_subject = "Follow up on Project Update"

# 4. Call the integrated function with the sample new email

print("--- Starting New Email Processing ---")

process_new_email(sample_new_email_content, sample_sender, sample_subject)

print("--- New Email Processing Finished ---")

# 5. Optionally, read and display the updated Excel file

print("\nUpdated Excel File Content:")

print("\nExcel file not found after processing.")

You might also like