0% found this document useful (0 votes)
29 views6 pages

Document 11

The document outlines a Python script for data analysis using pandas and matplotlib, focusing on sales, inventory, and customer data. It includes steps for loading and exploring data, cleaning it by handling missing values and duplicates, and performing analyses to derive insights such as total revenue, popular products, and customer demographics. Additionally, it features visualizations for trends and distributions, and an optional section for advanced analysis including profit calculations and identifying high-spending customers.
Copyright
© © All Rights Reserved
We take content rights seriously. If you suspect this is your content, claim it here.
Available Formats
Download as DOCX, PDF, TXT or read online on Scribd
0% found this document useful (0 votes)
29 views6 pages

Document 11

The document outlines a Python script for data analysis using pandas and matplotlib, focusing on sales, inventory, and customer data. It includes steps for loading and exploring data, cleaning it by handling missing values and duplicates, and performing analyses to derive insights such as total revenue, popular products, and customer demographics. Additionally, it features visualizations for trends and distributions, and an optional section for advanced analysis including profit calculations and identifying high-spending customers.
Copyright
© © All Rights Reserved
We take content rights seriously. If you suspect this is your content, claim it here.
Available Formats
Download as DOCX, PDF, TXT or read online on Scribd
You are on page 1/ 6

import pandas as pd

import matplotlib.pyplot as plt

# Part A: Load & Explore Data

# Load all CSV files

sales = pd.read_csv('sales.csv')

inventory = pd.read_csv('inventory.csv')

customers = pd.read_csv('customers.csv')

# Explore data

print("Sales Data:")

print(sales.head())

print(sales.info())

print(sales.describe())

print("\nInventory Data:")

print(inventory.head())

print(inventory.info())

print(inventory.describe())

print("\nCustomer Data:")

print(customers.head())

print(customers.info())

print(customers.describe())

# Rename confusing columns

sales = sales.rename(columns={'Unit_Price': 'Price_per_Unit'})


inventory = inventory.rename(columns={'Unit_Price': 'Price_per_Unit'})

# Convert Date column to datetime

sales['Date'] = pd.to_datetime(sales['Date'])

# Part B: Data Cleaning

# Handle missing values

sales = sales.dropna()

inventory = inventory.dropna()

customers = customers.dropna()

# Drop duplicates

sales = sales.drop_duplicates()

inventory = inventory.drop_duplicates()

customers = customers.drop_duplicates()

# Ensure proper data types

sales['Quantity'] = sales['Quantity'].astype(int)

customers['Age'] = customers['Age'].astype(int)

# Part C: Data Analysis

# 1. Sales Insights

# Total revenue generated

sales['Total_Sale'] = sales['Quantity'] * sales['Price_per_Unit']

total_revenue = sales['Total_Sale'].sum()

print(f"\nTotal Revenue: ₹{total_revenue:.2f}")


# Top 5 products by sales revenue

product_revenue = sales.groupby('Product_ID')
['Total_Sale'].sum().sort_values(ascending=False)

print("\nTop 5 Products by Revenue:")

print(product_revenue.head())

# Monthly revenue trend

sales['Month'] = sales['Date'].dt.month

monthly_revenue = sales.groupby('Month')['Total_Sale'].sum()

print("\nMonthly Revenue Trend:")

print(monthly_revenue)

# Most popular payment method

popular_payment = sales['Payment_Method'].value_counts().idxmax()

print(f"\nMost Popular Payment Method: {popular_payment}")

# 2. Inventory Management

# Products that need restocking (Stock < 10)

restock_products = inventory[inventory['Stock'] < 10]

print("\nProducts that need restocking:")

print(restock_products[['Product_ID', 'Product_Name', 'Stock']])

# Most stocked category

most_stocked = inventory.groupby('Category')['Stock'].sum().idxmax()

print(f"\nMost Stocked Category: {most_stocked}")

# Inventory value per category


inventory['Inventory_Value'] = inventory['Stock'] * inventory['Price_per_Unit']

category_value = inventory.groupby('Category')['Inventory_Value'].sum()

print("\nInventory Value per Category:")

print(category_value)

# 3. Customer Analysis

# Cities with most customers

city_counts = customers['City'].value_counts()

print("\nCities with Most Customers:")

print(city_counts.head())

# Gender distribution

gender_dist = customers['Gender'].value_counts(normalize=True) * 100

print("\nGender Distribution:")

print(gender_dist)

# Average age of customers

avg_age = customers['Age'].mean()

print(f"\nAverage Age of Customers: {avg_age:.1f} years")

# Part D: Visualizations

plt.figure(figsize=(15, 10))

# Line chart: Monthly revenue

plt.subplot(2, 2, 1)

monthly_revenue.plot(kind='line', marker='o')

plt.title('Monthly Revenue Trend')


plt.xlabel('Month')

plt.ylabel('Revenue (₹)')

plt.grid(True)

# Bar chart: Top 5 products by revenue

plt.subplot(2, 2, 2)

product_revenue.head().plot(kind='bar')

plt.title('Top 5 Products by Revenue')

plt.xlabel('Product ID')

plt.ylabel('Revenue (₹)')

# Pie chart: Payment methods distribution

plt.subplot(2, 2, 3)

sales['Payment_Method'].value_counts().plot(kind='pie', autopct='%1.1f%%')

plt.title('Payment Methods Distribution')

plt.ylabel('')

# Histogram: Age distribution of customers

plt.subplot(2, 2, 4)

customers['Age'].plot(kind='hist', bins=10)

plt.title('Age Distribution of Customers')

plt.xlabel('Age')

plt.ylabel('Count')

plt.tight_layout()

plt.show()
# Part E: Advanced (Optional)

# Merge sales and inventory to calculate profit (assuming cost price is 70%
of selling price)

merged_data = pd.merge(sales, inventory, on='Product_ID')

merged_data['Cost_Price'] = merged_data['Price_per_Unit_y'] * 0.7

merged_data['Profit'] = (merged_data['Price_per_Unit_x'] -
merged_data['Cost_Price']) * merged_data['Quantity']

# Flag customers who made purchases above ₹10,000

customer_spending = sales.groupby('Customer_ID')['Total_Sale'].sum()

big_spenders = customer_spending[customer_spending >


10000].index.tolist()

print("\nCustomers who spent more than ₹10,000:")

print(big_spenders)

You might also like