0% found this document useful (0 votes)

4 views14 pages

Solutions 1742312993

Uploaded by

olegruchinsky

We take content rights seriously. If you suspect this is your content, claim it here.

Available Formats

Download as PDF, TXT or read online on Scribd

0% found this document useful (0 votes)

4 views14 pages

Solutions 1742312993

Uploaded by

olegruchinsky

We take content rights seriously. If you suspect this is your content, claim it here.

Available Formats

Download as PDF, TXT or read online on Scribd

You are on page 1/ 14

🔥🔥🔥SQL SOLUTIONS :-- 🔥🔥🔥

I am hiring a PySpark Data Engineer! If you can answer

the following questions :
--====================
CREATE TABLE ##DATA(EmpID INT, FName
VARCHAR (50), Department VARCHAR (50),
Salary INT)
INSERT INTO ##DATA VALUES
(1, 'Amit', 'IT', 60000),
(2, 'Priya', 'HR', 55000),
(3, 'Rahul', 'Finance', 75000),
(4, 'Sneha', 'IT', 80000),
(5, 'Karan', 'HR', 65000)
--1.Task: Display the schema and first 3
rows.
SELECT TOP 3 * FROM ##DATA ORDER BY 1

---2. Filter Employees Earning More than

70,000
SELECT * FROM ##DATA WHERE Salary>70000
--3. Calculate Average Salary per
Department

SELECT Department,AVG(Salary) FROM ##DATA

GROUP BY Department
--4. Find Employees whose Name Starts with
'A'

SELECT * FROM ##DATA WHERE

SUBSTRING(FName,1,1)='A'

--5. Count the Number of Employees per

Department

SELECT Department,COUNT(FName)CNT FROM

##DATA GROUP BY Department

--6. Add a New Column for Tax Deduction

(10% of Salary)

SELECT , (Salary10)/100 AS TAX FROM

##DATA

--7. Sort Employees by Salary in

Descending Order

SELECT * FROM ##DATA ORDER BY Salary

DESC,FName

--8. Get the Second Highest Salary

SELECT TOP 1 * FROM ##DATA WHERE
Salary<(SELECT MAX(SALARY) FROM ##DATA)
ORDER BY Salary DESC

--9. Get Employees Who are in the HR or IT

Department

SELECT * FROM ##DATA WHERE Department IN

('HR','IT')

--10. Find the Total Salary Paid by the

Company

SELECT *,SUM(SALARY)OVER(ORDER BY
EMPID)TOTALSALARY FROM ##DATA

--11. Read a CSV File of Cricket Players

CREATE TABLE ##players(Player VARCHAR

(50),Country VARCHAR (50) ,Runs INT
,Wickets INT)

INSERT INTO ##players VALUES

('Virat Kohli','India',12000,4),
('Rohit Sharma','India',11000,8),
('Jasprit Bumrah','India',1200,200),
('Steve Smith','Australia',9500,20)
--Task: Read this CSV file into a
DataFrame and display its contents.

--12. Find the Player with Maximum Runs

SELECT TOP 1 * FROM ##players ORDER BY

RUNS DESC
---ACCORDING TO DATAFRAME
SELECT MAX(PLAYER),MAX(RUNS) FROM
##players

--13. Find the Average Runs Scored by

Indian Players
SELECT *,AVG(RUNS)OVER(ORDER BY PLAYER)
FROM ##players WHERE COUNTRY='INDIA'
---Find the Average Runs Scored by Indian
Players
SELECT COUNTRY,AVG(RUNS) RUNS FROM
##PLAYERS WHERE COUNTRY='INDIA' GROUP BY
COUNTRY

--14. Get Players Who Have Taken More than

50 Wickets

SELECT * FROM ##PLAYERS WHERE Wickets>50

--15. Read a JSON File Containing Indian
Cities Population

CREATE TABLE ##cities(City VARCHAR

(50),State VARCHAR (50),Population INT)
INSERT INTO ##cities VALUES
('Mumbai','Maharashtra','20000000'),
('Delhi','Delhi','18000000'),
('Bangalore','Karnataka','12000000'),
('Hyderabad','Telangana','10000000')

--Task: Read this JSON file into a

DataFrame and display its contents.

SELECT * FROM ##cities

--16. Find Cities with a Population

Greater than 15 Million

SELECT * FROM ##cities WHERE

Population>15000000

--17. Calculate Total Population per State

SELECT
STATE,SUM(Population)TOTALPopulation FROM
##cities GROUP BY STATE
--18. Find the State with the Highest
Total Population

SELECT
MAX(STATE)STATE,MAX(Population)Population
FROM ##cities

--19. Convert a DataFrame to Pandas

3/18/25, 6:43 PM Pyspark1 - Databricks

Pyspark1 (Python)

Import notebook

from pyspark.sql.functions import col, avg, count, sum, max

# Sample Data
data = [
(1, "Amit", "IT", 60000),
(2, "Priya", "HR", 55000),
(3, "Rahul", "Finance", 75000),
(4, "Sneha", "IT", 80000),
(5, "Karan", "HR", 65000)
]
columns = ["EmpID", "Name", "Department", "Salary"]
df = spark.createDataFrame(data, columns)

  df: pyspark.sql.dataframe.DataFrame = [EmpID: long, Name: string ... 2 more fields]

# 1. Display Schema and First 3 Rows

df.printSchema()
df.show(3)

root
|-- EmpID: long (nullable = true)
|-- Name: string (nullable = true)
|-- Department: string (nullable = true)
|-- Salary: long (nullable = true)

+-----+-----+----------+------+
|EmpID| Name|Department|Salary|
+-----+-----+----------+------+
| 1| Amit| IT| 60000|
| 2|Priya| HR| 55000|
| 3|Rahul| Finance| 75000|
+-----+-----+----------+------+
only showing top 3 rows

https://databricks-prod-cloudfront.cloud.databricks.com/public/4027ec902e239c93eaaa8714f173bcfc/1414938029622029/181382969102664/9730… 1/8
3/18/25, 6:43 PM Pyspark1 - Databricks

;
# 2. Filter Employees Earning More than 70,000
df.filter(col("Salary") > 70000).show()

+-----+-----+----------+------+
|EmpID| Name|Department|Salary|
+-----+-----+----------+------+
| 3|Rahul| Finance| 75000|
| 4|Sneha| IT| 80000|
+-----+-----+----------+------+

# 3. Calculate Average Salary per Department

df.groupBy("Department").agg(avg("Salary").alias("Avg_Salary")).show()

+----------+----------+
|Department|Avg_Salary|
+----------+----------+
| IT| 70000.0|
| HR| 60000.0|
| Finance| 75000.0|
+----------+----------+

# 4. Find Employees whose Name Starts with 'A'

df.filter(col("Name").startswith("A")).show()

+-----+----+----------+------+
|EmpID|Name|Department|Salary|
+-----+----+----------+------+
| 1|Amit| IT| 60000|
+-----+----+----------+------+

# 5. Count the Number of Employees per Department

df.groupBy("Department").agg(count("EmpID").alias("Num_Employees")).show()

+----------+-------------+
|Department|Num_Employees|
+----------+-------------+
| IT| 2|
| HR| 2|
| Finance| 1|
+----------+-------------+

https://databricks-prod-cloudfront.cloud.databricks.com/public/4027ec902e239c93eaaa8714f173bcfc/1414938029622029/181382969102664/9730… 2/8
3/18/25, 6:43 PM Pyspark1 - Databricks

# 6. Add a New Column for Tax Deduction (10% of Salary)

df = df.withColumn("Tax", col("Salary") * 0.1)
df.show()

  df: pyspark.sql.dataframe.DataFrame = [EmpID: long, Name: string ... 3 more fields]

+-----+-----+----------+------+------+
|EmpID| Name|Department|Salary| Tax|
+-----+-----+----------+------+------+
| 1| Amit| IT| 60000|6000.0|
| 2|Priya| HR| 55000|5500.0|
| 3|Rahul| Finance| 75000|7500.0|
| 4|Sneha| IT| 80000|8000.0|
| 5|Karan| HR| 65000|6500.0|
+-----+-----+----------+------+------+

# 7. Sort Employees by Salary in Descending Order

df.orderBy(col("Salary").desc()).show()

+-----+-----+----------+------+------+
|EmpID| Name|Department|Salary| Tax|
+-----+-----+----------+------+------+
| 4|Sneha| IT| 80000|8000.0|
| 3|Rahul| Finance| 75000|7500.0|
| 5|Karan| HR| 65000|6500.0|
| 1| Amit| IT| 60000|6000.0|
| 2|Priya| HR| 55000|5500.0|
+-----+-----+----------+------+------+

# 8. Get the Second Highest Salary

second_highest_salary = df.select("Salary").distinct().orderBy(col("Salary").desc()).limit(2).collect()[-1][0]
df.filter(col("Salary") == second_highest_salary).show()

+-----+-----+----------+------+------+
|EmpID| Name|Department|Salary| Tax|
+-----+-----+----------+------+------+
| 3|Rahul| Finance| 75000|7500.0|
+-----+-----+----------+------+------+

# 9. Get Employees Who are in the HR or IT Department

df.filter(col("Department").isin(["HR", "IT"])).show()

+-----+-----+----------+------+------+
|EmpID| Name|Department|Salary| Tax|
+-----+-----+----------+------+------+
| 1| Amit| IT| 60000|6000.0|
| 2|Priya| HR| 55000|5500.0|
| 4|Sneha| IT| 80000|8000.0|
| 5|Karan| HR| 65000|6500.0|

https://databricks-prod-cloudfront.cloud.databricks.com/public/4027ec902e239c93eaaa8714f173bcfc/1414938029622029/181382969102664/9730… 3/8
3/18/25, 6:43 PM Pyspark1 - Databricks

+-----+-----+----------+------+------+

# 10. Find the Total Salary Paid by the Company

df.agg(sum("Salary").alias("Total_Salary")).show()

+------------+
|Total_Salary|
+------------+
| 335000|
+------------+

from pyspark.sql import SparkSession

# Create Spark session

spark = SparkSession.builder.appName("CricketPlayers").getOrCreate()

# Define data
data = [
("Virat Kohli", "India", 12000, 4),
("Rohit Sharma", "India", 11000, 8),
("Jasprit Bumrah", "India", 1200, 200),
("Steve Smith", "Australia", 9500, 20),
("David Warner", "Australia", 10500, 10),
("Ben Stokes", "England", 5000, 150),
("Joe Root", "England", 10000, 30),
]

# Define schema
columns = ["Player", "Country", "Runs", "Wickets"]

# Create DataFrame
df = spark.createDataFrame(data, columns)

# Save DataFrame as CSV in Databricks

csv_path = "dbfs:/FileStore/tables/players.csv" # Databricks FileStore path
df.write.mode("overwrite").option("header", True).csv(csv_path)

# Display confirmation message

print(f"CSV file saved at: {csv_path}")

  df: pyspark.sql.dataframe.DataFrame = [Player: string, Country: string ... 2 more fields]

CSV file saved at: dbfs:/FileStore/tables/players.csv

# 11. Read a CSV File of Cricket Players

players_df = spark.read.option("header", "true").csv("dbfs:/FileStore/tables/players.csv", inferSchema=True)
players_df.show()

  players_df: pyspark.sql.dataframe.DataFrame = [Player: string, Country: string ... 2 more fields]

https://databricks-prod-cloudfront.cloud.databricks.com/public/4027ec902e239c93eaaa8714f173bcfc/1414938029622029/181382969102664/9730… 4/8
3/18/25, 6:43 PM Pyspark1 - Databricks

+--------------+---------+-----+-------+
| Player| Country| Runs|Wickets|
+--------------+---------+-----+-------+
| David Warner|Australia|10500| 10|
|Jasprit Bumrah| India| 1200| 200|
| Steve Smith|Australia| 9500| 20|
| Ben Stokes| England| 5000| 150|
| Rohit Sharma| India|11000| 8|
| Virat Kohli| India|12000| 4|
| Joe Root| England|10000| 30|
+--------------+---------+-----+-------+

# 12. Find the Player with Maximum Runs

players_df.orderBy(col("Runs").desc()).limit(1).show()

+-----------+-------+-----+-------+
| Player|Country| Runs|Wickets|
+-----------+-------+-----+-------+
|Virat Kohli| India|12000| 4|
+-----------+-------+-----+-------+

# 13. Find the Average Runs Scored by Indian Players

players_df.filter(col("Country") == "India").agg(avg("Runs").alias("Avg_Runs")).show()

+-----------------+
| Avg_Runs|
+-----------------+
|8066.666666666667|
+-----------------+

# 14. Get Players Who Have Taken More than 50 Wickets

players_df.filter(col("Wickets") > 50).show()

+--------------+-------+----+-------+
| Player|Country|Runs|Wickets|
+--------------+-------+----+-------+
|Jasprit Bumrah| India|1200| 200|
| Ben Stokes|England|5000| 150|
+--------------+-------+----+-------+

https://databricks-prod-cloudfront.cloud.databricks.com/public/4027ec902e239c93eaaa8714f173bcfc/1414938029622029/181382969102664/9730… 5/8
3/18/25, 6:43 PM Pyspark1 - Databricks

from pyspark.sql import SparkSession

# Create Spark session

spark = SparkSession.builder.appName("IndianCities").getOrCreate()

# Define data
data = [
("Mumbai", "Maharashtra", 20000000),
("Delhi", "Delhi", 18000000),
("Bangalore", "Karnataka", 12000000),
("Hyderabad", "Telangana", 10000000),
("Chennai", "Tamil Nadu", 9000000),
("Kolkata", "West Bengal", 15000000),
("Pune", "Maharashtra", 7000000),
("Ahmedabad", "Gujarat", 8000000)
]

# Define schema
columns = ["City", "State", "Population"]

# Create DataFrame
df = spark.createDataFrame(data, columns)

# Save DataFrame as JSON in Databricks

json_path = "dbfs:/FileStore/tables/cities.json" # Databricks FileStore path
df.write.mode("overwrite").json(json_path)

# Display confirmation message

print(f"JSON file saved at: {json_path}")

  df: pyspark.sql.dataframe.DataFrame = [City: string, State: string ... 1 more field]

JSON file saved at: dbfs:/FileStore/tables/cities.json

# 15. Read a JSON File Containing Indian Cities Population

cities_df = spark.read.option("multiline", "true").json("dbfs:/FileStore/tables/cities.json")
cities_df.show()

  cities_df: pyspark.sql.dataframe.DataFrame = [City: string, Population: long ... 1 more field]

https://databricks-prod-cloudfront.cloud.databricks.com/public/4027ec902e239c93eaaa8714f173bcfc/1414938029622029/181382969102664/9730… 6/8
3/18/25, 6:43 PM Pyspark1 - Databricks

# 16. Find Cities with a Population Greater than 15 Million

cities_df.filter(col("Population") > 15000000).show()

+------+----------+-----------+
| City|Population| State|
+------+----------+-----------+
|Mumbai| 20000000|Maharashtra|
| Delhi| 18000000| Delhi|
+------+----------+-----------+

# 17. Calculate Total Population per State

cities_df.groupBy("State").agg(sum("Population").alias("Total_Population")).show()

+-----------+----------------+
| State|Total_Population|
+-----------+----------------+
| Karnataka| 12000000|
| Telangana| 10000000|
|West Bengal| 15000000|
|Maharashtra| 27000000|
| Tamil Nadu| 9000000|
| Gujarat| 8000000|
| Delhi| 18000000|
+-----------+----------------+

# 18. Find the State with the Highest Total Population

cities_df.groupBy("State").agg(sum("Population").alias("Total_Population")).orderBy(col("Total_Population").desc
()).limit(1).show()

+-----------+----------------+
| State|Total_Population|
+-----------+----------------+
|Maharashtra| 27000000|
+-----------+----------------+

City Population State

0 Bangalore 12000000 Karnataka
1 Hyderabad 10000000 Telangana
2 Kolkata 15000000 West Bengal
3 Mumbai 20000000 Maharashtra
4 Chennai 9000000 Tamil Nadu
5 Ahmedabad 8000000 Gujarat
6 Pune 7000000 Maharashtra
7 Delhi 18000000 Delhi

https://databricks-prod-cloudfront.cloud.databricks.com/public/4027ec902e239c93eaaa8714f173bcfc/1414938029622029/181382969102664/9730… 7/8
3/18/25, 6:43 PM Pyspark1 - Databricks

https://databricks-prod-cloudfront.cloud.databricks.com/public/4027ec902e239c93eaaa8714f173bcfc/1414938029622029/181382969102664/9730… 8/8

SQL & Python Interview Q&A
No ratings yet
SQL & Python Interview Q&A
7 pages
Comparison of SQL
No ratings yet
Comparison of SQL
11 pages
Day 77
No ratings yet
Day 77
10 pages
Pyspark Coding Interview Questions
No ratings yet
Pyspark Coding Interview Questions
19 pages
Py Spark
No ratings yet
Py Spark
10 pages
Pyspark - Cheatsheet With Comparison To SQL5 - Seequality
No ratings yet
Pyspark - Cheatsheet With Comparison To SQL5 - Seequality
36 pages
Pyspark Coding Questions From StrataScratch Platform
No ratings yet
Pyspark Coding Questions From StrataScratch Platform
23 pages
Unit 4 Spark SQL
No ratings yet
Unit 4 Spark SQL
49 pages
Linked Int Question Experience
No ratings yet
Linked Int Question Experience
2 pages
Spark Test Que
No ratings yet
Spark Test Que
3 pages
Quewtion SQL - Pyspark
No ratings yet
Quewtion SQL - Pyspark
4 pages
Pyspark Syntax Using Simple Examples
No ratings yet
Pyspark Syntax Using Simple Examples
28 pages
PySpark DataFrame Operations
No ratings yet
PySpark DataFrame Operations
103 pages
Practice Paper For Ip
No ratings yet
Practice Paper For Ip
3 pages
Journal
No ratings yet
Journal
47 pages
Pyspark Interview Questions
No ratings yet
Pyspark Interview Questions
4 pages
PySpark Cheatsheet - Elaborate
No ratings yet
PySpark Cheatsheet - Elaborate
14 pages
Pyspark Distinct and Filter
No ratings yet
Pyspark Distinct and Filter
3 pages
Basic DataFrame Operation
No ratings yet
Basic DataFrame Operation
11 pages
SQL Cheat Sheet Python
100% (1)
SQL Cheat Sheet Python
1 page
Apache Spark Exercise List
No ratings yet
Apache Spark Exercise List
6 pages
Day 60
No ratings yet
Day 60
10 pages
Ans Key Set A
No ratings yet
Ans Key Set A
6 pages
Big Data With Spark and Hadoop
No ratings yet
Big Data With Spark and Hadoop
9 pages
PySpark Big Data Analytics Guide
No ratings yet
PySpark Big Data Analytics Guide
7 pages
Sanya Sekhri Assignment
No ratings yet
Sanya Sekhri Assignment
2 pages
Pyspark SQL Basics Cheat Sheet: Python For Data Science
No ratings yet
Pyspark SQL Basics Cheat Sheet: Python For Data Science
1 page
Answer Key For SET-1 TO 3
No ratings yet
Answer Key For SET-1 TO 3
7 pages
Pyspark and SQL
No ratings yet
Pyspark and SQL
57 pages
Question Bank-BDA (Module 1&2) 2
No ratings yet
Question Bank-BDA (Module 1&2) 2
5 pages
HTML Code
No ratings yet
HTML Code
3 pages
07 Structured Data Processing
No ratings yet
07 Structured Data Processing
91 pages
HTML Code
No ratings yet
HTML Code
4 pages
Aissce 2020 21
No ratings yet
Aissce 2020 21
3 pages
Pyspark Basics
No ratings yet
Pyspark Basics
16 pages
Must Know Pyspark Coding Before Databricks Interview
No ratings yet
Must Know Pyspark Coding Before Databricks Interview
7 pages
PySpark Interview Cheatsheet 1741068112
No ratings yet
PySpark Interview Cheatsheet 1741068112
19 pages
Dataframe and SQL Query Exercises
No ratings yet
Dataframe and SQL Query Exercises
12 pages
Pandas Cheat Sheet Serves
No ratings yet
Pandas Cheat Sheet Serves
20 pages
Employeeid, Employeename, Salary
No ratings yet
Employeeid, Employeename, Salary
1 page
DATAFRAME Vs DATASETS
No ratings yet
DATAFRAME Vs DATASETS
9 pages
PySpark All Query
No ratings yet
PySpark All Query
22 pages
CS 2018 042
No ratings yet
CS 2018 042
8 pages
Practical 2024
No ratings yet
Practical 2024
10 pages
Pandas
No ratings yet
Pandas
13 pages
SP 6
No ratings yet
SP 6
14 pages
Spark Structured API Solutions
No ratings yet
Spark Structured API Solutions
10 pages
Top 100 Pyspark Functions For Data Engineers 1738131847
No ratings yet
Top 100 Pyspark Functions For Data Engineers 1738131847
30 pages
Set B
No ratings yet
Set B
8 pages
Python SQL
No ratings yet
Python SQL
5 pages
EDA With Pandas
No ratings yet
EDA With Pandas
8 pages
PySpark Data Frame Questions PDF
100% (2)
PySpark Data Frame Questions PDF
57 pages
Pyspark SQL Transformation Cheat Sheet
No ratings yet
Pyspark SQL Transformation Cheat Sheet
3 pages
Aqe 1739740093
No ratings yet
Aqe 1739740093
20 pages
Seekhobigdata Institute All Date Related PySpark 1740252765
No ratings yet
Seekhobigdata Institute All Date Related PySpark 1740252765
9 pages
How To Process A 150GB Data in Apache Spark 1741505184
No ratings yet
How To Process A 150GB Data in Apache Spark 1741505184
8 pages
Pyspark Transformations Code Snippets 1736410008
No ratings yet
Pyspark Transformations Code Snippets 1736410008
6 pages
Monitor Spark Job Abhishek 1742663609
No ratings yet
Monitor Spark Job Abhishek 1742663609
12 pages
Amazon.2passeasy - Aws Certified Developer Associate - Exam.question.v2019 Mar 27.by - Harvey.106q.vce
No ratings yet
Amazon.2passeasy - Aws Certified Developer Associate - Exam.question.v2019 Mar 27.by - Harvey.106q.vce
25 pages
PySpark 1713691456
No ratings yet
PySpark 1713691456
24 pages
Examsboost NPH8 NQMMKP21072025101618 Demo
No ratings yet
Examsboost NPH8 NQMMKP21072025101618 Demo
11 pages
Python Project 20 Amazon Data 1690998456
No ratings yet
Python Project 20 Amazon Data 1690998456
14 pages
AWS Data Engineering Questions by Deepa Vasanth Kumar 1721182233
No ratings yet
AWS Data Engineering Questions by Deepa Vasanth Kumar 1721182233
68 pages
50 SQL To Python Series Problems
100% (1)
50 SQL To Python Series Problems
165 pages
Google Duo E2EE Technical Guide
No ratings yet
Google Duo E2EE Technical Guide
11 pages
Account Payable
No ratings yet
Account Payable
14 pages
Magmasoft: Magmacoat Simulation of Die Cooling by Coating
No ratings yet
Magmasoft: Magmacoat Simulation of Die Cooling by Coating
8 pages
PowerPoint 2B Celebrations Instructions
No ratings yet
PowerPoint 2B Celebrations Instructions
2 pages
GC 2025 01 26
No ratings yet
GC 2025 01 26
3 pages
Abhishek Lodha's Technical Manager Resume
No ratings yet
Abhishek Lodha's Technical Manager Resume
4 pages
Notification-Regarding Credit Transfer
No ratings yet
Notification-Regarding Credit Transfer
1 page
ST & QA Syllabus
No ratings yet
ST & QA Syllabus
3 pages
Paragraph On My State Gujarat - Essay Writings
No ratings yet
Paragraph On My State Gujarat - Essay Writings
1 page
NuoDB-20 White Paper
No ratings yet
NuoDB-20 White Paper
27 pages
Cloud Governance
No ratings yet
Cloud Governance
17 pages
Azure Data Factory v2 (PDFDrive)
No ratings yet
Azure Data Factory v2 (PDFDrive)
78 pages
NSE5 - FAZ-7.2 Exam Ingles
100% (1)
NSE5 - FAZ-7.2 Exam Ingles
32 pages
Revisiting Template-Based Automated Program Repair Via Mask Prediction
No ratings yet
Revisiting Template-Based Automated Program Repair Via Mask Prediction
13 pages
2 Introduction To Programming Using Python
No ratings yet
2 Introduction To Programming Using Python
8 pages
Guide How To Work in WOL - New Editor v3.13
No ratings yet
Guide How To Work in WOL - New Editor v3.13
49 pages
Sound Engineer & Developer Profile
No ratings yet
Sound Engineer & Developer Profile
2 pages
11th-Part-A (U3) - Information & CommunicationTechnology Skills-III
No ratings yet
11th-Part-A (U3) - Information & CommunicationTechnology Skills-III
7 pages
Arabeyes Technical Dictionary Which Can Be Downloaded at Their Site.
No ratings yet
Arabeyes Technical Dictionary Which Can Be Downloaded at Their Site.
96 pages
Are The Skewness and Kurtosis Useful Statistics
No ratings yet
Are The Skewness and Kurtosis Useful Statistics
6 pages
RC6 Cipher
No ratings yet
RC6 Cipher
6 pages
Monolithic Systems and Mainframes
100% (1)
Monolithic Systems and Mainframes
78 pages
Markem Imaje SmartLase Range DS HQ A1
100% (1)
Markem Imaje SmartLase Range DS HQ A1
4 pages
An Improvised Analysis of Smart Data For IoT-based Railway System Using RFID
No ratings yet
An Improvised Analysis of Smart Data For IoT-based Railway System Using RFID
13 pages
Eee 3506 Exp5
No ratings yet
Eee 3506 Exp5
7 pages
IP Project 2
No ratings yet
IP Project 2
37 pages
Convex Optimization for Engineers
No ratings yet
Convex Optimization for Engineers
2 pages
W2L2 - File Processing
No ratings yet
W2L2 - File Processing
18 pages
The Platform Society: Public Values in A Connective World José Van Dijck Instant Download Full Chapters
No ratings yet
The Platform Society: Public Values in A Connective World José Van Dijck Instant Download Full Chapters
113 pages

Solutions 1742312993

Uploaded by

Solutions 1742312993

Uploaded by

🔥🔥🔥SQL SOLUTIONS :-- 🔥🔥🔥

I am hiring a PySpark Data Engineer! If you can answer

---2. Filter Employees Earning More than

SELECT Department,AVG(Salary) FROM ##DATA

SELECT * FROM ##DATA WHERE

--5. Count the Number of Employees per

SELECT Department,COUNT(FName)CNT FROM

--6. Add a New Column for Tax Deduction

SELECT *, (Salary*10)/100 AS TAX FROM

--7. Sort Employees by Salary in

SELECT * FROM ##DATA ORDER BY Salary

--8. Get the Second Highest Salary

--9. Get Employees Who are in the HR or IT

SELECT * FROM ##DATA WHERE Department IN

--10. Find the Total Salary Paid by the

--11. Read a CSV File of Cricket Players

CREATE TABLE ##players(Player VARCHAR

INSERT INTO ##players VALUES

--12. Find the Player with Maximum Runs

SELECT TOP 1 * FROM ##players ORDER BY

--13. Find the Average Runs Scored by

--14. Get Players Who Have Taken More than

SELECT * FROM ##PLAYERS WHERE Wickets>50

CREATE TABLE ##cities(City VARCHAR

--Task: Read this JSON file into a

SELECT * FROM ##cities

--16. Find Cities with a Population

SELECT * FROM ##cities WHERE

--17. Calculate Total Population per State

--19. Convert a DataFrame to Pandas

from pyspark.sql.functions import col, avg, count, sum, max

  df: pyspark.sql.dataframe.DataFrame = [EmpID: long, Name: string ... 2 more fields]

# 1. Display Schema and First 3 Rows

# 3. Calculate Average Salary per Department

# 4. Find Employees whose Name Starts with 'A'

# 5. Count the Number of Employees per Department

# 6. Add a New Column for Tax Deduction (10% of Salary)

  df: pyspark.sql.dataframe.DataFrame = [EmpID: long, Name: string ... 3 more fields]

# 7. Sort Employees by Salary in Descending Order

# 8. Get the Second Highest Salary

# 9. Get Employees Who are in the HR or IT Department

# 10. Find the Total Salary Paid by the Company

from pyspark.sql import SparkSession

# Create Spark session

# Save DataFrame as CSV in Databricks

# Display confirmation message

  df: pyspark.sql.dataframe.DataFrame = [Player: string, Country: string ... 2 more fields]

# 11. Read a CSV File of Cricket Players

  players_df: pyspark.sql.dataframe.DataFrame = [Player: string, Country: string ... 2 more fields]

# 12. Find the Player with Maximum Runs

# 13. Find the Average Runs Scored by Indian Players

# 14. Get Players Who Have Taken More than 50 Wickets

from pyspark.sql import SparkSession

# Create Spark session

# Save DataFrame as JSON in Databricks

# Display confirmation message

  df: pyspark.sql.dataframe.DataFrame = [City: string, State: string ... 1 more field]

JSON file saved at: dbfs:/FileStore/tables/cities.json

# 15. Read a JSON File Containing Indian Cities Population

  cities_df: pyspark.sql.dataframe.DataFrame = [City: string, Population: long ... 1 more field]

# 16. Find Cities with a Population Greater than 15 Million

# 17. Calculate Total Population per State

# 18. Find the State with the Highest Total Population

City Population State

You might also like

SELECT , (Salary10)/100 AS TAX FROM