0% found this document useful (0 votes)

115 views9 pages

HPC MPI LAB 1 Vector Addition

This document summarizes an MPI lab assignment on vector addition using collective and point-to-point communication. It includes the name and roll number of the student, date, hardware configuration, MPI code snippets for the two communication methods, compilation and execution steps, performance results for varying processor counts, and analysis of the results.

Uploaded by

Mridul Harish

We take content rights seriously. If you suspect this is your content, claim it here.

Available Formats

Download as PDF, TXT or read online on Scribd

0% found this document useful (0 votes)

115 views9 pages

HPC MPI LAB 1 Vector Addition

Uploaded by

Mridul Harish

We take content rights seriously. If you suspect this is your content, claim it here.

Available Formats

Download as PDF, TXT or read online on Scribd

You are on page 1/ 9

HPC MPI LAB 1 : Vector Addition

Name: Mridul Harish

Roll No: CED18I034
Programming Environment: MPI
Problem: Matrix Multiplication
Date: 21st October

Hardware Configuration:
CPU NAME : Intel Core i5-8250U @ 8x 3.4GHz
Number of Sockets : 1
Cores per Socket : 4
Threads per core : 2
L1 Cache size : 32KB
L2 Cache size : 256KB
L3 Cache size(Shared): 6MB
RAM : 8 GB

Collective Communication MPI Code

#include <mpi.h>
#include <stdio.h>
#include <stdlib.h>
#include <math.h>

int main(int argc, char *argv[])

{
int myid, np;
double startwtime, endwtime, totalTime;
int namelen;
int n = 100000;
double a[n + 50], b[n + 50], c[n + 50];
int i, j;
int s, s0;
char processor_name[MPI_MAX_PROCESSOR_NAME];
MPI_Init(&argc, &argv);
startwtime = MPI_Wtime();
MPI_Comm_size(MPI_COMM_WORLD, &np);
MPI_Comm_rank(MPI_COMM_WORLD, &myid);
MPI_Get_processor_name(processor_name, &namelen);
//fprintf(stderr, "Process %d on %s\n", myid, processor_name);
fflush(stderr);
// Vector 1 Reading
for (i = 0; i < n; i++)
a[i] = i;
// Vector 2 Reading
for (i = 0; i < n; i++)
b[i] = i;
if (myid == 0)
{
MPI_Bcast(&n, 1, MPI_INT, 0, MPI_COMM_WORLD);
s = (int)floor(n / np);
s0 = n % np;
int a_recv[s];
int b_recv[s];
long int c_recv[s];
if (s0 != 0)
{
s = s + 1;
for (i = 0; i < ((s * np) - n); i++)
{
a[n + i] = 1;
b[n + i] = 1;
}
}
MPI_Bcast(&s, 1, MPI_INT, 0, MPI_COMM_WORLD);
MPI_Scatter(a, s, MPI_INT, a_recv, s, MPI_INT, 0, MPI_COMM_WORLD);
MPI_Scatter(b, s, MPI_INT, b_recv, s, MPI_INT, 0, MPI_COMM_WORLD);
for (i = 0; i < s; i++)
{
c_recv[i] = a_recv[i] + b_recv[i];
}
MPI_Gather(c_recv, s, MPI_LONG, c, s, MPI_LONG, 0,
MPI_COMM_WORLD);

//for (i = 0; i < n; i++)

//printf("%d %d %d %ld\n", i, a[i], b[i], c[i]);
endwtime = MPI_Wtime();
totalTime = endwtime - startwtime;
printf("%f\n", totalTime);
}
else
{
MPI_Bcast(&n, 1, MPI_INT, 0, MPI_COMM_WORLD);
MPI_Bcast(&s, 1, MPI_INT, 0, MPI_COMM_WORLD);
double a_recv[s], b_recv[s], c_recv[s];
MPI_Scatter(a, s, MPI_INT, a_recv, s, MPI_INT, 0, MPI_COMM_WORLD);
MPI_Scatter(b, s, MPI_INT, b_recv, s, MPI_INT, 0, MPI_COMM_WORLD);
for (i = 0; i < s; i++)
{
c_recv[i] = a_recv[i] + b_recv[i];
}
MPI_Gather(c_recv, s, MPI_LONG, c, s, MPI_LONG, 0,
MPI_COMM_WORLD);
}

MPI_Finalize();
}

Point to Point communication MPI Code

#include <stdio.h>
#include <stdlib.h>
#include <unistd.h>

#define n 100000
double a[n], b[n];
double c[n] = {0};
int main(int argc, char *argv[])
{
int myid, np, elements_per_process, n_elements_recieved;
MPI_Status status;
double startwtime, endwtime, totalTime;
MPI_Init(&argc, &argv);
startwtime = MPI_Wtime();
MPI_Comm_rank(MPI_COMM_WORLD, &myid);
MPI_Comm_size(MPI_COMM_WORLD, &np);
if (myid == 0)
{
for (int i = 0; i < n; i += 1)
{
a[i] = (i + 1);
b[i] = (i + 1);
}
int idx, i;
if(np == 1)
elements_per_process = n;
else
elements_per_process = n / (np - 1);
if (np > 1)
{
for (i = 1; i < np - 1; i++)
{
idx = (i - 1) * elements_per_process;
MPI_Send(&elements_per_process,1, MPI_DOUBLE, i,
0,MPI_COMM_WORLD);
MPI_Send(&a[idx],elements_per_process,MPI_DOUBLE, i,
0,MPI_COMM_WORLD);
MPI_Send(&b[idx],elements_per_process,MPI_DOUBLE, i,
0,MPI_COMM_WORLD);
}
idx = (i - 1) * elements_per_process;
int elements_left = n - idx;
MPI_Send(&elements_left,1, MPI_DOUBLE,i, 0,MPI_COMM_WORLD);
MPI_Send(&a[idx],elements_left,MPI_DOUBLE, i,
0,MPI_COMM_WORLD);
MPI_Send(&b[idx],elements_left,MPI_DOUBLE, i,
0,MPI_COMM_WORLD);
}
for (i = 1; i < np; i++)
{
int n_elements_recieved;
idx = (i - 1) * elements_per_process;
MPI_Recv(&n_elements_recieved,1, MPI_DOUBLE, i,
0,MPI_COMM_WORLD,&status);
MPI_Recv(&c[idx], n_elements_recieved,MPI_DOUBLE, i,
0,MPI_COMM_WORLD,&status);
int sender = status.MPI_SOURCE;
}
endwtime = MPI_Wtime();
totalTime = endwtime - startwtime;
printf("%f\n", totalTime);
}

else
{
MPI_Recv(&n_elements_recieved,1, MPI_DOUBLE, 0,
0,MPI_COMM_WORLD,&status);
char processor_name[MPI_MAX_PROCESSOR_NAME];
double a_recv[n + 1000], b_recv[n + 1000], c_recv[n + 1000];
int name_len;
MPI_Get_processor_name(processor_name, &name_len);
MPI_Recv(&a_recv, n_elements_recieved,MPI_DOUBLE, 0,
0,MPI_COMM_WORLD,&status);
MPI_Recv(&b_recv, n_elements_recieved,MPI_DOUBLE, 0,
0,MPI_COMM_WORLD,&status);
for (int i = 0; i < n_elements_recieved; i++)
c_recv[i] = a_recv[i] + b_recv[i];
MPI_Send(&n_elements_recieved,1, MPI_DOUBLE,0, 0,MPI_COMM_WORLD);
MPI_Send(&c_recv, n_elements_recieved, MPI_DOUBLE,0, 0,
MPI_COMM_WORLD);
}
MPI_Finalize();
return 0;
}

Compilation and Execution

Compile using mpicc to include the mpi library ;

mpicc vector_add.c -o vector_add

For execution;
mpirun -n 25 -f machinefile ./vector_add
Observations

Speed up can be found using the following formula;

S(n)=T(1)/T(n) where, S(n) = Speedup for thread count ‘n’
T(1) = Execution Time for Thread count ‘1’ (serial code)
T(n) = Execution Time for Thread count ‘n’ (serial code)

Parallelization Fraction can be found using the following formula;

S(n)=1/((1 - p) + p/n where, S(n) = Speedup for thread count ‘n’
n = Number of threads
p = Parallelization fraction

Collective Communication;

No of Execution Parallel
Threads(P) Time(T(P)) Speedup(S(P)) Fraction(f(P))
1 0.00576
2 0.002708 2.127031019 1.059722222
4 0.002645 2.177693762 0.7210648148
6 0.089785 0.064153255 -17.50520833
8 0.13245 0.04348810872 -25.13690476
12 0.338068 0.01703799236 -62.93712121
16 0.428783 0.01343336839 -78.33759259
20 1.112917 0.005175588117 -202.3313231
24 1.935339 0.00297622277 -349.561413
32 6.079448 0.0009474544399 -1088.474552
48 6.102137 0.0009439316095 -1080.917908
64 3.791373 0.001519238545 -667.6566138
128 14.708826 0.0003916016139 -2572.714961
Point to point Communication;

No of Execution Parallel
Threads(P) Time(T(P)) Speedup(S(P)) Fraction(f(P))
1 0.001028
2 0.002171 0.4735145094 -2.223735409
4 0.00299 0.343812709 -2.544747082
6 0.032904 0.03124240214 -37.20933852
8 0.170045 0.006045458555 -187.9010561
12 0.434316 0.0023669402 -459.8033251
16 0.061818 0.01662946067 -63.07652399
20 0.354108 0.00290306912 -361.5400369
24 0.111944 0.00918316301 -112.5860261
32 0.333079 0.003086354889 -333.4263838
48 2.674903 0.0003843130013 -2656.387118
64 4.826712 0.000212981425 -4768.756964
128 23.345798 0.0000440336201 -22887.73063
Inference

The performance of the program worsens with the cluster execution as the same
resources are divided among different virtual machines.
At any given instance of the program’s runtime, there are 4 OS (1 Windows 11 OS and
3 Ubuntu 20.10) running, using the common resources from the laptop. This causes a
bottleneck in the performance of the program.
Along with the OS, there are multiple background and foreground processes running,
which use the system resources.
Also, the complexity of the algorithm is low, O(n).
The communication overhead crosses the computation overhead at 128 processors,
causing the drastic reduction in the performance of the program.
At 256 processors, my computer stopped working because the computations overhead
were too much.
Collective communication offers better performance compared to Point-to-point
communication.

Exercise - 4
No ratings yet
Exercise - 4
8 pages
University of Moratuwa Department of Information Technology B18 - L4S1 - IN 4700 - Semester I Lab Session 04 - 2022
No ratings yet
University of Moratuwa Department of Information Technology B18 - L4S1 - IN 4700 - Semester I Lab Session 04 - 2022
24 pages
1.hello World Programme in Mpi
No ratings yet
1.hello World Programme in Mpi
11 pages
Untitled Document
No ratings yet
Untitled Document
23 pages
Exercise - 4
No ratings yet
Exercise - 4
8 pages
Lab 9 PDCHHHGGFFFFDDD
No ratings yet
Lab 9 PDCHHHGGFFFFDDD
4 pages
MPI Parallel Programming Guide
No ratings yet
MPI Parallel Programming Guide
67 pages
PDCLabMan Updated
No ratings yet
PDCLabMan Updated
46 pages
Cluster Lab Session 03
No ratings yet
Cluster Lab Session 03
9 pages
MPI Exercises PDF
No ratings yet
MPI Exercises PDF
7 pages
Pseudo Code of Mpi Programs
No ratings yet
Pseudo Code of Mpi Programs
22 pages
PC Record 15
No ratings yet
PC Record 15
81 pages
CSE4001 Parallel and Distributed Computing: Lab Assignment 6
No ratings yet
CSE4001 Parallel and Distributed Computing: Lab Assignment 6
8 pages
Nguyen Thanh Nam
No ratings yet
Nguyen Thanh Nam
21 pages
2 Mpi
No ratings yet
2 Mpi
13 pages
Gauss
No ratings yet
Gauss
7 pages
MPI Plamen Krastev
No ratings yet
MPI Plamen Krastev
49 pages
.Trashed 1734019373 TH4
No ratings yet
.Trashed 1734019373 TH4
21 pages
ECE 1747H: Parallel Programming: Message Passing (MPI)
No ratings yet
ECE 1747H: Parallel Programming: Message Passing (MPI)
67 pages
Parallel & Distributed Computing: MPI - Message Passing Interface
No ratings yet
Parallel & Distributed Computing: MPI - Message Passing Interface
49 pages
Unit IV
No ratings yet
Unit IV
12 pages
OpenMP and MPI Parallel Programming Guide
No ratings yet
OpenMP and MPI Parallel Programming Guide
23 pages
Lecture 11 Distributed Memory Programming
No ratings yet
Lecture 11 Distributed Memory Programming
28 pages
Solutions Midterm 1 March 72020
No ratings yet
Solutions Midterm 1 March 72020
7 pages
PC Pgms
No ratings yet
PC Pgms
14 pages
As 3
No ratings yet
As 3
2 pages
VSS Mpi 2
No ratings yet
VSS Mpi 2
23 pages
CP4292 Mcap
No ratings yet
CP4292 Mcap
15 pages
Advanced Parallel Programming Guide
No ratings yet
Advanced Parallel Programming Guide
32 pages
Problemes MPI
No ratings yet
Problemes MPI
4 pages
Sunil Kumar L 24
No ratings yet
Sunil Kumar L 24
21 pages
Solution of Project
No ratings yet
Solution of Project
5 pages
Assignment 04
No ratings yet
Assignment 04
16 pages
Untitled Document
No ratings yet
Untitled Document
23 pages
4 P2P-1
No ratings yet
4 P2P-1
31 pages
ATPESC 2019 Track-2 1-7-30 830am Guo-Raffenetti-Thakur-MPI For Scalable Computing
No ratings yet
ATPESC 2019 Track-2 1-7-30 830am Guo-Raffenetti-Thakur-MPI For Scalable Computing
199 pages
Code: First Method:: (1) Write A C Program Using Open MP To Estimate The Value of PI (Use Minimum Two Methods)
No ratings yet
Code: First Method:: (1) Write A C Program Using Open MP To Estimate The Value of PI (Use Minimum Two Methods)
8 pages
Parallel Block-Oriented Matrix Multiplication
No ratings yet
Parallel Block-Oriented Matrix Multiplication
5 pages
Intro MPI
No ratings yet
Intro MPI
60 pages
Mpi Openmp Examples
No ratings yet
Mpi Openmp Examples
27 pages
HPC Project Mpi
No ratings yet
HPC Project Mpi
17 pages
Lecture 15 MPI Summarization
No ratings yet
Lecture 15 MPI Summarization
26 pages
Distributed Memory Programming With: Peter Pacheco
No ratings yet
Distributed Memory Programming With: Peter Pacheco
125 pages
Ms. V. Uma Maheswari, Assistant Lecturer, Department of Information Technology, National Institute of Technology, Surathkal
No ratings yet
Ms. V. Uma Maheswari, Assistant Lecturer, Department of Information Technology, National Institute of Technology, Surathkal
91 pages
PDC 5 PDF
No ratings yet
PDC 5 PDF
7 pages
Lab3
No ratings yet
Lab3
4 pages
Probleme Laborator: Compilare: Mpicc Prog.c - o Prog Executie: Mpirun - N 4 ./prog
No ratings yet
Probleme Laborator: Compilare: Mpicc Prog.c - o Prog Executie: Mpirun - N 4 ./prog
14 pages
10.collectives I
No ratings yet
10.collectives I
31 pages
Key Concepts in MPI Programming: Processes
No ratings yet
Key Concepts in MPI Programming: Processes
6 pages
MPI Tutorial Fall Break 2022
No ratings yet
MPI Tutorial Fall Break 2022
60 pages
Mpi 1
No ratings yet
Mpi 1
20 pages
Lecture15 PDF
No ratings yet
Lecture15 PDF
32 pages
RajSingh HPC Exp1-7
No ratings yet
RajSingh HPC Exp1-7
23 pages
What Is The Message Passing Interface (MPI) ?: Standardization
No ratings yet
What Is The Message Passing Interface (MPI) ?: Standardization
5 pages
MPI Programming for CS Students
No ratings yet
MPI Programming for CS Students
21 pages
Introduction MPI - Chap2 - Slide 3
No ratings yet
Introduction MPI - Chap2 - Slide 3
16 pages
Parallel Computing with MPI
No ratings yet
Parallel Computing with MPI
26 pages
Lect 6
No ratings yet
Lect 6
84 pages
6 P2p-Iii
No ratings yet
6 P2p-Iii
33 pages
Data Parallelism
No ratings yet
Data Parallelism
5 pages
Pipeline and Vector Processing
No ratings yet
Pipeline and Vector Processing
18 pages
MapReduce Tutorial
100% (1)
MapReduce Tutorial
192 pages
Os History and Evolution
No ratings yet
Os History and Evolution
14 pages
Operating Systems Summary
No ratings yet
Operating Systems Summary
56 pages
Vasp Tutorial at Ugent: Dr. Dr. Danny E.P. Vanpoucke
No ratings yet
Vasp Tutorial at Ugent: Dr. Dr. Danny E.P. Vanpoucke
38 pages
Nvidia Tesla: Gpu Accelerators
No ratings yet
Nvidia Tesla: Gpu Accelerators
3 pages
OsChapter 6
No ratings yet
OsChapter 6
12 pages
Introduction, Lexical Analysis 1.1 Language Processors:: Compiled By: Dept. of CSE SJEC, M'luru
No ratings yet
Introduction, Lexical Analysis 1.1 Language Processors:: Compiled By: Dept. of CSE SJEC, M'luru
52 pages
Unit 5
No ratings yet
Unit 5
10 pages
ECE 365: Digital Computer Design
0% (1)
ECE 365: Digital Computer Design
59 pages
Multiple Processor Scheduling
No ratings yet
Multiple Processor Scheduling
4 pages
Instant Download Computer Organization and Design RISC V Edition The Hardware Software Interface David A. Patterson PDF All Chapter
100% (8)
Instant Download Computer Organization and Design RISC V Edition The Hardware Software Interface David A. Patterson PDF All Chapter
49 pages
Parallelization and Load Balancing of A Dynamic Mesh Method For Moving Boundary CFD
No ratings yet
Parallelization and Load Balancing of A Dynamic Mesh Method For Moving Boundary CFD
15 pages
Braumat - Sistar v5.3 System Description
No ratings yet
Braumat - Sistar v5.3 System Description
52 pages
Digsilent Powerfactory: Application Guide
67% (3)
Digsilent Powerfactory: Application Guide
17 pages
Parallel Architecture Classification
50% (2)
Parallel Architecture Classification
41 pages
ch09 Morris Mano
No ratings yet
ch09 Morris Mano
15 pages
IPP Question Bank
No ratings yet
IPP Question Bank
2 pages
Mpi
No ratings yet
Mpi
46 pages
Parallel, Cluster and Grid Computing: by P.S.Dhekne, BARC Dhekne@barc - Gov.in
No ratings yet
Parallel, Cluster and Grid Computing: by P.S.Dhekne, BARC Dhekne@barc - Gov.in
92 pages
PDC 8 - Processes and Message Passing
No ratings yet
PDC 8 - Processes and Message Passing
25 pages
Difference Between House and Home
No ratings yet
Difference Between House and Home
5 pages
Nanocubes For Real-Time Exploration of Spatiotemporal Datasets
No ratings yet
Nanocubes For Real-Time Exploration of Spatiotemporal Datasets
10 pages
Parallel Algorithm Lecture Notes
No ratings yet
Parallel Algorithm Lecture Notes
28 pages
Experiment 2 (A)
No ratings yet
Experiment 2 (A)
9 pages
Common Bus System
No ratings yet
Common Bus System
4 pages
Bigdata Presentation - Parallel and Distributed System
No ratings yet
Bigdata Presentation - Parallel and Distributed System
7 pages
Chapter 04
No ratings yet
Chapter 04
17 pages
Inter Process Communication
No ratings yet
Inter Process Communication
5 pages

HPC MPI LAB 1 Vector Addition

Uploaded by

HPC MPI LAB 1 Vector Addition

Uploaded by

HPC MPI LAB 1 : Vector Addition

Name: Mridul Harish

Collective Communication MPI Code

int main(int argc, char *argv[])

//for (i = 0; i < n; i++)

Point to Point communication MPI Code

Compilation and Execution

Compile using mpicc to include the mpi library ;

Speed up can be found using the following formula;

Parallelization Fraction can be found using the following formula;

You might also like