0% found this document useful (0 votes)

62 views15 pages

HPC Codes-2

codes

Uploaded by

Mikasa Jaeger

We take content rights seriously. If you suspect this is your content, claim it here.

Available Formats

Download as PDF, TXT or read online on Scribd

0% found this document useful (0 votes)

62 views15 pages

HPC Codes-2

codes

Uploaded by

Mikasa Jaeger

We take content rights seriously. If you suspect this is your content, claim it here.

Available Formats

Download as PDF, TXT or read online on Scribd

You are on page 1/ 15

Assignment No.

1:
Design and implement Parallel Breadth-First Search and Depth First Search based on
existing algorithms using OpenMP. Use a Tree or an undirected graph for BFS and DFS.

Code:
%%cu
#include <iostream>
#include <vector>
#include <queue>
#include <omp.h>

using namespace std;

// Graph class representing the adjacency list

class Graph {
int V; // Number of vertices
vector<vector<int>> adj; // Adjacency list

public:
Graph(int V) : V(V), adj(V) {}

// Add an edge to the graph

void addEdge(int v, int w) {
adj[v].push_back(w);
}

// Parallel Depth-First Search

void parallelDFS(int startVertex) {
vector<bool> visited(V, false);
parallelDFSUtil(startVertex, visited);
}

// Parallel DFS utility function

void parallelDFSUtil(int v, vector<bool>& visited) {
visited[v] = true;
cout << v << " ";

#pragma omp parallel for

for (int i = 0; i < adj[v].size(); ++i) {
int n = adj[v][i];
if (!visited[n])
parallelDFSUtil(n, visited);
}
}
// Parallel Breadth-First Search
void parallelBFS(int startVertex) {
vector<bool> visited(V, false);
queue<int> q;

visited[startVertex] = true;
q.push(startVertex);

while (!q.empty()) {
int v = q.front();
q.pop();
cout << v << " ";

#pragma omp parallel for

for (int i = 0; i < adj[v].size(); ++i) {
int n = adj[v][i];
if (!visited[n]) {
visited[n] = true;
q.push(n);
}
}
}
}
};

int main() {
// Create a graph
Graph g(7);
g.addEdge(0, 1);
g.addEdge(0, 2);
g.addEdge(1, 3);
g.addEdge(1, 4);
g.addEdge(2, 5);
g.addEdge(2, 6);

/*
0 -------->1
| /\
| / \
| / \
v v v
2 ----> 3 4
| |
| |
v v
5 6
*/
cout << "Depth-First Search (DFS): ";
g.parallelDFS(0);
cout << endl;

cout << "Breadth-First Search (BFS): ";

g.parallelBFS(0);
cout << endl;

return 0;
}
Output:
Depth-First Search (DFS): 0 1 3 4 2 5 6
Breadth-First Search (BFS): 0 1 2 3 4 5 6
Assignment No. 2:
Write a program to implement Parallel Bubble Sort and Merge sort using OpenMP. Use
existing algorithms and measure the performance of sequential and parallel algorithms.

Code - Parallel Bubble Sort:

#include<iostream>
#include<omp.h>

using namespace std;

void bubble(int array[], int n){

for (int i = 0; i < n - 1; i++){
for (int j = 0; j < n - i - 1; j++){
if (array[j] > array[j + 1]) swap(array[j], array[j + 1]);
}
}
}

void pBubble(int array[], int n){

//Sort odd indexed numbers
for(int i = 0; i < n; ++i){
#pragma omp for
for (int j = 1; j < n; j += 2){
if (array[j] < array[j-1])
{
swap(array[j], array[j - 1]);
}
}

// Synchronize
#pragma omp barrier

//Sort even indexed numbers

#pragma omp for
for (int j = 2; j < n; j += 2){
if (array[j] < array[j-1])
{
swap(array[j], array[j - 1]);
}
}
}
}

void printArray(int arr[], int n){

for(int i = 0; i < n; i++) cout << arr[i] << " ";
cout << "\n";
}

int main(){
// Set up variables
int n = 10;
int arr[n];
int brr[n];
double start_time, end_time;

// Create an array with numbers starting from n to 1

for(int i = 0, j = n; i < n; i++, j--) arr[i] = j;

// Sequential time
start_time = omp_get_wtime();
bubble(arr, n);
end_time = omp_get_wtime();
cout << "Sequential Bubble Sort took : " << end_time - start_time << " seconds.\n";
printArray(arr, n);

// Reset the array

for(int i = 0, j = n; i < n; i++, j--) arr[i] = j;

// Parallel time
start_time = omp_get_wtime();
pBubble(arr, n);
end_time = omp_get_wtime();
cout << "Parallel Bubble Sort took : " << end_time - start_time << " seconds.\n";
printArray(arr, n);
}

Output:
Sequential Bubble Sort took : 0.00957767 seconds.
Parallel Bubble Sort took : 0.00988083 seconds.

Code - Parallel Merge Sort:

#include <iostream>
#include <omp.h>

using namespace std;

void merge(int arr[], int low, int mid, int high) {

// Create arrays of left and right partititons
int n1 = mid - low + 1;
int n2 = high - mid;
int left[n1];
int right[n2];

// Copy all left elements

for (int i = 0; i < n1; i++) left[i] = arr[low + i];

// Copy all right elements

for (int j = 0; j < n2; j++) right[j] = arr[mid + 1 + j];

// Compare and place elements

int i = 0, j = 0, k = low;

while (i < n1 && j < n2) {

if (left[i] <= right[j]){
arr[k] = left[i];
i++;
}
else{
arr[k] = right[j];
j++;
}
k++;
}

// If any elements are left out

while (i < n1) {
arr[k] = left[i];
i++;
k++;
}

while (j < n2) {

arr[k] = right[j];
j++;
k++;
}
}

void parallelMergeSort(int arr[], int low, int high) {

if (low < high) {
int mid = (low + high) / 2;

#pragma omp parallel sections

{
#pragma omp section
{
parallelMergeSort(arr, low, mid);
}
#pragma omp section
{
parallelMergeSort(arr, mid + 1, high);
}
}
merge(arr, low, mid, high);
}
}

void mergeSort(int arr[], int low, int high) {

if (low < high) {
int mid = (low + high) / 2;
mergeSort(arr, low, mid);
mergeSort(arr, mid + 1, high);
merge(arr, low, mid, high);
}
}

int main() {
int n = 1000;
int arr[n];
double start_time, end_time;

// Create an array with numbers starting from n to 1.

for(int i = 0, j = n; i < n; i++, j--) arr[i] = j;

// Measure Sequential Time

start_time = omp_get_wtime();
mergeSort(arr, 0, n - 1);
end_time = omp_get_wtime();
cout << "Time taken by sequential algorithm: " << end_time - start_time << "
seconds\n";

// Reset the array

for(int i = 0, j = n; i < n; i++, j--) arr[i] = j;

//Measure Parallel time

start_time = omp_get_wtime();
parallelMergeSort(arr, 0, n - 1);
end_time = omp_get_wtime();
cout << "Time taken by parallel algorithm: " << end_time - start_time << " seconds";

return 0;
}
Output:
Time taken by sequential algorithm: 0.000135859 seconds
Time taken by parallel algorithm: 0.000123855 seconds
Assignment No. 3:
Implement Min, Max, Sum and Average operations using Parallel Reduction.

.cpp Code:
%%cu
/*
Windows does not support user defined reductions.
This program may not run on MVSC++ compilers for Windows.
Please use Linux Environment.[You can try using "windows subsystem for linux"]
*/

#include<iostream>
#include<omp.h>

using namespace std;

int minval(int arr[], int n){
int minval = arr[0];
#pragma omp parallel for reduction(min : minval)
for(int i = 0; i < n; i++){
if(arr[i] < minval) minval = arr[i];
}
return minval;
}

int maxval(int arr[], int n){

int maxval = arr[0];
#pragma omp parallel for reduction(max : maxval)
for(int i = 0; i < n; i++){
if(arr[i] > maxval) maxval = arr[i];
}
return maxval;
}

int sum(int arr[], int n){

int sum = 0;
#pragma omp parallel for reduction(+ : sum)
for(int i = 0; i < n; i++){
sum += arr[i];
}
return sum;
}

int average(int arr[], int n){

return (double)sum(arr, n) / n;
}

int main(){
int n = 5;
int arr[] = {1,2,3,4,5};
cout << "The minimum value is: " << minval(arr, n) << '\n';
cout << "The maximum value is: " << maxval(arr, n) << '\n';
cout << "The summation is: " << sum(arr, n) << '\n';
cout << "The average is: " << average(arr, n) << '\n';
return 0;
}

Output:
The minimum value is: 1
The maximum value is: 5
The summation is: 15
The average is: 3
Assignment No. 4:
Write a CUDA Program for :
1. Addition of two large vectors
2. Matrix Multiplication using CUDA C

Code - Addition of Two large Vectors:

%%cu
#include <iostream>
using namespace std;

global void add(int* A, int* B, int* C, int size) {

int tid = blockIdx.x * blockDim.x + threadIdx.x;

if (tid < size) {

C[tid] = A[tid] + B[tid];
}
}

void initialize(int* vector, int size) {

for (int i = 0; i < size; i++) {
vector[i] = rand() % 10;
}
}

void print(int* vector, int size) {

for (int i = 0; i < size; i++) {
cout << vector[i] << " ";
}
cout << endl;
}

int main() {
int N = 4;
int* A, * B, * C;

int vectorSize = N;
size_t vectorBytes = vectorSize * sizeof(int);

A = new int[vectorSize];
B = new int[vectorSize];
C = new int[vectorSize];

initialize(A, vectorSize);
initialize(B, vectorSize);
cout << "Vector A: ";
print(A, N);
cout << "Vector B: ";
print(B, N);

int* X, * Y, * Z;
cudaMalloc(&X, vectorBytes);
cudaMalloc(&Y, vectorBytes);
cudaMalloc(&Z, vectorBytes);

cudaMemcpy(X, A, vectorBytes, cudaMemcpyHostToDevice);

cudaMemcpy(Y, B, vectorBytes, cudaMemcpyHostToDevice);

int threadsPerBlock = 256;

int blocksPerGrid = (N + threadsPerBlock - 1) / threadsPerBlock;

add<<<blocksPerGrid, threadsPerBlock>>>(X, Y, Z, N);

cudaMemcpy(C, Z, vectorBytes, cudaMemcpyDeviceToHost);

cout << "Addition: ";

print(C, N);

delete[] A;
delete[] B;
delete[] C;

cudaFree(X);
cudaFree(Y);
cudaFree(Z);

return 0;
}

Output:
Vector A: 3 6 7 5
Vector B: 3 5 6 2
Addition: 6 11 13 7

Code - Matrix Multiplication using CUDA C:

%%cu
#include <iostream>
using namespace std;

// CUDA code to multiply matrices

__global__ void multiply(int* A, int* B, int* C, int size) {
// Uses thread indices and block indices to compute each element
int row = blockIdx.y * blockDim.y + threadIdx.y;
int col = blockIdx.x * blockDim.x + threadIdx.x;

if (row < size && col < size) {

int sum = 0;
for (int i = 0; i < size; i++) {
sum += A[row * size + i] * B[i * size + col];
}
C[row * size + col] = sum;
}
}

void initialize(int* matrix, int size) {

for (int i = 0; i < size * size; i++) {
matrix[i] = rand() % 10;
}
}

void print(int* matrix, int size) {

for (int row = 0; row < size; row++) {
for (int col = 0; col < size; col++) {
cout << matrix[row * size + col] << " ";
}
cout << '\n';
}
cout << '\n';
}

int main() {
int* A, * B, * C;

int N = 2;
int blockSize = 16;

int matrixSize = N * N;
size_t matrixBytes = matrixSize * sizeof(int);

A = new int[matrixSize];
B = new int[matrixSize];
C = new int[matrixSize];

initialize(A, N);
initialize(B, N);
cout << "Matrix A: \n";
print(A, N);

cout << "Matrix B: \n";

print(B, N);

int* X, * Y, * Z;
// Allocate space
cudaMalloc(&X, matrixBytes);
cudaMalloc(&Y, matrixBytes);
cudaMalloc(&Z, matrixBytes);

// Copy values from A to X

cudaMemcpy(X, A, matrixBytes, cudaMemcpyHostToDevice);

// Copy values from A to X and B to Y

cudaMemcpy(Y, B, matrixBytes, cudaMemcpyHostToDevice);

// Threads per CTA dimension

int THREADS = 2;

// Blocks per grid dimension (assumes THREADS divides N evenly)

int BLOCKS = N / THREADS;

// Use dim3 structs for block and grid dimensions

dim3 threads(THREADS, THREADS);
dim3 blocks(BLOCKS, BLOCKS);

// Launch kernel
multiply<<<blocks, threads>>>(X, Y, Z, N);

cudaMemcpy(C, Z, matrixBytes, cudaMemcpyDeviceToHost);

cout << "Multiplication of matrix A and B: \n";
print(C, N);

delete[] A;
delete[] B;
delete[] C;

cudaFree(X);
cudaFree(Y);
cudaFree(Z);
return 0;
}

Output:
Matrix A:
36
75

Matrix B:
35
62

Multiplication of matrix A and B:

45 27
51 45

HPC Output
No ratings yet
HPC Output
12 pages
HPC Codes
No ratings yet
HPC Codes
21 pages
All HPC Programs
No ratings yet
All HPC Programs
16 pages
HPC Codes
No ratings yet
HPC Codes
18 pages
HPC Printout 1
No ratings yet
HPC Printout 1
22 pages
HPC Practicals
No ratings yet
HPC Practicals
26 pages
Parallel Programming
No ratings yet
Parallel Programming
10 pages
Converted Text
No ratings yet
Converted Text
25 pages
Program1 PP
No ratings yet
Program1 PP
5 pages
CP4292 Mcap
No ratings yet
CP4292 Mcap
24 pages
Parallel Assignment 3
No ratings yet
Parallel Assignment 3
9 pages
OpenMP Programming Examples
No ratings yet
OpenMP Programming Examples
29 pages
Bubble Sort
No ratings yet
Bubble Sort
1 page
CP4252 Multicore Architecture and Programming Lab Manual
No ratings yet
CP4252 Multicore Architecture and Programming Lab Manual
26 pages
HPC Codes
No ratings yet
HPC Codes
14 pages
Parallel Computing Manual
No ratings yet
Parallel Computing Manual
15 pages
PPA Lab 10
No ratings yet
PPA Lab 10
10 pages
Lab 1 & 2
No ratings yet
Lab 1 & 2
6 pages
MAP Lab Mannual
No ratings yet
MAP Lab Mannual
24 pages
Parallel Computing Lab Manual
No ratings yet
Parallel Computing Lab Manual
26 pages
Parallel Merge Sort
No ratings yet
Parallel Merge Sort
2 pages
Lab 7
No ratings yet
Lab 7
3 pages
Multicore Architecture Lab Manual
No ratings yet
Multicore Architecture Lab Manual
34 pages
PC Labmanual
No ratings yet
PC Labmanual
19 pages
APT06 2024S2 New
No ratings yet
APT06 2024S2 New
21 pages
Parallel and Distributed Computing Lab Digital Assignment - 3
No ratings yet
Parallel and Distributed Computing Lab Digital Assignment - 3
10 pages
Unit 4 - Threads - Program Solution
No ratings yet
Unit 4 - Threads - Program Solution
9 pages
Lab Programs
No ratings yet
Lab Programs
15 pages
Lab Manual
No ratings yet
Lab Manual
33 pages
Question 1 - Serial: Output
No ratings yet
Question 1 - Serial: Output
9 pages
Merge Sort
No ratings yet
Merge Sort
2 pages
Lab Programs
No ratings yet
Lab Programs
18 pages
PP Manual
No ratings yet
PP Manual
22 pages
HPC Programs
No ratings yet
HPC Programs
19 pages
Program 1
No ratings yet
Program 1
3 pages
PDC Lab 2-5
No ratings yet
PDC Lab 2-5
5 pages
#Include Stdio.h
No ratings yet
#Include Stdio.h
3 pages
Bubble Sort
No ratings yet
Bubble Sort
2 pages
Parallel and Distributed Computing
No ratings yet
Parallel and Distributed Computing
5 pages
OpenMP Shared
No ratings yet
OpenMP Shared
28 pages
CP 4292 MCP Lab Manual
No ratings yet
CP 4292 MCP Lab Manual
20 pages
Multicore
No ratings yet
Multicore
23 pages
PDC LAB Experiment 2
No ratings yet
PDC LAB Experiment 2
12 pages
OpenMP Matrix
No ratings yet
OpenMP Matrix
6 pages
HPC Assignment 2-1
No ratings yet
HPC Assignment 2-1
5 pages
20bce2126 PDC Lab Da 3
No ratings yet
20bce2126 PDC Lab Da 3
11 pages
HPC 123
No ratings yet
HPC 123
6 pages
(Serial)
No ratings yet
(Serial)
8 pages
CC Lab Manual
No ratings yet
CC Lab Manual
39 pages
MAP Lab Completed
No ratings yet
MAP Lab Completed
29 pages
Lab Manual
No ratings yet
Lab Manual
31 pages
Exp 3 HPC
No ratings yet
Exp 3 HPC
8 pages
PC - Lab Manuall
No ratings yet
PC - Lab Manuall
15 pages
OpenMP Programs
No ratings yet
OpenMP Programs
4 pages
Lab 3
No ratings yet
Lab 3
23 pages
Cp4292 Multicore Lab Multicore Lab Removed
No ratings yet
Cp4292 Multicore Lab Multicore Lab Removed
37 pages
Time+Series+Forecasting Monograph
100% (4)
Time+Series+Forecasting Monograph
58 pages
Virtual Machine Setup Guide
No ratings yet
Virtual Machine Setup Guide
7 pages
Reflection - Webinar
No ratings yet
Reflection - Webinar
3 pages
Eazy Puls Manual 4 MB
No ratings yet
Eazy Puls Manual 4 MB
268 pages
Forming Technologies Inc
No ratings yet
Forming Technologies Inc
16 pages
SDN Lab Assignment 5
No ratings yet
SDN Lab Assignment 5
10 pages
Acceptance of The Terms of Service
No ratings yet
Acceptance of The Terms of Service
150 pages
Paper Traffic Montoring
No ratings yet
Paper Traffic Montoring
4 pages
Call Log Report
No ratings yet
Call Log Report
2 pages
Devesh Sir
No ratings yet
Devesh Sir
2 pages
Qlik Cloud Analytics
No ratings yet
Qlik Cloud Analytics
53 pages
Fam Summer 2024
No ratings yet
Fam Summer 2024
2 pages
Asian and Western Art
No ratings yet
Asian and Western Art
8 pages
Composition and Aggregation
No ratings yet
Composition and Aggregation
2 pages
E Handbook
No ratings yet
E Handbook
22 pages
Grade 7 IT Annu Plane
No ratings yet
Grade 7 IT Annu Plane
6 pages
GP 3
No ratings yet
GP 3
100 pages
BBA Outlines Bzu
100% (1)
BBA Outlines Bzu
230 pages
Ds 3marks
No ratings yet
Ds 3marks
8 pages
403 FORD6 3.10 RSR - 2014 Mustang (SA Key) - IG - EN - 20170127 PDF
No ratings yet
403 FORD6 3.10 RSR - 2014 Mustang (SA Key) - IG - EN - 20170127 PDF
23 pages
Data Science Assignment
No ratings yet
Data Science Assignment
2 pages
Ai-900 Ba1968fd3ca4
100% (2)
Ai-900 Ba1968fd3ca4
143 pages
24.data Encryption Standard (DES) & The Strength of DES
No ratings yet
24.data Encryption Standard (DES) & The Strength of DES
5 pages
Zadanie 1 Uzupełnij Zdania Odpowiednimi Słowami Z Ramki
No ratings yet
Zadanie 1 Uzupełnij Zdania Odpowiednimi Słowami Z Ramki
1 page
Stress Detection via Machine Learning
No ratings yet
Stress Detection via Machine Learning
9 pages
Home Power Protection System
No ratings yet
Home Power Protection System
2 pages
Vid Raffle Bundle Deal
No ratings yet
Vid Raffle Bundle Deal
13 pages
CentOS 5 Mail Server Setup Guide
No ratings yet
CentOS 5 Mail Server Setup Guide
22 pages
2.IEC Short Circuit
100% (1)
2.IEC Short Circuit
18 pages
Toolbar Contributions
No ratings yet
Toolbar Contributions
7 pages

HPC Codes-2

Uploaded by

HPC Codes-2

Uploaded by

Assignment No.

using namespace std;

// Graph class representing the adjacency list

// Add an edge to the graph

// Parallel Depth-First Search

// Parallel DFS utility function

#pragma omp parallel for

#pragma omp parallel for

cout << "Breadth-First Search (BFS): ";

Code - Parallel Bubble Sort:

using namespace std;

void bubble(int array[], int n){

void pBubble(int array[], int n){

//Sort even indexed numbers

void printArray(int arr[], int n){

// Create an array with numbers starting from n to 1

// Reset the array

Code - Parallel Merge Sort:

using namespace std;

void merge(int arr[], int low, int mid, int high) {

// Copy all left elements

// Copy all right elements

// Compare and place elements

while (i < n1 && j < n2) {

// If any elements are left out

while (j < n2) {

void parallelMergeSort(int arr[], int low, int high) {

#pragma omp parallel sections

void mergeSort(int arr[], int low, int high) {

// Create an array with numbers starting from n to 1.

// Measure Sequential Time

// Reset the array

//Measure Parallel time

using namespace std;

int maxval(int arr[], int n){

int sum(int arr[], int n){

int average(int arr[], int n){

Code - Addition of Two large Vectors:

__global__ void add(int* A, int* B, int* C, int size) {

if (tid < size) {

void initialize(int* vector, int size) {

void print(int* vector, int size) {

cudaMemcpy(X, A, vectorBytes, cudaMemcpyHostToDevice);

int threadsPerBlock = 256;

add<<<blocksPerGrid, threadsPerBlock>>>(X, Y, Z, N);

cudaMemcpy(C, Z, vectorBytes, cudaMemcpyDeviceToHost);

cout << "Addition: ";

Code - Matrix Multiplication using CUDA C:

// CUDA code to multiply matrices

if (row < size && col < size) {

void initialize(int* matrix, int size) {

void print(int* matrix, int size) {

cout << "Matrix B: \n";

// Copy values from A to X

// Copy values from A to X and B to Y

// Threads per CTA dimension

// Blocks per grid dimension (assumes THREADS divides N evenly)

// Use dim3 structs for block and grid dimensions

cudaMemcpy(C, Z, matrixBytes, cudaMemcpyDeviceToHost);

Multiplication of matrix A and B:

You might also like

global void add(int* A, int* B, int* C, int size) {