梯形积分法【OpenMP实现】多个版本

版权声明:本文为博主原创文章,未经博主允许不得转载。 https://blog.csdn.net/a19990412/article/details/85159411

简述

会用多个版本来写。操作是用命令行来操作。
除了第二版本是在内容上基于第一个版本的完善,其他都是降低算法的复杂程度的,可以放心阅读

版本1

  • 要求n被thread_count整除 不然可以会有计算错误。
#include <iostream>
#include <omp.h>

#define FUN(x) (x * x)

using namespace std;
#pragma warning(disable : 4996)
void Trap(double a, double b, int n, double *global_result_p);
int main(int argc, char **argv) {
	if (argc < 5) return 0;
	double global_result = 0.0;
	double a, b;
	int n;
	int thread_count = strtol(argv[1], NULL, 10);
	a = strtod(argv[2], NULL);
	b = strtod(argv[3], NULL);
	n = strtol(argv[4], NULL, 10);

	// n 必须被 thread_count 整除
#pragma omp parallel num_threads (thread_count)
	Trap(a, b, n, &global_result);

	cout << "With n = " << n << " trapzoids, our estimate\n";
	cout << "of the integral from " << a << " to " << b <<" = "<< global_result<< endl;
}

void Trap(double a, double b, int n, double *global_result_p) {
	double h, x, my_result;
	double local_a, local_b;
	int i, local_n;
	int my_rank = omp_get_thread_num();
	int thread_count = omp_get_num_threads();

	h = (b - a) / n;
	local_n = n / thread_count;  // same in all threads
	local_a = a + my_rank * local_n * h;
	local_b = local_a + local_n * h;
	x = local_a; // initial x
	my_result = (FUN(local_a) + FUN(local_b)) / 2;
	for (i = 1; i < local_n; ++i) {
		x += h;
		my_result += FUN(x);
	}
	my_result *= h;
# pragma omp critical 
	*global_result_p += my_result;
}

编译好源文件之后,就用下面的命令执行

  • 这里是用4个线程,来计算在0,1区间上的 x 2 x^2 的用n=64划分的梯形积分公式。结果接近1/3 可以通过提高n来提高精度。但这个版本只能计算n为thread_count的整数倍的时候
PS D:\C++\VS\repo\OpenMP-TEST\Debug> ./OpenMP-TEST 4 0 1 64
With n = 64 trapzoids, our estimate
of the integral from 0 to 1 = 0.333374

版本2

  • 可以使用不被thread_count整除的n
  • 改进代码: 添加了一个变量left表示剩余。剩余的让前left个线程每个再多算点。(一般来说线程数目都会比n要小很多。(不然在算法上就会不精确(n不够大的话)。))
left = n % thread_count;
if (my_rank < left) local_n += 1;
if (my_rank < left){ local_a = a + my_rank * local_n * h; }
else { local_a = a + left * (local_n + 1) * h + (my_rank - left) * local_n * h; }

实际的代码:

#include <iostream>
#include <omp.h>

#define FUN(x) (x * x)

using namespace std;
#pragma warning(disable : 4996)
void Trap(double a, double b, int n, double *global_result_p);
int main(int argc, char **argv) {
	if (argc < 5) return 0;
	double global_result = 0.0;
	double a, b;
	int n;
	int thread_count = strtol(argv[1], NULL, 10);
	a = strtod(argv[2], NULL);
	b = strtod(argv[3], NULL);
	n = strtol(argv[4], NULL, 10);

#pragma omp parallel num_threads (thread_count)
	Trap(a, b, n, &global_result);

	cout << "With n = " << n << " trapzoids, our estimate\n";
	cout << "of the integral from " << a << " to " << b <<" = "<< global_result<< endl;
}

void Trap(double a, double b, int n, double *global_result_p) {
	double h, x, my_result;
	double local_a, local_b;
	int i, local_n, left;
	int my_rank = omp_get_thread_num();
	int thread_count = omp_get_num_threads();

	h = (b - a) / n;

	local_n = n / thread_count;  // same in all threads
	left = n % thread_count;
	if (my_rank < left) local_n += 1;
	if (my_rank < left){ local_a = a + my_rank * local_n * h; }
	else { local_a = a + left * (local_n + 1) * h + (my_rank - left) * local_n * h; }

	local_b = local_a + local_n * h;
	x = local_a; // initial x
	my_result = (FUN(local_a) + FUN(local_b)) / 2;
	for (i = 1; i < local_n; ++i) {
		x += h;
		my_result += FUN(x);
	}
	my_result *= h;
# pragma omp critical 
	*global_result_p += my_result;
}

发现精度不断的提升,在只改变n的数目的情况下。

PS D:\C++\VS\repo\OpenMP-TEST\Debug> ./OpenMP-TEST 4 0 1 64
With n = 64 trapzoids, our estimate
of the integral from 0 to 1 = 0.333374
PS D:\C++\VS\repo\OpenMP-TEST\Debug> ./OpenMP-TEST 4 0 1 65
With n = 65 trapzoids, our estimate
of the integral from 0 to 1 = 0.333373
PS D:\C++\VS\repo\OpenMP-TEST\Debug> ./OpenMP-TEST 4 0 1 66
With n = 66 trapzoids, our estimate
of the integral from 0 to 1 = 0.333372
PS D:\C++\VS\repo\OpenMP-TEST\Debug> ./OpenMP-TEST 4 0 1 67
With n = 67 trapzoids, our estimate
of the integral from 0 to 1 = 0.33337

版本3

  • 对于部分对指针不太熟的人来说,我们可以做出下面的改进
  • 修改部分:
#pragma omp parallel num_threads (thread_count)
	{
		double my_result = Trap(a, b, n);
#pragma omp critical 
		global_result += my_result;
	}
#include <iostream>
#include <omp.h>

#define FUN(x) (x * x)

using namespace std;
#pragma warning(disable : 4996)
double Trap(double a, double b, int n);
int main(int argc, char **argv) {
	if (argc < 5) return 0;
	double global_result = 0.0;
	double a, b;
	int n;
	int thread_count = strtol(argv[1], NULL, 10);
	a = strtod(argv[2], NULL);
	b = strtod(argv[3], NULL);
	n = strtol(argv[4], NULL, 10);


#pragma omp parallel num_threads (thread_count)
	{
		double my_result = Trap(a, b, n);
#pragma omp critical 
		global_result += my_result;
	}

	cout << "With n = " << n << " trapzoids, our estimate\n";
	cout << "of the integral from " << a << " to " << b <<" = "<< global_result<< endl;
}

double Trap(double a, double b, int n) {
	double h, x, my_result;
	double local_a, local_b;
	int i, local_n, left;
	int my_rank = omp_get_thread_num();
	int thread_count = omp_get_num_threads();

	h = (b - a) / n;

	local_n = n / thread_count;  // same in all threads
	left = n % thread_count;
	if (my_rank < left) local_n += 1;
	if (my_rank < left){ local_a = a + my_rank * local_n * h; }
	else { local_a = a + left * (local_n + 1) * h + (my_rank - left) * local_n * h; }

	local_b = local_a + local_n * h;
	x = local_a; // initial x
	my_result = (FUN(local_a) + FUN(local_b)) / 2;
	for (i = 1; i < local_n; ++i) {
		x += h;
		my_result += FUN(x);
	}
	my_result *= h;
	return my_result;
}

结果

PS D:\C++\VS\repo\OpenMP-TEST\Debug> ./OpenMP-TEST 4 0 1 67
With n = 67 trapzoids, our estimate
of the integral from 0 to 1 = 0.33337
PS D:\C++\VS\repo\OpenMP-TEST\Debug> ./OpenMP-TEST 4 0 1 68
With n = 68 trapzoids, our estimate
of the integral from 0 to 1 = 0.333369
PS D:\C++\VS\repo\OpenMP-TEST\Debug>

版本4

  • 使用规约变量和规约操作
  • 会让代码更加简单
#pragma omp parallel num_threads (thread_count) reduction(+:global_result)
	{
		global_result += Trap(a, b, n);
	}
  • 跟版本3是一样的。就是让代码更加简单了而已,用了map-reduce的思路
  • reduction(<operator>: <variable list>) 前面的放操作符,后面的设置变量。
  • 但是有需要注意的,如果是减法就需要小心了。在每个线程上确实是使用了减法,但是全局上也是可能做了减法,(例如 -1 - (-5)= 4)这样的事情是有可能发生的。一般来说,满足交换律的都不用担心这点hh

猜你喜欢

转载自blog.csdn.net/a19990412/article/details/85159411