diff --git a/fftw3_2d_mpi_example/fftw3_2d_mpi_example.c b/fftw3_2d_mpi_example/fftw3_2d_mpi_example.c index 4282c770694699c3e5279e5d3947abaaf363c916..c97a28b23a89b265a01e7b5905e998fba2da4c16 100644 --- a/fftw3_2d_mpi_example/fftw3_2d_mpi_example.c +++ b/fftw3_2d_mpi_example/fftw3_2d_mpi_example.c @@ -9,7 +9,9 @@ int main(int argc, char **argv) fftw_plan plan; fftw_complex *data; ptrdiff_t alloc_local, local_n0, local_0_start, i, j; - + int rank, nranks; + double t0, t1; + if(argc < 10) { fprintf(stderr, "USAGE: %s <N>\n", argv[0]); exit(-1); @@ -20,15 +22,26 @@ int main(int argc, char **argv) MPI_Init(&argc, &argv); fftw_mpi_init(); + MPI_Comm comm = MPI_COMM_WORLD; + MPI_Comm_rank(comm, &rank); + MPI_Comm_size(comm, &nranks); + + t0 = MPI_Wtime(); /* get local data size and allocate */ - alloc_local = fftw_mpi_local_size_2d(N0, N1, MPI_COMM_WORLD, + alloc_local = fftw_mpi_local_size_2d(N0, N1, comm, &local_n0, &local_0_start); data = fftw_alloc_complex(alloc_local); + t1 = MPI_Wtime(); + if(rank==0)printf("t_%s = %f seconds\n","alloc",t1-t0); + t0 = MPI_Wtime(); /* create plan for in-place forward DFT */ - plan = fftw_mpi_plan_dft_2d(N0, N1, data, data, MPI_COMM_WORLD, + plan = fftw_mpi_plan_dft_2d(N0, N1, data, data, comm, FFTW_FORWARD, FFTW_ESTIMATE); + t1 = MPI_Wtime(); + if(rank==0)printf("t_%s = %f seconds\n","plan",t1-t0); + t0 = MPI_Wtime(); /* initialize data to some function my_function(x,y) */ for (i = 0; i < local_n0; ++i) for (j = 0; j < N1; ++j) { @@ -36,10 +49,15 @@ int main(int argc, char **argv) if( (local_0_start + i) == 0 && j == 0) data[i*N1 + j][0] = 1.0; } + t1 = MPI_Wtime(); + if(rank==0)printf("t_%s = %f seconds\n","init",t1-t0); + t0 = MPI_Wtime(); /* compute transforms, in-place, as many times as desired */ fftw_execute(plan); - + t1 = MPI_Wtime(); + if(rank==0)printf("t_%s = %f seconds\n","fftf",t1-t0); + fftw_destroy_plan(plan); MPI_Finalize(); diff --git a/fftw3_2d_mpi_example/fftw3_2d_mpi_example_omp.c b/fftw3_2d_mpi_example/fftw3_2d_mpi_example_omp.c new file mode 100644 index 0000000000000000000000000000000000000000..9d3d7bb266544c958e9320f6cb7a0494ed02ba91 --- /dev/null +++ b/fftw3_2d_mpi_example/fftw3_2d_mpi_example_omp.c @@ -0,0 +1,78 @@ +#include <stdlib.h> +#include <stdio.h> + +#include <fftw3-mpi.h> +#include <omp.h> + +int threads_ok; + +int main(int argc, char **argv) +{ + ptrdiff_t N0, N1; + fftw_plan plan; + fftw_complex *data; + ptrdiff_t alloc_local, local_n0, local_0_start, i, j; + int provided; + int rank, nranks; + double t0, t1; + + if(argc < 10) { + fprintf(stderr, "USAGE: %s <N>\n", argv[0]); + exit(-1); + } + N0 = atoi(argv[1]); + N1 = N0; + + MPI_Init_thread(&argc, &argv, MPI_THREAD_FUNNELED, &provided); + threads_ok = provided >= MPI_THREAD_FUNNELED; + if (threads_ok) threads_ok = fftw_init_threads(); + fftw_mpi_init(); + int nthreads = omp_get_max_threads(); + if (threads_ok) fftw_plan_with_nthreads(nthreads); + + MPI_Comm comm = MPI_COMM_WORLD; + MPI_Comm_rank(comm, &rank); + MPI_Comm_size(comm, &nranks); + + if(rank==0) { + printf("\nomp_max_threads = %d\n",nthreads); + printf("fftw_planner_nthreads = %d\n\n",fftw_planner_nthreads()); + } + + t0 = MPI_Wtime(); + /* get local data size and allocate */ + alloc_local = fftw_mpi_local_size_2d(N0, N1, comm, + &local_n0, &local_0_start); + data = fftw_alloc_complex(alloc_local); + t1 = MPI_Wtime(); + if(rank==0)printf("t_%s = %f seconds\n","alloc",t1-t0); + + t0 = MPI_Wtime(); + /* create plan for in-place forward DFT */ + plan = fftw_mpi_plan_dft_2d(N0, N1, data, data, comm, + FFTW_FORWARD, FFTW_ESTIMATE); + t1 = MPI_Wtime(); + if(rank==0)printf("t_%s = %f seconds\n","plan",t1-t0); + + t0 = MPI_Wtime(); + /* initialize data to some function my_function(x,y) */ +#pragma omp parallel for + for (i = 0; i < local_n0; ++i) + for (j = 0; j < N1; ++j) { + data[i*N1 + j][0] = data[i*N1 + j][1] = 0.0; + if( (local_0_start + i) == 0 && j == 0) + data[i*N1 + j][0] = 1.0; + } + t1 = MPI_Wtime(); + if(rank==0)printf("t_%s = %f seconds\n","init",t1-t0); + + t0 = MPI_Wtime(); + /* compute transforms, in-place, as many times as desired */ + fftw_execute(plan); + t1 = MPI_Wtime(); + if(rank==0)printf("t_%s = %f seconds\n","fftf",t1-t0); + + fftw_destroy_plan(plan); + + MPI_Finalize(); +} diff --git a/fftw3_2d_mpi_example/makefile_perlmutter_cpu b/fftw3_2d_mpi_example/makefile_perlmutter_cpu index 215341e884c53a4bb1c462a4ee0e2acb5d5fc0ff..083e53d3eb5b5ba852133fa5f5f7e70677f8b0bf 100644 --- a/fftw3_2d_mpi_example/makefile_perlmutter_cpu +++ b/fftw3_2d_mpi_example/makefile_perlmutter_cpu @@ -1,11 +1,20 @@ FFTW_HOME = ../fftw-gcc-cc/3.3.10 MPICC = cc + MPI_C_FLAGS = -g -O3 -I${FFTW_HOME}/include MPI_LD_FLAGS = -L${FFTW_HOME}/lib -lfftw3_mpi -lfftw3 -lm +MPI_OMP_C_FLAGS = -g -O3 -fopenmp -I${FFTW_HOME}/include +MPI_OMP_LD_FLAGS = -L${FFTW_HOME}/lib -lfftw3_mpi -lfftw3_omp -lfftw3 -lm + +all: fftw3_2d_mpi_example fftw3_2d_mpi_example_omp + fftw3_2d_mpi_example: fftw3_2d_mpi_example.c - ${MPICC} fftw3_2d_mpi_example.c ${MPI_C_FLAGS} ${MPI_LD_FLAGS} -o fftw3_2d_mpi_example + ${MPICC} $^ ${MPI_C_FLAGS} ${MPI_LD_FLAGS} -o $@ + +fftw3_2d_mpi_example_omp: fftw3_2d_mpi_example_omp.c + ${MPICC} $^ ${MPI_OMP_C_FLAGS} ${MPI_OMP_LD_FLAGS} -o $@ clean: - rm -f fftw3_2d_mpi_example + rm -f fftw3_2d_mpi_example fftw3_2d_mpi_example_omp