Highly Efficient FFT for Exascale: HeFFTe v2.4
Loading...
Searching...
No Matches
heffte_plan_logic.h
1/*
2 -- heFFTe --
3 Univ. of Tennessee, Knoxville
4 @date
5*/
6
7#ifndef HEFFTE_PLAN_LOGIC_H
8#define HEFFTE_PLAN_LOGIC_H
9
10#include "heffte_common.h"
11
20namespace heffte {
21
50 alltoallv = 0,
52 alltoall = 3,
54 p2p_plined = 1,
56 p2p = 2
57};
58
133 template<typename backend_tag> plan_options(backend_tag const)
138 num_sub(-1),
139 subcomm(MPI_COMM_NULL)
140 {}
168 num_sub = 1;
169 subcomm = comm;
170 }
172 int get_subranks() const{ return num_sub; }
173private:
174 int num_sub;
175 MPI_Comm subcomm;
176};
177
182inline std::ostream & operator << (std::ostream &os, plan_options const options){
183 std::string algorithm = "";
184 switch (options.algorithm){
185 case reshape_algorithm::alltoallv : algorithm = "mpi:alltoallv"; break;
186 case reshape_algorithm::alltoall : algorithm = "mpi:alltoall"; break;
187 case reshape_algorithm::p2p_plined : algorithm = "mpi:point-to-point-pipelined"; break;
188 case reshape_algorithm::p2p : algorithm = "mpi:point-to-point"; break;
189 };
190 os << "options = ("
191 << ((options.use_reorder) ? "fft1d:contiguous" : "fft1d:strided") << ", "
192 << algorithm << ", "
193 << ((options.use_pencils) ? "decomposition:pencil" : "decomposition:slab") << ", "
194 << ((options.use_gpu_aware) ? "mpi:from-gpu" : "mpi:from-cpu") << ")";
195 return os;
196}
197
206template<typename backend_tag, bool use_r2c = false>
208 if (std::is_same<backend_tag, backend::stock_cos>::value
209 or std::is_same<backend_tag, backend::mkl_cos>::value
210 or std::is_same<backend_tag, backend::cufft_cos>::value
211 or std::is_same<backend_tag, backend::rocfft_cos>::value
212 or std::is_same<backend_tag, backend::onemkl_cos>::value
213 or std::is_same<backend_tag, backend::stock_sin>::value
214 or std::is_same<backend_tag, backend::mkl_sin>::value
215 or std::is_same<backend_tag, backend::cufft_sin>::value
216 or std::is_same<backend_tag, backend::rocfft_sin>::value
217 or std::is_same<backend_tag, backend::onemkl_sin>::value
218 or std::is_same<backend_tag, backend::stock_cos1>::value
219 or std::is_same<backend_tag, backend::cufft_cos1>::value
220 or std::is_same<backend_tag, backend::rocfft_cos1>::value
221 ){
222 // currently the cosine options work only with reorder.
223 opts.use_reorder = true;
224 return opts;
225 }else if (use_r2c and std::is_same<backend_tag, backend::rocfft>::value){
226 // the rocfft backend with r2c requires the reorder (problem with the strides)
227 opts.use_reorder = true;
228 return opts;
229 }else{
230 return opts; // all options are supported for this backend
231 }
232}
233
239 opts.use_reorder = true;
240 return opts;
241}
242
247template<typename backend_tag>
251
275template<typename index>
278 std::vector<box3d<index>> in_shape[4];
280 std::vector<box3d<index>> out_shape[4];
282 std::array<index, 3> fft_sizes;
284 std::array<int, 3> fft_direction;
286 long long index_count;
290 int const mpi_rank;
291};
292
297template<typename index>
298inline std::array<bool, 3> pencil_directions(box3d<index> const world, std::vector<box3d<index>> const &boxes){
299 std::array<bool, 3> is_pencil = {true, true, true};
300 for(auto const &b : boxes){
301 for(int i=0; i<3; i++)
302 is_pencil[i] = is_pencil[i] and (world.size[i] == b.size[i]);
303 }
304 return is_pencil;
305}
306
318template<typename index>
319logic_plan3d<index> plan_operations(ioboxes<index> const &boxes, int r2c_direction, plan_options const options, int const mpi_rank);
320
325template<typename index>
326std::vector<std::array<int, 3>> compute_grids(logic_plan3d<index> const &plan);
327
328}
329
330#endif
reshape_algorithm
Defines list of potential communication algorithms.
Definition heffte_plan_logic.h:48
plan_options set_options(plan_options opts)
Adjusts the user provided options to what can be handled by the backend.
Definition heffte_plan_logic.h:207
plan_options default_options()
Returns the default backend options associated with the given backend.
Definition heffte_plan_logic.h:248
@ alltoall
Using the MPI_Alltoall options, with padding on the data.
@ alltoallv
Using the MPI_Alltoallv options, no padding on the data (default option).
@ p2p_plined
Using MPI_Isend and MPI_Irecv, all sending receiving packing and unpacking are pipelined.
@ p2p
Using MPI_Send and MPI_Irecv, receive is pipelined with packing and sending.
std::vector< box3d< index > > reorder(std::vector< box3d< index > > const &shape, std::array< int, 3 > order)
Returns the same shape, but sets a different order for each box.
Definition heffte_geometry.h:466
std::ostream & operator<<(std::ostream &os, box3d< index > const box)
Debugging info, writes out the box to a stream.
Definition heffte_geometry.h:146
std::vector< std::array< int, 3 > > compute_grids(logic_plan3d< index > const &plan)
Assuming the shapes in the plan form grids, reverse engineer the grid dimensions (used in the benchma...
Definition heffte_plan_logic.cpp:461
std::array< bool, 3 > pencil_directions(box3d< index > const world, std::vector< box3d< index > > const &boxes)
Returns true for each direction where the boxes form pencils (i.e., where the size matches the world ...
Definition heffte_plan_logic.h:298
logic_plan3d< index > plan_operations(ioboxes< index > const &boxes, int r2c_direction, plan_options const options, int const mpi_rank)
Creates the logic plan with the provided user input.
Definition heffte_plan_logic.cpp:425
plan_options force_reorder(plan_options opts)
Forces the reorder logic for the ROCM r2c variant.
Definition heffte_plan_logic.h:238
Namespace containing all HeFFTe methods and classes.
Definition heffte_backend_cuda.h:38
Defines a set of default plan options for a given backend.
Definition heffte_common.h:761
The logic plan incorporates the order and types of operations in a transform.
Definition heffte_plan_logic.h:276
std::vector< box3d< index > > in_shape[4]
Holds the input shapes for the 4 forward reshapes (backwards reverses in and out).
Definition heffte_plan_logic.h:278
std::array< index, 3 > fft_sizes
Sizes for the 1-D transforms.
Definition heffte_plan_logic.h:282
std::array< int, 3 > fft_direction
Direction of the 1-D FFT transforms.
Definition heffte_plan_logic.h:284
int const mpi_rank
MPI rank used in the plan creation.
Definition heffte_plan_logic.h:290
long long index_count
The total number of indexes in all directions.
Definition heffte_plan_logic.h:286
plan_options const options
Extra options used in the plan creation.
Definition heffte_plan_logic.h:288
std::vector< box3d< index > > out_shape[4]
Holds the output shapes for the 4 forward reshapes (backwards reverses in and out).
Definition heffte_plan_logic.h:280
Wrapper around cufftHandle plans, set for float or double complex.
Definition heffte_backend_cuda.h:346
Defines a set of tweaks and options to use in the plan generation.
Definition heffte_plan_logic.h:131
plan_options(backend_tag const)
Constructor, initializes all options with the default values for the given backend tag.
Definition heffte_plan_logic.h:133
int get_subranks() const
Return the set number of sub-ranks.
Definition heffte_plan_logic.h:172
void use_num_subranks(int num_subranks)
Defines the number of ranks to use for the internal reshapes, set to -1 to use all ranks.
Definition heffte_plan_logic.h:154
reshape_algorithm algorithm
Defines the communication algorithm.
Definition heffte_plan_logic.h:148
bool use_reorder
Defines whether to transpose the data on reshape or to use strided 1-D ffts.
Definition heffte_plan_logic.h:146
bool use_gpu_aware
Defines whether to use MPI calls directly from the GPU or to move to the CPU first.
Definition heffte_plan_logic.h:152
void use_subcomm(MPI_Comm comm)
Set sub-communicator to use in the intermediate reshape operations.
Definition heffte_plan_logic.h:167
plan_options(bool reorder, reshape_algorithm alg, bool pencils)
Constructor, initializes each variable, primarily for internal use.
Definition heffte_plan_logic.h:142
bool use_pencils
Defines whether to use pencil or slab data distribution in the reshape steps.
Definition heffte_plan_logic.h:150