7#ifndef HEFFTE_FFT3D_R2C_H
8#define HEFFTE_FFT3D_R2C_H
10#include "heffte_fft3d.h"
45template<
typename backend_tag,
typename index =
int>
99 bool use_reorder,
int algorithm,
bool use_pencils)
101 box3d<index>({ol0, ol1, ol2}, {oh0, oh1, oh2}, {oo0, oo1, oo2}),
103 plan_options(use_reorder,
static_cast<reshape_algorithm>(algorithm), use_pencils))
110 box3d<index>({ol0, ol1, ol2}, {oh0, oh1, oh2}, {oo0, oo1, oo2}),
117 :
fft3d_r2c(
box3d<index>({
il0,
il1,
il2}, {
ih0,
ih1,
ih2}), box3d<index>({ol0, ol1, ol2}, {oh0, oh1, oh2}), r2c_direction, comm)
146 template<
typename input_type,
typename output_type>
150 "Using either an unknown complex type or an incompatible pair of types!");
157 template<
typename input_type,
typename output_type>
161 "Using either an unknown complex type or an incompatible pair of types!");
170 template<
typename input_type,
typename output_type>
175 "Using either an unknown complex type or an incompatible pair of types!");
184 template<
typename input_type,
typename output_type>
188 "Using either an unknown complex type or an incompatible pair of types!");
210 template<
typename input_type>
213 throw std::invalid_argument(
"The input vector is smaller than size_inbox(), i.e., not enough entries provided to fill the inbox.");
214 static_assert(std::is_same<input_type, float>::value
or std::is_same<input_type, double>::value,
215 "The input to forward() must be real, i.e., either float or double.");
234 template<
typename input_type,
typename output_type>
238 "Using either an unknown complex type or an incompatible pair of types!");
245 template<
typename input_type,
typename output_type>
249 "Using either an unknown complex type or an incompatible pair of types!");
258 template<
typename input_type,
typename output_type>
263 "Using either an unknown complex type or an incompatible pair of types!");
272 template<
typename input_type,
typename output_type>
276 "Using either an unknown complex type or an incompatible pair of types!");
285 template<
typename scalar_type>
288 "Either calling backward() with non-complex input or using an unknown complex type.");
302 pinbox(
new box3d<index>(plan.in_shape[0][plan.mpi_rank])), poutbox(
new box3d<index>(plan.out_shape[3][plan.mpi_rank])),
304 #
ifdef Heffte_ENABLE_MAGMA
313 logic_plan3d<index>
const &plan, MPI_Comm
const comm) :
315 pinbox(new box3d<index>(plan.in_shape[0][plan.mpi_rank])), poutbox(new box3d<index>(plan.out_shape[3][plan.mpi_rank])),
316 scale_factor(1.0 / static_cast<double>(plan.index_count))
317 #ifdef Heffte_ENABLE_MAGMA
325 void setup(logic_plan3d<index>
const &plan, MPI_Comm
const comm){
326 for(
int i=0; i<4; i++){
327 forward_shaper[i] = make_reshape3d<backend_tag>(this->
stream(), plan.in_shape[i], plan.out_shape[i], comm, plan.options);
328 backward_shaper[3-i] = make_reshape3d<backend_tag>(this->
stream(), plan.out_shape[i], plan.in_shape[i], comm, plan.options);
331 executors[0] = make_executor_r2c<backend_tag>(this->
stream(), plan.out_shape[0][
mpi::comm_rank(comm)], plan.fft_direction[0]);
332 executors[1] = make_executor<backend_tag>(this->
stream(), plan.out_shape[1][
mpi::comm_rank(comm)], plan.fft_direction[1]);
333 executors[2] = make_executor<backend_tag>(this->
stream(), plan.out_shape[2][
mpi::comm_rank(comm)], plan.fft_direction[2]);
337 size_buffer_work = comm_buffer_offset
339 executor_buffer_offset = (executor_workspace_size == 0) ? 0 : size_buffer_work - executor_workspace_size;
342 std::array<executor_base*, 3> forward_executors()
const{
343 return std::array<executor_base*, 3>{executors[0].get(), executors[1].get(), executors[2].get()};
346 std::array<executor_base*, 3> backward_executors()
const{
347 return std::array<executor_base*, 3>{executors[2].get(), executors[1].get(), executors[0].get()};
351 template<
typename scalar_type>
352 void apply_scale(
int const batch_size,
direction dir,
scale scaling, scalar_type data[])
const{
354 add_trace
name(
"scale");
355 #ifdef Heffte_ENABLE_MAGMA
356 if (std::is_same<location_tag, tag::gpu>::value){
368 std::unique_ptr<box3d<index>> pinbox, poutbox;
370 std::array<std::unique_ptr<reshape3d_base<index>>, 4> forward_shaper;
371 std::array<std::unique_ptr<reshape3d_base<index>>, 4> backward_shaper;
373 std::array<std::unique_ptr<executor_base>, 3> executors;
374 #ifdef Heffte_ENABLE_MAGMA
375 gpu::magma_handle<location_tag> hmagma;
379 size_t size_buffer_work, comm_buffer_offset, executor_buffer_offset;
386template<
typename backend_tag,
typename index =
int>
393template<
typename backend_tag,
typename index>
397 static_assert(std::is_same<index, int>::value
or std::is_same<index, long long>::value,
398 "heFFTe works with 'int' and 'long long' indexing only");
400 "The backend_tag is not valid, perhaps it needs to be enabled in the build system");
Similar to heffte::fft3d, but computed fewer redundant coefficients when the input is real.
Definition heffte_fft3d_r2c.h:46
fft3d_r2c(int il0, int il1, int il2, int ih0, int ih1, int ih2, int io0, int io1, int io2, int ol0, int ol1, int ol2, int oh0, int oh1, int oh2, int oo0, int oo1, int oo2, int r2c_direction, MPI_Comm const comm, bool use_reorder, int algorithm, bool use_pencils)
Internal use only, used by the Fortran interface.
Definition heffte_fft3d_r2c.h:96
void forward(input_type const input[], output_type output[], scale scaling=scale::none) const
Performs a forward Fourier transform using two arrays.
Definition heffte_fft3d_r2c.h:147
typename backend::buffer_traits< backend_tag >::template container< T > buffer_container
Alias to the container template associated with the backend (allows for RAII memory management).
Definition heffte_fft3d_r2c.h:60
size_t size_comm_buffers() const
Returns the size used by the communication workspace buffers (internal use).
Definition heffte_fft3d_r2c.h:131
void backward(int batch_size, input_type const input[], output_type output[], scale scaling=scale::none) const
Overload that performs a batch transform using internally allocated workspace.
Definition heffte_fft3d_r2c.h:273
typename one_dim_backend< backend_tag >::executor_r2c backend_executor_r2c
FFT executor for the real-to-complex dimension.
Definition heffte_fft3d_r2c.h:51
box3d< index > inbox() const
Returns the inbox.
Definition heffte_fft3d_r2c.h:125
fft3d_r2c(typename backend::device_instance< location_tag >::stream_type gpu_stream, box3d< index > const inbox, box3d< index > const outbox, int r2c_direction, MPI_Comm const comm, plan_options const options=default_options< backend_tag >())
See the documentation for fft3d::fft3d()
Definition heffte_fft3d_r2c.h:85
long long size_inbox() const
Returns the size of the inbox defined in the constructor.
Definition heffte_fft3d_r2c.h:121
fft3d_r2c(box3d< index > const inbox, box3d< index > const outbox, int r2c_direction, MPI_Comm const comm, plan_options const options=default_options< backend_tag >())
Constructor creating a plan for FFT transform across the given communicator and using the box geometr...
Definition heffte_fft3d_r2c.h:76
typename backend::buffer_traits< backend_tag >::location location_tag
Type-tag that is either tag::cpu or tag::gpu to indicate the location of the data.
Definition heffte_fft3d_r2c.h:55
long long size_outbox() const
Returns the size of the outbox defined in the constructor.
Definition heffte_fft3d_r2c.h:123
box3d< index > outbox() const
Returns the outbox.
Definition heffte_fft3d_r2c.h:127
void forward(int batch_size, input_type const input[], output_type output[], output_type workspace[], scale scaling=scale::none) const
Overload utilizing a batch transform.
Definition heffte_fft3d_r2c.h:171
output_buffer_container< input_type > forward(buffer_container< input_type > const &input, scale scaling=scale::none)
Vector variant of forward() using input and output buffer_container classes.
Definition heffte_fft3d_r2c.h:211
fft3d_r2c(int il0, int il1, int il2, int ih0, int ih1, int ih2, int ol0, int ol1, int ol2, int oh0, int oh1, int oh2, int r2c_direction, MPI_Comm const comm)
Internal use only, used by the Fortran interface.
Definition heffte_fft3d_r2c.h:114
void backward(int batch_size, input_type const input[], output_type output[], input_type workspace[], scale scaling=scale::none) const
Overload that performs a batch transform.
Definition heffte_fft3d_r2c.h:259
real_buffer_container< scalar_type > backward(buffer_container< scalar_type > const &input, scale scaling=scale::none)
Variant of backward() that uses buffer_container for RAII style of resource management.
Definition heffte_fft3d_r2c.h:286
typename one_dim_backend< backend_tag >::executor backend_executor_c2c
FFT executor for the complex-to-complex dimensions.
Definition heffte_fft3d_r2c.h:49
void forward(input_type const input[], output_type output[], output_type workspace[], scale scaling=scale::none) const
Overload utilizing a user provided buffer.
Definition heffte_fft3d_r2c.h:158
size_t size_workspace() const
Returns the workspace size that will be used, size is measured in complex numbers.
Definition heffte_fft3d_r2c.h:129
fft3d_r2c(int il0, int il1, int il2, int ih0, int ih1, int ih2, int io0, int io1, int io2, int ol0, int ol1, int ol2, int oh0, int oh1, int oh2, int oo0, int oo1, int oo2, int r2c_direction, MPI_Comm const comm)
Internal use only, used by the Fortran interface.
Definition heffte_fft3d_r2c.h:106
void backward(input_type const input[], output_type output[], input_type workspace[], scale scaling=scale::none) const
Overload utilizing a user provided buffer.
Definition heffte_fft3d_r2c.h:246
void forward(int batch_size, input_type const input[], output_type output[], scale scaling=scale::none) const
Overload utilizing a batch transform using internally allocated workspace.
Definition heffte_fft3d_r2c.h:185
double get_scale_factor(scale scaling) const
Returns the scale factor for the given scaling.
Definition heffte_fft3d_r2c.h:297
void backward(input_type const input[], output_type output[], scale scaling=scale::none) const
Performs a backward Fourier transform using two arrays.
Definition heffte_fft3d_r2c.h:235
reshape_algorithm
Defines list of potential communication algorithms.
Definition heffte_plan_logic.h:48
plan_options set_options(plan_options opts)
Adjusts the user provided options to what can be handled by the backend.
Definition heffte_plan_logic.h:207
scale
Indicates the scaling factor to apply on the result of an FFT operation.
Definition heffte_fft3d.h:140
fft3d_r2c< backend_tag, index > make_fft3d_r2c(box3d< index > const inbox, box3d< index > const outbox, int r2c_direction, MPI_Comm const comm, plan_options const options=default_options< backend_tag >())
Factory method that auto-detects the index type based on the box.
Definition heffte_fft3d_r2c.h:394
@ none
No scale, leave the result unperturbed similar to the FFTW API.
@ symmetric
Symmetric scaling, apply the square-root of the full scaling.
std::string name()
Returns the human readable name of the backend.
Definition heffte_common.h:265
direction
Indicates the direction of the FFT (internal use only).
Definition heffte_common.h:652
size_t get_max_box_size_r2c(std::array< some_class, 3 > const &executors)
Returns the max of the box_size() for each of the executors.
Definition heffte_utils.h:414
size_t get_max_work_size(std::array< some_class, 3 > const &executors)
Returns the max of the workspace_size() for each of the executors.
Definition heffte_utils.h:424
@ backward
Inverse DFT transform.
@ forward
Forward DFT transform.
logic_plan3d< index > plan_operations(ioboxes< index > const &boxes, int r2c_direction, plan_options const options, int const mpi_rank)
Creates the logic plan with the provided user input.
Definition heffte_plan_logic.cpp:425
void apply(cudaStream_t stream, index num_entries, scalar_type *data, double scale_factor)
Simply multiply the num_entries in the data by the scale_factor.
Definition heffte_backend_cuda.h:837
int comm_rank(MPI_Comm const comm)
Returns the rank of this process within the specified comm.
Definition heffte_utils.h:79
size_t get_workspace_size(std::array< std::unique_ptr< reshape3d_base< index > >, 4 > const &shapers)
Returns the maximum workspace size used by the shapers.
Definition heffte_reshape3d.h:115
Namespace containing all HeFFTe methods and classes.
Definition heffte_backend_cuda.h:38
define_standard_type< scalar_type >::type * convert_to_standard(scalar_type input[])
Converts an array of some type to an array of the C++ equivalent type.
Definition heffte_utils.h:355
Defines the container for the temporary buffers.
Definition heffte_common.h:237
Holds the auxiliary variables needed by each backend.
Definition heffte_common.h:408
void * stream_type
The type for the internal stream, the cpu uses just a void pointer.
Definition heffte_common.h:420
device_instance(void *=nullptr)
Empty constructor.
Definition heffte_common.h:410
void * stream()
Returns the nullptr.
Definition heffte_common.h:414
Allows to define whether a specific backend interface has been enabled.
Definition heffte_common.h:226
A generic container that describes a 3d box of indexes.
Definition heffte_geometry.h:67
Struct to specialize to allow HeFFTe to recognize custom single precision complex types.
Definition heffte_utils.h:252
Struct to specialize to allow HeFFTe to recognize custom double precision complex types.
Definition heffte_utils.h:270
Indicates the structure that will be used by the fft backend.
Definition heffte_common.h:663
Wrapper around cufftHandle plans, set for float or double complex.
Definition heffte_backend_cuda.h:346
Defines a set of tweaks and options to use in the plan generation.
Definition heffte_plan_logic.h:131
Indicates the use of cpu backend and that all input/output data and arrays will be bound to the cpu.
Definition heffte_common.h:38