10#include "heffte_common.h"
31template<
typename index>
44 std::array<int, 3>
map;
51template<
typename index>
53 os <<
"nfast = " << plan.size[0] <<
"\n";
54 os <<
"nmid = " << plan.size[1] <<
"\n";
55 os <<
"nslow = " << plan.size[2] <<
"\n";
56 os <<
"line_stride = " << plan.line_stride <<
"\n";
57 os <<
"plane_stride = " << plan.plane_stride <<
"\n";
58 if (plan.buff_line_stride > 0){
59 os <<
"buff_line_stride = " << plan.buff_line_stride <<
"\n";
60 os <<
"buff_plane_stride = " << plan.buff_plane_stride <<
"\n";
61 os <<
"map = (" << plan.map[0] <<
", " << plan.map[1] <<
", " << plan.map[2] <<
")\n";
74template<
typename backend>
91 template<
typename scalar_type,
typename index>
95 for(index
mid = 0;
mid < plan.size[1];
mid++){
101 template<
typename scalar_type,
typename index>
104 for(index
mid = 0;
mid < plan.size[1];
mid++){
105 std::copy_n(&
buffer[(
slow * plan.size[1] +
mid) * plan.size[0]],
106 plan.size[0], &
data[
slow * plan.plane_stride +
mid * plan.line_stride]);
124 template<
typename scalar_type,
typename index>
134 template<
typename scalar_type,
typename index>
136 constexpr index stride = 256 /
sizeof(
scalar_type);
137 if (plan.map[0] == 0
and plan.map[1] == 1){
138 for(index
i=0;
i<plan.size[2];
i++)
139 for(index
j=0;
j<plan.size[1];
j++)
140 for(index
k=0;
k<plan.size[0];
k++)
141 data[
i * plan.plane_stride +
j * plan.line_stride +
k]
142 =
buffer[
i * plan.buff_plane_stride +
j * plan.buff_line_stride +
k ];
144 }
else if (plan.map[0] == 0
and plan.map[1] == 2){
145 for(index
bi=0;
bi<plan.size[2];
bi+=stride)
146 for(index
bj=0;
bj<plan.size[1];
bj+=stride)
147 for(index
bk=0;
bk<plan.size[0];
bk+=stride)
148 for(index
i=
bi;
i<std::min(
bi + stride, plan.size[2]);
i++)
149 for(index
j=
bj;
j<std::min(
bj + stride, plan.size[1]);
j++)
150 for(index
k=
bk;
k<std::min(
bk + stride, plan.size[0]);
k++)
151 data[
i * plan.plane_stride +
j * plan.line_stride +
k]
152 =
buffer[
j * plan.buff_plane_stride +
i * plan.buff_line_stride +
k ];
154 }
else if (plan.map[0] == 1
and plan.map[1] == 0){
155 for(index
bi=0;
bi<plan.size[2];
bi+=stride)
156 for(index
bj=0;
bj<plan.size[1];
bj+=stride)
157 for(index
bk=0;
bk<plan.size[0];
bk+=stride)
158 for(index
i=
bi;
i<std::min(
bi + stride, plan.size[2]);
i++)
159 for(index
j=
bj;
j<std::min(
bj + stride, plan.size[1]);
j++)
160 for(index
k=
bk;
k<std::min(
bk + stride, plan.size[0]);
k++)
161 data[
i * plan.plane_stride +
j * plan.line_stride +
k]
162 =
buffer[
i * plan.buff_plane_stride +
k * plan.buff_line_stride +
j ];
164 }
else if (plan.map[0] == 1
and plan.map[1] == 2){
165 for(index
bi=0;
bi<plan.size[2];
bi+=stride)
166 for(index
bj=0;
bj<plan.size[1];
bj+=stride)
167 for(index
bk=0;
bk<plan.size[0];
bk+=stride)
168 for(index
i=
bi;
i<std::min(
bi + stride, plan.size[2]);
i++)
169 for(index
j=
bj;
j<std::min(
bj + stride, plan.size[1]);
j++)
170 for(index
k=
bk;
k<std::min(
bk + stride, plan.size[0]);
k++)
171 data[
i * plan.plane_stride +
j * plan.line_stride +
k]
172 =
buffer[
k * plan.buff_plane_stride +
i * plan.buff_line_stride +
j ];
174 }
else if (plan.map[0] == 2
and plan.map[1] == 0){
175 for(index
bi=0;
bi<plan.size[2];
bi+=stride)
176 for(index
bj=0;
bj<plan.size[1];
bj+=stride)
177 for(index
bk=0;
bk<plan.size[0];
bk+=stride)
178 for(index
i=
bi;
i<std::min(
bi + stride, plan.size[2]);
i++)
179 for(index
j=
bj;
j<std::min(
bj + stride, plan.size[1]);
j++)
180 for(index
k=
bk;
k<std::min(
bk + stride, plan.size[0]);
k++)
181 data[
i * plan.plane_stride +
j * plan.line_stride +
k]
182 =
buffer[
j * plan.buff_plane_stride +
k * plan.buff_line_stride +
i ];
185 for(index
bi=0;
bi<plan.size[2];
bi+=stride)
186 for(index
bj=0;
bj<plan.size[1];
bj+=stride)
187 for(index
bk=0;
bk<plan.size[0];
bk+=stride)
188 for(index
i=
bi;
i<std::min(
bi + stride, plan.size[2]);
i++)
189 for(index
j=
bj;
j<std::min(
bj + stride, plan.size[1]);
j++)
190 for(index
k=
bk;
k<std::min(
bk + stride, plan.size[0]);
k++)
191 data[
i * plan.plane_stride +
j * plan.line_stride +
k]
192 =
buffer[
k * plan.buff_plane_stride +
j * plan.buff_line_stride +
i ];
203namespace data_scaling {
208 template<
typename scalar_type,
typename index>
210 for(index
i=0;
i<num_entries;
i++)
data[
i] *= scale_factor;
221 template<
typename precision_type,
typename index>
222 void apply(
void *stream, index num_entries, std::complex<precision_type> *
data,
double scale_factor){
229 template<
typename scalar_type,
typename index>
231 apply(
nullptr, num_entries,
data, scale_factor);
std::ostream & operator<<(std::ostream &os, box3d< index > const box)
Debugging info, writes out the box to a stream.
Definition heffte_geometry.h:146
void apply(cudaStream_t stream, index num_entries, scalar_type *data, double scale_factor)
Simply multiply the num_entries in the data by the scale_factor.
Definition heffte_backend_cuda.h:837
Namespace containing all HeFFTe methods and classes.
Definition heffte_backend_cuda.h:38
void pack(void *, pack_plan_3d< index > const &plan, scalar_type const data[], scalar_type buffer[]) const
Execute the planned pack operation.
Definition heffte_pack3d.h:92
void unpack(void *, pack_plan_3d< index > const &plan, scalar_type const buffer[], scalar_type data[]) const
Execute the planned unpack operation.
Definition heffte_pack3d.h:102
Defines the direct packer without implementation, use the specializations to get the CPU or GPU imple...
Definition heffte_pack3d.h:83
Holds the plan for a pack/unpack operation.
Definition heffte_pack3d.h:32
index buff_plane_stride
Stride of the planes in the received buffer (transpose packing only).
Definition heffte_pack3d.h:42
index line_stride
Stride of the lines.
Definition heffte_pack3d.h:36
index plane_stride
Stride of the planes.
Definition heffte_pack3d.h:38
std::array< index, 3 > size
Number of elements in the three directions.
Definition heffte_pack3d.h:34
std::array< int, 3 > map
Maps the i,j,k indexes from input to the output (transpose packing only).
Definition heffte_pack3d.h:44
index buff_line_stride
Stride of the lines in the received buffer (transpose packing only).
Definition heffte_pack3d.h:40
The packer needs to know whether the data will be on the CPU or GPU devices.
Definition heffte_pack3d.h:75
Wrapper around cufftHandle plans, set for float or double complex.
Definition heffte_backend_cuda.h:346
void pack(void *q, pack_plan_3d< index > const &plan, scalar_type const data[], scalar_type buffer[]) const
Execute the planned pack operation.
Definition heffte_pack3d.h:125
void unpack(void *, pack_plan_3d< index > const &plan, scalar_type const buffer[], scalar_type data[]) const
Execute the planned unpack operation.
Definition heffte_pack3d.h:135
Defines the transpose packer without implementation, use the specializations to get the CPU implement...
Definition heffte_pack3d.h:116