Highly Efficient FFT for Exascale: HeFFTe v2.4
|
Specialization for the data operations in CUDA mode. More...
#include <heffte_backend_cuda.h>
Public Types | |
using | stream_type = cudaStream_t |
The stream type for the device. | |
using | backend_device = backend::device_instance<tag::gpu> |
Defines the backend_device. | |
using | stream_type = sycl::queue& |
The stream type for the device. | |
using | backend_device = backend::device_instance<tag::gpu> |
Defines the backend_device. | |
using | stream_type = hipStream_t |
The stream type for the device. | |
using | backend_device = backend::device_instance<tag::gpu> |
Defines the backend_device. | |
Static Public Member Functions | |
template<typename scalar_type > | |
static scalar_type * | allocate (cudaStream_t, size_t num_entries) |
Allocate memory. | |
template<typename scalar_type > | |
static void | free (cudaStream_t, scalar_type *pntr) |
Free memory. | |
template<typename scalar_type > | |
static void | copy_n (cudaStream_t stream, scalar_type const source[], size_t num_entries, scalar_type destination[]) |
Equivalent to std::copy_n() but using CUDA arrays. | |
template<typename scalar_type > | |
static void | copy_n (cudaStream_t stream, std::complex< scalar_type > const source[], size_t num_entries, scalar_type destination[]) |
Copy-convert complex-to-real. | |
template<typename scalar_type > | |
static void | copy_n (cudaStream_t stream, scalar_type const source[], size_t num_entries, std::complex< scalar_type > destination[]) |
Copy-convert real-to-complex. | |
template<typename scalar_type > | |
static void | copy_device_to_host (cudaStream_t stream, scalar_type const source[], size_t num_entries, scalar_type destination[]) |
Copy the date from the device to the host. | |
template<typename scalar_type > | |
static void | copy_device_to_device (cudaStream_t stream, scalar_type const source[], size_t num_entries, scalar_type destination[]) |
Copy the date from the device to the device. | |
template<typename scalar_type > | |
static void | copy_host_to_device (cudaStream_t stream, scalar_type const source[], size_t num_entries, scalar_type destination[]) |
Copy the date from the host to the device. | |
template<typename scalar_type > | |
static scalar_type * | allocate (sycl::queue &stream, size_t num_entries) |
Allocate memory. | |
template<typename scalar_type > | |
static void | free (sycl::queue &stream, scalar_type *pntr) |
Free memory. | |
template<typename scalar_type > | |
static void | copy_n (sycl::queue &stream, scalar_type const source[], size_t num_entries, scalar_type destination[]) |
Equivalent to std::copy_n() but using CUDA arrays. | |
template<typename scalar_type > | |
static void | copy_n (sycl::queue &stream, std::complex< scalar_type > const source[], size_t num_entries, scalar_type destination[]) |
Copy-convert complex-to-real. | |
template<typename scalar_type > | |
static void | copy_n (sycl::queue &stream, scalar_type const source[], size_t num_entries, std::complex< scalar_type > destination[]) |
Copy-convert real-to-complex. | |
template<typename scalar_type > | |
static void | copy_device_to_host (sycl::queue &stream, scalar_type const source[], size_t num_entries, scalar_type destination[]) |
Copy the date from the device to the host. | |
template<typename scalar_type > | |
static void | copy_device_to_device (sycl::queue &stream, scalar_type const source[], size_t num_entries, scalar_type destination[]) |
Copy the date from the device to the device. | |
template<typename scalar_type > | |
static void | copy_host_to_device (sycl::queue &stream, scalar_type const source[], size_t num_entries, scalar_type destination[]) |
Copy the date from the host to the device. | |
template<typename scalar_type > | |
static scalar_type * | allocate (hipStream_t, size_t num_entries) |
Allocate memory. | |
template<typename scalar_type > | |
static void | free (hipStream_t, scalar_type *pntr) |
Free memory. | |
template<typename scalar_type > | |
static void | copy_n (hipStream_t stream, scalar_type const source[], size_t num_entries, scalar_type destination[]) |
Equivalent to std::copy_n() but using CUDA arrays. | |
template<typename scalar_type > | |
static void | copy_n (hipStream_t stream, std::complex< scalar_type > const source[], size_t num_entries, scalar_type destination[]) |
Copy-convert complex-to-real. | |
template<typename scalar_type > | |
static void | copy_n (hipStream_t stream, scalar_type const source[], size_t num_entries, std::complex< scalar_type > destination[]) |
Copy-convert real-to-complex. | |
template<typename scalar_type > | |
static void | copy_device_to_host (hipStream_t stream, scalar_type const source[], size_t num_entries, scalar_type destination[]) |
Copy the date from the device to the host. | |
template<typename scalar_type > | |
static void | copy_device_to_device (hipStream_t stream, scalar_type const source[], size_t num_entries, scalar_type destination[]) |
Copy the date from the device to the device. | |
template<typename scalar_type > | |
static void | copy_host_to_device (hipStream_t stream, scalar_type const source[], size_t num_entries, scalar_type destination[]) |
Copy the date from the host to the device. | |
Specialization for the data operations in CUDA mode.
Specialization for the data operations in ROCm mode.