多态函数包装器(Polymorphic Function Wrappers):
在nvfunctional头文件中提供了一个多态函数包装类模板nvstd :: function。 这个类模板的实例可以用来存储,复制和调用任何可调用的目标,例如lambda表达式。 nvstd :: function可以在主机和设备代码中使用。
例子:
#include <nvfunctional>
__device__ int foo_d() { return 1; }
__host__ __device__ int foo_hd() { return 2; }
__host__ int foo_h() { return 3; }
__global__ void kernel(int *result) {
nvstd::function<int()> fn1 = foo_d;
nvstd::function<int()> fn2 = foo_hd;
nvstd::function<int()> fn3 = []() { return 10; };
*result = fn1() + fn2() + fn3();
}
__host__ __device__ void hostdevice_func(int *result) {
nvstd::function<int()> fn1 = foo_hd;
nvstd::function<int()> fn2 = []() { return 10; };
*result = fn1() + fn2();
}
__host__ void host_func(int *result) {
nvstd::function<int()> fn1 = foo_h;
nvstd::function<int()> fn2 = foo_hd;
nvstd::function<int()> fn3 = []() { return 10; };
*result = fn1() + fn2() + fn3();
}
主机代码中nvstd :: function的实例不能用__device__
函数的地址或operator()是__device__
函数的函数来初始化。 设备代码中的nvstd :: function的实例不能用__host__
函数的地址或operator()是__host__
函数的函数来初始化。
nvstd ::函数实例在运行时不能从主机代码传递到设备代码(反之亦然)。 如果从宿主代码启动__global__
函数,则nvstd :: function不能用于__global__
函数的参数类型。
例子:
#include <nvfunctional>
__device__ int foo_d() { return 1; }
__host__ int foo_h() { return 3; }
auto lam_h = [] { return 0; };
__global__ void k(void) {
// error: initialized with address of __host__ function
nvstd::function<int()> fn1 = foo_h;
// error: initialized with address of functor with
// __host__ operator() function
nvstd::function<int()> fn2 = lam_h;
}
__global__ void kern(nvstd::function<int()> f1) { }
void foo(void) {
// error: initialized with address of __device__ function
nvstd::function<int()> fn1 = foo_d;
auto lam_d = [=] __device__{ return 1; };
// error: initialized with address of functor with
// __device__ operator() function
nvstd::function<int()> fn2 = lam_d;
// error: passing nvstd::function from host to device
kern << <1, 1 >> >(fn2);
}
nvstd :: function在nvfunctional头文件中定义如下:
namespace nvstd {
template <class _RetType, class ..._ArgTypes>
class function<_RetType(_ArgTypes...)>
{
public:
// constructors
__device__ __host__ function() noexcept;
__device__ __host__ function(nullptr_t) noexcept;
__device__ __host__ function(const function &);
__device__ __host__ function(function &&);
template<class _F>
__device__ __host__ function(_F);
// destructor
__device__ __host__ ~function();
// assignment operators
__device__ __host__ function& operator=(const function&);
__device__ __host__ function& operator=(function&&);
__device__ __host__ function& operator=(nullptr_t);
__device__ __host__ function& operator=(_F&&);
// swap
__device__ __host__ void swap(function&) noexcept;
// function capacity
__device__ __host__ explicit operator bool() const noexcept;
// function invocation
__device__ _RetType operator()(_ArgTypes...) const;
};
// null pointer comparisons
template <class _R, class... _ArgTypes>
__device__ __host__
bool operator==(const function<_R(_ArgTypes...)>&, nullptr_t) noexcept;
template <class _R, class... _ArgTypes>
__device__ __host__
bool operator==(nullptr_t, const function<_R(_ArgTypes...)>&) noexcept;
template <class _R, class... _ArgTypes>
__device__ __host__
bool operator!=(const function<_R(_ArgTypes...)>&, nullptr_t) noexcept;
template <class _R, class... _ArgTypes>
__device__ __host__
bool operator!=(nullptr_t, const function<_R(_ArgTypes...)>&) noexcept;
// specialized algorithms
template <class _R, class... _ArgTypes>
__device__ __host__
void swap(function<_R(_ArgTypes...)>&, function<_R(_ArgTypes...)>&);
}
实验特征:Extended Lambdas
nvcc标志'--expt-extended-lambda'允许在lambda表达式中显式执行空间标注。 执行空间注释应该在'lambda-introducer'之后和可选的'lambda-declarator'之前出现。 当指定'-expt-extended-lambda'标志时,nvcc将定义宏__CUDACC_EXTENDED_LAMBDA__
。extended __device__ lambda
是一个lambda表达式,它使用__device__
显式注释,并且在__host__
或__host__
__device__
函数的直接或嵌套块范围内定义。extended __host__ __device__ lambda
是一个lambda表达式,它用__host__
和'__device__'显式注释,并且在__host__
或__host__
__device__
函数的直接或嵌套块范围内定义。
“扩展lambda”表示扩展__device__ lambda
或扩展__host__ __device__ lambda
。 扩展lambda表达式可用于__global__
函数模板实例的类型参数。
如果未明确指定执行空间注释,则根据封装与lambda关联的闭包类的作用域计算它们,如C ++ 11支持部分所述。 执行空间注释应用于与lambda关联的闭包类的所有方法。
例子:
void foo_host(void)
{
// not an extended lambda: no explicit execution space annotations
auto lam1 = [] {};
// extended __device__ lambda
auto lam2 = [] __device__{};
// extended __host__ __device__ lambda
auto lam3 = [] __host__ __device__{};
// not an extended lambda: explicitly annotated with only '__host__'
auto lam4 = [] __host__{};
}
__host__ __device__ void foo_host_device(void)
{
// not an extended lambda: no explicit execution space annotations
auto lam1 = [] {};
// extended __device__ lambda
auto lam2 = [] __device__{};
// extended __host__ __device__ lambda
auto lam3 = [] __host__ __device__{};
// not an extended lambda: explicitly annotated with only '__host__'
auto lam4 = [] __host__{};
}
__device__ void foo_device(void)
{
// none of the lambdas within this function are extended lambdas,
// because the enclosing function is not a __host__ or __host__ __device__
// function.
auto lam1 = [] {};
auto lam2 = [] __device__{};
auto lam3 = [] __host__ __device__{};
auto lam4 = [] __host__{};
}
// lam1 and lam2 are not extended lambdas because they are not defined
// within a __host__ or __host__ __device__ function.
auto lam1 = [] {};
auto lam2 = [] __host__ __device__{};