CUDA版本的OpenCL在windows 7的下编程初步

简介:  参考文献: http://blog.csdn.net/neoxmu/article/details/8866928 我安装的是CUDA5.5,代码如下:   //#include "stdafx.

 参考文献:

http://blog.csdn.net/neoxmu/article/details/8866928

我安装的是CUDA5.5,代码如下:

 

//#include "stdafx.h"
#include "CL\cl.h"
#include <stdlib.h>
#include <stdio.h>

#pragma comment(lib,"OpenCL.lib")

#define CL_VERBOSE
void openclRetTackle(cl_int retValue, char* processInfo){
	if(retValue!=CL_SUCCESS){
#if (defined CL_DEBUG) || (defined CL_VERBOSE)
		printf("%s Error!\n",processInfo);
#endif
		exit(-1);
	}else{
#ifdef CL_VERBOSE
		printf("%s Success!\n",processInfo);
#endif
	}
}

cl_platform_id cpPlatform;
cl_device_id cdDevice;
cl_context cxGPUContext;
cl_command_queue cqCommandQueue;


int openclInit()
{
	cl_int ret;
	//得到平台ID
	openclRetTackle( clGetPlatformIDs(1, &cpPlatform, NULL), "clGetPlatFormIDs");
	//得到GPU设备ID
	openclRetTackle( clGetDeviceIDs(cpPlatform, CL_DEVICE_TYPE_GPU,1,&cdDevice,NULL), "clGetDeviceIDs");
	//获取GPU设备上下文
	cxGPUContext = clCreateContext(0, 1, &cdDevice, NULL, NULL, &ret);
	openclRetTackle( ret , "clCreateContext" );
	//开辟任务队列
	cqCommandQueue = clCreateCommandQueue(cxGPUContext, cdDevice, 0, &ret);
	openclRetTackle( ret , "clCreateCommandQueue");
	return CL_SUCCESS;
}

int run()
{
	openclInit();	
	system("pause");
	return 0;
}


 

<span style="font-family:Microsoft YaHei;font-size:18px;">//#include "stdafx.h"
#include <stdio.h>
#include <vector>
#include <CL/cl.h>
#include <iostream>
#include <fstream>
#include <string>

#pragma comment(lib,"OpenCL.lib")

int print_device()
{
	cl_int err;
	cl_uint num;
	err = clGetPlatformIDs(0, 0, &num);
	if(err != CL_SUCCESS) 
	{
		std::cerr << "Unable to get platforms\n";
		return 0;
	}
	std::vector<cl_platform_id> platforms(num);
	err = clGetPlatformIDs(num, &platforms[0], &num);
	if(err != CL_SUCCESS)
	{
		std::cerr << "Unable to get platform ID\n";
		return 0;
	}

	cl_context_properties prop[] = { CL_CONTEXT_PLATFORM, reinterpret_cast<cl_context_properties>(platforms[0]), 0 };
	cl_context context = clCreateContextFromType(prop, CL_DEVICE_TYPE_DEFAULT, NULL, NULL, NULL);
	if(context == 0)
	{
		std::cerr << "Can't create OpenCL context\n";
		return 0;
	}

	size_t cb;
	clGetContextInfo(context, CL_CONTEXT_DEVICES, 0, NULL, &cb);
	std::vector<cl_device_id> devices(cb / sizeof(cl_device_id));
	clGetContextInfo(context, CL_CONTEXT_DEVICES, cb, &devices[0], 0);

	clGetDeviceInfo(devices[0], CL_DEVICE_NAME, 0, NULL, &cb);
	std::string devname;
	devname.resize(cb);
	clGetDeviceInfo(devices[0], CL_DEVICE_NAME, cb, &devname[0], 0);
	std::cout << "Device: " << devname.c_str() << "\n";

	clReleaseContext(context);
	return 0;

}

cl_program load_program(cl_context context, const char* filename)
{
	std::ifstream in(filename, std::ios_base::binary);
	if(!in.good()) 
	{
		return 0;

	}// get file length
	in.seekg(0, std::ios_base::end);
	size_t length = in.tellg();
	in.seekg(0, std::ios_base::beg);

	// read program source
	std::vector<char> data(length + 1);
	in.read(&data[0], length);
	data[length] = 0;

	// create and build program 
	const char* source = &data[0];
	cl_program program = clCreateProgramWithSource(context, 1, &source, 0, 0);
	if(program == 0) 
	{
		return 0;
	}
	if(clBuildProgram(program, 0, 0, 0, 0, 0) != CL_SUCCESS) 
	{
		return 0;
	}
	return program;
}
int main()
{
	print_device();
	cl_int err;
	cl_uint num;
	err = clGetPlatformIDs(0, 0, &num);
	if(err != CL_SUCCESS) 
	{
		std::cerr << "Unable to get platforms\n";
		return 0;
	}

	std::vector<cl_platform_id> platforms(num);
	err = clGetPlatformIDs(num, &platforms[0], &num);
	if(err != CL_SUCCESS) 
	{
		std::cerr << "Unable to get platform ID\n";
		return 0;
	}
	cl_context_properties prop[] = { CL_CONTEXT_PLATFORM, reinterpret_cast<cl_context_properties>(platforms[0]), 0 };
	cl_context context = clCreateContextFromType(prop, CL_DEVICE_TYPE_DEFAULT, NULL, NULL, NULL);
	if(context == 0) 
	{
		std::cerr << "Can't create OpenCL context\n";
		return 0;
	}

	size_t cb;
	clGetContextInfo(context, CL_CONTEXT_DEVICES, 0, NULL, &cb);
	std::vector<cl_device_id> devices(cb / sizeof(cl_device_id));
	clGetContextInfo(context, CL_CONTEXT_DEVICES, cb, &devices[0], 0);

	clGetDeviceInfo(devices[0], CL_DEVICE_NAME, 0, NULL, &cb);
	std::string devname;
	devname.resize(cb);
	clGetDeviceInfo(devices[0], CL_DEVICE_NAME, cb, &devname[0], 0);
	std::cout << "Device: " << devname.c_str() << "\n";

	cl_command_queue queue = clCreateCommandQueue(context, devices[0], 0, 0);
	if(queue == 0)
	{
		std::cerr << "Can't create command queue\n";
		clReleaseContext(context);
		return 0;
	}

	const int DATA_SIZE = 1048576;
	std::vector<float> a(DATA_SIZE), b(DATA_SIZE), res(DATA_SIZE);
	for(int i = 0; i < DATA_SIZE; i++) 
	{
		a[i] = std::rand();
		b[i] = std::rand();
	}

	cl_mem cl_a = clCreateBuffer(context, CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR, sizeof(cl_float) * DATA_SIZE, &a[0], NULL);
	cl_mem cl_b = clCreateBuffer(context, CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR, sizeof(cl_float) * DATA_SIZE, &b[0], NULL);
	cl_mem cl_res = clCreateBuffer(context, CL_MEM_WRITE_ONLY, sizeof(cl_float) * DATA_SIZE, NULL, NULL);
	if(cl_a == 0 || cl_b == 0 || cl_res == 0)
	{
		std::cerr << "Can't create OpenCL buffer\n";
		clReleaseMemObject(cl_a);
		clReleaseMemObject(cl_b);
		clReleaseMemObject(cl_res);
		clReleaseCommandQueue(queue);
		clReleaseContext(context);
		return 0;
	}

	cl_program program = load_program(context, "..\\shader.txt");
	if(program == 0) 
	{
		std::cerr << "Can't load or build program\n";
		clReleaseMemObject(cl_a);
		clReleaseMemObject(cl_b);
		clReleaseMemObject(cl_res);
		clReleaseCommandQueue(queue);
		clReleaseContext(context);
		return 0;
	}
	cl_kernel adder = clCreateKernel(program, "adder", 0);
	if(adder == 0)
	{
		std::cerr << "Can't load kernel\n";
		clReleaseProgram(program);
		clReleaseMemObject(cl_a);
		clReleaseMemObject(cl_b);
		clReleaseMemObject(cl_res);
		clReleaseCommandQueue(queue);
		clReleaseContext(context);
		return 0;
	}

	clSetKernelArg(adder, 0, sizeof(cl_mem), &cl_a);

	clSetKernelArg(adder, 1, sizeof(cl_mem), &cl_b);

	clSetKernelArg(adder, 2, sizeof(cl_mem), &cl_res);

	size_t work_size = DATA_SIZE;

	err = clEnqueueNDRangeKernel(queue, adder, 1, 0, &work_size, 0, 0, 0, 0);
	if(err == CL_SUCCESS)
	{

		err = clEnqueueReadBuffer(queue, cl_res, CL_TRUE, 0, sizeof(float) * DATA_SIZE, &res[0], 0, 0, 0);
	}
	if(err == CL_SUCCESS)
	{
		bool correct = true;
		for(int i = 0; i < DATA_SIZE; i++) 
		{
			if(a[i] + b[i] != res[i])
			{
				correct = false;
				break;
			}
		}
		if(correct) 
		{

			std::cout << "Data is correct\n";
		}
		else 
		{

			std::cout << "Data is incorrect\n";

		}
	}

	else 
	{
		std::cerr << "Can't run kernel or read back data\n";
	}


	clReleaseKernel(adder);
	clReleaseProgram(program);
	clReleaseMemObject(cl_a);
	clReleaseMemObject(cl_b);
	clReleaseMemObject(cl_res);
	clReleaseCommandQueue(queue);
	clReleaseContext(context);	
	return 0;

}</span>

 

 

需要使用的数据:

 

shader.txt

<span style="font-family:Microsoft YaHei;font-size:18px;">__kernel void adder(__global const float* a, __global const float* b, __global float* result)
{
    int idx = get_global_id(0);
    result[idx] = a[idx] + b[idx];
}</span>



 

相关实践学习
在云上部署ChatGLM2-6B大模型(GPU版)
ChatGLM2-6B是由智谱AI及清华KEG实验室于2023年6月发布的中英双语对话开源大模型。通过本实验,可以学习如何配置AIGC开发环境,如何部署ChatGLM2-6B大模型。
相关文章
|
1月前
|
开发框架 安全 .NET
Microsoft .NET Framework 3.5、4.5.2、4.8.1,适用于 Windows 版本的 .NET,Microsoft C Runtime等下载
.NET Framework是Windows平台的开发框架,包含CLR和FCL,支持多种语言开发桌面、Web应用。常用版本有3.5、4.5.2、4.8.1,系统可同时安装多个版本,确保软件兼容运行。
528 0
Microsoft .NET Framework 3.5、4.5.2、4.8.1,适用于 Windows 版本的 .NET,Microsoft C Runtime等下载
|
4月前
|
Windows
Windows下版本控制器(SVN)-验证是否安装成功+配置版本库+启动服务器端程序
Windows下版本控制器(SVN)-验证是否安装成功+配置版本库+启动服务器端程序
137 2
|
4月前
|
运维 Linux 虚拟化
VMware虚拟机安装教程,Windows下安装VMware虚拟机,附VMware下载,Windows各版本系统镜像下载
虚拟机技术允许一台物理机运行多个操作系统,提升资源利用率,节省成本。通过快照、克隆等功能,实现系统快速恢复与复制,提高运维效率。本文详细介绍VMware虚拟机的安装步骤、Windows镜像下载及系统安装激活流程,适合初学者快速入门。
2220 0
|
4月前
|
Linux Docker Windows
windows docker安装报错适用于 Linux 的 Windows 子系统必须更新到最新版本才能继续。可通过运行 “wsl.exe --update” 进行更新。
适用于 Linux 的 Windows 子系统需更新至最新版本(如 wsl.2.4.11.0.x64.msi)以解决 2025 年 Windows 更新后可能出现的兼容性问题。用户可通过运行 “wsl.exe --update” 或访问提供的链接下载升级包进行更新。
1610 0
|
5月前
|
Windows
windows版本 软件管理
本内容讨论了将程序生成为exe文件并放置在D盘的过程,强调这一操作不会自动完成,需要用户进行手动设置或使用特定工具来实现。这对于编程初学者了解程序编译与部署的限制很有帮助,明确了自动化的边界条件。
170 1
|
6月前
|
消息中间件 NoSQL Linux
Redis的基本介绍和安装方式(包括Linux和Windows版本),以及常用命令的演示
Redis(Remote Dictionary Server)是一个高性能的开源键值存储数据库。它支持字符串、列表、散列、集合等多种数据类型,具有持久化、发布/订阅等高级功能。由于其出色的性能和广泛的使用场景,Redis在应用程序中常作为高速缓存、消息队列等用途。
913 16
|
Windows
Windows下版本控制器(SVN)- 配置版本库
Windows下版本控制器(SVN)- 配置版本库
59 0
|
8月前
|
机器学习/深度学习 并行计算 PyTorch
Windows下CUDA+pytorch安装
以下是关于在Windows下安装CUDA和PyTorch的简要介绍及参考链接:
454 0
Windows下CUDA+pytorch安装
|
网络协议 API Windows
MASM32编程调用 API函数RtlIpv6AddressToString,windows 10 容易,Windows 7 折腾
MASM32编程调用 API函数RtlIpv6AddressToString,windows 10 容易,Windows 7 折腾