// 高斯滤波
__kernel void gaussian_blur(__global uchar* input, __global uchar* output, int width, int height) {const int x = get_global_id(0);const int y = get_global_id(1);const int w = width;const int h = height;const float ksize[5][5] = {{2, 4, 5, 4, 2},{4, 9,12, 9, 4},{5,12,15,12, 5},{4, 9,12, 9, 4},{2, 4, 5, 4, 2}};float sum = 0.0f;float weight = 0.0f;for(int dy = -2; dy <= 2; dy++) {for(int dx = -2; dx <= 2; dx++) {int nx = clamp(x + dx, 0, w - 1);int ny = clamp(y + dy, 0, h - 1);float k = ksize[dy + 2][dx + 2];sum += input[ny * w + nx] * k;weight += k;}}output[y * w + x] = (uchar)(sum / weight);
}// Sobel 梯度
__kernel void sobel_gradient(__global uchar* input, __global float* grad, __global float* angle, int width, int height) {const int x = get_global_id(0);const int y = get_global_id(1);int w = width;int gx =-input[(y-1)*w + (x-1)] - 2*input[y*w + (x-1)] - input[(y+1)*w + (x-1)] +input[(y-1)*w + (x+1)] + 2*input[y*w + (x+1)] + input[(y+1)*w + (x+1)];int gy =-input[(y-1)*w + (x-1)] - 2*input[(y-1)*w + x] - input[(y-1)*w + (x+1)] +input[(y+1)*w + (x-1)] + 2*input[(y+1)*w + x] + input[(y+1)*w + (x+1)];grad[y*w + x] = hypot((float)gx, (float)gy);angle[y*w + x] = atan2((float)gy, (float)gx);
}// 非极大值抑制
__kernel void non_maximum_suppression(__global float* grad, __global float* angle, __global uchar* output, int width, int height) {const int x = get_global_id(0);const int y = get_global_id(1);int w = width;float dir = angle[y*w + x] * (180.0f / 3.14159f);if (dir < 0) dir += 180;float g = grad[y*w + x];float g1 = 0.0f, g2 = 0.0f;if ((dir >= 0 && dir < 22.5f) || (dir >= 157.5f && dir <= 180)) {g1 = grad[y*w + (x-1)];g2 = grad[y*w + (x+1)];} else if (dir >= 22.5f && dir < 67.5f) {g1 = grad[(y-1)*w + (x+1)];g2 = grad[(y+1)*w + (x-1)];} else if (dir >= 67.5f && dir < 112.5f) {g1 = grad[(y-1)*w + x];g2 = grad[(y+1)*w + x];} else if (dir >= 112.5f && dir < 157.5f) {g1 = grad[(y-1)*w + (x-1)];g2 = grad[(y+1)*w + (x+1)];}if (g >= g1 && g >= g2)output[y*w + x] = (uchar)g;elseoutput[y*w + x] = 0;
}// 双阈值连接
__kernel void hysteresis(__global uchar* input, __global uchar* output, int width, int height, uchar low, uchar high) {const int x = get_global_id(0);const int y = get_global_id(1);int w = width;uchar val = input[y*w + x];if (val >= high) {output[y*w + x] = 255;} else if (val >= low) {// 8邻域搜索强边缘bool connected = false;for (int dy = -1; dy <= 1; dy++) {for (int dx = -1; dx <= 1; dx++) {int nx = clamp(x + dx, 0, w - 1);int ny = clamp(y + dy, 0, height - 1);if (input[ny*w + nx] >= high) {connected = true;}}}output[y*w + x] = connected ? 255 : 0;} else {output[y*w + x] = 0;}
}
#ifndef COMPUTERVISION_CANNYMAIN_H
#define COMPUTERVISION_CANNYMAIN_H#include <opencv2/opencv.hpp>
#include "../helper/opencl_helper.h"int runCanny() {cv::Mat image = cv::imread("../src/opencl/sources/img.png", cv::IMREAD_GRAYSCALE);if (image.empty()) {printf("Image load failed!\n");return -1;}int width = image.cols;int height = image.rows;size_t imgSize = width * height;OpenCLObjects ocl = init_opencl("canny.cl", "gaussian_blur");cl_int err;cl_mem buf_input = clCreateBuffer(ocl.context, CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR,imgSize, image.data, &err);cl_mem buf_blur = clCreateBuffer(ocl.context, CL_MEM_READ_WRITE, imgSize, NULL, &err);cl_mem buf_grad = clCreateBuffer(ocl.context, CL_MEM_READ_WRITE, sizeof(float) * imgSize, NULL, &err);cl_mem buf_angle = clCreateBuffer(ocl.context, CL_MEM_READ_WRITE, sizeof(float) * imgSize, NULL, &err);cl_mem buf_nms = clCreateBuffer(ocl.context, CL_MEM_READ_WRITE, imgSize, NULL, &err);cl_mem buf_output = clCreateBuffer(ocl.context, CL_MEM_WRITE_ONLY, imgSize, NULL, &err);ocl.kernel = clCreateKernel(ocl.program, "gaussian_blur", &err);clSetKernelArg(ocl.kernel, 0, sizeof(cl_mem), &buf_input);clSetKernelArg(ocl.kernel, 1, sizeof(cl_mem), &buf_blur);clSetKernelArg(ocl.kernel, 2, sizeof(int), &width);clSetKernelArg(ocl.kernel, 3, sizeof(int), &height);size_t gsize[] = { (size_t)width, (size_t)height };clEnqueueNDRangeKernel(ocl.queue, ocl.kernel, 2, NULL, gsize, NULL, 0, NULL, NULL);clFinish(ocl.queue);ocl.kernel = clCreateKernel(ocl.program, "sobel_gradient", &err);clSetKernelArg(ocl.kernel, 0, sizeof(cl_mem), &buf_blur);clSetKernelArg(ocl.kernel, 1, sizeof(cl_mem), &buf_grad);clSetKernelArg(ocl.kernel, 2, sizeof(cl_mem), &buf_angle);clSetKernelArg(ocl.kernel, 3, sizeof(int), &width);clSetKernelArg(ocl.kernel, 4, sizeof(int), &height);clEnqueueNDRangeKernel(ocl.queue, ocl.kernel, 2, NULL, gsize, NULL, 0, NULL, NULL);clFinish(ocl.queue);ocl.kernel = clCreateKernel(ocl.program, "non_maximum_suppression", &err);clSetKernelArg(ocl.kernel, 0, sizeof(cl_mem), &buf_grad);clSetKernelArg(ocl.kernel, 1, sizeof(cl_mem), &buf_angle);clSetKernelArg(ocl.kernel, 2, sizeof(cl_mem), &buf_nms);clSetKernelArg(ocl.kernel, 3, sizeof(int), &width);clSetKernelArg(ocl.kernel, 4, sizeof(int), &height);clEnqueueNDRangeKernel(ocl.queue, ocl.kernel, 2, NULL, gsize, NULL, 0, NULL, NULL);clFinish(ocl.queue);uchar low = 50, high = 100;ocl.kernel = clCreateKernel(ocl.program, "hysteresis", &err);clSetKernelArg(ocl.kernel, 0, sizeof(cl_mem), &buf_nms);clSetKernelArg(ocl.kernel, 1, sizeof(cl_mem), &buf_output);clSetKernelArg(ocl.kernel, 2, sizeof(int), &width);clSetKernelArg(ocl.kernel, 3, sizeof(int), &height);clSetKernelArg(ocl.kernel, 4, sizeof(uchar), &low);clSetKernelArg(ocl.kernel, 5, sizeof(uchar), &high);clEnqueueNDRangeKernel(ocl.queue, ocl.kernel, 2, NULL, gsize, NULL, 0, NULL, NULL);clFinish(ocl.queue);std::vector<uchar> result(imgSize);clEnqueueReadBuffer(ocl.queue, buf_output, CL_TRUE, 0, imgSize, result.data(), 0, NULL, NULL);cv::Mat outputImg(height, width, CV_8UC1, result.data());cv::imshow("Original", image);cv::imshow("Canny Edge", outputImg);cv::waitKey(0);release_opencl(&ocl);clReleaseMemObject(buf_input);clReleaseMemObject(buf_blur);clReleaseMemObject(buf_grad);clReleaseMemObject(buf_angle);clReleaseMemObject(buf_nms);clReleaseMemObject(buf_output);return 0;
}#endif
#ifndef COMPUTERVISION_OPENCL_HELPER_H
#define COMPUTERVISION_OPENCL_HELPER_H#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <OpenCL/opencl.h>#define CHECK_ERROR(err, msg) \if (err != CL_SUCCESS) { \fprintf(stderr, "%s failed with error %d\n", msg, err); \exit(1); \}typedef struct {cl_platform_id platform;cl_device_id device;cl_context context;cl_command_queue queue;cl_program program;cl_kernel kernel;
} OpenCLObjects;
char *read_source(const char *filename) {FILE *fp = fopen(filename, "r");if (!fp) {perror("Failed to open kernel file");exit(1);}fseek(fp, 0, SEEK_END);size_t size = ftell(fp);rewind(fp);char *source = (char *)malloc(size + 1);fread(source, 1, size, fp);source[size] = '\0';fclose(fp);return source;
}
OpenCLObjects init_opencl(const char *source_file, const char *kernel_name) {OpenCLObjects ocl;cl_int err;err = clGetPlatformIDs(1, &ocl.platform, NULL);CHECK_ERROR(err, "clGetPlatformIDs");err = clGetDeviceIDs(ocl.platform, CL_DEVICE_TYPE_DEFAULT, 1, &ocl.device, NULL);CHECK_ERROR(err, "clGetDeviceIDs");ocl.context = clCreateContext(NULL, 1, &ocl.device, NULL, NULL, &err);CHECK_ERROR(err, "clCreateContext");ocl.queue = clCreateCommandQueue(ocl.context, ocl.device, 0, &err);CHECK_ERROR(err, "clCreateCommandQueue");char *source = read_source(source_file);ocl.program = clCreateProgramWithSource(ocl.context, 1, (const char **)&source, NULL, &err);CHECK_ERROR(err, "clCreateProgramWithSource");err = clBuildProgram(ocl.program, 1, &ocl.device, NULL, NULL, NULL);if (err != CL_SUCCESS) {char log[4096];clGetProgramBuildInfo(ocl.program, ocl.device, CL_PROGRAM_BUILD_LOG, sizeof(log), log, NULL);fprintf(stderr, "Build Error:\n%s\n", log);exit(1);}ocl.kernel = clCreateKernel(ocl.program, kernel_name, &err);CHECK_ERROR(err, "clCreateKernel");free(source);return ocl;
}void release_opencl(OpenCLObjects *ocl) {clReleaseKernel(ocl->kernel);clReleaseProgram(ocl->program);clReleaseCommandQueue(ocl->queue);clReleaseContext(ocl->context);
}#endif