河源市网站建设公司,关键词竞价排名名词解释,上海网站制作 公司,义乌做网站哪家好cuda小白 原文链接 NPP
GPU架构近些年也有不少的变化#xff0c;具体的可以参考别的博主的介绍#xff0c;都比较详细。还有一些cuda中的专有名词的含义#xff0c;可以参考《详解CUDA的Context、Stream、Warp、SM、SP、Kernel、Block、Grid》
常见的NppStatus#xff0c…cuda小白 原文链接 NPP
GPU架构近些年也有不少的变化具体的可以参考别的博主的介绍都比较详细。还有一些cuda中的专有名词的含义可以参考《详解CUDA的Context、Stream、Warp、SM、SP、Kernel、Block、Grid》
常见的NppStatus可以看这里。
如有问题请指出谢谢
Arithmetic Operations
当前模块主要是加减乘除abs平方矩阵相乘开根lnexp等。不同相同功能以其中一个为例进行介绍。
AddC
针对图像中每一个像素添加一个常量值。与之类似的还有MulCSubCDivCAbsDiffCMulScale 大概接口样式
// 两个结果的区别在于有I的结果可以就原始的图像地址进行操作无需进行拷贝
// Sfs的含义 表示可以对图像的数值范围进行约束操作。
NppStatus nppiAddC_[数据类型]_C[通道数]RSfs_[是否使用流]
NppStatus nppiAddC_[数据类型]_C[通道数]IRSfs_[是否使用流]以为三通道的uint8_t的图像数据为例子
NppStatus nppiAddC_8u_C3RSfs(const Npp8u * pSrc1,int nSrc1Step,const Npp8u aConstants[3],Npp8u *pDst,int nDstStep,NppiSize oSizeROI,int nScaleFactor);
NppStatus nppiAddC_8u_C3RSfs(const Npp8u aConstants[3],Npp8u *pDst,int nDstStep,NppiSize oSizeROI,int nScaleFactor); code
#include iostream
#include cuda_runtime.h
#include npp.h
#include opencv2/opencv.hpp#define CUDA_FREE(ptr) { if (ptr ! nullptr) { cudaFree(ptr); ptr nullptr; } }int main() {std::string directory ../;// load image cv::Mat image cv::imread(directory dog.png);if (image.empty()) {std::cout Load image error! std::endl;return -1;}int image_width image.cols;int image_height image.rows;int image_size image_width * image_height * 3 * sizeof(uint8_t);std::cout Image info : image_width image_width , image_height image_height std::endl;// malloc cpy uint8_t *in_ptr, *in_ptr2, *out_ptr, *roi_out_ptr;cudaMalloc((void**)in_ptr, image_size);cudaMalloc((void**)in_ptr2, image_size);cudaMalloc((void**)out_ptr, image_size);cudaMalloc((void**)roi_out_ptr, image_size);cudaMemcpy(in_ptr, image.data, image_size, cudaMemcpyHostToDevice);cudaMemcpy(in_ptr2, image.data, image_size, cudaMemcpyHostToDevice);uint8_t host_constant[3] { (uint8_t)0, (uint8_t)20, (uint8_t)0 };NppiSize roi1, roi2;roi1.width image_width;roi1.height image_height;roi2.width image_width / 2;roi2.height image_height / 2;// nppiAddC_8u_C3RSfscv::Mat out_image cv::Mat::zeros(image_height, image_width, CV_8UC3);NppStatus status;status nppiAddC_8u_C3RSfs(in_ptr, image_width * 3, host_constant, out_ptr, image_width * 3, roi1, 0);if (status ! NPP_SUCCESS) {std::cout [GPU] ERROR nppiAddC_8u_C3RSfs failed, status status std::endl;return false;}cudaMemcpy(out_image.data, out_ptr, image_size, cudaMemcpyDeviceToHost);cv::imwrite(directory add_constant.jpg, out_image);status nppiAddC_8u_C3RSfs(in_ptr, image_width * 3, host_constant, out_ptr, image_width * 3, roi1, 1);if (status ! NPP_SUCCESS) {std::cout [GPU] ERROR nppiAddC_8u_C3RSfs failed, status status std::endl;return false;}cudaMemcpy(out_image.data, out_ptr, image_size, cudaMemcpyDeviceToHost);cv::imwrite(directory add_constant_scale.jpg, out_image);status nppiAddC_8u_C3RSfs(in_ptr, image_width * 3, host_constant, out_ptr, image_width * 3, roi2, 0);if (status ! NPP_SUCCESS) {std::cout [GPU] ERROR nppiAddC_8u_C3RSfs failed, status status std::endl;return false;}cudaMemcpy(out_image.data, out_ptr, image_size, cudaMemcpyDeviceToHost);cv::imwrite(directory add_constant_roi.jpg, out_image);// freeCUDA_FREE(in_ptr)CUDA_FREE(in_ptr2)CUDA_FREE(out_ptr)CUDA_FREE(roi_out_ptr)
}make
cmake_minimum_required(VERSION 3.20)
project(test)find_package(OpenCV REQUIRED)
include_directories(${OpenCV_INCLUDE_DIRS})find_package(CUDA REQUIRED)
include_directories(${CUDA_INCLUDE_DIRS})
file(GLOB CUDA_LIBS /usr/local/cuda/lib64/*.so)add_executable(test test.cpp)
target_link_libraries(test${OpenCV_LIBS}${CUDA_LIBS}
)result 注意点
对图像的进行值的范围进行一定的约束最初的图像RGB的值都是[0, 255]如果对应的scale设置为1的时候相当于将数值的范围变为2的-nScaleFactor倍数即[0, 128]超过128的会变成128因此整体的图像色调会变暗同样的如果设置为-1则整体的会变为2倍因为存图的限制只能是[0, 255]所以整体看起来会变亮。如果指定的roi不是整张图的时候由于roi的类型是NppiSizewidth, height因此输入的指针不是指向图像的起始位置而是roi的起始位置。MulScale相较于MulC来说默认nScaleFactor为0AbsDiffC中主要功能就是得到整张图像与host_constant的绝对差值。每个结果都有一个对应的包含cudastream的版本按需使用。
Add
与AddC不同的是Add输入的是两张图像。同样的还有MulMulScaleSubDivDiv_roundAbsAbsDiffSqrSqrtLnExp。由于AbsAbsDiffSqrSqrtLnExp在图像出列方面使用的不是很多就不细述。 以uint8_t的三通道图像为例
// 命名规则与nppiAddC*类似
NppStatus nppiAdd_8u_C3RSfs(const Npp8u * pSrc1,int nSrc1Step,const Npp8u *pSrc2,int nSrc2Step,Npp8u * pDst,int nDstStep,NppiSize oSizeROI,int nScaleFactor);
NppStatus nppiAdd_8u_C3IRSfs(const Npp8u *pSrc,int nSrcStep,Npp8u *pSrcDst,int nSrcDstStep,NppiSize oSizeROI,int nScaleFactor);code
#include iostream
#include cuda_runtime.h
#include npp.h
#include opencv2/opencv.hpp#define PRINT_VALUE(value) { \std::cout [GPU] #value value std::endl; }#define CUDA_FREE(ptr) { if (ptr ! nullptr) { cudaFree(ptr); ptr nullptr; } }int main() {std::string directory ../;// load image cv::Mat image cv::imread(directory dog.png);if (image.empty()) {std::cout Load image error! std::endl;return -1;}int image_width image.cols;int image_height image.rows;int image_size image_width * image_height * 3 * sizeof(uint8_t);std::cout Image info : image_width image_width , image_height image_height std::endl;// malloc cpy uint8_t *in_ptr, *in_ptr2, *out_ptr, *roi_out_ptr;cudaMalloc((void**)in_ptr, image_size);cudaMalloc((void**)in_ptr2, image_size);cudaMalloc((void**)out_ptr, image_size);cudaMalloc((void**)roi_out_ptr, image_size);cudaMemcpy(in_ptr, image.data, image_size, cudaMemcpyHostToDevice);cudaMemcpy(in_ptr2, image.data, image_size, cudaMemcpyHostToDevice);NppiSize roi1, roi2;roi1.width image_width;roi1.height image_height;roi2.width image_width / 2;roi2.height image_height / 2;// nppiAdd_8u_C3RSfscv::Mat out_image cv::Mat::zeros(image_height, image_width, CV_8UC3);NppStatus status;status nppiAdd_8u_C3RSfs(in_ptr, image_width * 3, in_ptr2, image_width * 3, out_ptr, image_width * 3, roi1, 0);if (status ! NPP_SUCCESS) {std::cout [GPU] ERROR nppiAdd_8u_C3RSfs failed, status status std::endl;return false;}cudaMemcpy(out_image.data, out_ptr, image_size, cudaMemcpyDeviceToHost);cv::imwrite(directory add.jpg, out_image);status nppiAdd_8u_C3RSfs(in_ptr, image_width * 3, in_ptr2, image_width * 3, out_ptr, image_width * 3, roi1, 1);if (status ! NPP_SUCCESS) {std::cout [GPU] ERROR nppiAdd_8u_C3RSfs failed, status status std::endl;return false;}cudaMemcpy(out_image.data, out_ptr, image_size, cudaMemcpyDeviceToHost);cv::imwrite(directory add_scale.jpg, out_image);status nppiAdd_8u_C3RSfs(in_ptr, image_width * 3, in_ptr2, image_width * 3, out_ptr, image_width * 3, roi2, 0);if (status ! NPP_SUCCESS) {std::cout [GPU] ERROR nppiAdd_8u_C3RSfs failed, status status std::endl;return false;}cudaMemcpy(out_image.data, out_ptr, image_size, cudaMemcpyDeviceToHost);cv::imwrite(directory add_roi.jpg, out_image);// freeCUDA_FREE(in_ptr)CUDA_FREE(in_ptr2)CUDA_FREE(out_ptr)CUDA_FREE(roi_out_ptr)
}make
cmake_minimum_required(VERSION 3.20)
project(test)find_package(OpenCV REQUIRED)
include_directories(${OpenCV_INCLUDE_DIRS})find_package(CUDA REQUIRED)
include_directories(${CUDA_INCLUDE_DIRS})
file(GLOB CUDA_LIBS /usr/local/cuda/lib64/*.so)add_executable(test test.cpp)
target_link_libraries(test${OpenCV_LIBS}${CUDA_LIBS}
)result 注意点
nScaleFactor与AddC中的功能一致。roi的操作也与AddC中的一致。由于使用的是两个相同的图片进行相加因此在nScaleFactor为1的时候所有的数值都变成原来的值因此保存的图像与原图一致。同样有cudastream版本按需使用。
AddWeighted
将特定区域的图像进行填充weight
NppStatus nppiAddWeighted_8u32f_C1IR(const Npp8u *pSrc,int nSrcStep,Npp32f * pSrcDst,int nSrcDstStep,NppiSize oSizeROI,Npp32f nAlpha);
NppStatus nppiAddWeighted_8u32f_C1IMR(const Npp8u *pSrc,int nSrcStep,const Npp8u *pMask,int nMaskStep,Npp32f * pSrcDst,int nSrcDstStep,NppiSize oSizeROI,Npp32f nAlpha); code
#include iostream
#include cuda_runtime.h
#include npp.h
#include opencv2/opencv.hpp#define PRINT_VALUE(value) { \std::cout [GPU] #value value std::endl; }#define CUDA_FREE(ptr) { if (ptr ! nullptr) { cudaFree(ptr); ptr nullptr; } }int main() {std::string directory ../;// load image cv::Mat image cv::imread(directory dog.png);if (image.empty()) {std::cout Load image error! std::endl;return -1;}cv::Mat gray;cv::cvtColor(image, gray, CV_BGR2GRAY);cv::imwrite(directory gray.jpg, gray);int image_width gray.cols;int image_height gray.rows;int image_size image_width * image_height;std::cout Image info : image_width image_width , image_height image_height std::endl;cv::Mat mat_mask cv::Mat::ones(image_height, image_width, CV_8UC1);cv::Rect rc_center cv::Rect(image_width / 4, image_height / 4, image_width / 2, image_height / 2);mat_mask(rc_center) cv::Mat::ones(image_height / 2, image_width / 2, CV_8UC1) * 255;cv::imwrite(directory mask.jpg, mat_mask);// malloc cpy uint8_t *in_ptr, *mask;cudaMalloc((void**)in_ptr, image_size * sizeof(uint8_t));cudaMalloc((void**)mask, image_size * sizeof(uint8_t));cudaMemcpy(in_ptr, gray.data, image_size, cudaMemcpyHostToDevice);cudaMemcpy(mask, mat_mask.data, image_size, cudaMemcpyHostToDevice);float *out_ptr, *out_ptr1, *out_ptr2;cudaMalloc((void**)out_ptr, image_size * sizeof(float));cudaMalloc((void**)out_ptr1, image_size * sizeof(float));cudaMalloc((void**)out_ptr2, image_size * sizeof(float));NppiSize roi1, roi2;roi1.width image_width;roi1.height image_height;roi2.width image_width / 2;roi2.height image_height / 2;// nppiAdd_8u_C3RSfscv::Mat out_image cv::Mat::zeros(image_height, image_width, CV_32FC1);NppStatus status;status nppiAddWeighted_8u32f_C1IMR(in_ptr, image_width * sizeof(uint8_t), mask, image_width * sizeof(uint8_t), out_ptr, image_width * sizeof(float), roi1, 1.0);if (status ! NPP_SUCCESS) {std::cout [GPU] ERROR nppiAddWeighted_8u32f_C1IMR failed, status status std::endl;return false;}cudaMemcpy(out_image.data, out_ptr, image_size * sizeof(float), cudaMemcpyDeviceToHost);cv::imwrite(directory addweight.jpg, out_image);status nppiAddWeighted_8u32f_C1IMR(in_ptr, image_width * sizeof(uint8_t), mask, image_width * sizeof(uint8_t), out_ptr1, image_width * sizeof(float), roi1, 0.5);if (status ! NPP_SUCCESS) {std::cout [GPU] ERROR nppiAddWeighted_8u32f_C1IMR failed, status status std::endl;return false;}cudaMemcpy(out_image.data, out_ptr1, image_size * sizeof(float), cudaMemcpyDeviceToHost);cv::imwrite(directory addweight_scale.jpg, out_image);status nppiAddWeighted_8u32f_C1IMR(in_ptr, image_width * sizeof(uint8_t), mask, image_width * sizeof(uint8_t), out_ptr2, image_width * sizeof(float), roi2, 0.5);if (status ! NPP_SUCCESS) {std::cout [GPU] ERROR nppiAddWeighted_8u32f_C1IMR failed, status status std::endl;return false;}cudaMemcpy(out_image.data, out_ptr2, image_size * sizeof(float), cudaMemcpyDeviceToHost);cv::imwrite(directory addweight_roi_scale.jpg, out_image);// freeCUDA_FREE(in_ptr)CUDA_FREE(mask)CUDA_FREE(out_ptr)CUDA_FREE(out_ptr1)CUDA_FREE(out_ptr2)
}make
cmake_minimum_required(VERSION 3.20)
project(test)find_package(OpenCV REQUIRED)
include_directories(${OpenCV_INCLUDE_DIRS})find_package(CUDA REQUIRED)
include_directories(${CUDA_INCLUDE_DIRS})
file(GLOB CUDA_LIBS /usr/local/cuda/lib64/*.so)add_executable(test test.cpp)
target_link_libraries(test${OpenCV_LIBS}${CUDA_LIBS}
)result 注意点 1.nAlpha是针对原图中的每一个像素的值需要添加的权重mask仅影响目标位置中那些部分需要输出。 2. roi表示输入的区域约束。