42 #ifndef OPENCV_OPENCL_HPP
43 #define OPENCV_OPENCL_HPP
45 #include "opencv2/core.hpp"
49 namespace cv {
namespace ocl {
87 TYPE_DEFAULT = (1 << 0),
90 TYPE_ACCELERATOR = (1 << 3),
91 TYPE_DGPU = TYPE_GPU + (1 << 16),
92 TYPE_IGPU = TYPE_GPU + (1 << 17),
119 FP_ROUND_TO_NEAREST=(1 << 2),
120 FP_ROUND_TO_ZERO=(1 << 3),
121 FP_ROUND_TO_INF=(1 << 4),
123 FP_SOFT_FLOAT=(1 << 6),
124 FP_CORRECTLY_ROUNDED_DIVIDE_SQRT=(1 << 7)
140 EXEC_KERNEL=(1 << 0),
141 EXEC_NATIVE_KERNEL=(1 << 1)
251 inline Impl*
getImpl()
const {
return (Impl*)p; }
252 inline bool empty()
const {
return !p; }
272 bool create(
int dtype);
274 size_t ndevices() const;
314 template <
typename T>
316 setUserContext(
typeid(
T), userContext);
318 template <
typename T>
320 return std::dynamic_pointer_cast<T>(getUserContext(
typeid(
T)));
326 inline Impl*
getImpl()
const {
return (Impl*)p; }
327 inline bool empty()
const {
return !p; }
350 inline Impl* getImpl()
const {
return (Impl*)p; }
351 inline bool empty()
const {
return !p; }
412 const
Queue& getProfilingQueue() const;
414 struct Impl; friend struct Impl;
415 inline Impl* getImpl()
const {
return p; }
416 inline bool empty()
const {
return !p; }
425 enum { LOCAL=1, READ_ONLY=2, WRITE_ONLY=4, READ_WRITE=6, CONSTANT=8, PTR_ONLY = 16, NO_SIZE=256 };
426 KernelArg(
int _flags,
UMat* _m,
int wscale=1,
int iwscale=1,
const void* _obj=0,
size_t _sz=0);
430 {
return KernelArg(LOCAL, 0, 1, 1, 0, localMemSize); }
438 {
return KernelArg(READ_WRITE, (
UMat*)&m, wscale, iwscale); }
440 {
return KernelArg(READ_WRITE+NO_SIZE, (
UMat*)&m, wscale, iwscale); }
444 {
return KernelArg(WRITE_ONLY, (
UMat*)&m, wscale, iwscale); }
446 {
return KernelArg(READ_ONLY+NO_SIZE, (
UMat*)&m, wscale, iwscale); }
448 {
return KernelArg(WRITE_ONLY+NO_SIZE, (
UMat*)&m, wscale, iwscale); }
475 bool create(const
char* kname, const
Program& prog);
479 int set(
int i, const
void*
value,
size_t sz);
483 template<typename _Tp>
int set(
int i, const _Tp&
value)
488 template<
typename _Tp0>
inline
489 int set_args_(
int i,
const _Tp0& a0) {
return set(i, a0); }
490 template<
typename _Tp0,
typename... _Tps>
inline
491 int set_args_(
int i,
const _Tp0& a0,
const _Tps&... rest_args) { i = set(i, a0);
return set_args_(i, rest_args...); }
504 template<
typename... _Tps>
inline
505 Kernel&
args(
const _Tps&... kernel_args) { set_args_(0, kernel_args...);
return *
this; }
521 size_t localsize[],
bool sync,
const Queue& q=
Queue());
580 void getBinary(
std::vector<
char>& binary) const;
582 struct Impl; friend struct Impl;
583 inline Impl* getImpl()
const {
return (Impl*)p; }
584 inline bool empty()
const {
return !p; }
588 #ifndef OPENCV_REMOVE_DEPRECATED_API
633 const
unsigned char* binary, const
size_t size,
658 const
unsigned char* binary, const
size_t size,
666 struct Impl; friend struct Impl;
667 inline Impl* getImpl()
const {
return (Impl*)p; }
668 inline bool empty()
const {
return !p; }
693 int versionMajor() const;
694 int versionMinor() const;
696 int deviceNumber() const;
700 bool empty()
const {
return !p; }
767 static
bool canCreateAlias(const
UMat &u);
771 static
bool isFormatSupported(
int depth,
int cn,
bool norm);
879 inline bool empty()
const {
return !p; }
905 #ifdef __OPENCV_BUILD
909 #define OCL_FORCE_CHECK(condition) (cv::ocl::internal::isOpenCLForced() || (condition))
912 #define OCL_PERFORMANCE_CHECK(condition) (cv::ocl::internal::isPerformanceCheckBypassed() || (condition))
Custom array allocator.
Definition: mat.hpp:480
n-dimensional dense array class
Definition: mat.hpp:812
void setUseSVM(bool enabled)
std::shared_ptr< T > getUserContext()
Definition: ocl.hpp:319
Impl * getImpl() const
Definition: ocl.hpp:326
bool empty() const
Definition: ocl.hpp:327
std::shared_ptr< UserContext > getUserContext(std::type_index typeId)
static Context create(const std::string &configuration)
Impl * p
Definition: ocl.hpp:330
static Context fromHandle(void *context)
void setUserContext(const std::shared_ptr< T > &userContext)
Definition: ocl.hpp:315
static Context & getDefault(bool initialize=true)
void * getOpenCLContextProperty(int propertyId) const
Get OpenCL context property specified on context creation.
static Context fromDevice(const ocl::Device &device)
void setUserContext(std::type_index typeId, const std::shared_ptr< UserContext > &userContext)
CV_WRAP Device() CV_NOEXCEPT
CV_WRAP size_t image3DMaxWidth() const
CV_WRAP int nativeVectorWidthChar() const
bool empty() const
Definition: ocl.hpp:252
CV_WRAP size_t image3DMaxHeight() const
CV_WRAP int nativeVectorWidthLong() const
CV_WRAP int addressBits() const
CV_WRAP String OpenCLVersion() const
CV_WRAP size_t maxMemAllocSize() const
CV_WRAP bool errorCorrectionSupport() const
CV_WRAP size_t globalMemSize() const
CV_WRAP size_t image3DMaxDepth() const
CV_WRAP int preferredVectorWidthInt() const
CV_WRAP bool isExtensionSupported(const String &extensionName) const
CV_WRAP String driverVersion() const
CV_WRAP int maxClockFrequency() const
CV_WRAP int doubleFPConfig() const
CV_WRAP String extensions() const
CV_WRAP size_t imageMaxArraySize() const
CV_WRAP size_t printfBufferSize() const
CV_WRAP int maxReadImageArgs() const
CV_WRAP int nativeVectorWidthDouble() const
void maxWorkItemSizes(size_t *) const
CV_WRAP bool available() const
CV_WRAP int vendorID() const
CV_WRAP int executionCapabilities() const
Impl * p
Definition: ocl.hpp:254
CV_WRAP int singleFPConfig() const
CV_WRAP size_t maxWorkGroupSize() const
CV_WRAP int preferredVectorWidthDouble() const
CV_WRAP int nativeVectorWidthShort() const
CV_WRAP size_t image2DMaxHeight() const
CV_WRAP size_t image2DMaxWidth() const
CV_WRAP int nativeVectorWidthHalf() const
CV_WRAP int deviceVersionMinor() const
uint imagePitchAlignment() const
CV_WRAP bool compilerAvailable() const
CV_WRAP int maxConstantArgs() const
CV_WRAP int maxWorkItemDims() const
CV_WRAP int maxComputeUnits() const
CV_WRAP size_t maxConstantBufferSize() const
CV_WRAP int deviceVersionMajor() const
CV_WRAP String vendorName() const
CV_WRAP bool isNVidia() const
Definition: ocl.hpp:200
CV_WRAP bool endianLittle() const
CV_WRAP size_t imageMaxBufferSize() const
CV_WRAP size_t globalMemCacheSize() const
CV_WRAP size_t maxParameterSize() const
CV_WRAP String version() const
CV_WRAP int nativeVectorWidthFloat() const
CV_WRAP bool hasFP16() const
true if 'cl_khr_fp16' extension is available
CV_WRAP int maxWriteImageArgs() const
static CV_WRAP const Device & getDefault()
CV_WRAP int halfFPConfig() const
uint imageBaseAddressAlignment() const
CV_WRAP int preferredVectorWidthLong() const
CV_WRAP bool hostUnifiedMemory() const
CV_WRAP String OpenCL_C_Version() const
CV_WRAP int localMemType() const
CV_WRAP int preferredVectorWidthFloat() const
CV_WRAP bool isIntel() const
Definition: ocl.hpp:199
Impl * getImpl() const
Definition: ocl.hpp:251
CV_WRAP size_t profilingTimerResolution() const
CV_WRAP bool hasFP64() const
true if 'cl_khr_fp64' extension is available
CV_WRAP int preferredVectorWidthShort() const
CV_WRAP bool intelSubgroupsSupport() const
deprecated, use isExtensionSupported() method (probably with "cl_khr_subgroups" value)
CV_WRAP size_t localMemSize() const
CV_WRAP int memBaseAddrAlign() const
static Device fromHandle(void *d)
CV_WRAP int preferredVectorWidthChar() const
CV_WRAP bool linkerAvailable() const
CV_WRAP int nativeVectorWidthInt() const
CV_WRAP bool isAMD() const
Definition: ocl.hpp:198
CV_WRAP int maxSamplers() const
CV_WRAP int globalMemCacheLineSize() const
CV_WRAP bool imageFromBufferSupport() const
CV_WRAP String name() const
CV_WRAP int globalMemCacheType() const
CV_WRAP int preferredVectorWidthHalf() const
CV_WRAP bool imageSupport() const
int iwscale
Definition: ocl.hpp:457
static KernelArg ReadWriteNoSize(const UMat &m, int wscale=1, int iwscale=1)
Definition: ocl.hpp:439
static KernelArg ReadOnlyNoSize(const UMat &m, int wscale=1, int iwscale=1)
Definition: ocl.hpp:445
KernelArg(int _flags, UMat *_m, int wscale=1, int iwscale=1, const void *_obj=0, size_t _sz=0)
static KernelArg Constant(const _Tp *arr, size_t n)
Definition: ocl.hpp:450
const void * obj
Definition: ocl.hpp:455
static KernelArg PtrWriteOnly(const UMat &m)
Definition: ocl.hpp:431
static KernelArg PtrReadWrite(const UMat &m)
Definition: ocl.hpp:435
static KernelArg ReadOnly(const UMat &m, int wscale=1, int iwscale=1)
Definition: ocl.hpp:441
int flags
Definition: ocl.hpp:453
size_t sz
Definition: ocl.hpp:456
static KernelArg PtrReadOnly(const UMat &m)
Definition: ocl.hpp:433
static KernelArg ReadWrite(const UMat &m, int wscale=1, int iwscale=1)
Definition: ocl.hpp:437
static KernelArg Constant(const Mat &m)
static KernelArg WriteOnlyNoSize(const UMat &m, int wscale=1, int iwscale=1)
Definition: ocl.hpp:447
UMat * m
Definition: ocl.hpp:454
static KernelArg WriteOnly(const UMat &m, int wscale=1, int iwscale=1)
Definition: ocl.hpp:443
Impl * p
Definition: ocl.hpp:548
bool run(int dims, size_t globalsize[], size_t localsize[], bool sync, const Queue &q=Queue())
Run the OpenCL kernel (globalsize value may be adjusted)
int set_args_(int i, const _Tp0 &a0)
Definition: ocl.hpp:489
Kernel & args(const _Tps &... kernel_args)
Setup OpenCL Kernel arguments. Avoid direct using of set(i, ...) methods.
Definition: ocl.hpp:505
bool compileWorkGroupSize(size_t wsz[]) const
size_t preferedWorkGroupSizeMultiple() const
bool run_(int dims, size_t globalsize[], size_t localsize[], bool sync, const Queue &q=Queue())
Run the OpenCL kernel.
size_t localMemSize() const
int64 runProfiling(int dims, size_t globalsize[], size_t localsize[], const Queue &q=Queue())
Similar to synchronized run_() call with returning of kernel execution time.
size_t workGroupSize() const
int set_args_(int i, const _Tp0 &a0, const _Tps &... rest_args)
Definition: ocl.hpp:491
bool runTask(bool sync, const Queue &q=Queue())
~OpenCLExecutionContextScope()
Definition: ocl.hpp:896
OpenCLExecutionContextScope(const OpenCLExecutionContext &ctx)
Definition: ocl.hpp:889
OpenCLExecutionContext cloneWithNewQueue() const
static OpenCLExecutionContext create(const std::string &platformName, void *platformID, void *context, void *deviceID)
Creates OpenCL execution context OpenCV will check if available OpenCL platform has platformName name...
bool empty() const
Definition: ocl.hpp:879
static OpenCLExecutionContext & getCurrentRef()
static OpenCLExecutionContext create(const Context &context, const Device &device)
OpenCLExecutionContext & operator=(const OpenCLExecutionContext &)=default
static OpenCLExecutionContext create(const Context &context, const Device &device, const ocl::Queue &queue)
Creates OpenCL execution context.
Context & getContext() const
void setUseOpenCL(bool flag)
std::shared_ptr< Impl > p
Definition: ocl.hpp:882
OpenCLExecutionContext & operator=(OpenCLExecutionContext &&)=default
OpenCLExecutionContext cloneWithNewQueue(const ocl::Queue &q) const
static OpenCLExecutionContext & getCurrent()
OpenCLExecutionContext(const OpenCLExecutionContext &)=default
OpenCLExecutionContext()=default
Device & getDevice() const
~OpenCLExecutionContext()=default
OpenCLExecutionContext(OpenCLExecutionContext &&)=default
bool empty() const
Definition: ocl.hpp:668
Impl * p
Definition: ocl.hpp:670
uint64 hash_t
Definition: ocl.hpp:602
ProgramSource() CV_NOEXCEPT
CV_DEPRECATED bool read(const String &buf, const String &buildflags)
static CV_DEPRECATED String getPrefix(const String &buildflags)
CV_DEPRECATED String getPrefix() const
bool empty() const
Definition: ocl.hpp:584
Impl * p
Definition: ocl.hpp:586
CV_DEPRECATED const ProgramSource & source() const
CV_DEPRECATED bool write(String &buf) const
bool empty() const
Definition: ocl.hpp:416
Impl * p
Definition: ocl.hpp:418
uint64 durationNS() const
duration in nanoseconds
Impl *const p
Definition: ocl.hpp:790
InputArrayOfArrays InputArrayOfArrays InputOutputArray InputOutputArray InputOutputArray InputOutputArray Size InputOutputArray InputOutputArray T
Definition: calib3d.hpp:1867
static double norm(const Matx< _Tp, m, n > &M)
CV_EXPORTS InputOutputArray noArray()
CV__DEBUG_NS_END typedef const _InputArray & InputArray
Definition: mat.hpp:442
int rows
Definition: core_c.h:257
int CvScalar value
Definition: core_c.h:720
const CvArr const CvArr const CvArr * src3
Definition: core_c.h:994
int cols
Definition: core_c.h:221
const int * idx
Definition: core_c.h:668
const CvArr const CvArr * src2
Definition: core_c.h:994
CvSize size
Definition: core_c.h:112
const CvArr * src1
Definition: core_c.h:993
int int type
Definition: core_c.h:221
void int step
Definition: core_c.h:905
CvArr * arr
Definition: core_c.h:1247
int depth
Definition: core_c.h:100
int dims
Definition: core_c.h:464
uint32_t uint
Definition: interface.h:42
int64_t int64
Definition: interface.h:61
uint64_t uint64
Definition: interface.h:62
CV_EXPORTS_W bool haveAmdBlas()
CV_EXPORTS_W bool haveOpenCL()
CV_EXPORTS CV_DEPRECATED const char * convertTypeStr(int sdepth, int ddepth, int cn, char *buf)
CV_EXPORTS_W bool haveAmdFft()
CV_EXPORTS const char * memopTypeToStr(int t)
class CV_EXPORTS Queue
Definition: ocl.hpp:68
CV_EXPORTS int predictOptimalVectorWidth(InputArray src1, InputArray src2=noArray(), InputArray src3=noArray(), InputArray src4=noArray(), InputArray src5=noArray(), InputArray src6=noArray(), InputArray src7=noArray(), InputArray src8=noArray(), InputArray src9=noArray(), OclVectorStrategy strat=OCL_VECTOR_DEFAULT)
void initializeContextFromHandle(Context &ctx, void *platform, void *context, void *device)
CV_EXPORTS const char * vecopTypeToStr(int t)
CV_EXPORTS_W void finish()
CV_EXPORTS int checkOptimalVectorWidth(const int *vectorWidths, InputArray src1, InputArray src2=noArray(), InputArray src3=noArray(), InputArray src4=noArray(), InputArray src5=noArray(), InputArray src6=noArray(), InputArray src7=noArray(), InputArray src8=noArray(), InputArray src9=noArray(), OclVectorStrategy strat=OCL_VECTOR_DEFAULT)
CV_EXPORTS void getPlatfomsInfo(std::vector< PlatformInfo > &platform_info)
CV_EXPORTS String kernelToStr(InputArray _kernel, int ddepth=-1, const char *name=NULL)
CV_EXPORTS const char * getOpenCLErrorString(int errorCode)
CV_EXPORTS const char * typeToStr(int t)
CV_EXPORTS_W void setUseOpenCL(bool flag)
CV_EXPORTS void convertFromImage(void *cl_mem_image, UMat &dst)
Convert OpenCL image2d_t to UMat.
CV_EXPORTS void attachContext(const String &platformName, void *platformID, void *context, void *deviceID)
Attaches OpenCL context to OpenCV.
OclVectorStrategy
Definition: ocl.hpp:716
CV_EXPORTS int predictOptimalVectorWidthMax(InputArray src1, InputArray src2=noArray(), InputArray src3=noArray(), InputArray src4=noArray(), InputArray src5=noArray(), InputArray src6=noArray(), InputArray src7=noArray(), InputArray src8=noArray(), InputArray src9=noArray())
CV_EXPORTS void buildOptionsAddMatrixDescription(String &buildOptions, const String &name, InputArray _m)
CV_EXPORTS MatAllocator * getOpenCLAllocator()
CV_EXPORTS bool haveSVM()
CV_EXPORTS void convertFromBuffer(void *cl_mem_buffer, size_t step, int rows, int cols, int type, UMat &dst)
Convert OpenCL buffer to UMat.
CV_EXPORTS_W bool useOpenCL()
@ OCL_VECTOR_MAX
Definition: ocl.hpp:721
@ OCL_VECTOR_DEFAULT
Definition: ocl.hpp:724
@ OCL_VECTOR_OWN
Definition: ocl.hpp:718
#define CV_EXPORTS_W_SIMPLE
Definition: cvdef.h:473
#define CV_EXPORTS
Definition: cvdef.h:435
#define CV_DEPRECATED
Definition: cvdef.h:450
#define CV_EXPORTS_W
Definition: cvdef.h:472
#define CV_NOEXCEPT
Definition: cvdef.h:800
#define CV_WRAP
Definition: cvdef.h:481
#define CV_Assert(expr)
Checks a condition at runtime and throws exception if it fails.
Definition: base.hpp:342
CV_EXPORTS_W int getDevice()
Returns the current device index set by cuda::setDevice or initialized by default.
CV_EXPORTS OutputArray int double double InputArray OutputArray int int bool double k
Definition: imgproc.hpp:2133
OutputArray dst
Definition: imgproc.hpp:3564
OutputArray OutputArray OutputArray int sdepth
Definition: imgproc.hpp:2884
"black box" representation of the file storage associated with a file on disk.
Definition: calib3d.hpp:441