Re: Broken output from my algorithm on nVidia OpenCL implementation
Posted by Michael Bien on Apr 21, 2011; 3:13pm
URL: https://forum.jogamp.org/Broken-output-from-my-algorithm-on-nVidia-OpenCL-implementation-tp2843828p2847366.html
btw my CLInfo in case you need the device properties:
HOST_JRE: 1.6.0_24-b07
HOST_JVM: Java HotSpot(TM) 64-Bit Server VM
HOST_ARCH: amd64
HOST_NUM_CORES: 8
HOST_OS: Linux
HOST_LITTLE_ENDIAN: true
CL_BINDING_UNAVAILABLE_FUNCTIONS: [clCreateEventFromGLsyncKHR, clIcdGetPlatformIDsKHR]
CL_PLATFORM_NAME: ATI Stream
CL_PLATFORM_VERSION: OpenCL 1.1 ATI-Stream-v2.2 (302)
CL_PLATFORM_PROFILE: FULL_PROFILE
CL_PLATFORM_VENDOR: Advanced Micro Devices, Inc.
CL_PLATFORM_ICD_SUFFIX_KHR: AMD
CL_PLATFORM_EXTENSIONS: [cl_khr_icd, cl_amd_event_callback]
- CL_DEVICE_NAME: Intel(R) Core(TM) i7 CPU 940 @ 2.93GHz
- CL_DEVICE_TYPE: CPU
- CL_DEVICE_ENDIAN_LITTLE: true
- CL_DEVICE_VERSION: OpenCL 1.1 ATI-Stream-v2.2 (302)
- CL_DEVICE_PROFILE: FULL_PROFILE
- CL_DEVICE_VENDOR: GenuineIntel
- CL_DEVICE_EXTENSIONS: [cl_amd_device_attribute_query, cl_khr_byte_addressable_store, cl_khr_int64_extended_atomics, cl_khr_local_int32_extended_atomics, cl_amd_fp64, cl_amd_printf, cl_khr_local_int32_base_atomics, cl_khr_int64_base_atomics, cl_khr_global_int32_base_atomics, cl_khr_gl_sharing, cl_khr_global_int32_extended_atomics, cl_ext_device_fission]
- CL_DEVICE_MAX_COMPUTE_UNITS: 8
- CL_DEVICE_MAX_CLOCK_FREQUENCY: 2934
- CL_DEVICE_VENDOR_ID: 4098
- CL_DEVICE_OPENCL_C_VERSION: OpenCL C 1.1
- CL_DRIVER_VERSION: 2.0
- CL_DEVICE_ADDRESS_BITS: 64
- CL_DEVICE_PREFERRED_VECTOR_WIDTH_SHORT: 8
- CL_DEVICE_PREFERRED_VECTOR_WIDTH_CHAR: 16
- CL_DEVICE_PREFERRED_VECTOR_WIDTH_INT: 4
- CL_DEVICE_PREFERRED_VECTOR_WIDTH_LONG: 2
- CL_DEVICE_PREFERRED_VECTOR_WIDTH_FLOAT: 4
- CL_DEVICE_PREFERRED_VECTOR_WIDTH_DOUBLE: 0
- CL_DEVICE_NATIVE_VECTOR_WIDTH_CHAR: 16
- CL_DEVICE_NATIVE_VECTOR_WIDTH_SHORT: 8
- CL_DEVICE_NATIVE_VECTOR_WIDTH_INT: 4
- CL_DEVICE_NATIVE_VECTOR_WIDTH_LONG: 2
- CL_DEVICE_NATIVE_VECTOR_WIDTH_HALF: 0
- CL_DEVICE_NATIVE_VECTOR_WIDTH_FLOAT: 4
- CL_DEVICE_NATIVE_VECTOR_WIDTH_DOUBLE: 0
- CL_DEVICE_MAX_WORK_GROUP_SIZE: 1024
- CL_DEVICE_MAX_WORK_ITEM_DIMENSIONS: 3
- CL_DEVICE_MAX_WORK_ITEM_SIZES: [1024, 1024, 1024]
- CL_DEVICE_MAX_PARAMETER_SIZE: 4096
- CL_DEVICE_MAX_MEM_ALLOC_SIZE: 1073741824
- CL_DEVICE_GLOBAL_MEM_SIZE: 3221225472
- CL_DEVICE_LOCAL_MEM_SIZE: 32768
- CL_DEVICE_HOST_UNIFIED_MEMORY: true
- CL_DEVICE_MAX_CONSTANT_BUFFER_SIZE: 65536
- CL_DEVICE_GLOBAL_MEM_CACHELINE_SIZE: 64
- CL_DEVICE_GLOBAL_MEM_CACHE_SIZE: 32768
- CL_DEVICE_MAX_CONSTANT_ARGS: 8
- CL_DEVICE_IMAGE_SUPPORT: false
- CL_DEVICE_MAX_READ_IMAGE_ARGS: 0
- CL_DEVICE_MAX_WRITE_IMAGE_ARGS: 0
- CL_DEVICE_IMAGE2D_MAX_WIDTH: 0
- CL_DEVICE_IMAGE2D_MAX_HEIGHT: 0
- CL_DEVICE_IMAGE3D_MAX_WIDTH: 0
- CL_DEVICE_IMAGE3D_MAX_HEIGHT: 0
- CL_DEVICE_IMAGE3D_MAX_DEPTH: 0
- CL_DEVICE_MAX_SAMPLERS: 0
- CL_DEVICE_PROFILING_TIMER_RESOLUTION: 1
- CL_DEVICE_EXECUTION_CAPABILITIES: [EXEC_KERNEL, EXEC_NATIVE_KERNEL]
- CL_DEVICE_HALF_FP_CONFIG: []
- CL_DEVICE_SINGLE_FP_CONFIG: [DENORM, INF_NAN, ROUND_TO_NEAREST, ROUND_TO_INF, ROUND_TO_ZERO]
- CL_DEVICE_DOUBLE_FP_CONFIG: []
- CL_DEVICE_LOCAL_MEM_TYPE: GLOBAL
- CL_DEVICE_GLOBAL_MEM_CACHE_TYPE: READ_WRITE
- CL_DEVICE_QUEUE_PROPERTIES: [PROFILING_MODE]
- CL_DEVICE_AVAILABLE: true
- CL_DEVICE_COMPILER_AVAILABLE: true
- CL_DEVICE_ERROR_CORRECTION_SUPPORT: false
- cl_khr_fp16: false
- cl_khr_fp64: false
- cl_khr_gl_sharing | cl_APPLE_gl_sharing: true
CL_PLATFORM_NAME: NVIDIA CUDA
CL_PLATFORM_VERSION: OpenCL 1.0 CUDA 4.0.1
CL_PLATFORM_PROFILE: FULL_PROFILE
CL_PLATFORM_VENDOR: NVIDIA Corporation
CL_PLATFORM_ICD_SUFFIX_KHR: NV
CL_PLATFORM_EXTENSIONS: [cl_khr_icd, cl_khr_byte_addressable_store, cl_nv_compiler_options, cl_nv_pragma_unroll, cl_nv_device_attribute_query, cl_khr_gl_sharing]
- CL_DEVICE_NAME: GeForce GTX 295
- CL_DEVICE_TYPE: GPU
- CL_DEVICE_ENDIAN_LITTLE: true
- CL_DEVICE_VERSION: OpenCL 1.0 CUDA
- CL_DEVICE_PROFILE: FULL_PROFILE
- CL_DEVICE_VENDOR: NVIDIA Corporation
- CL_DEVICE_EXTENSIONS: [cl_khr_icd, cl_khr_byte_addressable_store, cl_khr_fp64, cl_khr_local_int32_extended_atomics, cl_khr_local_int32_base_atomics, cl_nv_compiler_options, cl_nv_pragma_unroll, cl_nv_device_attribute_query, cl_khr_global_int32_base_atomics, cl_khr_gl_sharing, cl_khr_global_int32_extended_atomics]
- CL_DEVICE_MAX_COMPUTE_UNITS: 30
- CL_DEVICE_MAX_CLOCK_FREQUENCY: 1242
- CL_DEVICE_VENDOR_ID: 7088510129506619614
- CL_DEVICE_OPENCL_C_VERSION: com.jogamp.opencl.CLException$CLInvalidValueException: error while asking for info string [error: CL_INVALID_VALUE]
- CL_DRIVER_VERSION: 270.41.06
- CL_DEVICE_ADDRESS_BITS: 32
- CL_DEVICE_PREFERRED_VECTOR_WIDTH_SHORT: 1
- CL_DEVICE_PREFERRED_VECTOR_WIDTH_CHAR: 1
- CL_DEVICE_PREFERRED_VECTOR_WIDTH_INT: 1
- CL_DEVICE_PREFERRED_VECTOR_WIDTH_LONG: 1
- CL_DEVICE_PREFERRED_VECTOR_WIDTH_FLOAT: 1
- CL_DEVICE_PREFERRED_VECTOR_WIDTH_DOUBLE: 1
- CL_DEVICE_NATIVE_VECTOR_WIDTH_CHAR: com.jogamp.opencl.CLException$CLInvalidValueException: error while asking for info value [error: CL_INVALID_VALUE]
- CL_DEVICE_NATIVE_VECTOR_WIDTH_SHORT: com.jogamp.opencl.CLException$CLInvalidValueException: error while asking for info value [error: CL_INVALID_VALUE]
- CL_DEVICE_NATIVE_VECTOR_WIDTH_INT: com.jogamp.opencl.CLException$CLInvalidValueException: error while asking for info value [error: CL_INVALID_VALUE]
- CL_DEVICE_NATIVE_VECTOR_WIDTH_LONG: com.jogamp.opencl.CLException$CLInvalidValueException: error while asking for info value [error: CL_INVALID_VALUE]
- CL_DEVICE_NATIVE_VECTOR_WIDTH_HALF: com.jogamp.opencl.CLException$CLInvalidValueException: error while asking for info value [error: CL_INVALID_VALUE]
- CL_DEVICE_NATIVE_VECTOR_WIDTH_FLOAT: com.jogamp.opencl.CLException$CLInvalidValueException: error while asking for info value [error: CL_INVALID_VALUE]
- CL_DEVICE_NATIVE_VECTOR_WIDTH_DOUBLE: com.jogamp.opencl.CLException$CLInvalidValueException: error while asking for info value [error: CL_INVALID_VALUE]
- CL_DEVICE_MAX_WORK_GROUP_SIZE: 512
- CL_DEVICE_MAX_WORK_ITEM_DIMENSIONS: 3
- CL_DEVICE_MAX_WORK_ITEM_SIZES: [512, 512, 64]
- CL_DEVICE_MAX_PARAMETER_SIZE: 4352
- CL_DEVICE_MAX_MEM_ALLOC_SIZE: 234831872
- CL_DEVICE_GLOBAL_MEM_SIZE: 939327488
- CL_DEVICE_LOCAL_MEM_SIZE: 16384
- CL_DEVICE_HOST_UNIFIED_MEMORY: com.jogamp.opencl.CLException$CLInvalidValueException: error while asking for info value [error: CL_INVALID_VALUE]
- CL_DEVICE_MAX_CONSTANT_BUFFER_SIZE: 65536
- CL_DEVICE_GLOBAL_MEM_CACHELINE_SIZE: 0
- CL_DEVICE_GLOBAL_MEM_CACHE_SIZE: 0
- CL_DEVICE_MAX_CONSTANT_ARGS: 9
- CL_DEVICE_IMAGE_SUPPORT: true
- CL_DEVICE_MAX_READ_IMAGE_ARGS: 128
- CL_DEVICE_MAX_WRITE_IMAGE_ARGS: 8
- CL_DEVICE_IMAGE2D_MAX_WIDTH: 4096
- CL_DEVICE_IMAGE2D_MAX_HEIGHT: 32768
- CL_DEVICE_IMAGE3D_MAX_WIDTH: 2048
- CL_DEVICE_IMAGE3D_MAX_HEIGHT: 2048
- CL_DEVICE_IMAGE3D_MAX_DEPTH: 2048
- CL_DEVICE_MAX_SAMPLERS: 16
- CL_DEVICE_PROFILING_TIMER_RESOLUTION: 1000
- CL_DEVICE_EXECUTION_CAPABILITIES: [EXEC_KERNEL]
- CL_DEVICE_HALF_FP_CONFIG: []
- CL_DEVICE_SINGLE_FP_CONFIG: [INF_NAN, ROUND_TO_NEAREST, ROUND_TO_INF, ROUND_TO_ZERO, FMA]
- CL_DEVICE_DOUBLE_FP_CONFIG: [DENORM, INF_NAN, ROUND_TO_NEAREST, ROUND_TO_INF, ROUND_TO_ZERO, FMA]
- CL_DEVICE_LOCAL_MEM_TYPE: LOCAL
- CL_DEVICE_GLOBAL_MEM_CACHE_TYPE: NONE
- CL_DEVICE_QUEUE_PROPERTIES: [OUT_OF_ORDER_MODE, PROFILING_MODE]
- CL_DEVICE_AVAILABLE: true
- CL_DEVICE_COMPILER_AVAILABLE: true
- CL_DEVICE_ERROR_CORRECTION_SUPPORT: false
- cl_khr_fp16: false
- cl_khr_fp64: true
- cl_khr_gl_sharing | cl_APPLE_gl_sharing: true
- CL_DEVICE_NAME: GeForce GTX 295
- CL_DEVICE_TYPE: GPU
- CL_DEVICE_ENDIAN_LITTLE: true
- CL_DEVICE_VERSION: OpenCL 1.0 CUDA
- CL_DEVICE_PROFILE: FULL_PROFILE
- CL_DEVICE_VENDOR: NVIDIA Corporation
- CL_DEVICE_EXTENSIONS: [cl_khr_icd, cl_khr_byte_addressable_store, cl_khr_fp64, cl_khr_local_int32_extended_atomics, cl_khr_local_int32_base_atomics, cl_nv_compiler_options, cl_nv_pragma_unroll, cl_nv_device_attribute_query, cl_khr_global_int32_base_atomics, cl_khr_gl_sharing, cl_khr_global_int32_extended_atomics]
- CL_DEVICE_MAX_COMPUTE_UNITS: 30
- CL_DEVICE_MAX_CLOCK_FREQUENCY: 1242
- CL_DEVICE_VENDOR_ID: 7088510129506619614
- CL_DEVICE_OPENCL_C_VERSION: com.jogamp.opencl.CLException$CLInvalidValueException: error while asking for info string [error: CL_INVALID_VALUE]
- CL_DRIVER_VERSION: 270.41.06
- CL_DEVICE_ADDRESS_BITS: 32
- CL_DEVICE_PREFERRED_VECTOR_WIDTH_SHORT: 1
- CL_DEVICE_PREFERRED_VECTOR_WIDTH_CHAR: 1
- CL_DEVICE_PREFERRED_VECTOR_WIDTH_INT: 1
- CL_DEVICE_PREFERRED_VECTOR_WIDTH_LONG: 1
- CL_DEVICE_PREFERRED_VECTOR_WIDTH_FLOAT: 1
- CL_DEVICE_PREFERRED_VECTOR_WIDTH_DOUBLE: 1
- CL_DEVICE_NATIVE_VECTOR_WIDTH_CHAR: com.jogamp.opencl.CLException$CLInvalidValueException: error while asking for info value [error: CL_INVALID_VALUE]
- CL_DEVICE_NATIVE_VECTOR_WIDTH_SHORT: com.jogamp.opencl.CLException$CLInvalidValueException: error while asking for info value [error: CL_INVALID_VALUE]
- CL_DEVICE_NATIVE_VECTOR_WIDTH_INT: com.jogamp.opencl.CLException$CLInvalidValueException: error while asking for info value [error: CL_INVALID_VALUE]
- CL_DEVICE_NATIVE_VECTOR_WIDTH_LONG: com.jogamp.opencl.CLException$CLInvalidValueException: error while asking for info value [error: CL_INVALID_VALUE]
- CL_DEVICE_NATIVE_VECTOR_WIDTH_HALF: com.jogamp.opencl.CLException$CLInvalidValueException: error while asking for info value [error: CL_INVALID_VALUE]
- CL_DEVICE_NATIVE_VECTOR_WIDTH_FLOAT: com.jogamp.opencl.CLException$CLInvalidValueException: error while asking for info value [error: CL_INVALID_VALUE]
- CL_DEVICE_NATIVE_VECTOR_WIDTH_DOUBLE: com.jogamp.opencl.CLException$CLInvalidValueException: error while asking for info value [error: CL_INVALID_VALUE]
- CL_DEVICE_MAX_WORK_GROUP_SIZE: 512
- CL_DEVICE_MAX_WORK_ITEM_DIMENSIONS: 3
- CL_DEVICE_MAX_WORK_ITEM_SIZES: [512, 512, 64]
- CL_DEVICE_MAX_PARAMETER_SIZE: 4352
- CL_DEVICE_MAX_MEM_ALLOC_SIZE: 234700800
- CL_DEVICE_GLOBAL_MEM_SIZE: 938803200
- CL_DEVICE_LOCAL_MEM_SIZE: 16384
- CL_DEVICE_HOST_UNIFIED_MEMORY: com.jogamp.opencl.CLException$CLInvalidValueException: error while asking for info value [error: CL_INVALID_VALUE]
- CL_DEVICE_MAX_CONSTANT_BUFFER_SIZE: 65536
- CL_DEVICE_GLOBAL_MEM_CACHELINE_SIZE: 0
- CL_DEVICE_GLOBAL_MEM_CACHE_SIZE: 0
- CL_DEVICE_MAX_CONSTANT_ARGS: 9
- CL_DEVICE_IMAGE_SUPPORT: true
- CL_DEVICE_MAX_READ_IMAGE_ARGS: 128
- CL_DEVICE_MAX_WRITE_IMAGE_ARGS: 8
- CL_DEVICE_IMAGE2D_MAX_WIDTH: 4096
- CL_DEVICE_IMAGE2D_MAX_HEIGHT: 32768
- CL_DEVICE_IMAGE3D_MAX_WIDTH: 2048
- CL_DEVICE_IMAGE3D_MAX_HEIGHT: 2048
- CL_DEVICE_IMAGE3D_MAX_DEPTH: 2048
- CL_DEVICE_MAX_SAMPLERS: 16
- CL_DEVICE_PROFILING_TIMER_RESOLUTION: 1000
- CL_DEVICE_EXECUTION_CAPABILITIES: [EXEC_KERNEL]
- CL_DEVICE_HALF_FP_CONFIG: []
- CL_DEVICE_SINGLE_FP_CONFIG: [INF_NAN, ROUND_TO_NEAREST, ROUND_TO_INF, ROUND_TO_ZERO, FMA]
- CL_DEVICE_DOUBLE_FP_CONFIG: [DENORM, INF_NAN, ROUND_TO_NEAREST, ROUND_TO_INF, ROUND_TO_ZERO, FMA]
- CL_DEVICE_LOCAL_MEM_TYPE: LOCAL
- CL_DEVICE_GLOBAL_MEM_CACHE_TYPE: NONE
- CL_DEVICE_QUEUE_PROPERTIES: [OUT_OF_ORDER_MODE, PROFILING_MODE]
- CL_DEVICE_AVAILABLE: true
- CL_DEVICE_COMPILER_AVAILABLE: true
- CL_DEVICE_ERROR_CORRECTION_SUPPORT: false
- cl_khr_fp16: false
- cl_khr_fp64: true
- cl_khr_gl_sharing | cl_APPLE_gl_sharing: true