problems with arg size

classic Classic list List threaded Threaded
4 messages Options
Reply | Threaded
Open this post in threaded view
|

problems with arg size

texone
I have written a small wrapper around the CLKernel class to simplify setting constant float4 and float2 kernel parameters which is working fine. Now I would also like to pass arrays as constant parameters but it seems the buffer with the arguments has the wrong size.

Here is the exception:

error setting arg 6 to value java.nio.DirectByteBuffer[pos=0 lim=128 cap=128] of size 12 of CLKernel [id: 4296729632 name: field] [error: CL_INVALID_ARG_SIZE]

Here is the java code:


public class CCCLKernel {

	private CLKernel _myKernel;
	private CCCLProgram _myProgram;
	
	private final ByteBuffer _myBuffer;
	
	CCCLKernel(CCCLProgram theProgram, CLKernel theKernel){
		_myProgram = theProgram;
		_myKernel = theKernel;
		
		_myBuffer = Buffers.newDirectByteBuffer((Platform.is32Bit()?4:8) * 16);
	}
	
	public CLKernel clKernel() {
		return _myKernel;
	}
	
	

    
    public void argument1f(int theArgumentIndex, float value) {
    	_myBuffer.putFloat(0, value);
        setArgument(theArgumentIndex, 4, _myBuffer);
    }

    public void argument2f(int theArgumentIndex, float value1, float value2) {
    	_myBuffer.putFloat(0, value1);
    	_myBuffer.putFloat(4, value2);
        setArgument(theArgumentIndex, 8, _myBuffer);
    }

    public void argument3f(int theArgumentIndex, float theValue1, float theValue2, float theValue3) {
    	argument4f(theArgumentIndex, theValue1, theValue2, theValue3,0);
    }

    public void argument4f(int theArgumentIndex, float theValue1, float theValue2, float theValue3, float theValue4) {
    	_myBuffer.putFloat(0, theValue1);
    	_myBuffer.putFloat(4, theValue2);
    	_myBuffer.putFloat(8, theValue3);
    	_myBuffer.putFloat(12, theValue4);
        setArgument(theArgumentIndex, 16, _myBuffer);
    }
    
    public void argumentNf(int theArgumentIndex, float...theValues) {
    	for(int i = 0; i < theValues.length;i++) {
        	_myBuffer.putFloat(i * 4, theValues[i]);
    	}
        setArgument(theArgumentIndex, theValues.length * 4, _myBuffer);
    }

    private void setArgument(int theArgumentIndex, int size, Buffer value) {
        if(theArgumentIndex >= _myKernel.numArgs || theArgumentIndex < 0) {
            throw new IndexOutOfBoundsException("kernel "+ this +" has "+_myKernel.numArgs+" arguments, can not set argument with index "+theArgumentIndex);
        }
        if(!_myProgram.isExecutable()) {
            throw new IllegalStateException("can not set program" + " arguments for a not executable program. " + _myProgram);
        }

        int ret = _myKernel.getContext().getCL().clSetKernelArg(_myKernel.ID, theArgumentIndex, size, value);
        if(ret != CL.CL_SUCCESS) {
            throw CLException.newException(ret, "error setting arg "+theArgumentIndex+" to value "+value+" of size "+size+" of "+this);
        }
    }
}


Here is the cl code:

float noiseX(float4 p) {
	return noise3(p);
}

float noiseY(float4 p) {
	return noise3((float4)(p.y + 31.416, p.z - 47.853, p.x + 12.793,0));
}

float noiseZ(float4 p) {
	return noise3((float4)(p.z - 233.145, p.x - 113.408, p.y - 185.31, 0));
}

kernel void field(
	global float4 * theAccelerations,
	global float4 * theVelocities,
	global float4 * thePositions,
	
	constant float4 theScale,
	constant float4 theOffset,
	constant float theDeltaTime,
	
	constant float * theNoiseLengthScales,
	constant float * theNoiseGains,
	
	constant float theStrength
){
	unsigned int myIndex = get_global_id(0);
	
	float4 myFuturePosition = thePositions[myIndex] + theVelocities[myIndex] * theDeltaTime;
	float4 myNoisePosition = myFuturePosition * theScale + theOffset;
	
	float4 result = (float4)(0);
		
	//float d=distance_and_normal(x, y, z, normal);
	// add turbulence octaves that respect boundaries, increasing upwards
	for (int i = 0; i < 1; i++) {
		result += (float4)(noiseX(myNoisePosition), noiseY(myNoisePosition), noiseZ(myNoisePosition), 0) * theNoiseGains[i];
		myNoisePosition *= theNoiseLengthScales[i];
	}
		
	theAccelerations[myIndex] += result * theStrength;
}


I guess this should some how be possible, any hint would be really appreciated.
Reply | Threaded
Open this post in threaded view
|

Re: problems with arg size

Michael Bien
  sorry i somehow managed to overlook this message.

i like the idea for the utility vector methods. Lets see if i can
somehow add a few of them to CLKernel without making the methodcount
explode.

Regarding your question.
I haven't used externally-set constant memory yet. Thats a good
opportunity for me to take a look at the spec again :)

regards,
michael


On 06/23/2011 10:31 AM, texone [via jogamp] wrote:

>
> I have written a small wrapper around the CLKernel class to simplify setting
> constant float4 and float2 kernel parameters which is working fine. Now I
> would also like to pass arrays as constant parameters but it seems the
> buffer with the arguments has the wrong size.
>
> Here is the exception:
>
> error setting arg 6 to value java.nio.DirectByteBuffer[pos=0 lim=128
> cap=128] of size 12 of CLKernel [id: 4296729632 name: field] [error:
> CL_INVALID_ARG_SIZE]
>
> Here is the java code:
>
>
>
> public class CCCLKernel {
>
> private CLKernel _myKernel;
> private CCCLProgram _myProgram;
>
> private final ByteBuffer _myBuffer;
>
> CCCLKernel(CCCLProgram theProgram, CLKernel theKernel){
> _myProgram = theProgram;
> _myKernel = theKernel;
>
> _myBuffer = Buffers.newDirectByteBuffer((Platform.is32Bit()?4:8) * 16);
> }
>
> public CLKernel clKernel() {
> return _myKernel;
> }
>
>
>
>
>      public void argument1f(int theArgumentIndex, float value) {
>       _myBuffer.putFloat(0, value);
>          setArgument(theArgumentIndex, 4, _myBuffer);
>      }
>
>      public void argument2f(int theArgumentIndex, float value1, float value2)
> {
>       _myBuffer.putFloat(0, value1);
>       _myBuffer.putFloat(4, value2);
>          setArgument(theArgumentIndex, 8, _myBuffer);
>      }
>
>      public void argument3f(int theArgumentIndex, float theValue1, float
> theValue2, float theValue3) {
>       argument4f(theArgumentIndex, theValue1, theValue2, theValue3,0);
>      }
>
>      public void argument4f(int theArgumentIndex, float theValue1, float
> theValue2, float theValue3, float theValue4) {
>       _myBuffer.putFloat(0, theValue1);
>       _myBuffer.putFloat(4, theValue2);
>       _myBuffer.putFloat(8, theValue3);
>       _myBuffer.putFloat(12, theValue4);
>          setArgument(theArgumentIndex, 16, _myBuffer);
>      }
>
>      public void argumentNf(int theArgumentIndex, float...theValues) {
>       for(int i = 0; i<  theValues.length;i++) {
>           _myBuffer.putFloat(i * 4, theValues[i]);
>       }
>          setArgument(theArgumentIndex, theValues.length * 4, _myBuffer);
>      }
>
>      private void setArgument(int theArgumentIndex, int size, Buffer value) {
>          if(theArgumentIndex>= _myKernel.numArgs || theArgumentIndex<  0) {
>              throw new IndexOutOfBoundsException("kernel "+ this +" has
> "+_myKernel.numArgs+" arguments, can not set argument with index
> "+theArgumentIndex);
>          }
>          if(!_myProgram.isExecutable()) {
>              throw new IllegalStateException("can not set program" + "
> arguments for a not executable program. " + _myProgram);
>          }
>
>          int ret =
> _myKernel.getContext().getCL().clSetKernelArg(_myKernel.ID,
> theArgumentIndex, size, value);
>          if(ret != CL.CL_SUCCESS) {
>              throw CLException.newException(ret, "error setting arg
> "+theArgumentIndex+" to value "+value+" of size "+size+" of "+this);
>          }
>      }
> }
>
>
> Here is the cl code:
>
>
> float noiseX(float4 p) {
> return noise3(p);
> }
>
> float noiseY(float4 p) {
> return noise3((float4)(p.y + 31.416, p.z - 47.853, p.x + 12.793,0));
> }
>
> float noiseZ(float4 p) {
> return noise3((float4)(p.z - 233.145, p.x - 113.408, p.y - 185.31, 0));
> }
>
> kernel void field(
> global float4 * theAccelerations,
> global float4 * theVelocities,
> global float4 * thePositions,
>
> constant float4 theScale,
> constant float4 theOffset,
> constant float theDeltaTime,
>
> constant float * theNoiseLengthScales,
> constant float * theNoiseGains,
>
> constant float theStrength
> ){
> unsigned int myIndex = get_global_id(0);
>
> float4 myFuturePosition = thePositions[myIndex] + theVelocities[myIndex] *
> theDeltaTime;
> float4 myNoisePosition = myFuturePosition * theScale + theOffset;
>
> float4 result = (float4)(0);
>
> //float d=distance_and_normal(x, y, z, normal);
> // add turbulence octaves that respect boundaries, increasing upwards
> for (int i = 0; i<  1; i++) {
> result += (float4)(noiseX(myNoisePosition), noiseY(myNoisePosition),
> noiseZ(myNoisePosition), 0) * theNoiseGains[i];
> myNoisePosition *= theNoiseLengthScales[i];
> }
>
> theAccelerations[myIndex] += result * theStrength;
> }
>
>
> I guess this should some how be possible, any hint would be really
> appreciated.
>
Reply | Threaded
Open this post in threaded view
|

Re: problems with arg size

Michael Bien
In reply to this post by texone
  works. constant memory is apparently just size-restricted global
read-only memory, you use it by allocating plain old CLBuffers and
passing the pointer to the kernel. Thats why you got the kernel size arg
error since the driver expected a pointer.

quick test:

     public static void main(String[] args) throws IOException {

         CLContext context =
CLContext.create(CLPlatform.getDefault(CLPlatformFilters.type(Type.CPU)));
         System.out.println(context);

         CLProgram program =
context.createProgram(KernelArrayTest.class.getResourceAsStream("test.cl")).build();

         CLKernel kernel = program.createCLKernel("foo");

         CLBuffer<IntBuffer> buffer = context.createBuffer(
                 Buffers.newDirectIntBuffer(new int[]{1,2,3,4}),
CLBuffer.Mem.COPY_BUFFER);

         kernel.setArg(0, buffer);

         CLCommandQueue queue =
context.getMaxFlopsDevice().createCommandQueue();
         queue.putTask(kernel).finish();

         context.release();
     }


kernel...

#pragma OPENCL EXTENSION cl_amd_printf : enable

kernel void foo(constant int array[4]) {

     printf("%d\n", array[0]);
     printf("%d\n", array[1]);
     printf("%d\n", array[2]);
     printf("%d\n", array[3]);

}

output...
run-single:
CLContext [id: 140558620923808, platform: ATI Stream, profile:
FULL_PROFILE, devices: 1]
1
2
3
4
BUILD SUCCESSFUL (total time: 1 second)
Reply | Threaded
Open this post in threaded view
|

Re: problems with arg size

texone
Hi Michael

Thanks for the reply sort of figured it out myself already, but will check it out again. I started to dive into opencl lately and port my old gpgpu code using ping pong framebuffers. Your jocl + help here in the forum really is great.

Cheers Christian