Convolução Unidimencional 2

Sinal em memória global. Mascara em memoria __constant__ Tratamento de bordas e alocação feitas com if.

Parte B: Terceito Exercício.

 1 #include <string.h>
 2 #include <math.h>
 3 #include <cuda.h>
 4 #define TILE_DIM 256
 5 #include "simple_arrays.h"
 6 
 7 /*
 8  * !.! Convolução 1D.
 9  *
10  * # dimS # Dimension of the input/output signal
11  * # dimM # Dimsension of the mask
12  * # S # Input Signal
13  * # M # Mask
14  * # R # Output Signal
15  *
16  * OBS.: Mask can not be larger then 128.
17  */
18 
19 
20 void cdConv_cd(int dimS , float *S,
21                           int dimM , float *M,
22                           float *R){
23 
24     int Col = blockIdx.x * blockDim.x + threadIdx.x;
25     int v = 0;
26 
27     if (Col < dimM/2 || Col >= dimS-dimM/2) return;
28 
29     for( int c = 0 ; c < dimM ; c++ ){
30 
31         v = v + (S[Col + c-dimM/2] * M[c]);
32 
33     }
34 
35     R[Col] = v;
36 }
37 
38 constant float dM[128];
39 
40 void cdConv_cpp(int dimS , float *S,
41                 int dimM , float *M,
42                            float *R){
43 
44     float *dS;
45     float *dR;
46 
47 
48 
49     cudaMalloc((void **)&dS, dimS*sizeof(float));
50     //cudaMalloc((void **)&dM, dimM*sizeof(float));
51     cudaMalloc((void **)&dR, dimS*sizeof(float));
52 
53     cudaMemcpy(dS, S, dimS*sizeof(float), cudaMemcpyHostToDevice);
54     cudaMemcpy(dM, M, dimM*sizeof(float), cudaMemcpyHostToDevice);
55     cudaMemset(dR, 0, dimS*sizeof(float));
56 
57     int fixedGridW = (dimS % TILE_DIM == 0)? dimS/TILE_DIM : dimS/TILE_DIM+1;
58     dim3 gridDim(fixedGridW);
59     dim3 blockDim(TILE_DIM);
60 
61     cdConv_cd<<<gridDim, blockDim>>>(dimS,dS,dimM,dM,dR);
62 
63     cudaMemcpy(R, dR, dimS*sizeof(float), cudaMemcpyDeviceToHost);
64 
65     cudaFree(dR);
66     cudaFree(dS);
67     cudaFree(dM);
68 
69 
70 }
71 
72 
73 void cdConv( int dimS , float *S,
74                int dimM , float *M,
75                float **R, int *dimR){
76 
77     *R = new float[dimS];
78     *dimR = dimS;
79 
80     cdConv_cpp( dimS , S,
81                 dimM , M,
82                       *R);
83 
84 }

Header file.

 1 /*
 2  * !.! Convolução 1D : Python Interface
 3  *
 4  * R = cdConv( S , M )
 5  *
 6  * # S # Input Signal : Array( 1D )
 7  * # M # Mask : Array ( 1D )
 8  * # R # Output Signal : Array( 1D )
 9  *
10  */
11 
12 
13 void cdConv( int DIM1 , float *IN_ARRAY1,
14                int DIM1 , float *IN_ARRAY1,
15                float **ARGOUT_ARRAY1,int *DIM1);
ERROR ping: sandbox "xsb_pycuda" is busy. Try again.
 1 from numpy import *
 2 from numpy.random import *
 3 import courseIA366F2S2010.lc100373_3_2 as clib
 4 from ia636 import *
 5 from courseIA366F2S2010.funcTimer import funcTimer
 6 
 7 sinal = array( random_integers(0,10,(200000)) ,dtype=float32)
 8 
 9 masc = array( [-1,-2,-4,14,-4,-2,-1],dtype=float32 )
10 
11 tGPU,rGPU = funcTimer( lambda: clib.cdConv( sinal , masc ) )
12 tCPU,rCPU = funcTimer( lambda: iaconv( sinal , masc ) )
13 
14 ret = rGPU
15 
16 f1 = [arange( 0,sinal.size,1 ), sinal,'', 'impulses']
17 f2 = [arange( 0,masc.size,1 ), masc,'', 'impulses']
18 f3 = [arange( 0,ret.size,1 ), ret,'', 'impulses']
19 
20 print "Sinal: %i,  e   Mascara: %i" %(sinal.size,masc.size)
21 print "Tempo na GPU: " , tGPU
22 print "Tempo na CPU: " , tCPU
23 print "Ratio : " , tGPU/tCPU
24 
25 
26 #mmplot([f1],['set yrange ['+str(min(sinal.min(),0))+':'+str(sinal.max())+']','set xrange [-1:'+str(sinal.size)+']'], ptitle='Entrada')
27 #mmplot([f2],['set yrange ['+str(min(masc.min(),0))+':'+str(masc.max())+']','set xrange [-1:'+str(masc.size)+']'], ptitle='Mascara')
28 #mmplot([f3],['set yrange ['+str(min(ret.min(),0))+':'+str(ret.max())+']','set xrange [-1:'+str(ret.size)+']'], ptitle='Saida')
ERROR ping: sandbox "xsb_pycuda" is busy. Try again.