1 #include "../include/lass.h" 131 enum LASS_RETURN
kdgemm(
enum DDSS_TRANS TRANS_A,
enum DDSS_TRANS TRANS_B,
133 const double ALPHA,
double *A,
int LDA,
135 const double BETA,
double *C,
int LDC )
150 if ( M % TILE_SIZE == 0 )
156 mt = ( M / TILE_SIZE ) + 1;
159 if ( K % TILE_SIZE == 0 )
165 kt = ( K / TILE_SIZE ) + 1;
168 if ( N % TILE_SIZE == 0 )
174 nt = ( N / TILE_SIZE ) + 1;
181 if ( TRANS_A == NoTrans )
192 if ( TRANS_B == NoTrans )
207 double (*TILE_A)[An][TILE_SIZE * TILE_SIZE] = malloc ( Am * An *
208 TILE_SIZE * TILE_SIZE *
sizeof(
double ) );
212 fprintf( stderr,
"Failure in kdgemm for matrix TILE_A\n" );
216 double (*TILE_B)[Bn][TILE_SIZE * TILE_SIZE] = malloc ( Bm * Bn *
217 TILE_SIZE * TILE_SIZE *
sizeof(
double ) );
221 fprintf( stderr,
"Failure in kdgemm for matrix TILE_B\n" );
225 double (*TILE_C)[nt][TILE_SIZE * TILE_SIZE] = malloc ( mt * nt *
226 TILE_SIZE * TILE_SIZE *
sizeof(
double ) );
230 fprintf( stderr,
"Failure in kdgemm for matrix TILE_C\n" );
239 if ( TRANS_A == NoTrans )
249 if ( TRANS_B == NoTrans )
265 for ( mi = 0; mi < mt; mi++ )
268 for ( ni = 0; ni < nt; ni++ )
271 if ( TRANS_A == NoTrans )
280 if ( ( ALPHA == 0.0 ) || ( k_check == 0 ) )
282 #pragma oss task inout( TILE_C[mi][ni] ) \ 283 shared( TILE_A, TILE_B, TILE_C ) \ 284 firstprivate( mi, ni ) \ 286 cblas_dgemm( CblasRowMajor,
287 ( CBLAS_TRANSPOSE ) TRANS_A,
288 ( CBLAS_TRANSPOSE ) TRANS_B,
292 ALPHA, TILE_A[0][0], 1,
294 BETA, TILE_C[mi][ni], tile_size_n );
296 else if ( TRANS_A == NoTrans )
299 if ( TRANS_B == NoTrans )
301 for ( ki = 0; ki < kt; ki++ )
313 #pragma oss task in( TILE_A[mi][ki] ) \ 314 in( TILE_B[ki][ni] ) \ 315 inout( TILE_C[mi][ni] ) \ 316 shared( TILE_A, TILE_B, TILE_C ) \ 317 firstprivate( mi, ni, ki, betat ) \ 319 cblas_dgemm( CblasRowMajor,
320 ( CBLAS_TRANSPOSE ) TRANS_A,
321 ( CBLAS_TRANSPOSE ) TRANS_B,
325 ALPHA, TILE_A[mi][ki], tile_size_k,
326 TILE_B[ki][ni], tile_size_n,
327 betat, TILE_C[mi][ni], tile_size_n );
333 for ( ki = 0; ki < kt; ki++ )
345 #pragma oss task in( TILE_A[mi][ki] ) \ 346 in( TILE_B[ni][ki] ) \ 347 inout( TILE_C[mi][ni] ) \ 348 shared( TILE_A, TILE_B, TILE_C ) \ 349 firstprivate( mi, ni, ki, betat ) 350 cblas_dgemm( CblasRowMajor,
351 ( CBLAS_TRANSPOSE ) TRANS_A,
352 ( CBLAS_TRANSPOSE ) TRANS_B,
356 ALPHA, TILE_A[mi][ki], tile_size_k,
357 TILE_B[ni][ki], tile_size_k,
358 betat, TILE_C[mi][ni], tile_size_n );
365 if ( TRANS_B == NoTrans )
367 for ( ki = 0; ki < kt; ki++ )
379 #pragma oss task in( TILE_A[ki][mi] ) \ 380 in( TILE_B[ki][ni] ) \ 381 inout( TILE_C[mi][ni] ) \ 382 shared( TILE_A, TILE_B, TILE_C ) \ 383 firstprivate( mi, ni, ki, betat ) 384 cblas_dgemm( CblasRowMajor,
385 ( CBLAS_TRANSPOSE ) TRANS_A,
386 ( CBLAS_TRANSPOSE ) TRANS_B,
390 ALPHA, TILE_A[ki][mi], tile_size_m,
391 TILE_B[ki][ni], tile_size_n,
392 betat, TILE_C[mi][ni], tile_size_n );
398 for ( ki = 0; ki < kt; ki++ )
410 #pragma oss task in( TILE_A[ki][mi] ) \ 411 in( TILE_B[ni][ki] ) \ 412 inout( TILE_C[mi][ni] ) \ 413 shared( TILE_A, TILE_B, TILE_C ) \ 414 firstprivate( mi, ni, ki, betat ) 415 cblas_dgemm( CblasRowMajor,
416 ( CBLAS_TRANSPOSE ) TRANS_A,
417 ( CBLAS_TRANSPOSE ) TRANS_B,
421 ALPHA, TILE_A[ki][mi], tile_size_m,
422 TILE_B[ni][ki], tile_size_k,
423 betat, TILE_C[mi][ni], tile_size_n );
void ddss_dflat2tiled(int M, int N, double *A, int LDA, int MT, int NT, double(*TILE_A)[NT][TILE_SIZE *TILE_SIZE])
enum LASS_RETURN kdgemm(enum DDSS_TRANS TRANS_A, enum DDSS_TRANS TRANS_B, int M, int N, int K, const double ALPHA, double *A, int LDA, double *B, int LDB, const double BETA, double *C, int LDC)
void ddss_dtiled2flat(int M, int N, double *A, int LDA, int MT, int NT, double(*TILE_A)[NT][TILE_SIZE *TILE_SIZE])
int ddss_tile_size(int M, int MT)