1 #include "../include/lass.h" 125 enum LASS_RETURN
kdsymm(
enum DDSS_SIDE SIDE,
enum DDSS_UPLO UPLO,
127 const double ALPHA,
double *A,
int LDA,
129 const double BETA,
double *C,
int LDC )
144 if ( M % TILE_SIZE == 0 )
150 mt = ( M / TILE_SIZE ) + 1;
153 if ( N % TILE_SIZE == 0 )
159 nt = ( N / TILE_SIZE ) + 1;
164 if ( M % TILE_SIZE == 0 )
170 kt = ( M / TILE_SIZE ) + 1;
175 if ( N % TILE_SIZE == 0 )
181 kt = ( N / TILE_SIZE ) + 1;
208 double (*TILE_A)[Ant][TILE_SIZE * TILE_SIZE] = malloc ( Amt * Ant *
209 TILE_SIZE * TILE_SIZE *
sizeof(
double ) );
213 fprintf( stderr,
"Failure in kdsymm for matrix TILE_A\n" );
217 double (*TILE_B)[nt][TILE_SIZE * TILE_SIZE] = malloc ( mt * nt *
218 TILE_SIZE * TILE_SIZE *
sizeof(
double ) );
222 fprintf( stderr,
"Failure in kdsymm for matrix TILE_B\n" );
226 double (*TILE_C)[nt][TILE_SIZE * TILE_SIZE] = malloc ( mt * nt *
227 TILE_SIZE * TILE_SIZE *
sizeof(
double ) );
231 fprintf( stderr,
"Failure in kdsymm for matrix TILE_C\n" );
252 for ( mi = 0; mi < mt; mi++ )
255 for ( ni = 0; ni < nt; ni++ )
263 for ( ki = 0; ki < kt; ki++ )
277 #pragma oss task in( TILE_A[mi][ki] ) \ 278 in( TILE_B[ki][ni] ) \ 279 inout( TILE_C[mi][ni] ) \ 280 shared( TILE_A, TILE_B, TILE_C ) \ 281 firstprivate( mi, ni, ki, betat ) 282 cblas_dgemm( CblasRowMajor,
283 ( CBLAS_TRANSPOSE ) NoTrans,
284 ( CBLAS_TRANSPOSE ) NoTrans,
288 ALPHA, TILE_A[mi][ki], tile_size_k,
289 TILE_B[ki][ni], tile_size_n,
290 betat, TILE_C[mi][ni], tile_size_n );
296 #pragma oss task in( TILE_A[ki][ki] ) \ 297 in( TILE_B[ki][ni] ) \ 298 inout( TILE_C[mi][ni] ) \ 299 shared( TILE_A, TILE_B, TILE_C ) \ 300 firstprivate( mi, ni, ki, betat ) 301 cblas_dsymm( CblasRowMajor,
304 tile_size_m, tile_size_n,
305 ALPHA, TILE_A[ki][ki], tile_size_k,
306 TILE_B[ki][ni], tile_size_n,
307 betat, TILE_C[mi][ni],
312 #pragma oss task in( TILE_A[ki][mi] ) \ 313 in( TILE_B[ki][ni] ) \ 314 inout( TILE_C[mi][ni] ) \ 315 shared( TILE_A, TILE_B, TILE_C ) \ 316 firstprivate( mi, ni, ki, betat ) 317 cblas_dgemm( CblasRowMajor,
318 ( CBLAS_TRANSPOSE ) Trans,
319 ( CBLAS_TRANSPOSE ) NoTrans,
323 ALPHA, TILE_A[ki][mi], tile_size_m,
324 TILE_B[ki][ni], tile_size_n,
325 betat, TILE_C[mi][ni],
334 for ( ki = 0; ki < kt; ki++ )
348 #pragma oss task in( TILE_A[ki][mi] ) \ 349 in( TILE_B[ki][ni] ) \ 350 inout( TILE_C[mi][ni] ) \ 351 shared( TILE_A, TILE_B, TILE_C ) \ 352 firstprivate( mi, ni, ki, betat ) 353 cblas_dgemm( CblasRowMajor,
354 ( CBLAS_TRANSPOSE ) Trans,
355 ( CBLAS_TRANSPOSE ) NoTrans,
359 ALPHA, TILE_A[ki][mi], tile_size_m,
360 TILE_B[ki][ni], tile_size_n,
361 betat, TILE_C[mi][ni], tile_size_n );
367 #pragma oss task in( TILE_A[ki][ki] ) \ 368 in( TILE_B[ki][ni] ) \ 369 inout( TILE_C[mi][ni] ) \ 370 shared( TILE_A, TILE_B, TILE_C ) \ 371 firstprivate( mi, ni, ki, betat ) 372 cblas_dsymm( CblasRowMajor,
375 tile_size_m, tile_size_n,
376 ALPHA, TILE_A[ki][ki], tile_size_k,
377 TILE_B[ki][ni], tile_size_n,
378 betat, TILE_C[mi][ni],
383 #pragma oss task in( TILE_A[mi][ki] ) \ 384 in( TILE_B[ki][ni] ) \ 385 inout( TILE_C[mi][ni] ) \ 386 shared( TILE_A, TILE_B, TILE_C ) \ 387 firstprivate( mi, ni, ki, betat ) 388 cblas_dgemm( CblasRowMajor,
389 ( CBLAS_TRANSPOSE ) NoTrans,
390 ( CBLAS_TRANSPOSE ) NoTrans,
394 ALPHA, TILE_A[mi][ki], tile_size_k,
395 TILE_B[ki][ni], tile_size_n,
396 betat, TILE_C[mi][ni],
408 for ( ki = 0; ki < kt; ki++ )
422 #pragma oss task in( TILE_B[mi][ki] ) \ 423 in( TILE_A[ni][ki] ) \ 424 inout( TILE_C[mi][ni] ) \ 425 shared( TILE_A, TILE_B, TILE_C ) \ 426 firstprivate( mi, ni, ki, betat ) 427 cblas_dgemm( CblasRowMajor,
428 ( CBLAS_TRANSPOSE ) NoTrans,
429 ( CBLAS_TRANSPOSE ) Trans,
433 ALPHA, TILE_B[mi][ki], tile_size_k,
434 TILE_A[ni][ki], tile_size_k,
435 betat, TILE_C[mi][ni], tile_size_n );
441 #pragma oss task in( TILE_A[ki][ki] ) \ 442 in( TILE_B[mi][ki] ) \ 443 inout( TILE_C[mi][ni] ) \ 444 shared( TILE_A, TILE_B, TILE_C ) \ 445 firstprivate( mi, ni, ki, betat ) 446 cblas_dsymm( CblasRowMajor,
449 tile_size_m, tile_size_n,
450 ALPHA, TILE_A[ki][ki], tile_size_k,
451 TILE_B[mi][ki], tile_size_k,
452 betat, TILE_C[mi][ni],
457 #pragma oss task in( TILE_B[mi][ki] ) \ 458 in( TILE_A[ki][ni] ) \ 459 inout( TILE_C[mi][ni] ) \ 460 shared( TILE_A, TILE_B, TILE_C ) \ 461 firstprivate( mi, ni, ki, betat ) 462 cblas_dgemm( CblasRowMajor,
463 ( CBLAS_TRANSPOSE ) NoTrans,
464 ( CBLAS_TRANSPOSE ) NoTrans,
468 ALPHA, TILE_B[mi][ki], tile_size_k,
469 TILE_A[ki][ni], tile_size_n,
470 betat, TILE_C[mi][ni],
479 for ( ki = 0; ki < kt; ki++ )
493 #pragma oss task in( TILE_B[mi][ki] ) \ 494 in( TILE_A[ki][ni] ) \ 495 inout( TILE_C[mi][ni] ) \ 496 shared( TILE_A, TILE_B, TILE_C ) \ 497 firstprivate( mi, ni, ki, betat ) 498 cblas_dgemm( CblasRowMajor,
499 ( CBLAS_TRANSPOSE ) NoTrans,
500 ( CBLAS_TRANSPOSE ) NoTrans,
504 ALPHA, TILE_B[mi][ki], tile_size_k,
505 TILE_A[ki][ni], tile_size_n,
506 betat, TILE_C[mi][ni], tile_size_n );
512 #pragma oss task in( TILE_A[ki][ki] ) \ 513 in( TILE_B[ki][ni] ) \ 514 inout( TILE_C[mi][ni] ) \ 515 shared( TILE_A, TILE_B, TILE_C ) \ 516 firstprivate( mi, ni, ki, betat ) 517 cblas_dsymm( CblasRowMajor,
520 tile_size_m, tile_size_n,
521 ALPHA, TILE_A[ki][ki], tile_size_k,
522 TILE_B[mi][ki], tile_size_k,
523 betat, TILE_C[mi][ni],
528 #pragma oss task in( TILE_B[mi][ki] ) \ 529 in( TILE_A[ni][ki] ) \ 530 inout( TILE_C[mi][ni] ) \ 531 shared( TILE_A, TILE_B, TILE_C ) \ 532 firstprivate( mi, ni, ki, betat ) 533 cblas_dgemm( CblasRowMajor,
534 ( CBLAS_TRANSPOSE ) NoTrans,
535 ( CBLAS_TRANSPOSE ) Trans,
539 ALPHA, TILE_B[mi][ki], tile_size_k,
540 TILE_A[ni][ki], tile_size_k,
541 betat, TILE_C[mi][ni],
void ddss_dflat2tiled(int M, int N, double *A, int LDA, int MT, int NT, double(*TILE_A)[NT][TILE_SIZE *TILE_SIZE])
void ddss_dsymflat2tiled(int M, int N, double *A, int LDA, int MT, int NT, double(*TILE_A)[NT][TILE_SIZE *TILE_SIZE], enum DDSS_UPLO UPLO)
enum LASS_RETURN kdsymm(enum DDSS_SIDE SIDE, enum DDSS_UPLO UPLO, int M, int N, const double ALPHA, double *A, int LDA, double *B, int LDB, const double BETA, double *C, int LDC)
void ddss_dtiled2flat(int M, int N, double *A, int LDA, int MT, int NT, double(*TILE_A)[NT][TILE_SIZE *TILE_SIZE])
int ddss_tile_size(int M, int MT)