ALPHA is a scalar, B is a M by N matrix and A is a unit, or non-unit, upper or lower triangular matrix.
133 int ki, ni, mi, nni, mmi;
141 if ( M % TILE_SIZE == 0 )
147 mt = ( M / TILE_SIZE ) + 1;
150 if ( N % TILE_SIZE == 0 )
156 nt = ( N / TILE_SIZE ) + 1;
181 double ( *TILE_A )[An][TILE_SIZE * TILE_SIZE] = malloc(
182 Am * An * TILE_SIZE * TILE_SIZE *
sizeof(
double ) );
184 if ( TILE_A == NULL )
186 fprintf( stderr,
"Failure in ddss_dtile_alloc for matrix TILE_A\n" );
190 double ( *TILE_B )[Bn][TILE_SIZE * TILE_SIZE] = malloc(
191 Bm * Bn * TILE_SIZE * TILE_SIZE *
sizeof(
double ) );
193 if ( TILE_B == NULL )
195 fprintf( stderr,
"Failure in ddss_dtile_alloc for matrix TILE_B\n" );
218 if ( TRANS_A == NoTrans )
220 for ( ki = 0; ki < mt; ki++ )
224 for ( ni = 0; ni < nt; ni++ )
228 #pragma oss task in( TILE_A[ki][ki] ) \ 229 inout( TILE_B[ki][ni] ) \ 230 shared( TILE_A, TILE_B ) \ 231 firstprivate( ki, ni ) \ 233 cblas_dtrmm( CblasRowMajor,
234 ( CBLAS_SIDE ) SIDE, ( CBLAS_UPLO ) UPLO,
235 ( CBLAS_TRANSPOSE ) TRANS_A,
239 ALPHA, TILE_A[ki][ki], tile_size_k,
240 TILE_B[ki][ni], tile_size_n );
242 for ( mi = ki + 1; mi < mt; mi++ )
246 #pragma oss task in( TILE_A[ki][mi] ) \ 247 in( TILE_B[mi][ni] ) \ 248 inout( TILE_B[ki][ni] ) \ 249 shared( TILE_A, TILE_B ) \ 250 firstprivate( ki, mi, ni ) \ 252 cblas_dgemm( CblasRowMajor,
253 CblasNoTrans, CblasNoTrans,
257 ALPHA, TILE_A[ki][mi], tile_size_m,
258 TILE_B[mi][ni], tile_size_n,
259 1.0, TILE_B[ki][ni], tile_size_n );
267 for ( ki = mt-1; ki > -1; ki-- )
271 for ( ni = 0; ni < nt; ni++ )
275 #pragma oss task in( TILE_A[ki][ki] ) \ 276 inout( TILE_B[ki][ni] ) \ 277 shared( TILE_A, TILE_B ) \ 278 firstprivate( ki, ni ) \ 280 cblas_dtrmm( CblasRowMajor,
281 ( CBLAS_SIDE ) SIDE, ( CBLAS_UPLO ) UPLO,
282 ( CBLAS_TRANSPOSE ) TRANS_A,
286 ALPHA, TILE_A[ki][ki], tile_size_k,
287 TILE_B[ki][ni], tile_size_n );
289 for ( mi = 0; mi < ki; mi++ )
293 #pragma oss task in( TILE_A[mi][ki] ) \ 294 in( TILE_B[mi][ni] ) \ 295 inout( TILE_B[ki][ni] ) \ 296 shared( TILE_A, TILE_B ) \ 297 firstprivate( ki, mi, ni ) \ 299 cblas_dgemm( CblasRowMajor,
300 CblasTrans, CblasNoTrans,
304 ALPHA, TILE_A[mi][ki], tile_size_k,
305 TILE_B[mi][ni], tile_size_n,
306 1.0, TILE_B[ki][ni], tile_size_n );
316 if ( TRANS_A == NoTrans )
318 for ( ki = mt-1; ki > -1; ki-- )
322 for ( ni = 0; ni < nt; ni++ )
326 #pragma oss task in( TILE_A[ki][ki] ) \ 327 inout( TILE_B[ki][ni] ) \ 328 shared( TILE_A, TILE_B ) \ 329 firstprivate( ki, ni ) \ 331 cblas_dtrmm( CblasRowMajor,
332 ( CBLAS_SIDE ) SIDE, ( CBLAS_UPLO ) UPLO,
333 ( CBLAS_TRANSPOSE ) TRANS_A,
337 ALPHA, TILE_A[ki][ki], tile_size_k,
338 TILE_B[ki][ni], tile_size_n );
340 for ( mi = 0; mi < ki; mi++ )
344 #pragma oss task in( TILE_A[ki][mi] ) \ 345 in( TILE_B[mi][ni] ) \ 346 inout( TILE_B[ki][ni] ) \ 347 shared( TILE_A, TILE_B ) \ 348 firstprivate( ki, mi, ni ) \ 350 cblas_dgemm( CblasRowMajor,
351 CblasNoTrans, CblasNoTrans,
355 ALPHA, TILE_A[ki][mi], tile_size_m,
356 TILE_B[mi][ni], tile_size_n,
357 1.0, TILE_B[ki][ni], tile_size_n );
366 for ( ki = 0; ki < mt; ki++ )
370 for ( ni = 0; ni < nt; ni++ )
374 #pragma oss task in( TILE_A[ki][ki] ) \ 375 inout( TILE_B[ki][ni] ) \ 376 shared( TILE_A, TILE_B ) \ 377 firstprivate( ki, ni ) \ 379 cblas_dtrmm( CblasRowMajor,
380 ( CBLAS_SIDE ) SIDE, ( CBLAS_UPLO ) UPLO,
381 ( CBLAS_TRANSPOSE ) TRANS_A,
385 ALPHA, TILE_A[ki][ki], tile_size_k,
386 TILE_B[ki][ni], tile_size_n );
388 for ( mi = ki + 1; mi < mt; mi++ )
392 #pragma oss task in( TILE_A[mi][ki] ) \ 393 in( TILE_B[mi][ni] ) \ 394 inout( TILE_B[ki][ni] ) \ 395 shared( TILE_A, TILE_B ) \ 396 firstprivate( ki, mi, ni ) \ 398 cblas_dgemm( CblasRowMajor,
399 CblasTrans, CblasNoTrans,
403 ALPHA, TILE_A[mi][ki], tile_size_k,
404 TILE_B[mi][ni], tile_size_n,
405 1.0, TILE_B[ki][ni], tile_size_n );
417 if ( TRANS_A == NoTrans )
419 for ( ki = nt-1; ki > -1; ki-- )
423 for ( ni = 0; ni < mt; ni++ )
427 #pragma oss task in( TILE_A[ki][ki] ) \ 428 inout( TILE_B[ni][ki] ) \ 429 shared( TILE_A, TILE_B ) \ 430 firstprivate( ki, ni ) \ 432 cblas_dtrmm( CblasRowMajor,
433 ( CBLAS_SIDE ) SIDE, ( CBLAS_UPLO ) UPLO,
434 ( CBLAS_TRANSPOSE ) TRANS_A,
438 ALPHA, TILE_A[ki][ki], tile_size_k,
439 TILE_B[ni][ki], tile_size_k );
441 for ( mi = 0; mi < ki; mi++ )
445 #pragma oss task in( TILE_B[ni][mi] ) \ 446 in( TILE_A[mi][ki] ) \ 447 inout( TILE_B[ni][ki] ) \ 448 shared( TILE_A, TILE_B ) \ 449 firstprivate( ki, mi, ni ) \ 451 cblas_dgemm( CblasRowMajor,
452 CblasNoTrans, CblasNoTrans,
456 ALPHA, TILE_B[ni][mi], tile_size_m,
457 TILE_A[mi][ki], tile_size_k,
458 1.0, TILE_B[ni][ki], tile_size_k );
466 for ( ki = 0; ki < nt; ki++ )
470 for ( ni = 0; ni < mt; ni++ )
474 #pragma oss task in( TILE_A[ki][ki] ) \ 475 inout( TILE_B[ni][ki] ) \ 476 shared( TILE_A, TILE_B ) \ 477 firstprivate( ki, ni ) \ 479 cblas_dtrmm( CblasRowMajor,
480 ( CBLAS_SIDE ) SIDE, ( CBLAS_UPLO ) UPLO,
481 ( CBLAS_TRANSPOSE ) TRANS_A,
485 ALPHA, TILE_A[ki][ki], tile_size_k,
486 TILE_B[ni][ki], tile_size_k );
488 for ( mi = ki+1; mi < nt; mi++ )
492 #pragma oss task in( TILE_B[ni][mi] ) \ 493 in( TILE_A[ki][mi] ) \ 494 inout( TILE_B[ni][ki] ) \ 495 shared( TILE_A, TILE_B ) \ 496 firstprivate( ki, mi, ni ) \ 498 cblas_dgemm( CblasRowMajor,
499 CblasNoTrans, CblasTrans,
503 ALPHA, TILE_B[ni][mi], tile_size_m,
504 TILE_A[ki][mi], tile_size_m,
505 1.0, TILE_B[ni][ki], tile_size_k );
514 if ( TRANS_A == NoTrans )
516 for ( ki = 0; ki < nt; ki++ )
520 for ( ni = 0; ni < mt; ni++ )
524 #pragma oss task in( TILE_A[ki][ki] ) \ 525 inout( TILE_B[ni][ki] ) \ 526 shared( TILE_A, TILE_B ) \ 527 firstprivate( ki, ni ) \ 529 cblas_dtrmm( CblasRowMajor,
530 ( CBLAS_SIDE ) SIDE, ( CBLAS_UPLO ) UPLO,
531 ( CBLAS_TRANSPOSE ) TRANS_A,
535 ALPHA, TILE_A[ki][ki], tile_size_k,
536 TILE_B[ni][ki], tile_size_k );
538 for ( mi = ki+1; mi < nt; mi++ )
542 #pragma oss task in( TILE_B[ni][mi] ) \ 543 in( TILE_A[mi][ki] ) \ 544 inout( TILE_B[ni][ki] ) \ 545 shared( TILE_A, TILE_B ) \ 546 firstprivate( ki, mi, ni ) \ 548 cblas_dgemm( CblasRowMajor,
549 CblasNoTrans, CblasNoTrans,
553 ALPHA, TILE_B[ni][mi], tile_size_m,
554 TILE_A[mi][ki], tile_size_k,
555 1.0, TILE_B[ni][ki], tile_size_k );
563 for ( ki = nt-1; ki > -1; ki-- )
567 for ( ni = 0; ni < mt; ni++ )
571 #pragma oss task in( TILE_A[ki][ki] ) \ 572 inout( TILE_B[ni][ki] ) \ 573 shared( TILE_A, TILE_B ) \ 574 firstprivate( ki, ni ) \ 576 cblas_dtrmm( CblasRowMajor,
577 ( CBLAS_SIDE ) SIDE, ( CBLAS_UPLO ) UPLO,
578 ( CBLAS_TRANSPOSE ) TRANS_A,
582 ALPHA, TILE_A[ki][ki], tile_size_k,
583 TILE_B[ni][ki], tile_size_k );
585 for ( mi = 0; mi < ki; mi++ )
589 #pragma oss task in( TILE_B[ni][mi] ) \ 590 in( TILE_A[ki][mi] ) \ 591 inout( TILE_B[ni][ki] ) \ 592 shared( TILE_A, TILE_B ) \ 593 firstprivate( ki, mi, ni ) \ 595 cblas_dgemm( CblasRowMajor,
596 CblasNoTrans, CblasTrans,
600 ALPHA, TILE_B[ni][mi], tile_size_m,
601 TILE_A[ki][mi], tile_size_m,
602 1.0, TILE_B[ni][ki], tile_size_k );
void ddss_dflat2tiled(int M, int N, double *A, int LDA, int MT, int NT, double(*TILE_A)[NT][TILE_SIZE *TILE_SIZE])
void ddss_dsymflat2tiled(int M, int N, double *A, int LDA, int MT, int NT, double(*TILE_A)[NT][TILE_SIZE *TILE_SIZE], enum DDSS_UPLO UPLO)
void ddss_dtiled2flat(int M, int N, double *A, int LDA, int MT, int NT, double(*TILE_A)[NT][TILE_SIZE *TILE_SIZE])
int ddss_tile_size(int M, int MT)