1 #include "../include/lass.h" 95 enum LASS_RETURN
kdposv(
enum DDSS_UPLO UPLO,
103 int mi, mmi, ki, ni, nni;
113 if ( N % TILE_SIZE == 0 )
119 nt = ( N / TILE_SIZE ) + 1;
121 if ( NRHS % TILE_SIZE == 0 )
123 nrhst = NRHS / TILE_SIZE;
127 nrhst = ( NRHS / TILE_SIZE ) + 1;
145 double (*TILE_A)[An][TILE_SIZE * TILE_SIZE] = malloc ( Am * An *
146 TILE_SIZE * TILE_SIZE *
sizeof(
double ) );
150 fprintf( stderr,
"Failure in kdposv for matrix TILE_A\n" );
154 double ( *TILE_B )[Bn][TILE_SIZE * TILE_SIZE] = malloc(
155 Bm * Bn * TILE_SIZE * TILE_SIZE *
sizeof(
double ) );
157 if ( TILE_B == NULL )
159 fprintf( stderr,
"Failure in kdposv for matrix TILE_B\n" );
181 for ( ki = 0; ki < nt; ki++ )
185 #pragma oss task inout( TILE_A[ki][ki] ) \ 188 LAPACKE_dpotrf_work( LAPACK_ROW_MAJOR,
191 TILE_A[ki][ki], tile_size_k );
193 for ( mi = ki + 1; mi < nt; mi++ )
197 #pragma oss task in( TILE_A[ki][ki] ) \ 198 inout(TILE_A[mi][ki]) \ 199 firstprivate( TILE_A ) \ 200 firstprivate( mi, ki ) \ 201 label( dpotrf_dtrsm ) 202 cblas_dtrsm( CblasRowMajor,
203 ( CBLAS_SIDE ) Right, ( CBLAS_UPLO ) Lower,
204 ( CBLAS_TRANSPOSE ) Trans, ( CBLAS_DIAG ) NonUnit,
205 tile_size_m, TILE_SIZE,
206 1.0, TILE_A[ki][ki], TILE_SIZE,
207 TILE_A[mi][ki], TILE_SIZE );
209 for ( mmi = ki + 1; mmi < nt; mmi++ )
213 #pragma oss task in( TILE_A[mmi][ki] ) \ 214 inout( TILE_A[mmi][mmi] ) \ 215 firstprivate( mmi, ki ) \ 216 label( dpotrf_dsyrk ) 217 cblas_dsyrk( CblasRowMajor,
218 ( CBLAS_UPLO ) Lower, ( CBLAS_TRANSPOSE ) NoTrans,
219 tile_size_mm, TILE_SIZE,
220 -1.0, TILE_A[mmi][ki], TILE_SIZE,
221 1.0, TILE_A[mmi][mmi], tile_size_mm );
223 for ( ni = ki + 1; ni < mmi; ni++ )
226 #pragma oss task in( TILE_A[mmi][ki] ) \ 227 in( TILE_A[ni][ki] ) \ 228 inout( TILE_A[mmi][ni] ) \ 229 firstprivate( ni, mmi, ki ) \ 230 label( dpotrf_dgemm ) 231 cblas_dgemm( CblasRowMajor,
232 ( CBLAS_TRANSPOSE ) NoTrans,
233 ( CBLAS_TRANSPOSE ) Trans,
237 -1.0, TILE_A[mmi][ki], TILE_SIZE,
238 TILE_A[ni][ki], TILE_SIZE,
239 1.0, TILE_A[mmi][ni], TILE_SIZE );
251 for ( ki = 0; ki < nt; ki++ )
255 for ( ni = 0; ni < nrhst; ni++ )
259 #pragma oss task in( TILE_A[ki][ki] ) \ 260 inout( TILE_B[ki][ni] ) \ 261 shared( TILE_A, TILE_B ) \ 262 firstprivate( ki, ni ) \ 263 label( dtrsm_dtrsm1 ) 264 cblas_dtrsm( CblasRowMajor,
265 ( CBLAS_SIDE ) Left, ( CBLAS_UPLO ) Lower,
266 ( CBLAS_TRANSPOSE ) NoTrans,
267 ( CBLAS_DIAG ) NonUnit,
270 1.0, TILE_A[ki][ki], tile_size_k,
271 TILE_B[ki][ni], tile_size_n );
273 for ( nni = 0; nni < nrhst; nni++ )
277 for ( mi = ki + 1; mi < nt; mi++ )
280 #pragma oss task in( TILE_A[mi][ki] ) \ 281 in( TILE_B[ki][nni] ) \ 282 inout( TILE_B[mi][nni] ) \ 283 shared( TILE_A, TILE_B ) \ 284 firstprivate( ki, mi, nni ) \ 285 label( dtrsm_dgemm1 ) 286 cblas_dgemm( CblasRowMajor,
287 CblasNoTrans, CblasNoTrans,
291 -1.0, TILE_A[mi][ki], tile_size_k,
292 TILE_B[ki][nni], tile_size_nn,
293 1.0, TILE_B[mi][nni], tile_size_nn );
300 for ( ki = 0; ki < nt; ki++ )
304 for ( ni = 0; ni < nrhst; ni++ )
308 #pragma oss task in( TILE_A[ki][ki] ) \ 309 inout( TILE_B[ki][ni] ) \ 310 shared( TILE_A, TILE_B ) \ 311 firstprivate( ki, ni ) \ 312 label( dtrsm_dtrsm2 ) 313 cblas_dtrsm( CblasRowMajor,
314 ( CBLAS_SIDE ) Left, ( CBLAS_UPLO ) Lower,
315 ( CBLAS_TRANSPOSE ) Trans,
316 ( CBLAS_DIAG ) NonUnit,
319 1.0, TILE_A[ki][ki], tile_size_k,
320 TILE_B[ki][ni], tile_size_n );
323 for ( nni = 0; nni < nrhst; nni++ )
327 for ( mi = ki - 1; mi >= 0; mi-- )
331 #pragma oss task in( TILE_A[ki][mi] ) \ 332 in( TILE_B[ki][nni] ) \ 333 inout( TILE_B[mi][nni] ) \ 334 shared( TILE_A, TILE_B ) \ 335 firstprivate( ki, mi, nni ) \ 336 label( dtrsm_dgemm2 ) 337 cblas_dgemm( CblasRowMajor,
338 CblasTrans, CblasNoTrans,
342 -1.0, TILE_A[ki][mi], tile_size_m,
343 TILE_B[ki][nni], tile_size_nn,
344 1.0, TILE_B[mi][nni], tile_size_nn );
353 for ( ki = 0; ki < nt; ki++ )
357 #pragma oss task inout( TILE_A[ki][ki] ) \ 361 LAPACKE_dpotrf_work( LAPACK_ROW_MAJOR,
363 tile_size_k, TILE_A[ki][ki],
366 for ( mi = ki + 1; mi < nt; mi++ )
370 #pragma oss task in( TILE_A[ki][ki] ) \ 371 inout(TILE_A[ki][mi]) \ 373 firstprivate( mi, ki ) \ 374 label( dpotrf_dtrsm ) 375 cblas_dtrsm( CblasRowMajor,
376 ( CBLAS_SIDE ) Left, ( CBLAS_UPLO ) Upper,
377 ( CBLAS_TRANSPOSE ) Trans, ( CBLAS_DIAG ) NonUnit,
378 TILE_SIZE, tile_size_m,
379 1.0, TILE_A[ki][ki], TILE_SIZE,
380 TILE_A[ki][mi], tile_size_m );
382 for ( mmi = ki + 1; mmi < nt; mmi++ )
386 #pragma oss task in( TILE_A[ki][mmi] ) \ 387 inout( TILE_A[mmi][mmi] ) \ 389 firstprivate( mmi, ki ) \ 390 label( dpotrf_dsyrk ) 391 cblas_dsyrk( CblasRowMajor,
392 ( CBLAS_UPLO ) Upper, ( CBLAS_TRANSPOSE ) Trans,
393 tile_size_mm, TILE_SIZE,
394 -1.0, TILE_A[ki][mmi], tile_size_mm,
395 1.0, TILE_A[mmi][mmi], tile_size_mm );
397 for ( ni = ki + 1; ni < mmi; ni++ )
399 #pragma oss task in( TILE_A[ki][ni] ) \ 400 in( TILE_A[ki][mmi] ) \ 401 inout( TILE_A[ni][mmi] ) \ 403 firstprivate( ni, mmi, ki ) \ 404 label( dpotrf_dgemm ) 405 cblas_dgemm( CblasRowMajor,
406 ( CBLAS_TRANSPOSE ) Trans,
407 ( CBLAS_TRANSPOSE ) NoTrans,
411 -1.0, TILE_A[ki][ni], TILE_SIZE,
412 TILE_A[ki][mmi], TILE_SIZE,
413 1.0, TILE_A[ni][mmi], TILE_SIZE );
423 for ( ki = 0; ki < nt; ki++ )
427 for ( ni = 0; ni < nrhst; ni++ )
431 #pragma oss task in( TILE_A[ki][ki] ) \ 432 inout( TILE_B[ki][ni] ) \ 433 shared( TILE_A, TILE_B ) \ 434 firstprivate( ki, ni ) \ 435 label( dtrsm_dtrsm1 ) 436 cblas_dtrsm( CblasRowMajor,
437 ( CBLAS_SIDE ) Left, ( CBLAS_UPLO ) Lower,
438 ( CBLAS_TRANSPOSE ) Trans,
439 ( CBLAS_DIAG ) NonUnit,
442 1.0, TILE_A[ki][ki], tile_size_k,
443 TILE_B[ki][ni], tile_size_n );
446 for ( mi = ki + 1; mi < nt; mi++ )
450 for ( nni = 0; nni < nrhst; nni++ )
454 #pragma oss task in( TILE_A[ki][mi]) \ 455 in( TILE_B[ki][nni] ) \ 456 inout( TILE_B[mi][nni] ) \ 457 shared( TILE_A, TILE_B ) \ 458 firstprivate( ki, mi, nni ) \ 459 label( dtrsm_dgemm1 ) 460 cblas_dgemm( CblasRowMajor,
461 CblasTrans, CblasNoTrans,
465 -1.0, TILE_A[ki][mi], tile_size_m,
466 TILE_B[ki][nni], tile_size_nn,
467 1.0, TILE_B[mi][nni], tile_size_nn );
473 for ( ki = 0; ki < nt; ki++ )
477 for ( ni = 0; ni < nrhst; ni++ )
481 #pragma oss task in( TILE_A[ki][ki] ) \ 482 inout( TILE_B[ki][ni] ) \ 483 shared( TILE_A, TILE_B ) \ 484 firstprivate( ki, ni ) \ 485 label( dtrsm_dtrsm2 ) 486 cblas_dtrsm( CblasRowMajor,
487 ( CBLAS_SIDE ) Left, ( CBLAS_UPLO ) Lower,
488 ( CBLAS_TRANSPOSE ) NoTrans,
489 ( CBLAS_DIAG ) NonUnit,
492 1.0, TILE_A[ki][ki], tile_size_k,
493 TILE_B[ki][ni], tile_size_n );
496 for ( nni = 0; nni < nrhst; nni++ )
500 for ( mi = ki - 1; mi >= 0; mi-- )
504 #pragma oss task in( TILE_A[mi][ki] ) \ 505 in( TILE_B[ki][nni] ) \ 506 inout( TILE_B[mi][nni] ) \ 507 shared( TILE_A, TILE_B ) \ 508 firstprivate( ki, mi, nni ) \ 509 label( dtrsm_dgemm2 ) 510 cblas_dgemm( CblasRowMajor,
511 CblasNoTrans, CblasNoTrans,
515 -1.0, TILE_A[mi][ki], tile_size_k,
516 TILE_B[ki][nni], tile_size_nn,
517 1.0, TILE_B[mi][nni], tile_size_nn );
void ddss_dflat2tiled(int M, int N, double *A, int LDA, int MT, int NT, double(*TILE_A)[NT][TILE_SIZE *TILE_SIZE])
void ddss_dsymflat2tiled(int M, int N, double *A, int LDA, int MT, int NT, double(*TILE_A)[NT][TILE_SIZE *TILE_SIZE], enum DDSS_UPLO UPLO)
enum LASS_RETURN kdposv(enum DDSS_UPLO UPLO, int N, int NRHS, double *A, int LDA, double *B, int LDB)
void ddss_dsymtiled2flat_nb(int M, int N, double *A, int LDA, int MT, int NT, double(*TILE_A)[NT][TILE_SIZE *TILE_SIZE], enum DDSS_UPLO UPLO)
void ddss_dtiled2flat(int M, int N, double *A, int LDA, int MT, int NT, double(*TILE_A)[NT][TILE_SIZE *TILE_SIZE])
int ddss_tile_size(int M, int MT)