1 #include "../include/lass.h" 124 enum LASS_RETURN
kdsyr2k(
enum DDSS_UPLO UPLO,
enum DDSS_TRANS TRANS,
126 const double ALPHA,
double *A,
int LDA,
128 const double BETA,
double *C,
int LDC )
145 if ( N % TILE_SIZE == 0 )
151 nt = ( N / TILE_SIZE ) + 1;
154 if ( K % TILE_SIZE == 0 )
160 kt = ( K / TILE_SIZE ) + 1;
167 if ( TRANS == NoTrans )
186 double ( *TILE_A )[An][TILE_SIZE * TILE_SIZE] = malloc(
187 Am * An * TILE_SIZE * TILE_SIZE *
sizeof(
double ) );
189 if ( TILE_A == NULL )
191 fprintf( stderr,
"Failure in ddss_dtile_alloc for matrix TILE_A\n" );
195 double ( *TILE_B )[Bn][TILE_SIZE * TILE_SIZE] = malloc(
196 Bm * Bn * TILE_SIZE * TILE_SIZE *
sizeof(
double ) );
198 if ( TILE_B == NULL )
200 fprintf( stderr,
"Failure in ddss_dtile_alloc for matrix TILE_B\n" );
204 double ( *TILE_C )[Cn][TILE_SIZE * TILE_SIZE] = malloc(
205 Cm * Cn * TILE_SIZE * TILE_SIZE *
sizeof(
double ) );
207 if ( TILE_C == NULL )
209 fprintf( stderr,
"Failure in ddss_dtile_alloc for matrix TILE_C\n" );
231 if ( TRANS == NoTrans )
235 for ( mi = 0; mi < nt; mi++ )
238 for ( ni = 0; ni < kt; ni++ )
251 #pragma oss task in( TILE_A[mi][ni] ) \ 252 in( TILE_B[mi][ni] ) \ 253 inout( TILE_C[mi][mi] ) \ 254 shared( TILE_A, TILE_B, TILE_C ) \ 255 firstprivate( mi,ni ) \ 257 cblas_dsyr2k( CblasRowMajor,
258 ( CBLAS_UPLO ) UPLO, ( CBLAS_TRANSPOSE ) TRANS,
261 ALPHA, TILE_A[mi][ni], tile_size_n,
262 TILE_B[mi][ni], tile_size_n,
263 beta, TILE_C[mi][mi], tile_size_m );
265 for ( ki = mi + 1; ki < nt; ki++ )
269 #pragma oss task in( TILE_A[mi][ni] ) \ 270 in( TILE_A[ki][ni] ) \ 271 in( TILE_B[mi][ni] ) \ 272 in( TILE_B[ki][ni] ) \ 273 inout( TILE_C[mi][ki] ) \ 274 shared( TILE_A, TILE_B, TILE_C ) \ 275 firstprivate( mi,ni,ki ) \ 278 cblas_dgemm( CblasRowMajor,
279 CblasNoTrans, CblasTrans,
283 ALPHA, TILE_A[mi][ni], tile_size_n,
284 TILE_B[ki][ni], tile_size_n,
285 beta, TILE_C[mi][ki], tile_size_k );
287 cblas_dgemm( CblasRowMajor,
288 CblasNoTrans, CblasTrans,
292 ALPHA, TILE_B[mi][ni], tile_size_n,
293 TILE_A[ki][ni], tile_size_n,
294 1.0, TILE_C[mi][ki], tile_size_k );
304 for ( mi = 0; mi < nt; mi++ )
307 for ( ni = 0; ni < kt; ni++ )
320 #pragma oss task in( TILE_A[mi][ni] ) \ 321 in( TILE_B[mi][ni] ) \ 322 inout( TILE_C[mi][mi] ) \ 323 shared( TILE_A, TILE_B, TILE_C ) \ 324 firstprivate( mi,ni ) \ 326 cblas_dsyr2k( CblasRowMajor,
327 ( CBLAS_UPLO ) UPLO, ( CBLAS_TRANSPOSE ) TRANS,
330 ALPHA, TILE_A[mi][ni], tile_size_n,
331 TILE_B[mi][ni], tile_size_n,
332 beta, TILE_C[mi][mi], tile_size_m );
334 for ( ki = mi + 1; ki < nt; ki++ )
338 #pragma oss task in( TILE_A[ki][ni] ) \ 339 in( TILE_A[mi][ni] ) \ 340 in( TILE_B[ki][ni] ) \ 341 in( TILE_B[mi][ni] ) \ 342 inout( TILE_C[ki][mi] ) \ 343 shared( TILE_A, TILE_B, TILE_C ) \ 344 firstprivate( mi,ni,ki ) \ 347 cblas_dgemm( CblasRowMajor,
348 CblasNoTrans, CblasTrans,
352 ALPHA, TILE_A[ki][ni], tile_size_n,
353 TILE_B[mi][ni], tile_size_n,
354 beta, TILE_C[ki][mi], tile_size_m );
356 cblas_dgemm( CblasRowMajor,
357 CblasNoTrans, CblasTrans,
361 ALPHA, TILE_B[ki][ni], tile_size_n,
362 TILE_A[mi][ni], tile_size_n,
363 1.0, TILE_C[ki][mi], tile_size_m );
376 for ( mi = 0; mi < kt; mi++ )
379 for ( ni = 0; ni < nt; ni++ )
392 #pragma oss task in( TILE_A[mi][ni] ) \ 393 in( TILE_B[mi][ni] ) \ 394 inout( TILE_C[ni][ni] ) \ 395 shared( TILE_A, TILE_B, TILE_C ) \ 396 firstprivate( mi,ni ) \ 398 cblas_dsyr2k( CblasRowMajor,
399 ( CBLAS_UPLO ) UPLO, ( CBLAS_TRANSPOSE ) TRANS,
402 ALPHA, TILE_A[mi][ni], tile_size_n,
403 TILE_B[mi][ni], tile_size_n,
404 beta, TILE_C[ni][ni], tile_size_n );
406 for ( ki = ni + 1; ki < nt; ki++ )
410 #pragma oss task in( TILE_A[mi][ni] ) \ 411 in( TILE_A[mi][ki] ) \ 412 in( TILE_B[mi][ni] ) \ 413 in( TILE_B[mi][ki] ) \ 414 inout( TILE_C[ni][ki] ) \ 415 shared( TILE_A, TILE_B, TILE_C ) \ 416 firstprivate( mi,ni,ki ) \ 419 cblas_dgemm( CblasRowMajor,
420 CblasTrans, CblasNoTrans,
424 ALPHA, TILE_A[mi][ni], tile_size_n,
425 TILE_B[mi][ki], tile_size_k,
426 beta, TILE_C[ni][ki], tile_size_k );
428 cblas_dgemm( CblasRowMajor,
429 CblasTrans, CblasNoTrans,
433 ALPHA, TILE_B[mi][ni], tile_size_n,
434 TILE_A[mi][ki], tile_size_k,
435 1.0, TILE_C[ni][ki], tile_size_k );
444 for ( mi = 0; mi < kt; mi++ )
447 for ( ni = 0; ni < nt; ni++ )
460 #pragma oss task in( TILE_A[mi][ni] ) \ 461 in( TILE_B[mi][ni] ) \ 462 inout( TILE_C[ni][ni] ) \ 463 shared( TILE_A, TILE_B, TILE_C ) \ 464 firstprivate( mi,ni ) \ 466 cblas_dsyr2k( CblasRowMajor,
467 ( CBLAS_UPLO ) UPLO, ( CBLAS_TRANSPOSE ) TRANS,
470 ALPHA, TILE_A[mi][ni], tile_size_n,
471 TILE_B[mi][ni], tile_size_n,
472 beta, TILE_C[ni][ni], tile_size_n );
474 for ( ki = ni + 1; ki < nt; ki++ )
478 #pragma oss task in( TILE_A[mi][ki] ) \ 479 in( TILE_A[mi][ni] ) \ 480 in( TILE_B[mi][ki] ) \ 481 in( TILE_B[mi][ni] ) \ 482 inout( TILE_C[ki][ni] ) \ 483 shared( TILE_A, TILE_B, TILE_C ) \ 484 firstprivate( mi,ni,ki ) \ 487 cblas_dgemm( CblasRowMajor,
488 CblasTrans, CblasNoTrans,
492 ALPHA, TILE_A[mi][ki], tile_size_k,
493 TILE_B[mi][ni], tile_size_n,
494 beta, TILE_C[ki][ni], tile_size_n );
496 cblas_dgemm( CblasRowMajor,
497 CblasTrans, CblasNoTrans,
501 ALPHA, TILE_B[mi][ki], tile_size_k,
502 TILE_A[mi][ni], tile_size_n,
503 1.0, TILE_C[ki][ni], tile_size_n );
void ddss_dflat2tiled(int M, int N, double *A, int LDA, int MT, int NT, double(*TILE_A)[NT][TILE_SIZE *TILE_SIZE])
void ddss_dsymflat2tiled(int M, int N, double *A, int LDA, int MT, int NT, double(*TILE_A)[NT][TILE_SIZE *TILE_SIZE], enum DDSS_UPLO UPLO)
void ddss_dsymtiled2flat(int M, int N, double *A, int LDA, int MT, int NT, double(*TILE_A)[NT][TILE_SIZE *TILE_SIZE], enum DDSS_UPLO UPLO)
enum LASS_RETURN kdsyr2k(enum DDSS_UPLO UPLO, enum DDSS_TRANS TRANS, int N, int K, const double ALPHA, double *A, int LDA, double *B, int LDB, const double BETA, double *C, int LDC)
int ddss_tile_size(int M, int MT)