1 #include "../include/lass.h" 117 enum LASS_RETURN
kdsyrk(
enum DDSS_UPLO UPLO,
enum DDSS_TRANS TRANS_A,
119 const double ALPHA,
double *A,
int LDA,
120 const double BETA,
double *C,
int LDC )
135 if ( N % TILE_SIZE == 0 )
141 nt = ( N / TILE_SIZE ) + 1;
144 if ( K % TILE_SIZE == 0 )
150 kt = ( K / TILE_SIZE ) + 1;
157 if ( TRANS_A == NoTrans )
176 double ( *TILE_A )[An][TILE_SIZE * TILE_SIZE] = malloc(
177 Am * An * TILE_SIZE * TILE_SIZE *
sizeof(
double ) );
179 if ( TILE_A == NULL )
181 fprintf( stderr,
"Failure in ddss_dtile_alloc for matrix TILE_A\n" );
185 double ( *TILE_C )[Cn][TILE_SIZE * TILE_SIZE] = malloc(
186 Cm * Cn * TILE_SIZE * TILE_SIZE *
sizeof(
double ) );
188 if ( TILE_C == NULL )
190 fprintf( stderr,
"Failure in ddss_dtile_alloc for matrix TILE_C\n" );
209 if ( TRANS_A == NoTrans )
213 for ( mi = 0; mi < nt; mi++ )
216 for ( ni = 0; ni < kt; ni++ )
229 #pragma oss task in( TILE_A[mi][ni] ) \ 230 inout( TILE_C[mi][mi] ) \ 231 shared( TILE_A, TILE_C ) \ 232 firstprivate( mi, ni ) \ 234 cblas_dsyrk( CblasRowMajor,
236 ( CBLAS_TRANSPOSE ) TRANS_A,
239 ALPHA, TILE_A[mi][ni], tile_size_n,
240 beta, TILE_C[mi][mi], tile_size_m );
242 for ( ki = mi + 1; ki < nt; ki++ )
246 #pragma oss task in( TILE_A[mi][ni] ) \ 247 in( TILE_A[ki][ni] ) \ 248 inout( TILE_C[mi][ki] ) \ 249 shared( TILE_A, TILE_C ) \ 250 firstprivate( mi, ni, ki ) \ 252 cblas_dgemm( CblasRowMajor,
253 CblasNoTrans, CblasTrans,
257 ALPHA, TILE_A[mi][ni], tile_size_n,
258 TILE_A[ki][ni], tile_size_n,
259 beta, TILE_C[mi][ki], tile_size_k );
268 for ( mi = 0; mi < nt; mi++ )
271 for ( ni = 0; ni < kt; ni++ )
284 #pragma oss task in( TILE_A[mi][ni] ) \ 285 inout( TILE_C[mi][mi] ) \ 286 shared( TILE_A, TILE_C ) \ 287 firstprivate( mi, ni ) \ 289 cblas_dsyrk( CblasRowMajor,
291 ( CBLAS_TRANSPOSE ) TRANS_A,
294 ALPHA, TILE_A[mi][ni], tile_size_n,
295 beta, TILE_C[mi][mi], tile_size_m );
297 for ( ki = mi + 1; ki < nt; ki++ )
301 #pragma oss task in( TILE_A[ki][ni] ) \ 302 in( TILE_A[mi][ni] ) \ 303 inout( TILE_C[ki][mi] ) \ 304 shared( TILE_A, TILE_C ) \ 305 firstprivate( mi, ni, ki ) \ 307 cblas_dgemm( CblasRowMajor,
308 CblasNoTrans, CblasTrans,
312 ALPHA, TILE_A[ki][ni], tile_size_n,
313 TILE_A[mi][ni], tile_size_n,
314 beta, TILE_C[ki][mi], tile_size_m );
326 for ( mi = 0; mi < kt; mi++ )
329 for ( ni = 0; ni < nt; ni++ )
342 #pragma oss task in( TILE_A[mi][ni] ) \ 343 inout( TILE_C[ni][ni] ) \ 344 shared( TILE_A, TILE_C ) \ 345 firstprivate( mi, ni ) \ 347 cblas_dsyrk( CblasRowMajor,
349 ( CBLAS_TRANSPOSE ) TRANS_A,
352 ALPHA, TILE_A[mi][ni], tile_size_n,
353 beta, TILE_C[ni][ni], tile_size_n );
355 for ( ki = ni + 1; ki < nt; ki++ )
359 #pragma oss task in( TILE_A[mi][ni] ) \ 360 in( TILE_A[mi][ki] ) \ 361 inout( TILE_C[ni][ki] ) \ 362 shared( TILE_A, TILE_C ) \ 363 firstprivate( mi, ni, ki ) \ 365 cblas_dgemm( CblasRowMajor,
366 CblasTrans, CblasNoTrans,
370 ALPHA, TILE_A[mi][ni], tile_size_n,
371 TILE_A[mi][ki], tile_size_k,
372 beta, TILE_C[ni][ki], tile_size_k );
380 for ( mi = 0; mi < kt; mi++ )
383 for ( ni = 0; ni < nt; ni++ )
396 #pragma oss task in( TILE_A[mi][ni] ) \ 397 inout( TILE_C[ni][ni] ) \ 398 shared( TILE_A, TILE_C ) \ 399 firstprivate( mi, ni ) \ 401 cblas_dsyrk( CblasRowMajor,
403 ( CBLAS_TRANSPOSE ) TRANS_A,
406 ALPHA, TILE_A[mi][ni], tile_size_n,
407 beta, TILE_C[ni][ni], tile_size_n );
409 for ( ki = ni + 1; ki < nt; ki++ )
413 #pragma oss task in( TILE_A[mi][ki] ) \ 414 in( TILE_A[mi][ni] ) \ 415 inout( TILE_C[ki][ni] ) \ 416 shared( TILE_A, TILE_C ) \ 417 firstprivate( mi, ni, ki ) \ 419 cblas_dgemm( CblasRowMajor,
420 CblasTrans, CblasNoTrans,
424 ALPHA, TILE_A[mi][ki], tile_size_k,
425 TILE_A[mi][ni], tile_size_n,
426 beta, TILE_C[ki][ni], tile_size_n );
void ddss_dflat2tiled(int M, int N, double *A, int LDA, int MT, int NT, double(*TILE_A)[NT][TILE_SIZE *TILE_SIZE])
void ddss_dsymflat2tiled(int M, int N, double *A, int LDA, int MT, int NT, double(*TILE_A)[NT][TILE_SIZE *TILE_SIZE], enum DDSS_UPLO UPLO)
enum LASS_RETURN kdsyrk(enum DDSS_UPLO UPLO, enum DDSS_TRANS TRANS_A, int N, int K, const double ALPHA, double *A, int LDA, const double BETA, double *C, int LDC)
void ddss_dsymtiled2flat(int M, int N, double *A, int LDA, int MT, int NT, double(*TILE_A)[NT][TILE_SIZE *TILE_SIZE], enum DDSS_UPLO UPLO)
int ddss_tile_size(int M, int MT)