1 #include "../include/lass.h" 93 int mi, mmi, ki, ni, nni;
101 if ( N % TILE_SIZE == 0 )
107 nt = ( N / TILE_SIZE ) + 1;
110 if ( NRHS % TILE_SIZE == 0 )
112 nrhst = NRHS / TILE_SIZE;
116 nrhst = ( NRHS / TILE_SIZE ) + 1;
123 double (*TILE_A)[nt][TILE_SIZE * TILE_SIZE] = malloc ( nt * nt *
124 TILE_SIZE * TILE_SIZE *
sizeof(
double ) );
126 if ( TILE_A == NULL )
128 fprintf( stderr,
"Failure in kdnpgesv for matrix TILE_A\n" );
132 double ( *TILE_B )[nrhst][TILE_SIZE * TILE_SIZE] = malloc(
133 nt * nrhst * TILE_SIZE * TILE_SIZE *
sizeof(
double ) );
135 if ( TILE_B == NULL )
137 fprintf( stderr,
"Failure in kdnpgesv for matrix TILE_B\n" );
156 for ( ki = 0; ki < nt; ki++ )
160 #if defined(LASs_WITH_MKL) 162 #pragma oss task inout( TILE_A[ki][ki] ) \ 165 label( mkl_dgetrfnpi ) 166 LAPACKE_mkl_dgetrfnpi( CblasRowMajor,
167 tile_size_k, tile_size_k,
169 TILE_A[ki][ki], tile_size_k );
173 #pragma oss task inout( TILE_A[ki][ki] ) \ 183 for ( mi = ki + 1; mi < nt; mi++ )
187 #pragma oss task in( TILE_A[ki][ki] ) \ 188 inout(TILE_A[mi][ki]) \ 190 firstprivate( mi, ki ) \ 191 label( dnpgetrf_dtrsm_below ) 192 cblas_dtrsm( CblasRowMajor,
193 ( CBLAS_SIDE ) Right, ( CBLAS_UPLO ) Upper,
194 ( CBLAS_TRANSPOSE ) NoTrans, ( CBLAS_DIAG ) NonUnit,
195 tile_size_m, tile_size_k,
196 1.0, TILE_A[ki][ki], tile_size_k,
197 TILE_A[mi][ki], tile_size_k );
199 for ( ni = ki + 1; ni < nt; ni++ )
203 #pragma oss task in( TILE_A[ki][ki] ) \ 204 inout(TILE_A[ki][ni]) \ 206 firstprivate( ni, ki ) \ 207 label( dnpgetrf_dtrsm_right ) 208 cblas_dtrsm( CblasRowMajor,
209 ( CBLAS_SIDE ) Left, ( CBLAS_UPLO ) Lower,
210 ( CBLAS_TRANSPOSE ) NoTrans, ( CBLAS_DIAG ) Unit,
211 tile_size_k, tile_size_n,
212 1.0, TILE_A[ki][ki], tile_size_k,
213 TILE_A[ki][ni], tile_size_n );
215 for ( mmi = ki + 1; mmi < nt; mmi++ )
219 #pragma oss task in( TILE_A[mmi][ki] ) \ 220 in( TILE_A[ki][ni] ) \ 221 inout( TILE_A[mmi][ni] ) \ 223 firstprivate( ni, mmi, ki ) \ 224 label( dnpgetrf_dgemm ) 225 cblas_dgemm( CblasRowMajor,
226 ( CBLAS_TRANSPOSE ) NoTrans,
227 ( CBLAS_TRANSPOSE ) NoTrans,
231 -1.0, TILE_A[mmi][ki], tile_size_k,
232 TILE_A[ki][ni], tile_size_n,
233 1.0, TILE_A[mmi][ni], tile_size_n );
245 for ( ki = 0; ki < nt; ki++ )
249 for ( ni = 0; ni < nrhst; ni++ )
253 #pragma oss task in( TILE_A[ki][ki] ) \ 254 inout( TILE_B[ki][ni] ) \ 255 shared( TILE_A, TILE_B ) \ 256 firstprivate( ki, ni ) \ 257 label( dtrsm_dtrsm1 ) 258 cblas_dtrsm( CblasRowMajor,
259 ( CBLAS_SIDE ) Left, ( CBLAS_UPLO ) Lower,
260 ( CBLAS_TRANSPOSE ) NoTrans,
264 1.0, TILE_A[ki][ki], tile_size_k,
265 TILE_B[ki][ni], tile_size_n );
267 for ( nni = 0; nni < nrhst; nni++ )
271 for ( mi = ki + 1; mi < nt; mi++ )
274 #pragma oss task in( TILE_A[mi][ki] ) \ 275 in( TILE_B[ki][nni] ) \ 276 inout( TILE_B[mi][nni] ) \ 277 shared( TILE_A, TILE_B ) \ 278 firstprivate( ki, mi, nni ) \ 279 label( dtrsm_dgemm1 ) 280 cblas_dgemm( CblasRowMajor,
281 CblasNoTrans, CblasNoTrans,
285 -1.0, TILE_A[mi][ki], tile_size_k,
286 TILE_B[ki][nni], tile_size_nn,
287 1.0, TILE_B[mi][nni], tile_size_nn );
294 for ( ki = nt - 1; ki >= 0; ki-- )
298 for ( ni = 0; ni < nrhst; ni++ )
302 #pragma oss task in( TILE_A[ki][ki] ) \ 303 inout( TILE_B[ki][ni] ) \ 304 shared( TILE_A, TILE_B ) \ 305 firstprivate( ki, ni ) \ 306 label( dtrsmi_dtrsm2 ) 307 cblas_dtrsm( CblasRowMajor,
308 ( CBLAS_SIDE ) Left, ( CBLAS_UPLO ) Upper,
309 ( CBLAS_TRANSPOSE ) NoTrans,
310 ( CBLAS_DIAG ) NonUnit,
313 1.0, TILE_A[ki][ki], tile_size_k,
314 TILE_B[ki][ni], tile_size_n );
317 for ( mi = ki - 1; mi >= 0; mi-- )
321 for ( nni = 0; nni < nrhst; nni++ )
325 #pragma oss task in( TILE_A[mi][ki] ) \ 326 in( TILE_B[ki][nni] ) \ 327 inout( TILE_B[mi][nni] ) \ 328 shared( TILE_A, TILE_B ) \ 329 firstprivate( ki, mi, nni ) \ 330 label( dtrsm_dgemm2 ) 331 cblas_dgemm( CblasRowMajor,
332 CblasNoTrans, CblasNoTrans,
336 -1.0, TILE_A[mi][ki], tile_size_k,
337 TILE_B[ki][nni], tile_size_nn,
338 1.0, TILE_B[mi][nni], tile_size_nn );
void ddss_dflat2tiled(int M, int N, double *A, int LDA, int MT, int NT, double(*TILE_A)[NT][TILE_SIZE *TILE_SIZE])
enum LASS_RETURN dnpgetrf(int M, int N, double *A, int LDA)
enum LASS_RETURN kdnpgesv(int N, int NRHS, double *A, int LDA, double *B, int LDB)
void ddss_dtiled2flat_nb(int M, int N, double *A, int LDA, int MT, int NT, double(*TILE_A)[NT][TILE_SIZE *TILE_SIZE])
void ddss_dtiled2flat(int M, int N, double *A, int LDA, int MT, int NT, double(*TILE_A)[NT][TILE_SIZE *TILE_SIZE])
int ddss_tile_size(int M, int MT)