1 #include "../include/lass.h" 74 enum LASS_RETURN
kdnpgetrf(
int M,
int N,
double *A,
int LDA )
79 int mi, mmi, ki, ni, nni, li;
89 if ( M % TILE_SIZE == 0 )
95 mt = ( M / TILE_SIZE ) + 1;
98 if ( N % TILE_SIZE == 0 )
104 nt = ( N / TILE_SIZE ) + 1;
111 double (*TILE_A)[nt][TILE_SIZE * TILE_SIZE] = malloc ( mt * nt *
112 TILE_SIZE * TILE_SIZE *
sizeof(
double ) );
114 if ( TILE_A == NULL )
116 fprintf( stderr,
"Failure in kdnpgetrf for matrix TILE_A\n" );
130 for ( ki = 0; ki < MIN( mt, nt ); ki++ )
135 #if defined(LASs_WITH_MKL) 137 tile_size_k = MIN( tile_size_km, tile_size_kn );
139 #pragma oss task inout( TILE_A[ki][ki] ) \ 143 label( mkl_dgetrfnpi ) 144 LAPACKE_mkl_dgetrfnpi( CblasRowMajor,
145 tile_size_km, tile_size_kn,
147 TILE_A[ki][ki], tile_size_kn );
151 #pragma oss task inout( TILE_A[ki][ki] ) \ 156 dnpgetrf( tile_size_km, tile_size_kn,
162 for ( mi = ki + 1; mi < mt; mi++ )
166 #pragma oss task in( TILE_A[ki][ki] ) \ 167 inout(TILE_A[mi][ki]) \ 169 firstprivate( mi, ki ) \ 172 cblas_dtrsm( CblasRowMajor,
173 ( CBLAS_SIDE ) Right, ( CBLAS_UPLO ) Upper,
174 ( CBLAS_TRANSPOSE ) NoTrans, ( CBLAS_DIAG ) NonUnit,
175 tile_size_m, tile_size_kn,
176 1.0, TILE_A[ki][ki], tile_size_kn,
177 TILE_A[mi][ki], tile_size_kn );
179 for ( ni = ki + 1; ni < nt; ni++ )
183 #pragma oss task in( TILE_A[ki][ki] ) \ 184 inout(TILE_A[ki][ni]) \ 186 firstprivate( ni, ki ) \ 189 cblas_dtrsm( CblasRowMajor,
190 ( CBLAS_SIDE ) Left, ( CBLAS_UPLO ) Lower,
191 ( CBLAS_TRANSPOSE ) NoTrans, ( CBLAS_DIAG ) Unit,
192 tile_size_km, tile_size_n,
193 1.0, TILE_A[ki][ki], tile_size_kn,
194 TILE_A[ki][ni], tile_size_n );
197 for ( nni = ki + 1; nni < nt; nni++ )
201 for ( mmi = ki + 1; mmi < mt; mmi++ )
205 #pragma oss task in( TILE_A[mmi][ki] ) \ 206 in( TILE_A[ki][nni] ) \ 207 inout( TILE_A[mmi][nni] ) \ 209 firstprivate( nni, mmi, ki ) \ 210 priority( nt - nni ) \ 212 cblas_dgemm( CblasRowMajor,
213 ( CBLAS_TRANSPOSE ) NoTrans,
214 ( CBLAS_TRANSPOSE ) NoTrans,
218 -1.0, TILE_A[mmi][ki], tile_size_kn,
219 TILE_A[ki][nni], tile_size_nn,
220 1.0, TILE_A[mmi][nni], tile_size_nn );
void ddss_dflat2tiled(int M, int N, double *A, int LDA, int MT, int NT, double(*TILE_A)[NT][TILE_SIZE *TILE_SIZE])
enum LASS_RETURN dnpgetrf(int M, int N, double *A, int LDA)
enum LASS_RETURN kdnpgetrf(int M, int N, double *A, int LDA)
void ddss_dtiled2flat(int M, int N, double *A, int LDA, int MT, int NT, double(*TILE_A)[NT][TILE_SIZE *TILE_SIZE])
int ddss_tile_size(int M, int MT)