1 #include "../include/lass.h" 80 enum LASS_RETURN
kdtpgetrf(
int M,
int N,
double *A,
int LDA,
int *IPIV )
98 if ( M % TILE_SIZE == 0 )
104 mt = ( M / TILE_SIZE ) + 1;
107 if ( N % TILE_SIZE == 0 )
113 nt = ( N / TILE_SIZE ) + 1;
120 double ( *TILE_A )[nt][TILE_SIZE * TILE_SIZE] = malloc ( mt * nt *
121 TILE_SIZE * TILE_SIZE *
sizeof(
double ) );
123 if ( TILE_A == NULL )
125 fprintf( stderr,
"Failure in kdnpgetrf for matrix TILE_A\n" );
129 ipiv_local = (
int* ) malloc( MIN( M, N ) *
sizeof( int ) );
131 if ( ipiv_local == NULL )
133 fprintf( stderr,
"Failure in kdnpgetrf for ipiv_local\n" );
147 current_position = 0;
149 for ( ki = 0; ki < MIN( mt, nt ); ki++ )
154 #pragma oss task inout( TILE_A[ki][ki] ) \ 155 inout( ipiv_local ) \ 156 inout( current_position ) \ 157 out( local_ipiv_size ) \ 159 firstprivate( ki, i ) \ 162 LAPACKE_dgetrf( CblasRowMajor,
163 tile_size_km, tile_size_kn,
164 TILE_A[ki][ki], tile_size_kn,
165 ipiv_local + current_position );
168 local_ipiv_size = MIN( tile_size_km, tile_size_kn );
169 for ( i = 0; i < local_ipiv_size; i++ )
171 ( IPIV + current_position )[i] =
172 ( ipiv_local + current_position )[i] + current_position;
174 current_position += local_ipiv_size;
176 for ( mi = ki + 1; mi < mt; mi++ )
180 #pragma oss task in( TILE_A[ki][ki] ) \ 181 inout( TILE_A[mi][ki] ) \ 183 firstprivate( mi, ki ) \ 185 cblas_dtrsm( CblasRowMajor,
186 ( CBLAS_SIDE ) Right, ( CBLAS_UPLO ) Upper,
187 ( CBLAS_TRANSPOSE ) NoTrans, ( CBLAS_DIAG ) NonUnit,
188 tile_size_m, tile_size_kn,
189 1.0, TILE_A[ki][ki], tile_size_kn,
190 TILE_A[mi][ki], tile_size_kn );
192 for ( ni = 0; ni < ki; ni++ )
196 #pragma oss task in( ipiv_local ) \ 197 in( current_position ) \ 198 in( local_ipiv_size ) \ 199 inout( TILE_A[ki][ni] ) \ 201 firstprivate( ki, ni ) \ 203 LAPACKE_dlaswp( CblasRowMajor,
204 tile_size_n, TILE_A[ki][ni], tile_size_n,
206 ipiv_local + current_position - local_ipiv_size,
210 for ( ni = ki + 1; ni < nt; ni++ )
214 #pragma oss task in( ipiv_local ) \ 215 in( current_position ) \ 216 in( local_ipiv_size ) \ 217 inout( TILE_A[ki][ni] ) \ 219 firstprivate( ki, ni ) \ 220 label( dlaswp_right ) 221 LAPACKE_dlaswp( CblasRowMajor,
222 tile_size_n, TILE_A[ki][ni], tile_size_n,
224 ipiv_local + current_position - local_ipiv_size,
227 #pragma oss task in( TILE_A[ki][ki] ) \ 228 inout( TILE_A[ki][ni] ) \ 230 firstprivate( ni, ki ) \ 232 cblas_dtrsm( CblasRowMajor,
233 ( CBLAS_SIDE ) Left, ( CBLAS_UPLO ) Lower,
234 ( CBLAS_TRANSPOSE ) NoTrans, ( CBLAS_DIAG ) Unit,
235 tile_size_km, tile_size_n,
236 1.0, TILE_A[ki][ki], tile_size_kn,
237 TILE_A[ki][ni], tile_size_n );
239 for ( mmi = ki + 1; mmi < mt; mmi++ )
243 #pragma oss task in( TILE_A[mmi][ki] ) \ 244 in( TILE_A[ki][ni] ) \ 245 inout( TILE_A[mmi][ni] ) \ 247 firstprivate( ni, mmi, ki ) \ 249 cblas_dgemm( CblasRowMajor,
250 ( CBLAS_TRANSPOSE ) NoTrans,
251 ( CBLAS_TRANSPOSE ) NoTrans,
255 -1.0, TILE_A[mmi][ki], tile_size_kn,
256 TILE_A[ki][ni], tile_size_n,
257 1.0, TILE_A[mmi][ni], tile_size_n );
void ddss_dflat2tiled(int M, int N, double *A, int LDA, int MT, int NT, double(*TILE_A)[NT][TILE_SIZE *TILE_SIZE])
void ddss_dtiled2flat(int M, int N, double *A, int LDA, int MT, int NT, double(*TILE_A)[NT][TILE_SIZE *TILE_SIZE])
enum LASS_RETURN kdtpgetrf(int M, int N, double *A, int LDA, int *IPIV)
int ddss_tile_size(int M, int MT)