1 #include "../include/lass.h" 78 enum LASS_RETURN
kdpotrf(
enum DDSS_UPLO UPLO,
int N,
double *A,
int LDA )
94 if ( N % TILE_SIZE == 0 )
100 nt = ( N / TILE_SIZE ) + 1;
114 double (*TILE_A)[An][TILE_SIZE * TILE_SIZE] = malloc ( Am * An *
115 TILE_SIZE * TILE_SIZE *
sizeof(
double ) );
119 fprintf( stderr,
"Failure in kdpotrf for matrix TILE_A\n" );
136 for ( ki = 0; ki < nt; ki++ )
140 #pragma oss task inout( TILE_A[ki][ki] ) \ 143 LAPACKE_dpotrf_work( LAPACK_ROW_MAJOR,
145 tile_size_k, TILE_A[ki][ki], tile_size_k );
147 for ( mi = ki + 1; mi < nt; mi++ )
151 #pragma oss task in( TILE_A[ki][ki] ) \ 152 inout(TILE_A[mi][ki]) \ 153 firstprivate( mi, ki ) \ 155 cblas_dtrsm( CblasRowMajor,
156 ( CBLAS_SIDE ) Right, ( CBLAS_UPLO ) Lower,
157 ( CBLAS_TRANSPOSE ) Trans, ( CBLAS_DIAG ) NonUnit,
158 tile_size_m, TILE_SIZE,
159 1.0, TILE_A[ki][ki], TILE_SIZE,
160 TILE_A[mi][ki], TILE_SIZE );
162 for ( mmi = ki + 1; mmi < nt; mmi++ )
166 #pragma oss task in( TILE_A[mmi][ki] ) \ 167 inout( TILE_A[mmi][mmi] ) \ 168 firstprivate( mmi, ki ) \ 170 cblas_dsyrk( CblasRowMajor,
171 ( CBLAS_UPLO ) Lower, ( CBLAS_TRANSPOSE ) NoTrans,
172 tile_size_mm, TILE_SIZE,
173 -1.0, TILE_A[mmi][ki], TILE_SIZE,
174 1.0, TILE_A[mmi][mmi], tile_size_mm );
176 for ( ni = ki + 1; ni < mmi; ni++ )
179 #pragma oss task in( TILE_A[mmi][ki] ) \ 180 in( TILE_A[ni][ki] ) \ 181 inout( TILE_A[mmi][ni] ) \ 182 firstprivate( ni, mmi, ki ) \ 184 cblas_dgemm( CblasRowMajor,
185 ( CBLAS_TRANSPOSE ) NoTrans,
186 ( CBLAS_TRANSPOSE ) Trans,
190 -1.0, TILE_A[mmi][ki], TILE_SIZE,
191 TILE_A[ni][ki], TILE_SIZE,
192 1.0, TILE_A[mmi][ni], TILE_SIZE );
200 for ( ki = 0; ki < nt; ki++ )
204 #pragma oss task inout( TILE_A[ki][ki] ) \ 208 LAPACKE_dpotrf_work( LAPACK_ROW_MAJOR,
210 tile_size_k, TILE_A[ki][ki],
213 for ( mi = ki + 1; mi < nt; mi++ )
217 #pragma oss task in( TILE_A[ki][ki] ) \ 218 inout(TILE_A[ki][mi]) \ 220 firstprivate( mi, ki ) \ 222 cblas_dtrsm( CblasRowMajor,
223 ( CBLAS_SIDE ) Left, ( CBLAS_UPLO ) Upper,
224 ( CBLAS_TRANSPOSE ) Trans, ( CBLAS_DIAG ) NonUnit,
225 TILE_SIZE, tile_size_m,
226 1.0, TILE_A[ki][ki], TILE_SIZE,
227 TILE_A[ki][mi], tile_size_m );
229 for ( mmi = ki + 1; mmi < nt; mmi++ )
233 #pragma oss task in( TILE_A[ki][mmi] ) \ 234 inout( TILE_A[mmi][mmi] ) \ 236 firstprivate( mmi, ki ) \ 238 cblas_dsyrk( CblasRowMajor,
239 ( CBLAS_UPLO ) Upper, ( CBLAS_TRANSPOSE ) Trans,
240 tile_size_mm, TILE_SIZE,
241 -1.0, TILE_A[ki][mmi], tile_size_mm,
242 1.0, TILE_A[mmi][mmi], tile_size_mm );
244 for ( ni = ki + 1; ni < mmi; ni++ )
246 #pragma oss task in( TILE_A[ki][ni] ) \ 247 in( TILE_A[ki][mmi] ) \ 248 inout( TILE_A[ni][mmi] ) \ 250 firstprivate( ni, mmi, ki ) \ 252 cblas_dgemm( CblasRowMajor,
253 ( CBLAS_TRANSPOSE ) Trans,
254 ( CBLAS_TRANSPOSE ) NoTrans,
258 -1.0, TILE_A[ki][ni], TILE_SIZE,
259 TILE_A[ki][mmi], TILE_SIZE,
260 1.0, TILE_A[ni][mmi], TILE_SIZE );
void ddss_dsymflat2tiled(int M, int N, double *A, int LDA, int MT, int NT, double(*TILE_A)[NT][TILE_SIZE *TILE_SIZE], enum DDSS_UPLO UPLO)
void ddss_dsymtiled2flat(int M, int N, double *A, int LDA, int MT, int NT, double(*TILE_A)[NT][TILE_SIZE *TILE_SIZE], enum DDSS_UPLO UPLO)
enum LASS_RETURN kdpotrf(enum DDSS_UPLO UPLO, int N, double *A, int LDA)
int ddss_tile_size(int M, int MT)