LASs - Linear Algebra Routines on OmpSs  1.0.0
LASs
kdpotrf.c
Go to the documentation of this file.
1 #include "../include/lass.h"
2 
3 /**
4  *
5  * @file kdpotrf.c
6  *
7  * @brief LASs-DDSs kdpotrf routine.
8  *
9  * LASs-DDSs is a software package provided by:
10  * Barcelona Supercomputing Center - Centro Nacional de Supercomputacion
11  *
12  * @author Pedro Valero-Lara pedro.valero@bsc.es
13  * @date 2017-24-10
14  * @reviewer
15  * @modified
16  *
17  **/
18 
19 /**
20  *
21  * @ingroup DDSS
22  *
23  * Performs the Cholesky factorization of a symmetric positive definite
24  * matrix A:
25  *
26  * A = L \times L^T
27  * or
28  * A = U^T \times U
29  *
30  * where L is a lower triangular matrix and U is an upper triangular matrix.
31  *
32 **/
33 
34 /**
35  *
36  * @param[in]
37  * UPLO enum DDSS_UPLO.
38  * UPLO specifies the form of A is stored:
39  * - Lower: Lower triangle of A is stored. The upper traingular part is
40  * not referenced.
41  * - Upper: Upper triangle of A is stored. The lower triangular part is
42  * not referenced.
43  *
44  * @param[in]
45  * N int.
46  * N specifies the order of the square matrix A. N >= 0.
47  *
48  * @param[in,out]
49  * A double *.
50  * A is a pointer to a positive definite matrix of dimension N by LDA.
51  * On exit, if return value is Success, the matrix A is overwriten by
52  * the factor U or L.
53  *
54  * @param[in]
55  * LDA int.
56  * LDA specifies the number of columns of A ( row-major order ).
57  * LDA must be at least max( 1, N ).
58  *
59  **/
60 
61 
62 /**
63  *
64  * @retval Success sucessful exit
65  * @retval NoSuccess unsucessful exit
66  *
67  **/
68 
69 /**
70  *
71  * @sa ddss_dpotrf
72  * @sa ddss_tile
73  * @sa ddss_symflat2tiled
74  * @sa ddss_symtiled2flat
75  *
76  **/
77 
78 enum LASS_RETURN kdpotrf( enum DDSS_UPLO UPLO, int N, double *A, int LDA )
79 {
80 
81  // Local variables
82  int nt;
83  int mi, mmi, ki, ni;
84  int Am;
85  int An;
86  int tile_size_m;
87  int tile_size_mm;
88  int tile_size_n;
89  int tile_size_k;
90  int k_check;
91  double betat;
92 
93  // Number of tiles
94  if ( N % TILE_SIZE == 0 )
95  {
96  nt = N / TILE_SIZE;
97  }
98  else
99  {
100  nt = ( N / TILE_SIZE ) + 1;
101  }
102 
103  /****************************
104  --Tile A matrix declaration--
105  ****************************/
106 
107  Am = nt;
108  An = nt;
109 
110  /***************************
111  --Tile A matrix allocation--
112  ***************************/
113 
114  double (*TILE_A)[An][TILE_SIZE * TILE_SIZE] = malloc ( Am * An *
115  TILE_SIZE * TILE_SIZE * sizeof( double ) );
116 
117  if ( TILE_A == NULL)
118  {
119  fprintf( stderr, "Failure in kdpotrf for matrix TILE_A\n" );
120  return NoSuccess;
121  }
122 
123  /************************************************
124  // --From flat data layout to tiled data layout--
125  ************************************************/
126 
127  ddss_dsymflat2tiled( N, N, A, LDA, nt, nt, TILE_A, UPLO );
128 
129  /**************
130  --DPOTRF tile--
131  **************/
132 
133  if ( UPLO == Lower )
134  {
135  // --UPLO = Lower--
136  for ( ki = 0; ki < nt; ki++ )
137  {
138  tile_size_k = ddss_tile_size( N, ki );
139 
140  #pragma oss task inout( TILE_A[ki][ki] ) \
141  firstprivate( ki ) \
142  label( dpotrf )
143  LAPACKE_dpotrf_work( LAPACK_ROW_MAJOR,
144  'L',
145  tile_size_k, TILE_A[ki][ki], tile_size_k );
146 
147  for ( mi = ki + 1; mi < nt; mi++ )
148  {
149  tile_size_m = ddss_tile_size( N, mi );
150 
151  #pragma oss task in( TILE_A[ki][ki] ) \
152  inout(TILE_A[mi][ki]) \
153  firstprivate( mi, ki ) \
154  label( dtrsm )
155  cblas_dtrsm( CblasRowMajor,
156  ( CBLAS_SIDE ) Right, ( CBLAS_UPLO ) Lower,
157  ( CBLAS_TRANSPOSE ) Trans, ( CBLAS_DIAG ) NonUnit,
158  tile_size_m, TILE_SIZE,
159  1.0, TILE_A[ki][ki], TILE_SIZE,
160  TILE_A[mi][ki], TILE_SIZE );
161  }
162  for ( mmi = ki + 1; mmi < nt; mmi++ )
163  {
164  tile_size_mm = ddss_tile_size( N, mmi );
165 
166  #pragma oss task in( TILE_A[mmi][ki] ) \
167  inout( TILE_A[mmi][mmi] ) \
168  firstprivate( mmi, ki ) \
169  label( dsyrk )
170  cblas_dsyrk( CblasRowMajor,
171  ( CBLAS_UPLO ) Lower, ( CBLAS_TRANSPOSE ) NoTrans,
172  tile_size_mm, TILE_SIZE,
173  -1.0, TILE_A[mmi][ki], TILE_SIZE,
174  1.0, TILE_A[mmi][mmi], tile_size_mm );
175 
176  for ( ni = ki + 1; ni < mmi; ni++ )
177  {
178 
179  #pragma oss task in( TILE_A[mmi][ki] ) \
180  in( TILE_A[ni][ki] ) \
181  inout( TILE_A[mmi][ni] ) \
182  firstprivate( ni, mmi, ki ) \
183  label( dgemm )
184  cblas_dgemm( CblasRowMajor,
185  ( CBLAS_TRANSPOSE ) NoTrans,
186  ( CBLAS_TRANSPOSE ) Trans,
187  TILE_SIZE,
188  TILE_SIZE,
189  TILE_SIZE,
190  -1.0, TILE_A[mmi][ki], TILE_SIZE,
191  TILE_A[ni][ki], TILE_SIZE,
192  1.0, TILE_A[mmi][ni], TILE_SIZE );
193  }
194  }
195  }
196  }
197  else
198  {
199  // --UPLO = Upper--
200  for ( ki = 0; ki < nt; ki++ )
201  {
202  tile_size_k = ddss_tile_size( N, ki );
203 
204  #pragma oss task inout( TILE_A[ki][ki] ) \
205  shared( TILE_A ) \
206  firstprivate( ki ) \
207  label( dpotrf )
208  LAPACKE_dpotrf_work( LAPACK_ROW_MAJOR,
209  'U',
210  tile_size_k, TILE_A[ki][ki],
211  tile_size_k );
212 
213  for ( mi = ki + 1; mi < nt; mi++ )
214  {
215  tile_size_m = ddss_tile_size( N, mi );
216 
217  #pragma oss task in( TILE_A[ki][ki] ) \
218  inout(TILE_A[ki][mi]) \
219  shared( TILE_A ) \
220  firstprivate( mi, ki ) \
221  label( dtrsm )
222  cblas_dtrsm( CblasRowMajor,
223  ( CBLAS_SIDE ) Left, ( CBLAS_UPLO ) Upper,
224  ( CBLAS_TRANSPOSE ) Trans, ( CBLAS_DIAG ) NonUnit,
225  TILE_SIZE, tile_size_m,
226  1.0, TILE_A[ki][ki], TILE_SIZE,
227  TILE_A[ki][mi], tile_size_m );
228  }
229  for ( mmi = ki + 1; mmi < nt; mmi++ )
230  {
231  tile_size_mm = ddss_tile_size( N, mmi );
232 
233  #pragma oss task in( TILE_A[ki][mmi] ) \
234  inout( TILE_A[mmi][mmi] ) \
235  shared( TILE_A ) \
236  firstprivate( mmi, ki ) \
237  label( dsyrk )
238  cblas_dsyrk( CblasRowMajor,
239  ( CBLAS_UPLO ) Upper, ( CBLAS_TRANSPOSE ) Trans,
240  tile_size_mm, TILE_SIZE,
241  -1.0, TILE_A[ki][mmi], tile_size_mm,
242  1.0, TILE_A[mmi][mmi], tile_size_mm );
243 
244  for ( ni = ki + 1; ni < mmi; ni++ )
245  {
246  #pragma oss task in( TILE_A[ki][ni] ) \
247  in( TILE_A[ki][mmi] ) \
248  inout( TILE_A[ni][mmi] ) \
249  shared( TILE_A ) \
250  firstprivate( ni, mmi, ki ) \
251  label( dgemm )
252  cblas_dgemm( CblasRowMajor,
253  ( CBLAS_TRANSPOSE ) Trans,
254  ( CBLAS_TRANSPOSE ) NoTrans,
255  TILE_SIZE,
256  TILE_SIZE,
257  TILE_SIZE,
258  -1.0, TILE_A[ki][ni], TILE_SIZE,
259  TILE_A[ki][mmi], TILE_SIZE,
260  1.0, TILE_A[ni][mmi], TILE_SIZE );
261  }
262  }
263  }
264  }
265 
266  // --From tile data layout to flat data layout--
267  ddss_dsymtiled2flat( N, N, A, LDA, nt, nt, TILE_A, UPLO );
268 
269  // --Tile A matrix free--
270  free( TILE_A );
271 
272  return Success;
273 
274 }
void ddss_dsymflat2tiled(int M, int N, double *A, int LDA, int MT, int NT, double(*TILE_A)[NT][TILE_SIZE *TILE_SIZE], enum DDSS_UPLO UPLO)
void ddss_dsymtiled2flat(int M, int N, double *A, int LDA, int MT, int NT, double(*TILE_A)[NT][TILE_SIZE *TILE_SIZE], enum DDSS_UPLO UPLO)
enum LASS_RETURN kdpotrf(enum DDSS_UPLO UPLO, int N, double *A, int LDA)
Definition: kdpotrf.c:78
int ddss_tile_size(int M, int MT)
Definition: ddss_tile.c:52