LASs - Linear Algebra Routines on OmpSs  1.0.0
LASs
kdposv.c
Go to the documentation of this file.
1 #include "../include/lass.h"
2 
3 /**
4  *
5  * @file kdposv.c
6  *
7  * @brief LASs-DDSs kdposv routine.
8  *
9  * LASs-DDSs is a software package provided by:
10  * Barcelona Supercomputing Center - Centro Nacional de Supercomputacion
11  *
12  * @author Pedro Valero-Lara pedro.valero@bsc.es
13  * @date 2018-04-05
14  * @reviewer
15  * @modified
16  *
17  **/
18 
19 /**
20  *
21  * @ingroup DDSS
22  *
23  * Solves a system of linear equations A \times X = B, where A is a M-by-M
24  * simmetric positive definite matrix and X and B are M-by-NRHS matrices.
25  * The matrix A is descomposed as:
26  *
27  * A = L \times L^T
28  * or
29  * A = U^T \times U
30  *
31  * where L is a lower triangular matrix and U is an upper triangular matrix.
32  *
33 **/
34 
35 /**
36  *
37  * @param[in]
38  * UPLO enum DDSS_UPLO.
39  * UPLO specifies the form of A is stored:
40  * - Lower: Lower triangle of A is stored. The upper traingular part is
41  * not referenced.
42  * - Upper: Upper triangle of A is stored. The lower triangular part is
43  * not referenced.
44  *
45  * @param[in]
46  * N int.
47  * N specifies the order of the square matrix A. N >= 0.
48  *
49  * @param[in]
50  * NRHS int.
51  * NRHS specifies the number of right-hand-sides (number
52  * of columns of B). NRHS >= 0.
53  *
54  * @param[in,out]
55  * A double *.
56  * A is a pointer to a positive definite matrix of dimension N by LDA.
57  * On exit, if return value is Success, the matrix A
58  * is overwriten by the factor U or L.
59  *
60  * @param[in]
61  * LDA int.
62  * LDA specifies the number of columns of A ( row-major order ).
63  * LDA must be at least max( 1, N ).
64  *
65  * @param[in,out]
66  * B double *.
67  * B is a pointer to a matrix of dimension N by NRHS, which stores the
68  * right-hand-sides of the systems of linear equations.
69  * (row-major order). On exit, if return value is Success, the matrix B
70  * is overwriten by the solution matrix X.
71  *
72  * @param[in]
73  * LDB int.
74  * LDB specifies the number of columns of B ( row-major order ).
75  * LDB must be at least max( 1, NRHS ).
76  *
77  **/
78 
79 /**
80  *
81  * @retval Success sucessful exit
82  * @retval NoSuccess unsucessful exit
83  *
84  **/
85 
86 /**
87  *
88  * @sa ddss_dpotrf
89  * @sa ddss_tile
90  * @sa ddss_symflat2tiled
91  * @sa ddss_symtiled2flat
92  *
93  **/
94 
95 enum LASS_RETURN kdposv( enum DDSS_UPLO UPLO,
96  int N, int NRHS,
97  double *A, int LDA,
98  double *B, int LDB )
99 {
100 
101  // Local variables
102  int nt, nrhst;
103  int mi, mmi, ki, ni, nni;
104  int Am, An;
105  int Bm, Bn;
106  int tile_size_m;
107  int tile_size_mm;
108  int tile_size_n;
109  int tile_size_nn;
110  int tile_size_k;
111 
112  // Number of tiles
113  if ( N % TILE_SIZE == 0 )
114  {
115  nt = N / TILE_SIZE;
116  }
117  else
118  {
119  nt = ( N / TILE_SIZE ) + 1;
120  }
121  if ( NRHS % TILE_SIZE == 0 )
122  {
123  nrhst = NRHS / TILE_SIZE;
124  }
125  else
126  {
127  nrhst = ( NRHS / TILE_SIZE ) + 1;
128  }
129 
130 
131  /****************************
132  --Tile A matrix declaration--
133  ****************************/
134 
135  Am = nt;
136  An = nt;
137 
138  Bm = nt;
139  Bn = nrhst;
140 
141  /***************************
142  --Tile matrices allocation--
143  ***************************/
144 
145  double (*TILE_A)[An][TILE_SIZE * TILE_SIZE] = malloc ( Am * An *
146  TILE_SIZE * TILE_SIZE * sizeof( double ) );
147 
148  if ( TILE_A == NULL)
149  {
150  fprintf( stderr, "Failure in kdposv for matrix TILE_A\n" );
151  return NoSuccess;
152  }
153 
154  double ( *TILE_B )[Bn][TILE_SIZE * TILE_SIZE] = malloc(
155  Bm * Bn * TILE_SIZE * TILE_SIZE * sizeof( double ) );
156 
157  if ( TILE_B == NULL )
158  {
159  fprintf( stderr, "Failure in kdposv for matrix TILE_B\n" );
160  return NoSuccess;
161  }
162 
163  /************************************************
164  // --From flat data layout to tiled data layout--
165  ************************************************/
166 
167  // From flat matrix A to tile matrix TILE_A
168  ddss_dsymflat2tiled( N, N, A, LDA, nt, nt, TILE_A, UPLO );
169 
170  // From flat matrix B to tile matrix TILE_B
171  ddss_dflat2tiled( N, NRHS, B, LDB, nt, nrhst, TILE_B );
172 
173  /**************
174  --DPOSV tile--
175  **************/
176 
177  if ( UPLO == Lower )
178  {
179  // Cholesky descomposition
180  // --UPLO = Lower--
181  for ( ki = 0; ki < nt; ki++ )
182  {
183  tile_size_k = ddss_tile_size( N, ki );
184 
185  #pragma oss task inout( TILE_A[ki][ki] ) \
186  firstprivate( ki ) \
187  label( dpotrf )
188  LAPACKE_dpotrf_work( LAPACK_ROW_MAJOR,
189  'L',
190  tile_size_k,
191  TILE_A[ki][ki], tile_size_k );
192 
193  for ( mi = ki + 1; mi < nt; mi++ )
194  {
195  tile_size_m = ddss_tile_size( N, mi );
196 
197  #pragma oss task in( TILE_A[ki][ki] ) \
198  inout(TILE_A[mi][ki]) \
199  firstprivate( TILE_A ) \
200  firstprivate( mi, ki ) \
201  label( dpotrf_dtrsm )
202  cblas_dtrsm( CblasRowMajor,
203  ( CBLAS_SIDE ) Right, ( CBLAS_UPLO ) Lower,
204  ( CBLAS_TRANSPOSE ) Trans, ( CBLAS_DIAG ) NonUnit,
205  tile_size_m, TILE_SIZE,
206  1.0, TILE_A[ki][ki], TILE_SIZE,
207  TILE_A[mi][ki], TILE_SIZE );
208  }
209  for ( mmi = ki + 1; mmi < nt; mmi++ )
210  {
211  tile_size_mm = ddss_tile_size( N, mmi );
212 
213  #pragma oss task in( TILE_A[mmi][ki] ) \
214  inout( TILE_A[mmi][mmi] ) \
215  firstprivate( mmi, ki ) \
216  label( dpotrf_dsyrk )
217  cblas_dsyrk( CblasRowMajor,
218  ( CBLAS_UPLO ) Lower, ( CBLAS_TRANSPOSE ) NoTrans,
219  tile_size_mm, TILE_SIZE,
220  -1.0, TILE_A[mmi][ki], TILE_SIZE,
221  1.0, TILE_A[mmi][mmi], tile_size_mm );
222 
223  for ( ni = ki + 1; ni < mmi; ni++ )
224  {
225 
226  #pragma oss task in( TILE_A[mmi][ki] ) \
227  in( TILE_A[ni][ki] ) \
228  inout( TILE_A[mmi][ni] ) \
229  firstprivate( ni, mmi, ki ) \
230  label( dpotrf_dgemm )
231  cblas_dgemm( CblasRowMajor,
232  ( CBLAS_TRANSPOSE ) NoTrans,
233  ( CBLAS_TRANSPOSE ) Trans,
234  TILE_SIZE,
235  TILE_SIZE,
236  TILE_SIZE,
237  -1.0, TILE_A[mmi][ki], TILE_SIZE,
238  TILE_A[ni][ki], TILE_SIZE,
239  1.0, TILE_A[mmi][ni], TILE_SIZE );
240  }
241  }
242  }
243 
244  /*********************************************
245  --From tiled data layout to flat data layout--
246  *********************************************/
247  ddss_dsymtiled2flat_nb( N, N, A, LDA, nt, nt, TILE_A, UPLO );
248 
249  // Triangular solve
250  // --SIDE = Left & UPLO = Lower & TRANS_A = NoTrans--
251  for ( ki = 0; ki < nt; ki++ )
252  {
253  tile_size_k = ddss_tile_size( N, ki );
254 
255  for ( ni = 0; ni < nrhst; ni++ )
256  {
257  tile_size_n = ddss_tile_size( NRHS, ni );
258 
259  #pragma oss task in( TILE_A[ki][ki] ) \
260  inout( TILE_B[ki][ni] ) \
261  shared( TILE_A, TILE_B ) \
262  firstprivate( ki, ni ) \
263  label( dtrsm_dtrsm1 )
264  cblas_dtrsm( CblasRowMajor,
265  ( CBLAS_SIDE ) Left, ( CBLAS_UPLO ) Lower,
266  ( CBLAS_TRANSPOSE ) NoTrans,
267  ( CBLAS_DIAG ) NonUnit,
268  tile_size_k,
269  tile_size_n,
270  1.0, TILE_A[ki][ki], tile_size_k,
271  TILE_B[ki][ni], tile_size_n );
272  }
273  for ( nni = 0; nni < nrhst; nni++ )
274  {
275  tile_size_nn = ddss_tile_size( NRHS, nni );
276 
277  for ( mi = ki + 1; mi < nt; mi++ )
278  {
279 
280  #pragma oss task in( TILE_A[mi][ki] ) \
281  in( TILE_B[ki][nni] ) \
282  inout( TILE_B[mi][nni] ) \
283  shared( TILE_A, TILE_B ) \
284  firstprivate( ki, mi, nni ) \
285  label( dtrsm_dgemm1 )
286  cblas_dgemm( CblasRowMajor,
287  CblasNoTrans, CblasNoTrans,
288  tile_size_k,
289  tile_size_nn,
290  tile_size_k,
291  -1.0, TILE_A[mi][ki], tile_size_k,
292  TILE_B[ki][nni], tile_size_nn,
293  1.0, TILE_B[mi][nni], tile_size_nn );
294  }
295  }
296  }
297 
298  // Triangular solve
299  // --SIDE = Left & UPLO = Lower & TRANS_A = Trans--
300  for ( ki = 0; ki < nt; ki++ )
301  {
302  tile_size_k = ddss_tile_size( N, ki );
303 
304  for ( ni = 0; ni < nrhst; ni++ )
305  {
306  tile_size_n = ddss_tile_size( NRHS, ni );
307 
308  #pragma oss task in( TILE_A[ki][ki] ) \
309  inout( TILE_B[ki][ni] ) \
310  shared( TILE_A, TILE_B ) \
311  firstprivate( ki, ni ) \
312  label( dtrsm_dtrsm2 )
313  cblas_dtrsm( CblasRowMajor,
314  ( CBLAS_SIDE ) Left, ( CBLAS_UPLO ) Lower,
315  ( CBLAS_TRANSPOSE ) Trans,
316  ( CBLAS_DIAG ) NonUnit,
317  tile_size_k,
318  tile_size_n,
319  1.0, TILE_A[ki][ki], tile_size_k,
320  TILE_B[ki][ni], tile_size_n );
321  }
322 
323  for ( nni = 0; nni < nrhst; nni++ )
324  {
325  tile_size_nn = ddss_tile_size( NRHS, nni );
326 
327  for ( mi = ki - 1; mi >= 0; mi-- )
328  {
329  tile_size_m = ddss_tile_size( N, mi );
330 
331  #pragma oss task in( TILE_A[ki][mi] ) \
332  in( TILE_B[ki][nni] ) \
333  inout( TILE_B[mi][nni] ) \
334  shared( TILE_A, TILE_B ) \
335  firstprivate( ki, mi, nni ) \
336  label( dtrsm_dgemm2 )
337  cblas_dgemm( CblasRowMajor,
338  CblasTrans, CblasNoTrans,
339  tile_size_m,
340  tile_size_nn,
341  tile_size_k,
342  -1.0, TILE_A[ki][mi], tile_size_m,
343  TILE_B[ki][nni], tile_size_nn,
344  1.0, TILE_B[mi][nni], tile_size_nn );
345  }
346  }
347  }
348  }
349  else
350  {
351  // Cholesky descomposition
352  // --UPLO = Upper--
353  for ( ki = 0; ki < nt; ki++ )
354  {
355  tile_size_k = ddss_tile_size( N, ki );
356 
357  #pragma oss task inout( TILE_A[ki][ki] ) \
358  shared( TILE_A ) \
359  firstprivate( ki ) \
360  label( dpotrf )
361  LAPACKE_dpotrf_work( LAPACK_ROW_MAJOR,
362  'U',
363  tile_size_k, TILE_A[ki][ki],
364  tile_size_k );
365 
366  for ( mi = ki + 1; mi < nt; mi++ )
367  {
368  tile_size_m = ddss_tile_size( N, mi );
369 
370  #pragma oss task in( TILE_A[ki][ki] ) \
371  inout(TILE_A[ki][mi]) \
372  shared( TILE_A ) \
373  firstprivate( mi, ki ) \
374  label( dpotrf_dtrsm )
375  cblas_dtrsm( CblasRowMajor,
376  ( CBLAS_SIDE ) Left, ( CBLAS_UPLO ) Upper,
377  ( CBLAS_TRANSPOSE ) Trans, ( CBLAS_DIAG ) NonUnit,
378  TILE_SIZE, tile_size_m,
379  1.0, TILE_A[ki][ki], TILE_SIZE,
380  TILE_A[ki][mi], tile_size_m );
381  }
382  for ( mmi = ki + 1; mmi < nt; mmi++ )
383  {
384  tile_size_mm = ddss_tile_size( N, mmi );
385 
386  #pragma oss task in( TILE_A[ki][mmi] ) \
387  inout( TILE_A[mmi][mmi] ) \
388  shared( TILE_A ) \
389  firstprivate( mmi, ki ) \
390  label( dpotrf_dsyrk )
391  cblas_dsyrk( CblasRowMajor,
392  ( CBLAS_UPLO ) Upper, ( CBLAS_TRANSPOSE ) Trans,
393  tile_size_mm, TILE_SIZE,
394  -1.0, TILE_A[ki][mmi], tile_size_mm,
395  1.0, TILE_A[mmi][mmi], tile_size_mm );
396 
397  for ( ni = ki + 1; ni < mmi; ni++ )
398  {
399  #pragma oss task in( TILE_A[ki][ni] ) \
400  in( TILE_A[ki][mmi] ) \
401  inout( TILE_A[ni][mmi] ) \
402  shared( TILE_A ) \
403  firstprivate( ni, mmi, ki ) \
404  label( dpotrf_dgemm )
405  cblas_dgemm( CblasRowMajor,
406  ( CBLAS_TRANSPOSE ) Trans,
407  ( CBLAS_TRANSPOSE ) NoTrans,
408  TILE_SIZE,
409  TILE_SIZE,
410  TILE_SIZE,
411  -1.0, TILE_A[ki][ni], TILE_SIZE,
412  TILE_A[ki][mmi], TILE_SIZE,
413  1.0, TILE_A[ni][mmi], TILE_SIZE );
414  }
415  }
416  }
417  /*********************************************
418  --From tiled data layout to flat data layout--
419  *********************************************/
420  ddss_dsymtiled2flat_nb( N, N, A, LDA, nt, nt, TILE_A, UPLO );
421  // Triangular solve
422  // --SIDE = Left & UPLO = Upper & TRANS_A = Trans--
423  for ( ki = 0; ki < nt; ki++ )
424  {
425  tile_size_k = ddss_tile_size( N, ki );
426 
427  for ( ni = 0; ni < nrhst; ni++ )
428  {
429  tile_size_n = ddss_tile_size( NRHS, ni );
430 
431  #pragma oss task in( TILE_A[ki][ki] ) \
432  inout( TILE_B[ki][ni] ) \
433  shared( TILE_A, TILE_B ) \
434  firstprivate( ki, ni ) \
435  label( dtrsm_dtrsm1 )
436  cblas_dtrsm( CblasRowMajor,
437  ( CBLAS_SIDE ) Left, ( CBLAS_UPLO ) Lower,
438  ( CBLAS_TRANSPOSE ) Trans,
439  ( CBLAS_DIAG ) NonUnit,
440  tile_size_k,
441  tile_size_n,
442  1.0, TILE_A[ki][ki], tile_size_k,
443  TILE_B[ki][ni], tile_size_n );
444  }
445 
446  for ( mi = ki + 1; mi < nt; mi++ )
447  {
448  tile_size_m = ddss_tile_size( N, mi );
449 
450  for ( nni = 0; nni < nrhst; nni++ )
451  {
452  tile_size_nn = ddss_tile_size( NRHS, nni );
453 
454  #pragma oss task in( TILE_A[ki][mi]) \
455  in( TILE_B[ki][nni] ) \
456  inout( TILE_B[mi][nni] ) \
457  shared( TILE_A, TILE_B ) \
458  firstprivate( ki, mi, nni ) \
459  label( dtrsm_dgemm1 )
460  cblas_dgemm( CblasRowMajor,
461  CblasTrans, CblasNoTrans,
462  tile_size_m,
463  tile_size_nn,
464  tile_size_k,
465  -1.0, TILE_A[ki][mi], tile_size_m,
466  TILE_B[ki][nni], tile_size_nn,
467  1.0, TILE_B[mi][nni], tile_size_nn );
468  }
469  }
470  }
471  // Triangular solve
472  // --SIDE = Left & UPLO = Upper & TRANS_A = NoTrans--
473  for ( ki = 0; ki < nt; ki++ )
474  {
475  tile_size_k = ddss_tile_size( N, ki );
476 
477  for ( ni = 0; ni < nrhst; ni++ )
478  {
479  tile_size_n = ddss_tile_size( NRHS, ni );
480 
481  #pragma oss task in( TILE_A[ki][ki] ) \
482  inout( TILE_B[ki][ni] ) \
483  shared( TILE_A, TILE_B ) \
484  firstprivate( ki, ni ) \
485  label( dtrsm_dtrsm2 )
486  cblas_dtrsm( CblasRowMajor,
487  ( CBLAS_SIDE ) Left, ( CBLAS_UPLO ) Lower,
488  ( CBLAS_TRANSPOSE ) NoTrans,
489  ( CBLAS_DIAG ) NonUnit,
490  tile_size_k,
491  tile_size_n,
492  1.0, TILE_A[ki][ki], tile_size_k,
493  TILE_B[ki][ni], tile_size_n );
494  }
495 
496  for ( nni = 0; nni < nrhst; nni++ )
497  {
498  tile_size_nn = ddss_tile_size( NRHS, nni );
499 
500  for ( mi = ki - 1; mi >= 0; mi-- )
501  {
502  tile_size_m = ddss_tile_size( N, mi );
503 
504  #pragma oss task in( TILE_A[mi][ki] ) \
505  in( TILE_B[ki][nni] ) \
506  inout( TILE_B[mi][nni] ) \
507  shared( TILE_A, TILE_B ) \
508  firstprivate( ki, mi, nni ) \
509  label( dtrsm_dgemm2 )
510  cblas_dgemm( CblasRowMajor,
511  CblasNoTrans, CblasNoTrans,
512  tile_size_m,
513  tile_size_nn,
514  tile_size_k,
515  -1.0, TILE_A[mi][ki], tile_size_k,
516  TILE_B[ki][nni], tile_size_nn,
517  1.0, TILE_B[mi][nni], tile_size_nn );
518  }
519  }
520  }
521  }
522 
523  /*********************************************
524  --From tiled data layout to flat data layout--
525  *********************************************/
526  ddss_dtiled2flat( N, NRHS, B, LDB, nt, nrhst, TILE_B );
527 
528  // --Tile A and B matrices free--
529  free( TILE_A );
530  free( TILE_B );
531 
532  return Success;
533 
534 }
void ddss_dflat2tiled(int M, int N, double *A, int LDA, int MT, int NT, double(*TILE_A)[NT][TILE_SIZE *TILE_SIZE])
void ddss_dsymflat2tiled(int M, int N, double *A, int LDA, int MT, int NT, double(*TILE_A)[NT][TILE_SIZE *TILE_SIZE], enum DDSS_UPLO UPLO)
enum LASS_RETURN kdposv(enum DDSS_UPLO UPLO, int N, int NRHS, double *A, int LDA, double *B, int LDB)
Definition: kdposv.c:95
void ddss_dsymtiled2flat_nb(int M, int N, double *A, int LDA, int MT, int NT, double(*TILE_A)[NT][TILE_SIZE *TILE_SIZE], enum DDSS_UPLO UPLO)
void ddss_dtiled2flat(int M, int N, double *A, int LDA, int MT, int NT, double(*TILE_A)[NT][TILE_SIZE *TILE_SIZE])
int ddss_tile_size(int M, int MT)
Definition: ddss_tile.c:52