LASs - Linear Algebra Routines on OmpSs  1.0.0
LASs
kdnpgesv.c
Go to the documentation of this file.
1 #include "../include/lass.h"
2 
3 /**
4  *
5  * @file kdnpgesv.c
6  *
7  * @brief LASs-DDSs kdnpgesv routine.
8  *
9  * LASs-DDSs is a software package provided by:
10  * Barcelona Supercomputing Center - Centro Nacional de Supercomputacion
11  *
12  * @author Pedro Valero-Lara pedro.valero@bsc.es
13  * @date 2018-07-26
14  * @reviewer
15  * @modified
16  *
17  **/
18 
19 /**
20  *
21  * @ingroup DDSS
22  *
23  * Solves a system of linear equations A \times X = B, where A is a N-by-N
24 * general matrix and X and B are N-by-NRHS matrices. The matrix A is
25  * factorized using the LU descomposition without pivoting.
26  * The matrix A is descomposed as:
27  *
28  * A = L * U
29  *
30  * where L is a lower triangular matrix with unit diagonal elements and
31  * U is an upper triangular matrix.
32  *
33 **/
34 
35 /**
36  *
37  * @param[in]
38  * N int.
39  * N specifies the order of the square matrix A. N >= 0.
40  *
41  * NRHS int.
42  * NRHS specifies the number of right-hand-sides (number
43  * of columns of B). NRHS >= 0.
44  *
45  * @param[in,out]
46  * A double *.
47  * A is a pointer to a regular matrix of dimension N-by-LDA.
48  * On exit, if return value is Success, the matrix A is overwriten by
49  * the factors L and U. The unit diagonal elements of L are not stored.
50  *
51  * @param[in]
52  * LDA int.
53  * LDA specifies the number of columns of A ( row-major order ).
54  * LDA must be at least max( 1, N ).
55  *
56  * @param[in,out]
57  * B double *.
58  * B is a pointer to a matrix of dimension N by NRHS, which stores the
59  * right-hand-sides of the systems of linear equations.
60  * (row-major order). On exit, if return value is Success, the matrix B
61  * is overwriten by the solution matrix X.
62  *
63  * @param[in]
64  * LDB int.
65  * LDB specifies the number of columns of B ( row-major order ).
66  * LDB must be at least max( 1, NRHS ).
67  *
68  **/
69 
70 /**
71  *
72  * @retval Success successful exit
73  * @retval NoSuccess unsuccessful exit
74  *
75  **/
76 
77 /**
78  *
79  * @sa dnpgesv
80  * @sa ddss_tile
81  * @sa ddss_flat2tiled
82  * @sa ddss_tiled2flat
83  *
84  **/
85 
86 enum LASS_RETURN kdnpgesv( int N, int NRHS,
87  double *A, int LDA,
88  double *B, int LDB )
89 {
90 
91  // Local variables
92  int nt, nrhst;
93  int mi, mmi, ki, ni, nni;
94  int tile_size_m;
95  int tile_size_mm;
96  int tile_size_n;
97  int tile_size_nn;
98  int tile_size_k;
99 
100  // Number of tiles
101  if ( N % TILE_SIZE == 0 )
102  {
103  nt = N / TILE_SIZE;
104  }
105  else
106  {
107  nt = ( N / TILE_SIZE ) + 1;
108  }
109 
110  if ( NRHS % TILE_SIZE == 0 )
111  {
112  nrhst = NRHS / TILE_SIZE;
113  }
114  else
115  {
116  nrhst = ( NRHS / TILE_SIZE ) + 1;
117  }
118 
119  /***************************
120  --Tile matrices allocation--
121  ***************************/
122 
123  double (*TILE_A)[nt][TILE_SIZE * TILE_SIZE] = malloc ( nt * nt *
124  TILE_SIZE * TILE_SIZE * sizeof( double ) );
125 
126  if ( TILE_A == NULL )
127  {
128  fprintf( stderr, "Failure in kdnpgesv for matrix TILE_A\n" );
129  return NoSuccess;
130  }
131 
132  double ( *TILE_B )[nrhst][TILE_SIZE * TILE_SIZE] = malloc(
133  nt * nrhst * TILE_SIZE * TILE_SIZE * sizeof( double ) );
134 
135  if ( TILE_B == NULL )
136  {
137  fprintf( stderr, "Failure in kdnpgesv for matrix TILE_B\n" );
138  return NoSuccess;
139  }
140 
141  /************************************************
142  // --From flat data layout to tiled data layout--
143  ************************************************/
144 
145  // From flat matrix A to tile matrix TILE_A
146  ddss_dflat2tiled( N, N, A, LDA, nt, nt, TILE_A );
147 
148  // From flat matrix B to tile matrix TILE_B
149  ddss_dflat2tiled( N, NRHS, B, LDB, nt, nrhst, TILE_B );
150 
151  /****************
152  --DNPGESV tile--
153  ****************/
154 
155  // LU descomposition without pivoting
156  for ( ki = 0; ki < nt; ki++ )
157  {
158  tile_size_k = ddss_tile_size( N, ki );
159 
160  #if defined(LASs_WITH_MKL)
161 
162  #pragma oss task inout( TILE_A[ki][ki] ) \
163  shared( TILE_A ) \
164  firstprivate( ki ) \
165  label( mkl_dgetrfnpi )
166  LAPACKE_mkl_dgetrfnpi( CblasRowMajor,
167  tile_size_k, tile_size_k,
168  tile_size_k,
169  TILE_A[ki][ki], tile_size_k );
170 
171  #else
172 
173  #pragma oss task inout( TILE_A[ki][ki] ) \
174  shared( TILE_A ) \
175  firstprivate( ki ) \
176  label( dnpgetrf )
177  dnpgetrf( tile_size_k, tile_size_k,
178  TILE_A[ki][ki],
179  tile_size_k );
180 
181  #endif
182 
183  for ( mi = ki + 1; mi < nt; mi++ )
184  {
185  tile_size_m = ddss_tile_size( N, mi );
186 
187  #pragma oss task in( TILE_A[ki][ki] ) \
188  inout(TILE_A[mi][ki]) \
189  shared( TILE_A ) \
190  firstprivate( mi, ki ) \
191  label( dnpgetrf_dtrsm_below )
192  cblas_dtrsm( CblasRowMajor,
193  ( CBLAS_SIDE ) Right, ( CBLAS_UPLO ) Upper,
194  ( CBLAS_TRANSPOSE ) NoTrans, ( CBLAS_DIAG ) NonUnit,
195  tile_size_m, tile_size_k,
196  1.0, TILE_A[ki][ki], tile_size_k,
197  TILE_A[mi][ki], tile_size_k );
198  }
199  for ( ni = ki + 1; ni < nt; ni++ )
200  {
201  tile_size_n = ddss_tile_size( N, ni );
202 
203  #pragma oss task in( TILE_A[ki][ki] ) \
204  inout(TILE_A[ki][ni]) \
205  shared( TILE_A ) \
206  firstprivate( ni, ki ) \
207  label( dnpgetrf_dtrsm_right )
208  cblas_dtrsm( CblasRowMajor,
209  ( CBLAS_SIDE ) Left, ( CBLAS_UPLO ) Lower,
210  ( CBLAS_TRANSPOSE ) NoTrans, ( CBLAS_DIAG ) Unit,
211  tile_size_k, tile_size_n,
212  1.0, TILE_A[ki][ki], tile_size_k,
213  TILE_A[ki][ni], tile_size_n );
214 
215  for ( mmi = ki + 1; mmi < nt; mmi++ )
216  {
217  tile_size_mm = ddss_tile_size( N, mmi );
218 
219  #pragma oss task in( TILE_A[mmi][ki] ) \
220  in( TILE_A[ki][ni] ) \
221  inout( TILE_A[mmi][ni] ) \
222  shared( TILE_A ) \
223  firstprivate( ni, mmi, ki ) \
224  label( dnpgetrf_dgemm )
225  cblas_dgemm( CblasRowMajor,
226  ( CBLAS_TRANSPOSE ) NoTrans,
227  ( CBLAS_TRANSPOSE ) NoTrans,
228  tile_size_mm,
229  tile_size_n,
230  TILE_SIZE,
231  -1.0, TILE_A[mmi][ki], tile_size_k,
232  TILE_A[ki][ni], tile_size_n,
233  1.0, TILE_A[mmi][ni], tile_size_n );
234  }
235  }
236  }
237 
238  /*********************************************
239  --From tiled data layout to flat data layout--
240  *********************************************/
241  ddss_dtiled2flat_nb( N, N, A, LDA, nt, nt, TILE_A );
242 
243  // Triandular solve
244  // --SIDE = Left & UPLO = Lower & TRANS_A = NoTrans & Unit--
245  for ( ki = 0; ki < nt; ki++ )
246  {
247  tile_size_k = ddss_tile_size( N, ki );
248 
249  for ( ni = 0; ni < nrhst; ni++ )
250  {
251  tile_size_n = ddss_tile_size( NRHS, ni );
252 
253  #pragma oss task in( TILE_A[ki][ki] ) \
254  inout( TILE_B[ki][ni] ) \
255  shared( TILE_A, TILE_B ) \
256  firstprivate( ki, ni ) \
257  label( dtrsm_dtrsm1 )
258  cblas_dtrsm( CblasRowMajor,
259  ( CBLAS_SIDE ) Left, ( CBLAS_UPLO ) Lower,
260  ( CBLAS_TRANSPOSE ) NoTrans,
261  ( CBLAS_DIAG ) Unit,
262  tile_size_k,
263  tile_size_n,
264  1.0, TILE_A[ki][ki], tile_size_k,
265  TILE_B[ki][ni], tile_size_n );
266  }
267  for ( nni = 0; nni < nrhst; nni++ )
268  {
269  tile_size_nn = ddss_tile_size( NRHS, nni );
270 
271  for ( mi = ki + 1; mi < nt; mi++ )
272  {
273 
274  #pragma oss task in( TILE_A[mi][ki] ) \
275  in( TILE_B[ki][nni] ) \
276  inout( TILE_B[mi][nni] ) \
277  shared( TILE_A, TILE_B ) \
278  firstprivate( ki, mi, nni ) \
279  label( dtrsm_dgemm1 )
280  cblas_dgemm( CblasRowMajor,
281  CblasNoTrans, CblasNoTrans,
282  tile_size_k,
283  tile_size_nn,
284  tile_size_k,
285  -1.0, TILE_A[mi][ki], tile_size_k,
286  TILE_B[ki][nni], tile_size_nn,
287  1.0, TILE_B[mi][nni], tile_size_nn );
288  }
289  }
290  }
291 
292  // Triandular solve
293  // --SIDE = Left & UPLO = Upper & TRANS_A = NoTrans & NonUnit--
294  for ( ki = nt - 1; ki >= 0; ki-- )
295  {
296  tile_size_k = ddss_tile_size( N, ki );
297 
298  for ( ni = 0; ni < nrhst; ni++ )
299  {
300  tile_size_n = ddss_tile_size( NRHS, ni );
301 
302  #pragma oss task in( TILE_A[ki][ki] ) \
303  inout( TILE_B[ki][ni] ) \
304  shared( TILE_A, TILE_B ) \
305  firstprivate( ki, ni ) \
306  label( dtrsmi_dtrsm2 )
307  cblas_dtrsm( CblasRowMajor,
308  ( CBLAS_SIDE ) Left, ( CBLAS_UPLO ) Upper,
309  ( CBLAS_TRANSPOSE ) NoTrans,
310  ( CBLAS_DIAG ) NonUnit,
311  tile_size_k,
312  tile_size_n,
313  1.0, TILE_A[ki][ki], tile_size_k,
314  TILE_B[ki][ni], tile_size_n );
315  }
316 
317  for ( mi = ki - 1; mi >= 0; mi-- )
318  {
319  tile_size_m = ddss_tile_size( N, mi );
320 
321  for ( nni = 0; nni < nrhst; nni++ )
322  {
323  tile_size_nn = ddss_tile_size( NRHS, nni );
324 
325  #pragma oss task in( TILE_A[mi][ki] ) \
326  in( TILE_B[ki][nni] ) \
327  inout( TILE_B[mi][nni] ) \
328  shared( TILE_A, TILE_B ) \
329  firstprivate( ki, mi, nni ) \
330  label( dtrsm_dgemm2 )
331  cblas_dgemm( CblasRowMajor,
332  CblasNoTrans, CblasNoTrans,
333  tile_size_m,
334  tile_size_nn,
335  tile_size_k,
336  -1.0, TILE_A[mi][ki], tile_size_k,
337  TILE_B[ki][nni], tile_size_nn,
338  1.0, TILE_B[mi][nni], tile_size_nn );
339  }
340  }
341  }
342 
343  /*********************************************
344  --From tiled data layout to flat data layout--
345  *********************************************/
346  ddss_dtiled2flat( N, NRHS, B, LDB, nt, nrhst, TILE_B );
347 
348  // --Tile A and B matrices free--
349  free( TILE_A );
350  free( TILE_B );
351 
352  return Success;
353 
354 }
void ddss_dflat2tiled(int M, int N, double *A, int LDA, int MT, int NT, double(*TILE_A)[NT][TILE_SIZE *TILE_SIZE])
enum LASS_RETURN dnpgetrf(int M, int N, double *A, int LDA)
Definition: dnpgetrf.c:70
enum LASS_RETURN kdnpgesv(int N, int NRHS, double *A, int LDA, double *B, int LDB)
Definition: kdnpgesv.c:86
void ddss_dtiled2flat_nb(int M, int N, double *A, int LDA, int MT, int NT, double(*TILE_A)[NT][TILE_SIZE *TILE_SIZE])
void ddss_dtiled2flat(int M, int N, double *A, int LDA, int MT, int NT, double(*TILE_A)[NT][TILE_SIZE *TILE_SIZE])
int ddss_tile_size(int M, int MT)
Definition: ddss_tile.c:52