LASs - Linear Algebra Routines on OmpSs  1.0.0
LASs
kdnpgetrf.c
Go to the documentation of this file.
1 #include "../include/lass.h"
2 
3 /**
4  *
5  * @file kdnpgetrf.c
6  *
7  * @brief sLASs-DDSs kdnpgetrf routine.
8  *
9  * sLASs-DDSs is a software package provided by:
10  * Barcelona Supercomputing Center - Centro Nacional de Supercomputacion
11  *
12  * @author Pedro Valero-Lara pedro.valero@bsc.es
13  * @date 2018-07-24
14  * @reviewer
15  * @modified
16  *
17  **/
18 
19 /**
20  *
21  * @ingroup DDSS
22  *
23  * Performs the LU factorization without pivoting of a general M-by-N matrix A:
24  *
25  * A = L * U
26  *
27  * where L is a lower triangular ( lower trapezoidal if M > N ) matrix with
28  * unit diagonal elements and U is an upper triangular ( upper trapezoidal
29  * if M < N ) matrix.
30  *
31 **/
32 
33 /**
34  *
35  * @param[in]
36  * M int.
37  * M specifies the number of rows of the matrix A. M >= 0.
38  *
39  * @param[in]
40  * N int.
41  * N specifies the number of columns of the matrix A. N >= 0.
42  *
43  * @param[in,out]
44  * A double *.
45  * A is a pointer to a regular matrix of dimension M-by-N.
46  * On exit, if return value is Success, the matrix A is overwriten by
47  * the factors L and U. The unit diagonal elements of L are not stored.
48  *
49  * @param[in]
50  * LDA int.
51  * LDA specifies the number of columns of A ( row-major order ).
52  * LDA must be at least max( 1, N ).
53  *
54  **/
55 
56 /**
57  *
58  * @retval Success successful exit
59  * @retval NoSuccess unsuccessful exit
60  *
61  **/
62 
63 /**
64  *
65  * @sa dnpgetrf
66  * @sa tune_dnpgetrf
67  * @sa smart_dnpgetrf
68  * @sa ddss_tile
69  * @sa ddss_flat2tiled
70  * @sa ddss_tiled2flat
71  *
72  **/
73 
74 enum LASS_RETURN kdnpgetrf( int M, int N, double *A, int LDA )
75 {
76 
77  // Local variables
78  int mt, nt;
79  int mi, mmi, ki, ni, nni, li;
80  int tile_size_m;
81  int tile_size_mm;
82  int tile_size_n;
83  int tile_size_nn;
84  int tile_size_k;
85  int tile_size_km;
86  int tile_size_kn;
87 
88  // Number of tiles
89  if ( M % TILE_SIZE == 0 )
90  {
91  mt = M / TILE_SIZE;
92  }
93  else
94  {
95  mt = ( M / TILE_SIZE ) + 1;
96  }
97 
98  if ( N % TILE_SIZE == 0 )
99  {
100  nt = N / TILE_SIZE;
101  }
102  else
103  {
104  nt = ( N / TILE_SIZE ) + 1;
105  }
106 
107  /***************************
108  --Tile A matrix allocation--
109  ***************************/
110 
111  double (*TILE_A)[nt][TILE_SIZE * TILE_SIZE] = malloc ( mt * nt *
112  TILE_SIZE * TILE_SIZE * sizeof( double ) );
113 
114  if ( TILE_A == NULL )
115  {
116  fprintf( stderr, "Failure in kdnpgetrf for matrix TILE_A\n" );
117  return NoSuccess;
118  }
119 
120  /*********************************************
121  --From flat data layout to tiled data layout--
122  *********************************************/
123 
124  ddss_dflat2tiled( M, N, A, LDA, mt, nt, TILE_A );
125 
126  /****************
127  --DNPGETRF tile--
128  ****************/
129 
130  for ( ki = 0; ki < MIN( mt, nt ); ki++ )
131  {
132  tile_size_km = ddss_tile_size( M, ki );
133  tile_size_kn = ddss_tile_size( N, ki );
134 
135  #if defined(LASs_WITH_MKL)
136 
137  tile_size_k = MIN( tile_size_km, tile_size_kn );
138 
139  #pragma oss task inout( TILE_A[ki][ki] ) \
140  shared( TILE_A ) \
141  firstprivate( ki ) \
142  priority( nt ) \
143  label( mkl_dgetrfnpi )
144  LAPACKE_mkl_dgetrfnpi( CblasRowMajor,
145  tile_size_km, tile_size_kn,
146  tile_size_k,
147  TILE_A[ki][ki], tile_size_kn );
148 
149  #else
150 
151  #pragma oss task inout( TILE_A[ki][ki] ) \
152  shared( TILE_A ) \
153  firstprivate( ki ) \
154  priority( nt ) \
155  label( dnpgetrf )
156  dnpgetrf( tile_size_km, tile_size_kn,
157  TILE_A[ki][ki],
158  tile_size_kn );
159 
160  #endif
161 
162  for ( mi = ki + 1; mi < mt; mi++ )
163  {
164  tile_size_m = ddss_tile_size( M, mi );
165 
166  #pragma oss task in( TILE_A[ki][ki] ) \
167  inout(TILE_A[mi][ki]) \
168  shared( TILE_A ) \
169  firstprivate( mi, ki ) \
170  priority( nt ) \
171  label( dtrsm_below )
172  cblas_dtrsm( CblasRowMajor,
173  ( CBLAS_SIDE ) Right, ( CBLAS_UPLO ) Upper,
174  ( CBLAS_TRANSPOSE ) NoTrans, ( CBLAS_DIAG ) NonUnit,
175  tile_size_m, tile_size_kn,
176  1.0, TILE_A[ki][ki], tile_size_kn,
177  TILE_A[mi][ki], tile_size_kn );
178  }
179  for ( ni = ki + 1; ni < nt; ni++ )
180  {
181  tile_size_n = ddss_tile_size( N, ni );
182 
183  #pragma oss task in( TILE_A[ki][ki] ) \
184  inout(TILE_A[ki][ni]) \
185  shared( TILE_A ) \
186  firstprivate( ni, ki ) \
187  priority( nt ) \
188  label( dtrsm_right )
189  cblas_dtrsm( CblasRowMajor,
190  ( CBLAS_SIDE ) Left, ( CBLAS_UPLO ) Lower,
191  ( CBLAS_TRANSPOSE ) NoTrans, ( CBLAS_DIAG ) Unit,
192  tile_size_km, tile_size_n,
193  1.0, TILE_A[ki][ki], tile_size_kn,
194  TILE_A[ki][ni], tile_size_n );
195  }
196 
197  for ( nni = ki + 1; nni < nt; nni++ )
198  {
199  tile_size_nn = ddss_tile_size( N, nni );
200 
201  for ( mmi = ki + 1; mmi < mt; mmi++ )
202  {
203  tile_size_mm = ddss_tile_size( M, mmi );
204 
205  #pragma oss task in( TILE_A[mmi][ki] ) \
206  in( TILE_A[ki][nni] ) \
207  inout( TILE_A[mmi][nni] ) \
208  shared( TILE_A ) \
209  firstprivate( nni, mmi, ki ) \
210  priority( nt - nni ) \
211  label( dgemm )
212  cblas_dgemm( CblasRowMajor,
213  ( CBLAS_TRANSPOSE ) NoTrans,
214  ( CBLAS_TRANSPOSE ) NoTrans,
215  tile_size_mm,
216  tile_size_nn,
217  TILE_SIZE,
218  -1.0, TILE_A[mmi][ki], tile_size_kn,
219  TILE_A[ki][nni], tile_size_nn,
220  1.0, TILE_A[mmi][nni], tile_size_nn );
221  }
222  }
223  }
224 
225  // --From tile data layout to flat data layout--
226  ddss_dtiled2flat( M, N, A, LDA, mt, nt, TILE_A );
227 
228  // --Tile A matrix free--
229  free( TILE_A );
230 
231  return Success;
232 
233 }
void ddss_dflat2tiled(int M, int N, double *A, int LDA, int MT, int NT, double(*TILE_A)[NT][TILE_SIZE *TILE_SIZE])
enum LASS_RETURN dnpgetrf(int M, int N, double *A, int LDA)
Definition: dnpgetrf.c:70
enum LASS_RETURN kdnpgetrf(int M, int N, double *A, int LDA)
Definition: kdnpgetrf.c:74
void ddss_dtiled2flat(int M, int N, double *A, int LDA, int MT, int NT, double(*TILE_A)[NT][TILE_SIZE *TILE_SIZE])
int ddss_tile_size(int M, int MT)
Definition: ddss_tile.c:52