LASs - Linear Algebra Routines on OmpSs  1.0.0
LASs
kdtpgetrf.c
Go to the documentation of this file.
1 #include "../include/lass.h"
2 
3 /**
4  *
5  * @file kdtpgetrf.c
6  *
7  * @brief LASs-DDSs kdtpgetrf routine.
8  *
9  * LASs-DDSs is a software package provided by:
10  * Barcelona Supercomputing Center - Centro Nacional de Supercomputacion
11  *
12  * @author Pedro Valero-Lara pedro.valero@bsc.es
13  * @author Boro Sofranac boro.sofranac@bsc.es
14  * @date 2018-04-08
15  * @reviewer
16  * @modified 2018-05-08
17  *
18  **/
19 
20 /**
21  *
22  * @ingroup DDSS
23  *
24  * Performs the LU factorization with tiled pivoting ( row interchanges )
25  * of a general M-by-N matrix A:
26  *
27  * A = P * L * U
28  *
29  * where P is a permutation matrix, L is a lower triangular
30  * ( lower trapezoidal if M > N ) matrix with unit diagonal elements and
31  * U is an upper triangular ( upper trapezoidal if M < N ) matrix.
32  *
33 **/
34 
35 /**
36  *
37  * @param[in]
38  * M int.
39  * M specifies the number of rows of the matrix A. M >= 0.
40  *
41  * @param[in]
42  * N int.
43  * N specifies the number of columns of the matrix A. N >= 0.
44  *
45  * @param[in,out]
46  * A double *.
47  * A is a pointer to a regular matrix of dimension M-by-N.
48  * On exit, if return value is Success, the matrix A is overwriten by
49  * the factors L and U. The unit diagonal elements of L are not stored.
50  *
51  * @param[in]
52  * LDA int.
53  * LDA specifies the number of columns of A ( row-major order ).
54  * LDA must be at least max( 1, N ).
55  *
56  * @param[out]
57  * IPIV int *.
58  * ipiv is a pointer to an array of dimesion at least
59  * max( 1, min ( M, N ) ).
60  * ipiv( i ) = j, 1 <= i <= min( M, N ) implies that rows i and j have
61  * been interchanged.
62  *
63  **/
64 
65 /**
66  *
67  * @retval Success successful exit
68  * @retval NoSuccess unsuccessful exit
69  *
70  **/
71 
72 /**
73  *
74  * @sa ddss_tile
75  * @sa ddss_flat2tiled
76  * @sa ddss_tiled2flat
77  *
78  **/
79 
80 enum LASS_RETURN kdtpgetrf( int M, int N, double *A, int LDA, int *IPIV )
81 {
82 
83  // Local variables
84  int i;
85  int mt, nt;
86  int mi, mmi, ki, ni;
87  int tile_size_m;
88  int tile_size_mm;
89  int tile_size_n;
90  int tile_size_km;
91  int tile_size_kn;
92  int local_ipiv_size;
93  int current_position;
94  enum LASS_RETURN ret;
95  int *ipiv_local;
96 
97  // Number of tiles
98  if ( M % TILE_SIZE == 0 )
99  {
100  mt = M / TILE_SIZE;
101  }
102  else
103  {
104  mt = ( M / TILE_SIZE ) + 1;
105  }
106 
107  if ( N % TILE_SIZE == 0 )
108  {
109  nt = N / TILE_SIZE;
110  }
111  else
112  {
113  nt = ( N / TILE_SIZE ) + 1;
114  }
115 
116  /***************************
117  --Tile A matrix allocation--
118  ***************************/
119 
120  double ( *TILE_A )[nt][TILE_SIZE * TILE_SIZE] = malloc ( mt * nt *
121  TILE_SIZE * TILE_SIZE * sizeof( double ) );
122 
123  if ( TILE_A == NULL )
124  {
125  fprintf( stderr, "Failure in kdnpgetrf for matrix TILE_A\n" );
126  return NoSuccess;
127  }
128 
129  ipiv_local = ( int* ) malloc( MIN( M, N ) * sizeof( int ) );
130 
131  if ( ipiv_local == NULL )
132  {
133  fprintf( stderr, "Failure in kdnpgetrf for ipiv_local\n" );
134  return NoSuccess;
135  }
136 
137  /************************************************
138  // --From flat data layout to tiled data layout--
139  ************************************************/
140 
141  ddss_dflat2tiled( M, N, A, LDA, mt, nt, TILE_A );
142 
143  /****************
144  --DGETRF tile--
145  ****************/
146 
147  current_position = 0;
148  i = 0;
149  for ( ki = 0; ki < MIN( mt, nt ); ki++ )
150  {
151  tile_size_km = ddss_tile_size( M, ki );
152  tile_size_kn = ddss_tile_size( N, ki );
153 
154  #pragma oss task inout( TILE_A[ki][ki] ) \
155  inout( ipiv_local ) \
156  inout( current_position ) \
157  out( local_ipiv_size ) \
158  shared( TILE_A ) \
159  firstprivate( ki, i ) \
160  label( dgetrf )
161  {
162  LAPACKE_dgetrf( CblasRowMajor,
163  tile_size_km, tile_size_kn,
164  TILE_A[ki][ki], tile_size_kn,
165  ipiv_local + current_position );
166 
167  // Update the global ipiv array
168  local_ipiv_size = MIN( tile_size_km, tile_size_kn );
169  for ( i = 0; i < local_ipiv_size; i++ )
170  {
171  ( IPIV + current_position )[i] =
172  ( ipiv_local + current_position )[i] + current_position;
173  }
174  current_position += local_ipiv_size;
175  }
176  for ( mi = ki + 1; mi < mt; mi++ )
177  {
178  tile_size_m = ddss_tile_size( M, mi );
179 
180  #pragma oss task in( TILE_A[ki][ki] ) \
181  inout( TILE_A[mi][ki] ) \
182  shared( TILE_A ) \
183  firstprivate( mi, ki ) \
184  label( dtrsm_below )
185  cblas_dtrsm( CblasRowMajor,
186  ( CBLAS_SIDE ) Right, ( CBLAS_UPLO ) Upper,
187  ( CBLAS_TRANSPOSE ) NoTrans, ( CBLAS_DIAG ) NonUnit,
188  tile_size_m, tile_size_kn,
189  1.0, TILE_A[ki][ki], tile_size_kn,
190  TILE_A[mi][ki], tile_size_kn );
191  }
192  for ( ni = 0; ni < ki; ni++ )
193  {
194  tile_size_n = ddss_tile_size( N, ni );
195  // Swap the tiles to the left of A[ki][ki]
196  #pragma oss task in( ipiv_local ) \
197  in( current_position ) \
198  in( local_ipiv_size ) \
199  inout( TILE_A[ki][ni] ) \
200  shared( TILE_A ) \
201  firstprivate( ki, ni ) \
202  label( dlaswp_left )
203  LAPACKE_dlaswp( CblasRowMajor,
204  tile_size_n, TILE_A[ki][ni], tile_size_n,
205  1, local_ipiv_size,
206  ipiv_local + current_position - local_ipiv_size,
207  1 );
208 
209  }
210  for ( ni = ki + 1; ni < nt; ni++ )
211  {
212  tile_size_n = ddss_tile_size( N, ni );
213  // Swap the tiles to the right of A[ki][ki]
214  #pragma oss task in( ipiv_local ) \
215  in( current_position ) \
216  in( local_ipiv_size ) \
217  inout( TILE_A[ki][ni] ) \
218  shared( TILE_A ) \
219  firstprivate( ki, ni ) \
220  label( dlaswp_right )
221  LAPACKE_dlaswp( CblasRowMajor,
222  tile_size_n, TILE_A[ki][ni], tile_size_n,
223  1, local_ipiv_size,
224  ipiv_local + current_position - local_ipiv_size,
225  1 );
226 
227  #pragma oss task in( TILE_A[ki][ki] ) \
228  inout( TILE_A[ki][ni] ) \
229  shared( TILE_A ) \
230  firstprivate( ni, ki ) \
231  label( dtrsm_right )
232  cblas_dtrsm( CblasRowMajor,
233  ( CBLAS_SIDE ) Left, ( CBLAS_UPLO ) Lower,
234  ( CBLAS_TRANSPOSE ) NoTrans, ( CBLAS_DIAG ) Unit,
235  tile_size_km, tile_size_n,
236  1.0, TILE_A[ki][ki], tile_size_kn,
237  TILE_A[ki][ni], tile_size_n );
238 
239  for ( mmi = ki + 1; mmi < mt; mmi++ )
240  {
241  tile_size_mm = ddss_tile_size( M, mmi );
242 
243  #pragma oss task in( TILE_A[mmi][ki] ) \
244  in( TILE_A[ki][ni] ) \
245  inout( TILE_A[mmi][ni] ) \
246  shared( TILE_A ) \
247  firstprivate( ni, mmi, ki ) \
248  label( dgemm )
249  cblas_dgemm( CblasRowMajor,
250  ( CBLAS_TRANSPOSE ) NoTrans,
251  ( CBLAS_TRANSPOSE ) NoTrans,
252  tile_size_mm,
253  tile_size_n,
254  TILE_SIZE,
255  -1.0, TILE_A[mmi][ki], tile_size_kn,
256  TILE_A[ki][ni], tile_size_n,
257  1.0, TILE_A[mmi][ni], tile_size_n );
258  }
259  }
260  }
261 
262  // --From tile data layout to flat data layout--
263  ddss_dtiled2flat( M, N, A, LDA, mt, nt, TILE_A );
264 
265  // --Free--
266  free( TILE_A );
267  free( ipiv_local );
268 
269  return Success;
270 
271 }
void ddss_dflat2tiled(int M, int N, double *A, int LDA, int MT, int NT, double(*TILE_A)[NT][TILE_SIZE *TILE_SIZE])
void ddss_dtiled2flat(int M, int N, double *A, int LDA, int MT, int NT, double(*TILE_A)[NT][TILE_SIZE *TILE_SIZE])
enum LASS_RETURN kdtpgetrf(int M, int N, double *A, int LDA, int *IPIV)
Definition: kdtpgetrf.c:80
int ddss_tile_size(int M, int MT)
Definition: ddss_tile.c:52