LASs - Linear Algebra Routines on OmpSs  1.0.0
LASs
Functions
kdtpgesv.c File Reference

LASs-DDSs kdtpgesv routine. More...

#include "../include/lass.h"
Include dependency graph for kdtpgesv.c:

Go to the source code of this file.

Functions

enum LASS_RETURN kdtpgesv (int N, int NRHS, double *A, int LDA, int *IPIV, double *B, int LDB)
 

Detailed Description

LASs-DDSs kdtpgesv routine.

LASs-DDSs is a software package provided by: Barcelona Supercomputing Center - Centro Nacional de Supercomputacion

Author
Pedro Valero-Lara pedro.nosp@m..val.nosp@m.ero@b.nosp@m.sc.e.nosp@m.s
Date
2018-09-20

Definition in file kdtpgesv.c.

Function Documentation

enum LASS_RETURN kdtpgesv ( int  N,
int  NRHS,
double *  A,
int  LDA,
int *  IPIV,
double *  B,
int  LDB 
)

Solves a system of linear equations A X = B, where A is a N-by-N general matrix and X and B are N-by-NRHS matrices. The matrix A is factorized using the LU descomposition with tiled-pivoting. The matrix A is descomposed as:

A = P * L * U

where P is a permutation matrix, L is a lower triangular matrix with unit diagonal elements and U is an upper triangular matrix.

Parameters
[in]Nint. N specifies the order of the square matrix A. N >= 0.

NRHS int. NRHS specifies the number of right-hand-sides (number of columns of B). NRHS >= 0.

Parameters
[in,out]Adouble *. A is a pointer to a regular matrix of dimension N-by-LDA. On exit, if return value is Success, the matrix A is overwriten by the factors L and U. The unit diagonal elements of L are not stored.
[in]LDAint. LDA specifies the number of columns of A ( row-major order ). LDA must be at least max( 1, N ).
[out]IPIVint *. ipiv is a pointer to an array of dimesion at least max( 1, min ( M, N ) ). ipiv( i ) = j, 1 <= i <= min( M, N ) implies that rows i and j have been interchanged.
[in,out]Bdouble *. B is a pointer to a matrix of dimension N by NRHS, which stores the right-hand-sides of the systems of linear equations. (row-major order). On exit, if return value is Success, the matrix B is overwriten by the solution matrix X.
[in]LDBint. LDB specifies the number of columns of B ( row-major order ). LDB must be at least max( 1, NRHS ).
Return values
Successsuccessful exit
NoSuccessunsuccessful exit
See also
ddss_tile
ddss_flat2tiled
ddss_tiled2flat

Definition at line 91 of file kdtpgesv.c.

References ddss_dflat2tiled(), ddss_dtiled2flat(), and ddss_tile_size().

Referenced by ddss_dtpgesv().

95 {
96 
97  // Local variables
98  int i;
99  int nt, nrhst;
100  int ni, mi, nni, ki, nrhsi;
101  int tile_size_n;
102  int tile_size_m;
103  int tile_size_nn;
104  int tile_size_nrhs;
105  int tile_size_k;
106  int local_ipiv_size;
107  int current_position;
108  enum LASS_RETURN ret;
109  int *ipiv_local;
110 
111  // Number of tiles
112  if ( N % TILE_SIZE == 0 )
113  {
114  nt = N / TILE_SIZE;
115  }
116  else
117  {
118  nt = ( N / TILE_SIZE ) + 1;
119  }
120 
121  if ( NRHS % TILE_SIZE == 0 )
122  {
123  nrhst = NRHS / TILE_SIZE;
124  }
125  else
126  {
127  nrhst = ( NRHS / TILE_SIZE ) + 1;
128  }
129 
130  /***************************
131  --Tile matrices allocation--
132  ***************************/
133 
134  double ( *TILE_A )[nt][TILE_SIZE * TILE_SIZE] = malloc ( nt * nt *
135  TILE_SIZE * TILE_SIZE * sizeof( double ) );
136 
137  if ( TILE_A == NULL )
138  {
139  fprintf( stderr, "Failure in kdtpgesv for matrix TILE_A\n" );
140  return NoSuccess;
141  }
142 
143  double ( *TILE_B )[nrhst][TILE_SIZE * TILE_SIZE] = malloc(
144  nt * nrhst * TILE_SIZE * TILE_SIZE * sizeof( double ) );
145 
146  if ( TILE_B == NULL )
147  {
148  fprintf( stderr, "Failure in kdtpgesv for matrix TILE_B\n" );
149  return NoSuccess;
150  }
151 
152  ipiv_local = ( int* ) malloc( N * sizeof( int ) );
153 
154  if ( ipiv_local == NULL )
155  {
156  fprintf( stderr, "Failure in kdtpgesv for ipiv_local\n" );
157  return NoSuccess;
158  }
159 
160  /************************************************
161  // --From flat data layout to tiled data layout--
162  ************************************************/
163 
164  // From flat matrix A to tile matrix TILE_A
165  ddss_dflat2tiled( N, N, A, LDA, nt, nt, TILE_A );
166 
167  // From flat matrix B to tile matrix TILE_B
168  ddss_dflat2tiled( N, NRHS, B, LDB, nt, nrhst, TILE_B );
169 
170  /****************
171  --DGETRF tile--
172  ****************/
173 
174  current_position = 0;
175  i = 0;
176  for ( ki = 0; ki < nt; ki++ )
177  {
178  tile_size_k = ddss_tile_size( N, ki );
179 
180  #pragma oss task inout( TILE_A[ki][ki] ) \
181  inout( ipiv_local ) \
182  inout( current_position ) \
183  out( local_ipiv_size ) \
184  shared( TILE_A ) \
185  firstprivate( ki, i ) \
186  label( dgetrf )
187  {
188  LAPACKE_dgetrf( CblasRowMajor,
189  tile_size_k, tile_size_k,
190  TILE_A[ki][ki], tile_size_k,
191  ipiv_local + current_position );
192 
193  // Update the global ipiv array
194  local_ipiv_size = tile_size_k;
195  for ( i = 0; i < local_ipiv_size; i++ )
196  {
197  ( IPIV + current_position )[i] =
198  ( ipiv_local + current_position )[i] + current_position;
199  }
200  current_position += local_ipiv_size;
201  }
202  for ( mi = ki + 1; mi < nt; mi++ )
203  {
204  tile_size_m = ddss_tile_size( N, mi );
205 
206  #pragma oss task in( TILE_A[ki][ki] ) \
207  inout(TILE_A[mi][ki]) \
208  shared( TILE_A ) \
209  firstprivate( mi, ki ) \
210  label( dtrsm_below )
211  cblas_dtrsm( CblasRowMajor,
212  ( CBLAS_SIDE ) Right, ( CBLAS_UPLO ) Upper,
213  ( CBLAS_TRANSPOSE ) NoTrans, ( CBLAS_DIAG ) NonUnit,
214  tile_size_m, tile_size_k,
215  1.0, TILE_A[ki][ki], tile_size_k,
216  TILE_A[mi][ki], tile_size_k );
217  }
218  for ( ni = 0; ni < ki; ni++ )
219  {
220  tile_size_n = ddss_tile_size( N, ni );
221  // Swap the tiles to the left of A[ki][ki]
222  #pragma oss task in ( ipiv_local ) \
223  in( current_position ) \
224  in( local_ipiv_size ) \
225  inout( TILE_A[ki][ni] ) \
226  shared( TILE_A ) \
227  firstprivate( ki, ni ) \
228  label( dlaswp_left )
229  LAPACKE_dlaswp( CblasRowMajor,
230  tile_size_n, TILE_A[ki][ni], tile_size_n,
231  1, local_ipiv_size,
232  ipiv_local + current_position - local_ipiv_size,
233  1 );
234 
235  }
236  for ( nrhsi = 0; nrhsi < nrhst; nrhsi++ )
237  {
238  tile_size_nrhs = ddss_tile_size( NRHS, nrhsi );
239  // Swap on the B[ki][nrhsi] tile
240  #pragma oss task in ( ipiv_local ) \
241  in( current_position ) \
242  in( local_ipiv_size ) \
243  inout( TILE_B[ki][nrhsi] ) \
244  shared( TILE_B ) \
245  firstprivate( ki, nrhsi ) \
246  label( dlaswp_on_B )
247  LAPACKE_dlaswp( CblasRowMajor,
248  tile_size_nrhs, TILE_B[ki][nrhsi], tile_size_nrhs,
249  1, local_ipiv_size,
250  ipiv_local + current_position - local_ipiv_size,
251  1 );
252 
253  }
254  for ( ni = ki + 1; ni < nt; ni++ )
255  {
256  tile_size_n = ddss_tile_size( N, ni );
257  // Swap the tiles to the right of A[ki][ki]
258  #pragma oss task in ( ipiv_local ) \
259  in( current_position ) \
260  in( local_ipiv_size ) \
261  inout( TILE_A[ki][ni] ) \
262  shared( TILE_A ) \
263  firstprivate( ki, ni ) \
264  label( dlaswp_right )
265  LAPACKE_dlaswp( CblasRowMajor,
266  tile_size_n, TILE_A[ki][ni], tile_size_n,
267  1, local_ipiv_size,
268  ipiv_local + current_position - local_ipiv_size,
269  1 );
270 
271  #pragma oss task in( TILE_A[ki][ki] ) \
272  inout(TILE_A[ki][ni]) \
273  shared( TILE_A ) \
274  firstprivate( ni, ki ) \
275  label( dtrsm_right )
276  cblas_dtrsm( CblasRowMajor,
277  ( CBLAS_SIDE ) Left, ( CBLAS_UPLO ) Lower,
278  ( CBLAS_TRANSPOSE ) NoTrans, ( CBLAS_DIAG ) Unit,
279  tile_size_k, tile_size_n,
280  1.0, TILE_A[ki][ki], tile_size_k,
281  TILE_A[ki][ni], tile_size_n );
282 
283  for ( nni = ki + 1; nni < nt; nni++ )
284  {
285  tile_size_nn = ddss_tile_size( N, nni );
286 
287  #pragma oss task in( TILE_A[nni][ki] ) \
288  in( TILE_A[ki][ni] ) \
289  inout( TILE_A[nni][ni] ) \
290  shared( TILE_A ) \
291  firstprivate( ni, nni, ki ) \
292  label( dgemm )
293  cblas_dgemm( CblasRowMajor,
294  ( CBLAS_TRANSPOSE ) NoTrans,
295  ( CBLAS_TRANSPOSE ) NoTrans,
296  tile_size_nn,
297  tile_size_n,
298  TILE_SIZE,
299  -1.0, TILE_A[nni][ki], tile_size_k,
300  TILE_A[ki][ni], tile_size_n,
301  1.0, TILE_A[nni][ni], tile_size_n );
302  }
303  }
304  }
305 
306  // --From tile data layout to flat data layout--
307  ddss_dtiled2flat( N, N, A, LDA, nt, nt, TILE_A );
308 
309  // Triandular solve
310  // --SIDE = Left & UPLO = Lower & TRANS_A = NoTrans & Unit--
311  for ( ki = 0; ki < nt; ki++ )
312  {
313  tile_size_k = ddss_tile_size( N, ki );
314 
315  for ( ni = 0; ni < nrhst; ni++ )
316  {
317  tile_size_n = ddss_tile_size( NRHS, ni );
318 
319  #pragma oss task in( TILE_A[ki][ki] ) \
320  inout( TILE_B[ki][ni] ) \
321  shared( TILE_A, TILE_B ) \
322  firstprivate( ki, ni ) \
323  label( dtrsm_dtrsm1 )
324  cblas_dtrsm( CblasRowMajor,
325  ( CBLAS_SIDE ) Left, ( CBLAS_UPLO ) Lower,
326  ( CBLAS_TRANSPOSE ) NoTrans,
327  ( CBLAS_DIAG ) Unit,
328  tile_size_k,
329  tile_size_n,
330  1.0, TILE_A[ki][ki], tile_size_k,
331  TILE_B[ki][ni], tile_size_n );
332  }
333  for ( nni = 0; nni < nrhst; nni++ )
334  {
335  tile_size_nn = ddss_tile_size( NRHS, nni );
336 
337  for ( mi = ki + 1; mi < nt; mi++ )
338  {
339 
340  #pragma oss task in( TILE_A[mi][ki] ) \
341  in( TILE_B[ki][nni] ) \
342  inout( TILE_B[mi][nni] ) \
343  shared( TILE_A, TILE_B ) \
344  firstprivate( ki, mi, nni ) \
345  label( dtrsm_dgemm1 )
346  cblas_dgemm( CblasRowMajor,
347  CblasNoTrans, CblasNoTrans,
348  tile_size_k,
349  tile_size_nn,
350  tile_size_k,
351  -1.0, TILE_A[mi][ki], tile_size_k,
352  TILE_B[ki][nni], tile_size_nn,
353  1.0, TILE_B[mi][nni], tile_size_nn );
354  }
355  }
356  }
357 
358  // Triandular solve
359  // --SIDE = Left & UPLO = Upper & TRANS_A = NoTrans & NonUnit--
360  for ( ki = nt - 1; ki >= 0; ki-- )
361  {
362  tile_size_k = ddss_tile_size( N, ki );
363 
364  for ( ni = 0; ni < nrhst; ni++ )
365  {
366  tile_size_n = ddss_tile_size( NRHS, ni );
367 
368  #pragma oss task in( TILE_A[ki][ki] ) \
369  inout( TILE_B[ki][ni] ) \
370  shared( TILE_A, TILE_B ) \
371  firstprivate( ki, ni ) \
372  label( dtrsmi_dtrsm2 )
373  cblas_dtrsm( CblasRowMajor,
374  ( CBLAS_SIDE ) Left, ( CBLAS_UPLO ) Upper,
375  ( CBLAS_TRANSPOSE ) NoTrans,
376  ( CBLAS_DIAG ) NonUnit,
377  tile_size_k,
378  tile_size_n,
379  1.0, TILE_A[ki][ki], tile_size_k,
380  TILE_B[ki][ni], tile_size_n );
381  }
382 
383  for ( mi = ki - 1; mi >= 0; mi-- )
384  {
385  tile_size_m = ddss_tile_size( N, mi );
386 
387  for ( nni = 0; nni < nrhst; nni++ )
388  {
389  tile_size_nn = ddss_tile_size( NRHS, nni );
390 
391  #pragma oss task in( TILE_A[mi][ki] ) \
392  in( TILE_B[ki][nni] ) \
393  inout( TILE_B[mi][nni] ) \
394  shared( TILE_A, TILE_B ) \
395  firstprivate( ki, mi, nni ) \
396  label( dtrsm_dgemm2 )
397  cblas_dgemm( CblasRowMajor,
398  CblasNoTrans, CblasNoTrans,
399  tile_size_m,
400  tile_size_nn,
401  tile_size_k,
402  -1.0, TILE_A[mi][ki], tile_size_k,
403  TILE_B[ki][nni], tile_size_nn,
404  1.0, TILE_B[mi][nni], tile_size_nn );
405  }
406  }
407  }
408 
409  /*********************************************
410  --From tiled data layout to flat data layout--
411  *********************************************/
412  ddss_dtiled2flat( N, NRHS, B, LDB, nt, nrhst, TILE_B );
413 
414  // --Free--
415  free( TILE_A );
416  free( TILE_B );
417  free( ipiv_local );
418 
419  return Success;
420 
421 }
void ddss_dflat2tiled(int M, int N, double *A, int LDA, int MT, int NT, double(*TILE_A)[NT][TILE_SIZE *TILE_SIZE])
void ddss_dtiled2flat(int M, int N, double *A, int LDA, int MT, int NT, double(*TILE_A)[NT][TILE_SIZE *TILE_SIZE])
int ddss_tile_size(int M, int MT)
Definition: ddss_tile.c:52