LASs - Linear Algebra Routines on OmpSs  1.0.0
LASs
Functions
kdposv.c File Reference

LASs-DDSs kdposv routine. More...

#include "../include/lass.h"
Include dependency graph for kdposv.c:

Go to the source code of this file.

Functions

enum LASS_RETURN kdposv (enum DDSS_UPLO UPLO, int N, int NRHS, double *A, int LDA, double *B, int LDB)
 

Detailed Description

LASs-DDSs kdposv routine.

LASs-DDSs is a software package provided by: Barcelona Supercomputing Center - Centro Nacional de Supercomputacion

Author
Pedro Valero-Lara pedro.nosp@m..val.nosp@m.ero@b.nosp@m.sc.e.nosp@m.s
Date
2018-04-05

Definition in file kdposv.c.

Function Documentation

enum LASS_RETURN kdposv ( enum DDSS_UPLO  UPLO,
int  N,
int  NRHS,
double *  A,
int  LDA,
double *  B,
int  LDB 
)

Solves a system of linear equations A X = B, where A is a M-by-M simmetric positive definite matrix and X and B are M-by-NRHS matrices. The matrix A is descomposed as:

A = L \times L^T
or
A = U^T \times U

where L is a lower triangular matrix and U is an upper triangular matrix.

Parameters
[in]UPLOenum DDSS_UPLO. UPLO specifies the form of A is stored:
  • Lower: Lower triangle of A is stored. The upper traingular part is not referenced.
  • Upper: Upper triangle of A is stored. The lower triangular part is not referenced.
[in]Nint. N specifies the order of the square matrix A. N >= 0.
[in]NRHSint. NRHS specifies the number of right-hand-sides (number of columns of B). NRHS >= 0.
[in,out]Adouble *. A is a pointer to a positive definite matrix of dimension N by LDA. On exit, if return value is Success, the matrix A is overwriten by the factor U or L.
[in]LDAint. LDA specifies the number of columns of A ( row-major order ). LDA must be at least max( 1, N ).
[in,out]Bdouble *. B is a pointer to a matrix of dimension N by NRHS, which stores the right-hand-sides of the systems of linear equations. (row-major order). On exit, if return value is Success, the matrix B is overwriten by the solution matrix X.
[in]LDBint. LDB specifies the number of columns of B ( row-major order ). LDB must be at least max( 1, NRHS ).
Return values
Successsucessful exit
NoSuccessunsucessful exit
See also
ddss_dpotrf
ddss_tile
ddss_symflat2tiled
ddss_symtiled2flat

Definition at line 95 of file kdposv.c.

References ddss_dflat2tiled(), ddss_dsymflat2tiled(), ddss_dsymtiled2flat_nb(), ddss_dtiled2flat(), and ddss_tile_size().

Referenced by ddss_dposv().

99 {
100 
101  // Local variables
102  int nt, nrhst;
103  int mi, mmi, ki, ni, nni;
104  int Am, An;
105  int Bm, Bn;
106  int tile_size_m;
107  int tile_size_mm;
108  int tile_size_n;
109  int tile_size_nn;
110  int tile_size_k;
111 
112  // Number of tiles
113  if ( N % TILE_SIZE == 0 )
114  {
115  nt = N / TILE_SIZE;
116  }
117  else
118  {
119  nt = ( N / TILE_SIZE ) + 1;
120  }
121  if ( NRHS % TILE_SIZE == 0 )
122  {
123  nrhst = NRHS / TILE_SIZE;
124  }
125  else
126  {
127  nrhst = ( NRHS / TILE_SIZE ) + 1;
128  }
129 
130 
131  /****************************
132  --Tile A matrix declaration--
133  ****************************/
134 
135  Am = nt;
136  An = nt;
137 
138  Bm = nt;
139  Bn = nrhst;
140 
141  /***************************
142  --Tile matrices allocation--
143  ***************************/
144 
145  double (*TILE_A)[An][TILE_SIZE * TILE_SIZE] = malloc ( Am * An *
146  TILE_SIZE * TILE_SIZE * sizeof( double ) );
147 
148  if ( TILE_A == NULL)
149  {
150  fprintf( stderr, "Failure in kdposv for matrix TILE_A\n" );
151  return NoSuccess;
152  }
153 
154  double ( *TILE_B )[Bn][TILE_SIZE * TILE_SIZE] = malloc(
155  Bm * Bn * TILE_SIZE * TILE_SIZE * sizeof( double ) );
156 
157  if ( TILE_B == NULL )
158  {
159  fprintf( stderr, "Failure in kdposv for matrix TILE_B\n" );
160  return NoSuccess;
161  }
162 
163  /************************************************
164  // --From flat data layout to tiled data layout--
165  ************************************************/
166 
167  // From flat matrix A to tile matrix TILE_A
168  ddss_dsymflat2tiled( N, N, A, LDA, nt, nt, TILE_A, UPLO );
169 
170  // From flat matrix B to tile matrix TILE_B
171  ddss_dflat2tiled( N, NRHS, B, LDB, nt, nrhst, TILE_B );
172 
173  /**************
174  --DPOSV tile--
175  **************/
176 
177  if ( UPLO == Lower )
178  {
179  // Cholesky descomposition
180  // --UPLO = Lower--
181  for ( ki = 0; ki < nt; ki++ )
182  {
183  tile_size_k = ddss_tile_size( N, ki );
184 
185  #pragma oss task inout( TILE_A[ki][ki] ) \
186  firstprivate( ki ) \
187  label( dpotrf )
188  LAPACKE_dpotrf_work( LAPACK_ROW_MAJOR,
189  'L',
190  tile_size_k,
191  TILE_A[ki][ki], tile_size_k );
192 
193  for ( mi = ki + 1; mi < nt; mi++ )
194  {
195  tile_size_m = ddss_tile_size( N, mi );
196 
197  #pragma oss task in( TILE_A[ki][ki] ) \
198  inout(TILE_A[mi][ki]) \
199  firstprivate( TILE_A ) \
200  firstprivate( mi, ki ) \
201  label( dpotrf_dtrsm )
202  cblas_dtrsm( CblasRowMajor,
203  ( CBLAS_SIDE ) Right, ( CBLAS_UPLO ) Lower,
204  ( CBLAS_TRANSPOSE ) Trans, ( CBLAS_DIAG ) NonUnit,
205  tile_size_m, TILE_SIZE,
206  1.0, TILE_A[ki][ki], TILE_SIZE,
207  TILE_A[mi][ki], TILE_SIZE );
208  }
209  for ( mmi = ki + 1; mmi < nt; mmi++ )
210  {
211  tile_size_mm = ddss_tile_size( N, mmi );
212 
213  #pragma oss task in( TILE_A[mmi][ki] ) \
214  inout( TILE_A[mmi][mmi] ) \
215  firstprivate( mmi, ki ) \
216  label( dpotrf_dsyrk )
217  cblas_dsyrk( CblasRowMajor,
218  ( CBLAS_UPLO ) Lower, ( CBLAS_TRANSPOSE ) NoTrans,
219  tile_size_mm, TILE_SIZE,
220  -1.0, TILE_A[mmi][ki], TILE_SIZE,
221  1.0, TILE_A[mmi][mmi], tile_size_mm );
222 
223  for ( ni = ki + 1; ni < mmi; ni++ )
224  {
225 
226  #pragma oss task in( TILE_A[mmi][ki] ) \
227  in( TILE_A[ni][ki] ) \
228  inout( TILE_A[mmi][ni] ) \
229  firstprivate( ni, mmi, ki ) \
230  label( dpotrf_dgemm )
231  cblas_dgemm( CblasRowMajor,
232  ( CBLAS_TRANSPOSE ) NoTrans,
233  ( CBLAS_TRANSPOSE ) Trans,
234  TILE_SIZE,
235  TILE_SIZE,
236  TILE_SIZE,
237  -1.0, TILE_A[mmi][ki], TILE_SIZE,
238  TILE_A[ni][ki], TILE_SIZE,
239  1.0, TILE_A[mmi][ni], TILE_SIZE );
240  }
241  }
242  }
243 
244  /*********************************************
245  --From tiled data layout to flat data layout--
246  *********************************************/
247  ddss_dsymtiled2flat_nb( N, N, A, LDA, nt, nt, TILE_A, UPLO );
248 
249  // Triangular solve
250  // --SIDE = Left & UPLO = Lower & TRANS_A = NoTrans--
251  for ( ki = 0; ki < nt; ki++ )
252  {
253  tile_size_k = ddss_tile_size( N, ki );
254 
255  for ( ni = 0; ni < nrhst; ni++ )
256  {
257  tile_size_n = ddss_tile_size( NRHS, ni );
258 
259  #pragma oss task in( TILE_A[ki][ki] ) \
260  inout( TILE_B[ki][ni] ) \
261  shared( TILE_A, TILE_B ) \
262  firstprivate( ki, ni ) \
263  label( dtrsm_dtrsm1 )
264  cblas_dtrsm( CblasRowMajor,
265  ( CBLAS_SIDE ) Left, ( CBLAS_UPLO ) Lower,
266  ( CBLAS_TRANSPOSE ) NoTrans,
267  ( CBLAS_DIAG ) NonUnit,
268  tile_size_k,
269  tile_size_n,
270  1.0, TILE_A[ki][ki], tile_size_k,
271  TILE_B[ki][ni], tile_size_n );
272  }
273  for ( nni = 0; nni < nrhst; nni++ )
274  {
275  tile_size_nn = ddss_tile_size( NRHS, nni );
276 
277  for ( mi = ki + 1; mi < nt; mi++ )
278  {
279 
280  #pragma oss task in( TILE_A[mi][ki] ) \
281  in( TILE_B[ki][nni] ) \
282  inout( TILE_B[mi][nni] ) \
283  shared( TILE_A, TILE_B ) \
284  firstprivate( ki, mi, nni ) \
285  label( dtrsm_dgemm1 )
286  cblas_dgemm( CblasRowMajor,
287  CblasNoTrans, CblasNoTrans,
288  tile_size_k,
289  tile_size_nn,
290  tile_size_k,
291  -1.0, TILE_A[mi][ki], tile_size_k,
292  TILE_B[ki][nni], tile_size_nn,
293  1.0, TILE_B[mi][nni], tile_size_nn );
294  }
295  }
296  }
297 
298  // Triangular solve
299  // --SIDE = Left & UPLO = Lower & TRANS_A = Trans--
300  for ( ki = 0; ki < nt; ki++ )
301  {
302  tile_size_k = ddss_tile_size( N, ki );
303 
304  for ( ni = 0; ni < nrhst; ni++ )
305  {
306  tile_size_n = ddss_tile_size( NRHS, ni );
307 
308  #pragma oss task in( TILE_A[ki][ki] ) \
309  inout( TILE_B[ki][ni] ) \
310  shared( TILE_A, TILE_B ) \
311  firstprivate( ki, ni ) \
312  label( dtrsm_dtrsm2 )
313  cblas_dtrsm( CblasRowMajor,
314  ( CBLAS_SIDE ) Left, ( CBLAS_UPLO ) Lower,
315  ( CBLAS_TRANSPOSE ) Trans,
316  ( CBLAS_DIAG ) NonUnit,
317  tile_size_k,
318  tile_size_n,
319  1.0, TILE_A[ki][ki], tile_size_k,
320  TILE_B[ki][ni], tile_size_n );
321  }
322 
323  for ( nni = 0; nni < nrhst; nni++ )
324  {
325  tile_size_nn = ddss_tile_size( NRHS, nni );
326 
327  for ( mi = ki - 1; mi >= 0; mi-- )
328  {
329  tile_size_m = ddss_tile_size( N, mi );
330 
331  #pragma oss task in( TILE_A[ki][mi] ) \
332  in( TILE_B[ki][nni] ) \
333  inout( TILE_B[mi][nni] ) \
334  shared( TILE_A, TILE_B ) \
335  firstprivate( ki, mi, nni ) \
336  label( dtrsm_dgemm2 )
337  cblas_dgemm( CblasRowMajor,
338  CblasTrans, CblasNoTrans,
339  tile_size_m,
340  tile_size_nn,
341  tile_size_k,
342  -1.0, TILE_A[ki][mi], tile_size_m,
343  TILE_B[ki][nni], tile_size_nn,
344  1.0, TILE_B[mi][nni], tile_size_nn );
345  }
346  }
347  }
348  }
349  else
350  {
351  // Cholesky descomposition
352  // --UPLO = Upper--
353  for ( ki = 0; ki < nt; ki++ )
354  {
355  tile_size_k = ddss_tile_size( N, ki );
356 
357  #pragma oss task inout( TILE_A[ki][ki] ) \
358  shared( TILE_A ) \
359  firstprivate( ki ) \
360  label( dpotrf )
361  LAPACKE_dpotrf_work( LAPACK_ROW_MAJOR,
362  'U',
363  tile_size_k, TILE_A[ki][ki],
364  tile_size_k );
365 
366  for ( mi = ki + 1; mi < nt; mi++ )
367  {
368  tile_size_m = ddss_tile_size( N, mi );
369 
370  #pragma oss task in( TILE_A[ki][ki] ) \
371  inout(TILE_A[ki][mi]) \
372  shared( TILE_A ) \
373  firstprivate( mi, ki ) \
374  label( dpotrf_dtrsm )
375  cblas_dtrsm( CblasRowMajor,
376  ( CBLAS_SIDE ) Left, ( CBLAS_UPLO ) Upper,
377  ( CBLAS_TRANSPOSE ) Trans, ( CBLAS_DIAG ) NonUnit,
378  TILE_SIZE, tile_size_m,
379  1.0, TILE_A[ki][ki], TILE_SIZE,
380  TILE_A[ki][mi], tile_size_m );
381  }
382  for ( mmi = ki + 1; mmi < nt; mmi++ )
383  {
384  tile_size_mm = ddss_tile_size( N, mmi );
385 
386  #pragma oss task in( TILE_A[ki][mmi] ) \
387  inout( TILE_A[mmi][mmi] ) \
388  shared( TILE_A ) \
389  firstprivate( mmi, ki ) \
390  label( dpotrf_dsyrk )
391  cblas_dsyrk( CblasRowMajor,
392  ( CBLAS_UPLO ) Upper, ( CBLAS_TRANSPOSE ) Trans,
393  tile_size_mm, TILE_SIZE,
394  -1.0, TILE_A[ki][mmi], tile_size_mm,
395  1.0, TILE_A[mmi][mmi], tile_size_mm );
396 
397  for ( ni = ki + 1; ni < mmi; ni++ )
398  {
399  #pragma oss task in( TILE_A[ki][ni] ) \
400  in( TILE_A[ki][mmi] ) \
401  inout( TILE_A[ni][mmi] ) \
402  shared( TILE_A ) \
403  firstprivate( ni, mmi, ki ) \
404  label( dpotrf_dgemm )
405  cblas_dgemm( CblasRowMajor,
406  ( CBLAS_TRANSPOSE ) Trans,
407  ( CBLAS_TRANSPOSE ) NoTrans,
408  TILE_SIZE,
409  TILE_SIZE,
410  TILE_SIZE,
411  -1.0, TILE_A[ki][ni], TILE_SIZE,
412  TILE_A[ki][mmi], TILE_SIZE,
413  1.0, TILE_A[ni][mmi], TILE_SIZE );
414  }
415  }
416  }
417  /*********************************************
418  --From tiled data layout to flat data layout--
419  *********************************************/
420  ddss_dsymtiled2flat_nb( N, N, A, LDA, nt, nt, TILE_A, UPLO );
421  // Triangular solve
422  // --SIDE = Left & UPLO = Upper & TRANS_A = Trans--
423  for ( ki = 0; ki < nt; ki++ )
424  {
425  tile_size_k = ddss_tile_size( N, ki );
426 
427  for ( ni = 0; ni < nrhst; ni++ )
428  {
429  tile_size_n = ddss_tile_size( NRHS, ni );
430 
431  #pragma oss task in( TILE_A[ki][ki] ) \
432  inout( TILE_B[ki][ni] ) \
433  shared( TILE_A, TILE_B ) \
434  firstprivate( ki, ni ) \
435  label( dtrsm_dtrsm1 )
436  cblas_dtrsm( CblasRowMajor,
437  ( CBLAS_SIDE ) Left, ( CBLAS_UPLO ) Lower,
438  ( CBLAS_TRANSPOSE ) Trans,
439  ( CBLAS_DIAG ) NonUnit,
440  tile_size_k,
441  tile_size_n,
442  1.0, TILE_A[ki][ki], tile_size_k,
443  TILE_B[ki][ni], tile_size_n );
444  }
445 
446  for ( mi = ki + 1; mi < nt; mi++ )
447  {
448  tile_size_m = ddss_tile_size( N, mi );
449 
450  for ( nni = 0; nni < nrhst; nni++ )
451  {
452  tile_size_nn = ddss_tile_size( NRHS, nni );
453 
454  #pragma oss task in( TILE_A[ki][mi]) \
455  in( TILE_B[ki][nni] ) \
456  inout( TILE_B[mi][nni] ) \
457  shared( TILE_A, TILE_B ) \
458  firstprivate( ki, mi, nni ) \
459  label( dtrsm_dgemm1 )
460  cblas_dgemm( CblasRowMajor,
461  CblasTrans, CblasNoTrans,
462  tile_size_m,
463  tile_size_nn,
464  tile_size_k,
465  -1.0, TILE_A[ki][mi], tile_size_m,
466  TILE_B[ki][nni], tile_size_nn,
467  1.0, TILE_B[mi][nni], tile_size_nn );
468  }
469  }
470  }
471  // Triangular solve
472  // --SIDE = Left & UPLO = Upper & TRANS_A = NoTrans--
473  for ( ki = 0; ki < nt; ki++ )
474  {
475  tile_size_k = ddss_tile_size( N, ki );
476 
477  for ( ni = 0; ni < nrhst; ni++ )
478  {
479  tile_size_n = ddss_tile_size( NRHS, ni );
480 
481  #pragma oss task in( TILE_A[ki][ki] ) \
482  inout( TILE_B[ki][ni] ) \
483  shared( TILE_A, TILE_B ) \
484  firstprivate( ki, ni ) \
485  label( dtrsm_dtrsm2 )
486  cblas_dtrsm( CblasRowMajor,
487  ( CBLAS_SIDE ) Left, ( CBLAS_UPLO ) Lower,
488  ( CBLAS_TRANSPOSE ) NoTrans,
489  ( CBLAS_DIAG ) NonUnit,
490  tile_size_k,
491  tile_size_n,
492  1.0, TILE_A[ki][ki], tile_size_k,
493  TILE_B[ki][ni], tile_size_n );
494  }
495 
496  for ( nni = 0; nni < nrhst; nni++ )
497  {
498  tile_size_nn = ddss_tile_size( NRHS, nni );
499 
500  for ( mi = ki - 1; mi >= 0; mi-- )
501  {
502  tile_size_m = ddss_tile_size( N, mi );
503 
504  #pragma oss task in( TILE_A[mi][ki] ) \
505  in( TILE_B[ki][nni] ) \
506  inout( TILE_B[mi][nni] ) \
507  shared( TILE_A, TILE_B ) \
508  firstprivate( ki, mi, nni ) \
509  label( dtrsm_dgemm2 )
510  cblas_dgemm( CblasRowMajor,
511  CblasNoTrans, CblasNoTrans,
512  tile_size_m,
513  tile_size_nn,
514  tile_size_k,
515  -1.0, TILE_A[mi][ki], tile_size_k,
516  TILE_B[ki][nni], tile_size_nn,
517  1.0, TILE_B[mi][nni], tile_size_nn );
518  }
519  }
520  }
521  }
522 
523  /*********************************************
524  --From tiled data layout to flat data layout--
525  *********************************************/
526  ddss_dtiled2flat( N, NRHS, B, LDB, nt, nrhst, TILE_B );
527 
528  // --Tile A and B matrices free--
529  free( TILE_A );
530  free( TILE_B );
531 
532  return Success;
533 
534 }
void ddss_dflat2tiled(int M, int N, double *A, int LDA, int MT, int NT, double(*TILE_A)[NT][TILE_SIZE *TILE_SIZE])
void ddss_dsymflat2tiled(int M, int N, double *A, int LDA, int MT, int NT, double(*TILE_A)[NT][TILE_SIZE *TILE_SIZE], enum DDSS_UPLO UPLO)
void ddss_dsymtiled2flat_nb(int M, int N, double *A, int LDA, int MT, int NT, double(*TILE_A)[NT][TILE_SIZE *TILE_SIZE], enum DDSS_UPLO UPLO)
void ddss_dtiled2flat(int M, int N, double *A, int LDA, int MT, int NT, double(*TILE_A)[NT][TILE_SIZE *TILE_SIZE])
int ddss_tile_size(int M, int MT)
Definition: ddss_tile.c:52