LASs - Linear Algebra Routines on OmpSs  1.0.0
LASs
Functions
kdpotrf.c File Reference

LASs-DDSs kdpotrf routine. More...

#include "../include/lass.h"
Include dependency graph for kdpotrf.c:

Go to the source code of this file.

Functions

enum LASS_RETURN kdpotrf (enum DDSS_UPLO UPLO, int N, double *A, int LDA)
 

Detailed Description

LASs-DDSs kdpotrf routine.

LASs-DDSs is a software package provided by: Barcelona Supercomputing Center - Centro Nacional de Supercomputacion

Author
Pedro Valero-Lara pedro.nosp@m..val.nosp@m.ero@b.nosp@m.sc.e.nosp@m.s
Date
2017-24-10

Definition in file kdpotrf.c.

Function Documentation

enum LASS_RETURN kdpotrf ( enum DDSS_UPLO  UPLO,
int  N,
double *  A,
int  LDA 
)

Performs the Cholesky factorization of a symmetric positive definite matrix A:

A = L \times L^T
or
A = U^T \times U

where L is a lower triangular matrix and U is an upper triangular matrix.

Parameters
[in]UPLOenum DDSS_UPLO. UPLO specifies the form of A is stored:
  • Lower: Lower triangle of A is stored. The upper traingular part is not referenced.
  • Upper: Upper triangle of A is stored. The lower triangular part is not referenced.
[in]Nint. N specifies the order of the square matrix A. N >= 0.
[in,out]Adouble *. A is a pointer to a positive definite matrix of dimension N by LDA. On exit, if return value is Success, the matrix A is overwriten by the factor U or L.
[in]LDAint. LDA specifies the number of columns of A ( row-major order ). LDA must be at least max( 1, N ).
Return values
Successsucessful exit
NoSuccessunsucessful exit
See also
ddss_dpotrf
ddss_tile
ddss_symflat2tiled
ddss_symtiled2flat

Definition at line 78 of file kdpotrf.c.

References ddss_dsymflat2tiled(), ddss_dsymtiled2flat(), and ddss_tile_size().

Referenced by ddss_dpotrf().

79 {
80 
81  // Local variables
82  int nt;
83  int mi, mmi, ki, ni;
84  int Am;
85  int An;
86  int tile_size_m;
87  int tile_size_mm;
88  int tile_size_n;
89  int tile_size_k;
90  int k_check;
91  double betat;
92 
93  // Number of tiles
94  if ( N % TILE_SIZE == 0 )
95  {
96  nt = N / TILE_SIZE;
97  }
98  else
99  {
100  nt = ( N / TILE_SIZE ) + 1;
101  }
102 
103  /****************************
104  --Tile A matrix declaration--
105  ****************************/
106 
107  Am = nt;
108  An = nt;
109 
110  /***************************
111  --Tile A matrix allocation--
112  ***************************/
113 
114  double (*TILE_A)[An][TILE_SIZE * TILE_SIZE] = malloc ( Am * An *
115  TILE_SIZE * TILE_SIZE * sizeof( double ) );
116 
117  if ( TILE_A == NULL)
118  {
119  fprintf( stderr, "Failure in kdpotrf for matrix TILE_A\n" );
120  return NoSuccess;
121  }
122 
123  /************************************************
124  // --From flat data layout to tiled data layout--
125  ************************************************/
126 
127  ddss_dsymflat2tiled( N, N, A, LDA, nt, nt, TILE_A, UPLO );
128 
129  /**************
130  --DPOTRF tile--
131  **************/
132 
133  if ( UPLO == Lower )
134  {
135  // --UPLO = Lower--
136  for ( ki = 0; ki < nt; ki++ )
137  {
138  tile_size_k = ddss_tile_size( N, ki );
139 
140  #pragma oss task inout( TILE_A[ki][ki] ) \
141  firstprivate( ki ) \
142  label( dpotrf )
143  LAPACKE_dpotrf_work( LAPACK_ROW_MAJOR,
144  'L',
145  tile_size_k, TILE_A[ki][ki], tile_size_k );
146 
147  for ( mi = ki + 1; mi < nt; mi++ )
148  {
149  tile_size_m = ddss_tile_size( N, mi );
150 
151  #pragma oss task in( TILE_A[ki][ki] ) \
152  inout(TILE_A[mi][ki]) \
153  firstprivate( mi, ki ) \
154  label( dtrsm )
155  cblas_dtrsm( CblasRowMajor,
156  ( CBLAS_SIDE ) Right, ( CBLAS_UPLO ) Lower,
157  ( CBLAS_TRANSPOSE ) Trans, ( CBLAS_DIAG ) NonUnit,
158  tile_size_m, TILE_SIZE,
159  1.0, TILE_A[ki][ki], TILE_SIZE,
160  TILE_A[mi][ki], TILE_SIZE );
161  }
162  for ( mmi = ki + 1; mmi < nt; mmi++ )
163  {
164  tile_size_mm = ddss_tile_size( N, mmi );
165 
166  #pragma oss task in( TILE_A[mmi][ki] ) \
167  inout( TILE_A[mmi][mmi] ) \
168  firstprivate( mmi, ki ) \
169  label( dsyrk )
170  cblas_dsyrk( CblasRowMajor,
171  ( CBLAS_UPLO ) Lower, ( CBLAS_TRANSPOSE ) NoTrans,
172  tile_size_mm, TILE_SIZE,
173  -1.0, TILE_A[mmi][ki], TILE_SIZE,
174  1.0, TILE_A[mmi][mmi], tile_size_mm );
175 
176  for ( ni = ki + 1; ni < mmi; ni++ )
177  {
178 
179  #pragma oss task in( TILE_A[mmi][ki] ) \
180  in( TILE_A[ni][ki] ) \
181  inout( TILE_A[mmi][ni] ) \
182  firstprivate( ni, mmi, ki ) \
183  label( dgemm )
184  cblas_dgemm( CblasRowMajor,
185  ( CBLAS_TRANSPOSE ) NoTrans,
186  ( CBLAS_TRANSPOSE ) Trans,
187  TILE_SIZE,
188  TILE_SIZE,
189  TILE_SIZE,
190  -1.0, TILE_A[mmi][ki], TILE_SIZE,
191  TILE_A[ni][ki], TILE_SIZE,
192  1.0, TILE_A[mmi][ni], TILE_SIZE );
193  }
194  }
195  }
196  }
197  else
198  {
199  // --UPLO = Upper--
200  for ( ki = 0; ki < nt; ki++ )
201  {
202  tile_size_k = ddss_tile_size( N, ki );
203 
204  #pragma oss task inout( TILE_A[ki][ki] ) \
205  shared( TILE_A ) \
206  firstprivate( ki ) \
207  label( dpotrf )
208  LAPACKE_dpotrf_work( LAPACK_ROW_MAJOR,
209  'U',
210  tile_size_k, TILE_A[ki][ki],
211  tile_size_k );
212 
213  for ( mi = ki + 1; mi < nt; mi++ )
214  {
215  tile_size_m = ddss_tile_size( N, mi );
216 
217  #pragma oss task in( TILE_A[ki][ki] ) \
218  inout(TILE_A[ki][mi]) \
219  shared( TILE_A ) \
220  firstprivate( mi, ki ) \
221  label( dtrsm )
222  cblas_dtrsm( CblasRowMajor,
223  ( CBLAS_SIDE ) Left, ( CBLAS_UPLO ) Upper,
224  ( CBLAS_TRANSPOSE ) Trans, ( CBLAS_DIAG ) NonUnit,
225  TILE_SIZE, tile_size_m,
226  1.0, TILE_A[ki][ki], TILE_SIZE,
227  TILE_A[ki][mi], tile_size_m );
228  }
229  for ( mmi = ki + 1; mmi < nt; mmi++ )
230  {
231  tile_size_mm = ddss_tile_size( N, mmi );
232 
233  #pragma oss task in( TILE_A[ki][mmi] ) \
234  inout( TILE_A[mmi][mmi] ) \
235  shared( TILE_A ) \
236  firstprivate( mmi, ki ) \
237  label( dsyrk )
238  cblas_dsyrk( CblasRowMajor,
239  ( CBLAS_UPLO ) Upper, ( CBLAS_TRANSPOSE ) Trans,
240  tile_size_mm, TILE_SIZE,
241  -1.0, TILE_A[ki][mmi], tile_size_mm,
242  1.0, TILE_A[mmi][mmi], tile_size_mm );
243 
244  for ( ni = ki + 1; ni < mmi; ni++ )
245  {
246  #pragma oss task in( TILE_A[ki][ni] ) \
247  in( TILE_A[ki][mmi] ) \
248  inout( TILE_A[ni][mmi] ) \
249  shared( TILE_A ) \
250  firstprivate( ni, mmi, ki ) \
251  label( dgemm )
252  cblas_dgemm( CblasRowMajor,
253  ( CBLAS_TRANSPOSE ) Trans,
254  ( CBLAS_TRANSPOSE ) NoTrans,
255  TILE_SIZE,
256  TILE_SIZE,
257  TILE_SIZE,
258  -1.0, TILE_A[ki][ni], TILE_SIZE,
259  TILE_A[ki][mmi], TILE_SIZE,
260  1.0, TILE_A[ni][mmi], TILE_SIZE );
261  }
262  }
263  }
264  }
265 
266  // --From tile data layout to flat data layout--
267  ddss_dsymtiled2flat( N, N, A, LDA, nt, nt, TILE_A, UPLO );
268 
269  // --Tile A matrix free--
270  free( TILE_A );
271 
272  return Success;
273 
274 }
void ddss_dsymflat2tiled(int M, int N, double *A, int LDA, int MT, int NT, double(*TILE_A)[NT][TILE_SIZE *TILE_SIZE], enum DDSS_UPLO UPLO)
void ddss_dsymtiled2flat(int M, int N, double *A, int LDA, int MT, int NT, double(*TILE_A)[NT][TILE_SIZE *TILE_SIZE], enum DDSS_UPLO UPLO)
int ddss_tile_size(int M, int MT)
Definition: ddss_tile.c:52