LASs - Linear Algebra Routines on OmpSs  1.0.0
LASs
kdsyrk.c
Go to the documentation of this file.
1 #include "../include/lass.h"
2 
3 /**
4  *
5  * @file kdsyrk.c
6  *
7  * @brief LASs-DDSs kdsyrk routine.
8  *
9  * LASs-DDSs is a software package provided by:
10  * Barcelona Supercomputing Center - Centro Nacional de Supercomputacion
11  *
12  * @author Pedro Valero-Lara pedro.valero@bsc.es
13  * @author Boro Sofranac boro.sofranac@bsc.es
14  * @date 2018-02-13
15  * @reviewer
16  * @modified
17  *
18  **/
19 
20 /**
21  *
22  * @ingroup DDSS
23  *
24  * Performs one of the symmetric rank k operations:
25  *
26  * C = ALPHA * A * op( A ) + BETA * C or
27  * C = ALPHA * op( A ) * A + BETA * C
28  *
29  * where op( X ) is:
30  *
31  * op( X ) = X**T
32  *
33  * ALPHA and BETA are scalars, C is an N by N symmetric matrix and A is an
34  * N by K matrix in the first case and a K by N matrix in the second case.
35  *
36  **/
37 
38 /**
39  * @param[in]
40  * UPLO enum DDSS_UPLO.
41  * UPLO specifies the form in which C is stored:
42  * - Lower: Lower triangular part of C is stored. The upper traingular
43  * part is not referenced.
44  * - Upper: Upper triangular part of C is stored. The lower triangular
45  * part is not referenced.
46  *
47  * @param[in]
48  * TRANS_A enum DDSS_TRANS.
49  * TRANS_A specifies the operation to be performed as follows:
50  * - NoTrans: C = ALPHA * A * A**T + BETA * C
51  * - Trans: C = ALPHA * A**T * A + BETA * C
52  *
53  * @param[in]
54  * N int.
55  * N specifies the order of matrix C. N must be at least zero.
56  *
57  * @param[in]
58  * K int.
59  * With TRANS_A = NoTrans, K specifies the number of columns of the
60  * matrix A, and with TRANS_A = Trans, K specifies the number of rows
61  * of the matrix A. K must be at least zero.
62  *
63  * @param[in]
64  * ALPHA double.
65  * ALPHA specifies the scalar alpha.
66  *
67  * @param[in]
68  * A double *.
69  * A is a pointer to a matrix of dimension Na ( rows ) by Ka
70  * ( columns ), where Na is N and Ka is K when TRANS_A = NoTrans,
71  * and Na is K and Ka is N otherwise.
72  *
73  * @param[in]
74  * LDA int.
75  * LDA specifies the number of columns of A ( row-major order ).
76  * When TRANS_A = NoTrans then LDA must be at least max( 1, K ),
77  * otherwise LDA must be at least max( 1, N ).
78  *
79  * @param[in]
80  * BETA double.
81  * BETA specifies the scalar beta.
82  *
83  * @param[in, out]
84  * C double *.
85  * C is a pointer to a matrix of dimension N by N.
86  * When UPLO = Uppper the strictly lower triangular part of C is not
87  * referenced. On exit, the upper triangular part of C is overwritten
88  * by the upper triangular part of the updated solution matrix C.
89  * When UPLO = Lower the strictly upper triangular part of C is not
90  * referenced. On exit, the lower triangular part of C is overwritten
91  * by the lower triangular part of the updated solution matrix C.
92  *
93  * @param[in]
94  * LDC int.
95  * LDC specifies the number of columns of C ( row-major order ).
96  * LDC must be at least max( 1, N ).
97  *
98  **/
99 
100 
101 /**
102  *
103  * @retval Success successful exit
104  * @retval NoSuccess unsuccessful exit
105  *
106  **/
107 
108 /**
109  *
110  * @sa ddss_tile
111  * @sa ddss_flat2tiled
112  * @sa ddss_dsymflat2tiled
113  * @sa ddss_dsymtiled2flat
114  *
115  **/
116 
117 enum LASS_RETURN kdsyrk( enum DDSS_UPLO UPLO, enum DDSS_TRANS TRANS_A,
118  int N, int K,
119  const double ALPHA, double *A, int LDA,
120  const double BETA, double *C, int LDC )
121 {
122 
123  // Local variables
124  int nt, kt;
125  int mi, ni, ki;
126  int Am, An;
127  int Cm, Cn;
128  int tile_size_m;
129  int tile_size_n;
130  int tile_size_k;
131  int ka;
132  double beta;
133 
134  // Number of tiles
135  if ( N % TILE_SIZE == 0 )
136  {
137  nt = N / TILE_SIZE;
138  }
139  else
140  {
141  nt = ( N / TILE_SIZE ) + 1;
142  }
143 
144  if ( K % TILE_SIZE == 0 )
145  {
146  kt = K / TILE_SIZE;
147  }
148  else
149  {
150  kt = ( K / TILE_SIZE ) + 1;
151  }
152 
153  /****************************
154  --Tile matrices declaration--
155  ****************************/
156 
157  if ( TRANS_A == NoTrans )
158  {
159  Am = nt;
160  An = kt;
161  ka = N;
162  }
163  else
164  {
165  Am = kt;
166  An = nt;
167  ka = K;
168  }
169 
170  Cm = Cn = nt;
171 
172  /***************************
173  --Tile matrices allocation--
174  ***************************/
175 
176  double ( *TILE_A )[An][TILE_SIZE * TILE_SIZE] = malloc(
177  Am * An * TILE_SIZE * TILE_SIZE * sizeof( double ) );
178 
179  if ( TILE_A == NULL )
180  {
181  fprintf( stderr, "Failure in ddss_dtile_alloc for matrix TILE_A\n" );
182  return NoSuccess;
183  }
184 
185  double ( *TILE_C )[Cn][TILE_SIZE * TILE_SIZE] = malloc(
186  Cm * Cn * TILE_SIZE * TILE_SIZE * sizeof( double ) );
187 
188  if ( TILE_C == NULL )
189  {
190  fprintf( stderr, "Failure in ddss_dtile_alloc for matrix TILE_C\n" );
191  return NoSuccess;
192  }
193 
194  /*********************************************
195  --From flat data layout to tiled data layout--
196  *********************************************/
197 
198  // From flat matrix A to tile matrix TILE_A
199  ddss_dflat2tiled( ka, LDA, A, LDA, Am, An, TILE_A );
200 
201  // From flat matrix C to tile matrix TILE_C
202  ddss_dsymflat2tiled( N, N, C, LDC, Cm, Cn, TILE_C, UPLO );
203 
204  /*************
205  --DSYRK tile--
206  *************/
207 
208  // --TRANS_A = NoTrans & UPLO = Upper--
209  if ( TRANS_A == NoTrans )
210  {
211  if ( UPLO == Upper )
212  {
213  for ( mi = 0; mi < nt; mi++ )
214  {
215  tile_size_m = ddss_tile_size( N, mi );
216  for ( ni = 0; ni < kt; ni++ )
217  {
218  tile_size_n = ddss_tile_size( K, ni );
219 
220  if ( ni == 0 )
221  {
222  beta = BETA;
223  }
224  else
225  {
226  beta = 1.0;
227  }
228 
229  #pragma oss task in( TILE_A[mi][ni] ) \
230  inout( TILE_C[mi][mi] ) \
231  shared( TILE_A, TILE_C ) \
232  firstprivate( mi, ni ) \
233  label( dsyrk )
234  cblas_dsyrk( CblasRowMajor,
235  ( CBLAS_UPLO ) UPLO,
236  ( CBLAS_TRANSPOSE ) TRANS_A,
237  tile_size_m,
238  tile_size_n,
239  ALPHA, TILE_A[mi][ni], tile_size_n,
240  beta, TILE_C[mi][mi], tile_size_m );
241 
242  for ( ki = mi + 1; ki < nt; ki++ )
243  {
244  tile_size_k = ddss_tile_size( N, ki );
245 
246  #pragma oss task in( TILE_A[mi][ni] ) \
247  in( TILE_A[ki][ni] ) \
248  inout( TILE_C[mi][ki] ) \
249  shared( TILE_A, TILE_C ) \
250  firstprivate( mi, ni, ki ) \
251  label( dgemm )
252  cblas_dgemm( CblasRowMajor,
253  CblasNoTrans, CblasTrans,
254  tile_size_m,
255  tile_size_k,
256  tile_size_n,
257  ALPHA, TILE_A[mi][ni], tile_size_n,
258  TILE_A[ki][ni], tile_size_n,
259  beta, TILE_C[mi][ki], tile_size_k );
260  }
261 
262  }
263  }
264  }
265  // --TRANS_A = NoTrans & UPLO = Lower--
266  else
267  {
268  for ( mi = 0; mi < nt; mi++ )
269  {
270  tile_size_m = ddss_tile_size( N, mi );
271  for ( ni = 0; ni < kt; ni++ )
272  {
273  tile_size_n = ddss_tile_size( K, ni );
274 
275  if ( ni == 0 )
276  {
277  beta = BETA;
278  }
279  else
280  {
281  beta = 1.0;
282  }
283 
284  #pragma oss task in( TILE_A[mi][ni] ) \
285  inout( TILE_C[mi][mi] ) \
286  shared( TILE_A, TILE_C ) \
287  firstprivate( mi, ni ) \
288  label( dsyrk )
289  cblas_dsyrk( CblasRowMajor,
290  ( CBLAS_UPLO ) UPLO,
291  ( CBLAS_TRANSPOSE ) TRANS_A,
292  tile_size_m,
293  tile_size_n,
294  ALPHA, TILE_A[mi][ni], tile_size_n,
295  beta, TILE_C[mi][mi], tile_size_m );
296 
297  for ( ki = mi + 1; ki < nt; ki++ )
298  {
299  tile_size_k = ddss_tile_size( N, ki );
300 
301  #pragma oss task in( TILE_A[ki][ni] ) \
302  in( TILE_A[mi][ni] ) \
303  inout( TILE_C[ki][mi] ) \
304  shared( TILE_A, TILE_C ) \
305  firstprivate( mi, ni, ki ) \
306  label( dgemm )
307  cblas_dgemm( CblasRowMajor,
308  CblasNoTrans, CblasTrans,
309  tile_size_k,
310  tile_size_m,
311  tile_size_n,
312  ALPHA, TILE_A[ki][ni], tile_size_n,
313  TILE_A[mi][ni], tile_size_n,
314  beta, TILE_C[ki][mi], tile_size_m );
315  }
316 
317  }
318  }
319  }
320  }
321  // --TRANS_A = Trans & UPLO = Upper--
322  else
323  {
324  if ( UPLO == Upper )
325  {
326  for ( mi = 0; mi < kt; mi++ )
327  {
328  tile_size_m = ddss_tile_size( K, mi );
329  for ( ni = 0; ni < nt; ni++ )
330  {
331  tile_size_n = ddss_tile_size( N, ni );
332 
333  if ( mi == 0 )
334  {
335  beta = BETA;
336  }
337  else
338  {
339  beta = 1.0;
340  }
341 
342  #pragma oss task in( TILE_A[mi][ni] ) \
343  inout( TILE_C[ni][ni] ) \
344  shared( TILE_A, TILE_C ) \
345  firstprivate( mi, ni ) \
346  label( dsyrk )
347  cblas_dsyrk( CblasRowMajor,
348  ( CBLAS_UPLO ) UPLO,
349  ( CBLAS_TRANSPOSE ) TRANS_A,
350  tile_size_n,
351  tile_size_m,
352  ALPHA, TILE_A[mi][ni], tile_size_n,
353  beta, TILE_C[ni][ni], tile_size_n );
354 
355  for ( ki = ni + 1; ki < nt; ki++ )
356  {
357  tile_size_k = ddss_tile_size( N, ki );
358 
359  #pragma oss task in( TILE_A[mi][ni] ) \
360  in( TILE_A[mi][ki] ) \
361  inout( TILE_C[ni][ki] ) \
362  shared( TILE_A, TILE_C ) \
363  firstprivate( mi, ni, ki ) \
364  label( dgemm )
365  cblas_dgemm( CblasRowMajor,
366  CblasTrans, CblasNoTrans,
367  tile_size_n,
368  tile_size_k,
369  tile_size_m,
370  ALPHA, TILE_A[mi][ni], tile_size_n,
371  TILE_A[mi][ki], tile_size_k,
372  beta, TILE_C[ni][ki], tile_size_k );
373  }
374  }
375  }
376  }
377  // --TRANS_A = Trans & UPLO = Lower--
378  else
379  {
380  for ( mi = 0; mi < kt; mi++ )
381  {
382  tile_size_m = ddss_tile_size( K, mi );
383  for ( ni = 0; ni < nt; ni++ )
384  {
385  tile_size_n = ddss_tile_size( N, ni );
386 
387  if ( mi == 0 )
388  {
389  beta = BETA;
390  }
391  else
392  {
393  beta = 1.0;
394  }
395 
396  #pragma oss task in( TILE_A[mi][ni] ) \
397  inout( TILE_C[ni][ni] ) \
398  shared( TILE_A, TILE_C ) \
399  firstprivate( mi, ni ) \
400  label( dsyrk )
401  cblas_dsyrk( CblasRowMajor,
402  ( CBLAS_UPLO ) UPLO,
403  ( CBLAS_TRANSPOSE ) TRANS_A,
404  tile_size_n,
405  tile_size_m,
406  ALPHA, TILE_A[mi][ni], tile_size_n,
407  beta, TILE_C[ni][ni], tile_size_n );
408 
409  for ( ki = ni + 1; ki < nt; ki++ )
410  {
411  tile_size_k = ddss_tile_size( N, ki );
412 
413  #pragma oss task in( TILE_A[mi][ki] ) \
414  in( TILE_A[mi][ni] ) \
415  inout( TILE_C[ki][ni] ) \
416  shared( TILE_A, TILE_C ) \
417  firstprivate( mi, ni, ki ) \
418  label( dgemm )
419  cblas_dgemm( CblasRowMajor,
420  CblasTrans, CblasNoTrans,
421  tile_size_k,
422  tile_size_n,
423  tile_size_m,
424  ALPHA, TILE_A[mi][ki], tile_size_k,
425  TILE_A[mi][ni], tile_size_n,
426  beta, TILE_C[ki][ni], tile_size_n );
427  }
428  }
429  }
430  }
431  }
432 
433  /*********************************************
434  --From tiled data layout to flat data layout--
435  *********************************************/
436 
437  ddss_dsymtiled2flat( N, N, C, LDC, Cm, Cn, TILE_C, UPLO );
438 
439  // --Tile matrices free--
440  free( TILE_A );
441  free( TILE_C );
442 
443  return Success;
444 
445 }
void ddss_dflat2tiled(int M, int N, double *A, int LDA, int MT, int NT, double(*TILE_A)[NT][TILE_SIZE *TILE_SIZE])
void ddss_dsymflat2tiled(int M, int N, double *A, int LDA, int MT, int NT, double(*TILE_A)[NT][TILE_SIZE *TILE_SIZE], enum DDSS_UPLO UPLO)
enum LASS_RETURN kdsyrk(enum DDSS_UPLO UPLO, enum DDSS_TRANS TRANS_A, int N, int K, const double ALPHA, double *A, int LDA, const double BETA, double *C, int LDC)
Definition: kdsyrk.c:117
void ddss_dsymtiled2flat(int M, int N, double *A, int LDA, int MT, int NT, double(*TILE_A)[NT][TILE_SIZE *TILE_SIZE], enum DDSS_UPLO UPLO)
int ddss_tile_size(int M, int MT)
Definition: ddss_tile.c:52