LASs - Linear Algebra Routines on OmpSs  1.0.0
LASs
kdsyr2k.c
Go to the documentation of this file.
1 #include "../include/lass.h"
2 
3 /**
4  *
5  * @file kdsyr2k.c
6  *
7  * @brief LASs-DDSs kdsyr2k routine.
8  *
9  * LASs-DDSs is a software package provided by:
10  * Barcelona Supercomputing Center - Centro Nacional de Supercomputacion
11  *
12  * @author Pedro Valero-Lara pedro.valero@bsc.es
13  * @author Boro Sofranac boro.sofranac@bsc.es
14  * @date 2018-02-21
15  * @reviewer
16  * @modified
17  *
18  **/
19 
20 /**
21  *
22  * @ingroup DDSS
23  *
24  * Performs one of the symmetric rank 2k operations:
25  *
26  * C = ALPHA * A * B**T + ALPHA * B * A**T + BETA * C or
27  * C = ALPHA * A**T * B + ALPHA * B**T * A + BETA * C
28  *
29  * ALPHA and BETA are scalars, C is an N by N symmetric matrix and A and B are
30  * N by K matrices in the first case and K by N matrices in the second case.
31  *
32  **/
33 
34 /**
35  * @param[in]
36  * UPLO enum DDSS_UPLO.
37  * UPLO specifies the form in which C is stored:
38  * - Lower: Lower triangular part of C is stored. The upper traingular
39  * part is not referenced.
40  * - Upper: Upper triangular part of C is stored. The lower triangular
41  * part is not referenced.
42  *
43  * @param[in]
44  * TRANS enum DDSS_TRANS.
45  * TRANS specifies the operation to be performed as follows:
46  * - NoTrans: C = ALPHA * A * B**T + ALPHA * B * A**T + BETA * C
47  * - Trans: C = ALPHA * A**T * B + ALPHA * B**T * A + BETA * C
48  *
49  * @param[in]
50  * N int.
51  * N specifies the order of matrix C. N must be at least zero.
52  *
53  * @param[in]
54  * K int.
55  * With TRANS = NoTrans, K specifies the number of columns of the
56  * matrices A and B, and with TRANS = Trans, K specifies the number of
57  * rows of the matrices A and B. K must be at least zero.
58  *
59  * @param[in]
60  * ALPHA double.
61  * ALPHA specifies the scalar alpha.
62  *
63  * @param[in]
64  * A double *.
65  * A is a pointer to a matrix of dimension Na ( rows ) by Ka
66  * ( columns ), where Na is N and Ka is K when TRANS = NoTrans,
67  * and Na is K and Ka is N otherwise.
68  *
69  * @param[in]
70  * LDA int.
71  * LDA specifies the number of columns of A ( row-major order ).
72  * When TRANS = NoTrans then LDA must be at least max( 1, K ),
73  * otherwise LDA must be at least max( 1, N ).
74  *
75  * @param[in]
76  * B double *.
77  * B is a pointer to a matrix of dimension Nb ( rows ) by Kb
78  * ( columns ), where Nb is N and Kb is K when TRANS = NoTrans,
79  * and Nb is K and Kb is N otherwise.
80  *
81  * @param[in]
82  * LDB int.
83  * LDB specifies the number of columns of B ( row-major order ).
84  * When TRANS = NoTrans then LDB must be at least max( 1, K ),
85  * otherwise LDB must be at least max( 1, N ).
86  *
87  * @param[in]
88  * BETA double.
89  * BETA specifies the scalar beta.
90  *
91  * @param[in, out]
92  * C double *.
93  * C is a pointer to a matrix of dimension N by N.
94  * When UPLO = Uppper the strictly lower triangular part of C is not
95  * referenced. On exit, the upper triangular part of C is overwritten
96  * by the upper triangular part of the updated solution matrix C.
97  * When UPLO = Lower the strictly upper triangular part of C is not
98  * referenced. On exit, the lower triangular part of C is overwritten
99  * by the lower triangular part of the updated solution matrix C.
100  *
101  * @param[in]
102  * LDC int.
103  * LDC specifies the number of columns of C ( row-major order ).
104  * LDC must be at least max( 1, N ).
105  *
106  **/
107 
108 /**
109  *
110  * @retval Success successful exit
111  * @retval NoSuccess unsuccessful exit
112  *
113  **/
114 
115 /**
116  *
117  * @sa ddss_tile
118  * @sa ddss_flat2tiled
119  * @sa ddss_dsymflat2tiled
120  * @sa ddss_dsymtiled2flat
121  *
122  **/
123 
124 enum LASS_RETURN kdsyr2k( enum DDSS_UPLO UPLO, enum DDSS_TRANS TRANS,
125  int N, int K,
126  const double ALPHA, double *A, int LDA,
127  double *B, int LDB,
128  const double BETA, double *C, int LDC )
129 {
130 
131  // Local variables
132  int nt, kt;
133  int mi, ni, ki;
134  int Am, An;
135  int Bm, Bn;
136  int Cm, Cn;
137  int tile_size_m;
138  int tile_size_n;
139  int tile_size_k;
140  int ka;
141  int kb;
142  double beta;
143 
144  // Number of tiles
145  if ( N % TILE_SIZE == 0 )
146  {
147  nt = N / TILE_SIZE;
148  }
149  else
150  {
151  nt = ( N / TILE_SIZE ) + 1;
152  }
153 
154  if ( K % TILE_SIZE == 0 )
155  {
156  kt = K / TILE_SIZE;
157  }
158  else
159  {
160  kt = ( K / TILE_SIZE ) + 1;
161  }
162 
163  /****************************
164  --Tile matrices declaration--
165  ****************************/
166 
167  if ( TRANS == NoTrans )
168  {
169  Am = Bm = nt;
170  An = Bn = kt;
171  ka = kb = N;
172  }
173  else
174  {
175  Am = Bm = kt;
176  An = Bn = nt;
177  ka = kb = K;
178  }
179 
180  Cm = Cn = nt;
181 
182  /***************************
183  --Tile matrices allocation--
184  ***************************/
185 
186  double ( *TILE_A )[An][TILE_SIZE * TILE_SIZE] = malloc(
187  Am * An * TILE_SIZE * TILE_SIZE * sizeof( double ) );
188 
189  if ( TILE_A == NULL )
190  {
191  fprintf( stderr, "Failure in ddss_dtile_alloc for matrix TILE_A\n" );
192  return NoSuccess;
193  }
194 
195  double ( *TILE_B )[Bn][TILE_SIZE * TILE_SIZE] = malloc(
196  Bm * Bn * TILE_SIZE * TILE_SIZE * sizeof( double ) );
197 
198  if ( TILE_B == NULL )
199  {
200  fprintf( stderr, "Failure in ddss_dtile_alloc for matrix TILE_B\n" );
201  return NoSuccess;
202  }
203 
204  double ( *TILE_C )[Cn][TILE_SIZE * TILE_SIZE] = malloc(
205  Cm * Cn * TILE_SIZE * TILE_SIZE * sizeof( double ) );
206 
207  if ( TILE_C == NULL )
208  {
209  fprintf( stderr, "Failure in ddss_dtile_alloc for matrix TILE_C\n" );
210  return NoSuccess;
211  }
212 
213  /*********************************************
214  --From flat data layout to tiled data layout--
215  *********************************************/
216 
217  // From flat matrix A to tile matrix TILE_A
218  ddss_dflat2tiled( ka, LDA, A, LDA, Am, An, TILE_A );
219 
220  // From flat matrix B to tile matrix TILE_B
221  ddss_dflat2tiled( kb, LDB, B, LDB, Bm, Bn, TILE_B );
222 
223  // From flat matrix C to tile matrix TILE_C
224  ddss_dsymflat2tiled( N, N, C, LDC, Cm, Cn, TILE_C, UPLO );
225 
226  /*************
227  --DSYRK tile--
228  *************/
229 
230  // --TRANS = NoTrans & UPLO = Upper--
231  if ( TRANS == NoTrans )
232  {
233  if ( UPLO == Upper )
234  {
235  for ( mi = 0; mi < nt; mi++ )
236  {
237  tile_size_m = ddss_tile_size( N, mi );
238  for ( ni = 0; ni < kt; ni++ )
239  {
240  tile_size_n = ddss_tile_size( K, ni );
241 
242  if ( ni == 0 )
243  {
244  beta = BETA;
245  }
246  else
247  {
248  beta = 1.0;
249  }
250 
251  #pragma oss task in( TILE_A[mi][ni] ) \
252  in( TILE_B[mi][ni] ) \
253  inout( TILE_C[mi][mi] ) \
254  shared( TILE_A, TILE_B, TILE_C ) \
255  firstprivate( mi,ni ) \
256  label( dsyr2k )
257  cblas_dsyr2k( CblasRowMajor,
258  ( CBLAS_UPLO ) UPLO, ( CBLAS_TRANSPOSE ) TRANS,
259  tile_size_m,
260  tile_size_n,
261  ALPHA, TILE_A[mi][ni], tile_size_n,
262  TILE_B[mi][ni], tile_size_n,
263  beta, TILE_C[mi][mi], tile_size_m );
264 
265  for ( ki = mi + 1; ki < nt; ki++ )
266  {
267  tile_size_k = ddss_tile_size( N, ki );
268 
269  #pragma oss task in( TILE_A[mi][ni] ) \
270  in( TILE_A[ki][ni] ) \
271  in( TILE_B[mi][ni] ) \
272  in( TILE_B[ki][ni] ) \
273  inout( TILE_C[mi][ki] ) \
274  shared( TILE_A, TILE_B, TILE_C ) \
275  firstprivate( mi,ni,ki ) \
276  label( dgemm )
277  {
278  cblas_dgemm( CblasRowMajor,
279  CblasNoTrans, CblasTrans,
280  tile_size_m,
281  tile_size_k,
282  tile_size_n,
283  ALPHA, TILE_A[mi][ni], tile_size_n,
284  TILE_B[ki][ni], tile_size_n,
285  beta, TILE_C[mi][ki], tile_size_k );
286 
287  cblas_dgemm( CblasRowMajor,
288  CblasNoTrans, CblasTrans,
289  tile_size_m,
290  tile_size_k,
291  tile_size_n,
292  ALPHA, TILE_B[mi][ni], tile_size_n,
293  TILE_A[ki][ni], tile_size_n,
294  1.0, TILE_C[mi][ki], tile_size_k );
295  }
296  }
297 
298  }
299  }
300  }
301  // --TRANS = NoTrans & UPLO = Lower--
302  else
303  {
304  for ( mi = 0; mi < nt; mi++ )
305  {
306  tile_size_m = ddss_tile_size( N, mi );
307  for ( ni = 0; ni < kt; ni++ )
308  {
309  tile_size_n = ddss_tile_size( K, ni );
310 
311  if ( ni == 0 )
312  {
313  beta = BETA;
314  }
315  else
316  {
317  beta = 1.0;
318  }
319 
320  #pragma oss task in( TILE_A[mi][ni] ) \
321  in( TILE_B[mi][ni] ) \
322  inout( TILE_C[mi][mi] ) \
323  shared( TILE_A, TILE_B, TILE_C ) \
324  firstprivate( mi,ni ) \
325  label( dsyr2k )
326  cblas_dsyr2k( CblasRowMajor,
327  ( CBLAS_UPLO ) UPLO, ( CBLAS_TRANSPOSE ) TRANS,
328  tile_size_m,
329  tile_size_n,
330  ALPHA, TILE_A[mi][ni], tile_size_n,
331  TILE_B[mi][ni], tile_size_n,
332  beta, TILE_C[mi][mi], tile_size_m );
333 
334  for ( ki = mi + 1; ki < nt; ki++ )
335  {
336  tile_size_k = ddss_tile_size( N, ki );
337 
338  #pragma oss task in( TILE_A[ki][ni] ) \
339  in( TILE_A[mi][ni] ) \
340  in( TILE_B[ki][ni] ) \
341  in( TILE_B[mi][ni] ) \
342  inout( TILE_C[ki][mi] ) \
343  shared( TILE_A, TILE_B, TILE_C ) \
344  firstprivate( mi,ni,ki ) \
345  label( dgemm )
346  {
347  cblas_dgemm( CblasRowMajor,
348  CblasNoTrans, CblasTrans,
349  tile_size_k,
350  tile_size_m,
351  tile_size_n,
352  ALPHA, TILE_A[ki][ni], tile_size_n,
353  TILE_B[mi][ni], tile_size_n,
354  beta, TILE_C[ki][mi], tile_size_m );
355 
356  cblas_dgemm( CblasRowMajor,
357  CblasNoTrans, CblasTrans,
358  tile_size_k,
359  tile_size_m,
360  tile_size_n,
361  ALPHA, TILE_B[ki][ni], tile_size_n,
362  TILE_A[mi][ni], tile_size_n,
363  1.0, TILE_C[ki][mi], tile_size_m );
364  }
365  }
366 
367  }
368  }
369  }
370  }
371  // --TRANS = Trans & UPLO = Upper--
372  else
373  {
374  if ( UPLO == Upper )
375  {
376  for ( mi = 0; mi < kt; mi++ )
377  {
378  tile_size_m = ddss_tile_size( K, mi );
379  for ( ni = 0; ni < nt; ni++ )
380  {
381  tile_size_n = ddss_tile_size( N, ni );
382 
383  if ( mi == 0 )
384  {
385  beta = BETA;
386  }
387  else
388  {
389  beta = 1.0;
390  }
391 
392  #pragma oss task in( TILE_A[mi][ni] ) \
393  in( TILE_B[mi][ni] ) \
394  inout( TILE_C[ni][ni] ) \
395  shared( TILE_A, TILE_B, TILE_C ) \
396  firstprivate( mi,ni ) \
397  label( dsyr2k )
398  cblas_dsyr2k( CblasRowMajor,
399  ( CBLAS_UPLO ) UPLO, ( CBLAS_TRANSPOSE ) TRANS,
400  tile_size_n,
401  tile_size_m,
402  ALPHA, TILE_A[mi][ni], tile_size_n,
403  TILE_B[mi][ni], tile_size_n,
404  beta, TILE_C[ni][ni], tile_size_n );
405 
406  for ( ki = ni + 1; ki < nt; ki++ )
407  {
408  tile_size_k = ddss_tile_size( N, ki );
409 
410  #pragma oss task in( TILE_A[mi][ni] ) \
411  in( TILE_A[mi][ki] ) \
412  in( TILE_B[mi][ni] ) \
413  in( TILE_B[mi][ki] ) \
414  inout( TILE_C[ni][ki] ) \
415  shared( TILE_A, TILE_B, TILE_C ) \
416  firstprivate( mi,ni,ki ) \
417  label( dgemm )
418  {
419  cblas_dgemm( CblasRowMajor,
420  CblasTrans, CblasNoTrans,
421  tile_size_n,
422  tile_size_k,
423  tile_size_m,
424  ALPHA, TILE_A[mi][ni], tile_size_n,
425  TILE_B[mi][ki], tile_size_k,
426  beta, TILE_C[ni][ki], tile_size_k );
427 
428  cblas_dgemm( CblasRowMajor,
429  CblasTrans, CblasNoTrans,
430  tile_size_n,
431  tile_size_k,
432  tile_size_m,
433  ALPHA, TILE_B[mi][ni], tile_size_n,
434  TILE_A[mi][ki], tile_size_k,
435  1.0, TILE_C[ni][ki], tile_size_k );
436  }
437  }
438  }
439  }
440  }
441  // --TRANS = Trans & UPLO = Lower--
442  else
443  {
444  for ( mi = 0; mi < kt; mi++ )
445  {
446  tile_size_m = ddss_tile_size( K, mi );
447  for ( ni = 0; ni < nt; ni++ )
448  {
449  tile_size_n = ddss_tile_size( N, ni );
450 
451  if ( mi == 0 )
452  {
453  beta = BETA;
454  }
455  else
456  {
457  beta = 1.0;
458  }
459 
460  #pragma oss task in( TILE_A[mi][ni] ) \
461  in( TILE_B[mi][ni] ) \
462  inout( TILE_C[ni][ni] ) \
463  shared( TILE_A, TILE_B, TILE_C ) \
464  firstprivate( mi,ni ) \
465  label( dsyr2k )
466  cblas_dsyr2k( CblasRowMajor,
467  ( CBLAS_UPLO ) UPLO, ( CBLAS_TRANSPOSE ) TRANS,
468  tile_size_n,
469  tile_size_m,
470  ALPHA, TILE_A[mi][ni], tile_size_n,
471  TILE_B[mi][ni], tile_size_n,
472  beta, TILE_C[ni][ni], tile_size_n );
473 
474  for ( ki = ni + 1; ki < nt; ki++ )
475  {
476  tile_size_k = ddss_tile_size( N, ki );
477 
478  #pragma oss task in( TILE_A[mi][ki] ) \
479  in( TILE_A[mi][ni] ) \
480  in( TILE_B[mi][ki] ) \
481  in( TILE_B[mi][ni] ) \
482  inout( TILE_C[ki][ni] ) \
483  shared( TILE_A, TILE_B, TILE_C ) \
484  firstprivate( mi,ni,ki ) \
485  label( dgemm )
486  {
487  cblas_dgemm( CblasRowMajor,
488  CblasTrans, CblasNoTrans,
489  tile_size_k,
490  tile_size_n,
491  tile_size_m,
492  ALPHA, TILE_A[mi][ki], tile_size_k,
493  TILE_B[mi][ni], tile_size_n,
494  beta, TILE_C[ki][ni], tile_size_n );
495 
496  cblas_dgemm( CblasRowMajor,
497  CblasTrans, CblasNoTrans,
498  tile_size_k,
499  tile_size_n,
500  tile_size_m,
501  ALPHA, TILE_B[mi][ki], tile_size_k,
502  TILE_A[mi][ni], tile_size_n,
503  1.0, TILE_C[ki][ni], tile_size_n );
504  }
505  }
506  }
507  }
508  }
509  }
510 
511  /*********************************************
512  --From tiled data layout to flat data layout--
513  *********************************************/
514 
515  ddss_dsymtiled2flat( N, N, C, LDC, Cm, Cn, TILE_C, UPLO );
516 
517  // --Tile matrices free--
518  free( TILE_A );
519  free( TILE_B );
520  free( TILE_C );
521 
522  return Success;
523 
524 }
void ddss_dflat2tiled(int M, int N, double *A, int LDA, int MT, int NT, double(*TILE_A)[NT][TILE_SIZE *TILE_SIZE])
void ddss_dsymflat2tiled(int M, int N, double *A, int LDA, int MT, int NT, double(*TILE_A)[NT][TILE_SIZE *TILE_SIZE], enum DDSS_UPLO UPLO)
void ddss_dsymtiled2flat(int M, int N, double *A, int LDA, int MT, int NT, double(*TILE_A)[NT][TILE_SIZE *TILE_SIZE], enum DDSS_UPLO UPLO)
enum LASS_RETURN kdsyr2k(enum DDSS_UPLO UPLO, enum DDSS_TRANS TRANS, int N, int K, const double ALPHA, double *A, int LDA, double *B, int LDB, const double BETA, double *C, int LDC)
Definition: kdsyr2k.c:124
int ddss_tile_size(int M, int MT)
Definition: ddss_tile.c:52