LASs - Linear Algebra Routines on OmpSs  1.0.0
LASs
kdsymm.c
Go to the documentation of this file.
1 #include "../include/lass.h"
2 
3 /**
4  *
5  * @file kdsymm.c
6  *
7  * @brief LASs-DDSs kdsymm routine.
8  *
9  * LASs-DDSs is a software package provided by:
10  * Barcelona Supercomputing Center - Centro Nacional de Supercomputacion
11  *
12  * @author Pedro Valero-Lara pedro.valero@bsc.es
13  * @date 2017-08-11
14  * @reviewer
15  * @modified
16  *
17  **/
18 
19 /**
20  *
21  * @ingroup DDSS
22  *
23  * Performs one of the matrix-matrix operations:
24  *
25  * C = ALPHA * A * B + BETA * C
26  * or
27  *
28  * C = ALPHA * B * A + BETA * C
29  *
30  *
31  * where op( X ) is one of:
32  *
33  * op( X ) = X or
34  * op( X ) = X**T
35  *
36  * ALPHA and BETA are scalars, A is a symmetric matrix, and B and C
37  * are M by N matrices.
38  *
39 **/
40 
41 /**
42  *
43  * @param[in]
44  * SIDE enum DDSS_SIDE.
45  * UPLO specifies the position of the symmetric A matrix in
46  * the operation:
47  * - Left: C = ALPHA * A * B + BETA * C
48  * - Right: C = ALPHA * B * A + BETA * C
49  *
50  * @param[in]
51  * UPLO enum DDSS_UPLO.
52  * UPLO specifies the form of A is stored:
53  * - Lower: Lower triangle of A is stored. The upper traingular part is
54  * not referenced.
55  * - Upper: Upper triangle of A is stored. The lower triangular part is
56  * not referenced.
57  *
58  * @param[in]
59  * M int.
60  * M specifies the number of rows of the matrix C.
61  * M must be equal or greater than zero.
62  *
63  * @param[in]
64  * N int.
65  * N specifies the number of columns of the matrix C.
66  * N must be equal or greater than zero.
67  *
68  * @param[in]
69  * ALPHA double.
70  *
71  * @param[in]
72  * A double *.
73  * A is a pointer to a matrix of dimension Ma ( rows ) by Na
74  * (columns), where Ma is M and Na is M when SIDE = Left, and
75  * Ma is N and Na is N when SIDE = Right
76  *
77  *
78  * @param[in]
79  * LDA int.
80  * LDA specifies the number of columns of A ( row-major order ).
81  * LDA must be at least max( 1, Na ).
82  *
83  * @param[in]
84  * B double *.
85  * B is a pointer to a matrix of dimension M by N.
86  *
87  * @param[in]
88  * LDB int.
89  * LDB specifies the number of columns of B ( row-major order ).
90  * LDB must be at least max( 1, N ).
91  *
92  * @param[in]
93  * BETA double.
94  *
95  * @param[in,out]
96  * C double *.
97  * C is a pointer to a matrix of dimension M by N.
98  * On exit, C is overwritten by the M by N matrix.
99  *
100  * @param[in]
101  * LDC int.
102  * LDC specifies the number of columns of C ( row-major order ).
103  * LDC must be at least max( 1, N ).
104  *
105  **/
106 
107 /**
108  *
109  * @retval Success sucessful exit
110  * @retval NoSuccess unsucessful exit
111  *
112  **/
113 
114 /**
115  *
116  * @sa ddss_dgemm
117  * @sa ddss_tile
118  * @sa ddss_flat2tiled
119  * @sa ddss_symflat2tiled
120  * @sa ddss_tiled2flat
121  * @sa ddss_symtiled2flat
122  *
123  **/
124 
125 enum LASS_RETURN kdsymm( enum DDSS_SIDE SIDE, enum DDSS_UPLO UPLO,
126  int M, int N,
127  const double ALPHA, double *A, int LDA,
128  double *B, int LDB,
129  const double BETA, double *C, int LDC )
130 {
131 
132  // Local variables
133  int mt, kt, nt;
134  int mi, ki, ni;
135  int Am, An;
136  int Amt, Ant;
137  int tile_size_m;
138  int tile_size_n;
139  int tile_size_k;
140  int k_check;
141  double betat;
142 
143  // Number of tiles
144  if ( M % TILE_SIZE == 0 )
145  {
146  mt = M / TILE_SIZE;
147  }
148  else
149  {
150  mt = ( M / TILE_SIZE ) + 1;
151  }
152 
153  if ( N % TILE_SIZE == 0 )
154  {
155  nt = N / TILE_SIZE;
156  }
157  else
158  {
159  nt = ( N / TILE_SIZE ) + 1;
160  }
161 
162  if ( SIDE == Left )
163  {
164  if ( M % TILE_SIZE == 0 )
165  {
166  kt = M / TILE_SIZE;
167  }
168  else
169  {
170  kt = ( M / TILE_SIZE ) + 1;
171  }
172  }
173  else
174  {
175  if ( N % TILE_SIZE == 0 )
176  {
177  kt = N / TILE_SIZE;
178  }
179  else
180  {
181  kt = ( N / TILE_SIZE ) + 1;
182  }
183  }
184 
185  /****************************
186  --Tile matrices declaration--
187  ****************************/
188 
189  if ( SIDE == Left )
190  {
191  Am = M;
192  An = M;
193  Amt = mt;
194  Ant = mt;
195  }
196  else
197  {
198  Am = N;
199  An = N;
200  Amt = nt;
201  Ant = nt;
202  }
203 
204  /***************************
205  --Tile matrices allocation--
206  ***************************/
207 
208  double (*TILE_A)[Ant][TILE_SIZE * TILE_SIZE] = malloc ( Amt * Ant *
209  TILE_SIZE * TILE_SIZE * sizeof( double ) );
210 
211  if ( TILE_A == NULL)
212  {
213  fprintf( stderr, "Failure in kdsymm for matrix TILE_A\n" );
214  return NoSuccess;
215  }
216 
217  double (*TILE_B)[nt][TILE_SIZE * TILE_SIZE] = malloc ( mt * nt *
218  TILE_SIZE * TILE_SIZE * sizeof( double ) );
219 
220  if ( TILE_B == NULL)
221  {
222  fprintf( stderr, "Failure in kdsymm for matrix TILE_B\n" );
223  return NoSuccess;
224  }
225 
226  double (*TILE_C)[nt][TILE_SIZE * TILE_SIZE] = malloc ( mt * nt *
227  TILE_SIZE * TILE_SIZE * sizeof( double ) );
228 
229  if ( TILE_C == NULL)
230  {
231  fprintf( stderr, "Failure in kdsymm for matrix TILE_C\n" );
232  return NoSuccess;
233  }
234 
235  /************************************************
236  // --From flat data layout to tiled data layout--
237  ************************************************/
238 
239  // From flat and symmetric matrix A to tile matrix TILE_A
240  ddss_dsymflat2tiled( Am, An, A, LDA, Amt, Ant, TILE_A, UPLO );
241 
242  // From flat matrix B to tile matrix TILE_B
243  ddss_dflat2tiled( M, N, B, LDB, mt, nt, TILE_B );
244 
245  // From flat matrix C to tile matrix TILE_C
246  ddss_dflat2tiled( M, N, C, LDC, mt, nt, TILE_C );
247 
248  /*************
249  --DSYMM tile--
250  *************/
251 
252  for ( mi = 0; mi < mt; mi++ )
253  {
254  tile_size_m = ddss_tile_size( M, mi );
255  for ( ni = 0; ni < nt; ni++ )
256  {
257  tile_size_n = ddss_tile_size( N, ni );
258  if ( SIDE == Left )
259  {
260  // --SIDE = Left & UPLO = Lower--
261  if ( UPLO == Lower)
262  {
263  for ( ki = 0; ki < kt; ki++ )
264  {
265  if (ki == 0)
266  {
267  betat = BETA;
268  }
269  else
270  {
271  betat = 1.0;
272  }
273  tile_size_k = ddss_tile_size( M, ki );
274 
275  if ( ki < mi )
276  {
277  #pragma oss task in( TILE_A[mi][ki] ) \
278  in( TILE_B[ki][ni] ) \
279  inout( TILE_C[mi][ni] ) \
280  shared( TILE_A, TILE_B, TILE_C ) \
281  firstprivate( mi, ni, ki, betat )
282  cblas_dgemm( CblasRowMajor,
283  ( CBLAS_TRANSPOSE ) NoTrans,
284  ( CBLAS_TRANSPOSE ) NoTrans,
285  tile_size_m,
286  tile_size_n,
287  tile_size_k,
288  ALPHA, TILE_A[mi][ki], tile_size_k,
289  TILE_B[ki][ni], tile_size_n,
290  betat, TILE_C[mi][ni], tile_size_n );
291  }
292  else
293  {
294  if ( ki == mi )
295  {
296  #pragma oss task in( TILE_A[ki][ki] ) \
297  in( TILE_B[ki][ni] ) \
298  inout( TILE_C[mi][ni] ) \
299  shared( TILE_A, TILE_B, TILE_C ) \
300  firstprivate( mi, ni, ki, betat )
301  cblas_dsymm( CblasRowMajor,
302  ( CBLAS_SIDE ) SIDE,
303  ( CBLAS_UPLO ) UPLO,
304  tile_size_m, tile_size_n,
305  ALPHA, TILE_A[ki][ki], tile_size_k,
306  TILE_B[ki][ni], tile_size_n,
307  betat, TILE_C[mi][ni],
308  tile_size_n );
309  }
310  else
311  {
312  #pragma oss task in( TILE_A[ki][mi] ) \
313  in( TILE_B[ki][ni] ) \
314  inout( TILE_C[mi][ni] ) \
315  shared( TILE_A, TILE_B, TILE_C ) \
316  firstprivate( mi, ni, ki, betat )
317  cblas_dgemm( CblasRowMajor,
318  ( CBLAS_TRANSPOSE ) Trans,
319  ( CBLAS_TRANSPOSE ) NoTrans,
320  tile_size_m,
321  tile_size_n,
322  tile_size_k,
323  ALPHA, TILE_A[ki][mi], tile_size_m,
324  TILE_B[ki][ni], tile_size_n,
325  betat, TILE_C[mi][ni],
326  tile_size_n );
327  }
328  }
329  }
330  }
331  // --SIDE = Left & UPLO = Upper--
332  else
333  {
334  for ( ki = 0; ki < kt; ki++ )
335  {
336  if (ki == 0)
337  {
338  betat = BETA;
339  }
340  else
341  {
342  betat = 1.0;
343  }
344  tile_size_k = ddss_tile_size( M, ki );
345 
346  if ( ki < mi )
347  {
348  #pragma oss task in( TILE_A[ki][mi] ) \
349  in( TILE_B[ki][ni] ) \
350  inout( TILE_C[mi][ni] ) \
351  shared( TILE_A, TILE_B, TILE_C ) \
352  firstprivate( mi, ni, ki, betat )
353  cblas_dgemm( CblasRowMajor,
354  ( CBLAS_TRANSPOSE ) Trans,
355  ( CBLAS_TRANSPOSE ) NoTrans,
356  tile_size_m,
357  tile_size_n,
358  tile_size_k,
359  ALPHA, TILE_A[ki][mi], tile_size_m,
360  TILE_B[ki][ni], tile_size_n,
361  betat, TILE_C[mi][ni], tile_size_n );
362  }
363  else
364  {
365  if ( ki == mi )
366  {
367  #pragma oss task in( TILE_A[ki][ki] ) \
368  in( TILE_B[ki][ni] ) \
369  inout( TILE_C[mi][ni] ) \
370  shared( TILE_A, TILE_B, TILE_C ) \
371  firstprivate( mi, ni, ki, betat )
372  cblas_dsymm( CblasRowMajor,
373  ( CBLAS_SIDE ) SIDE,
374  ( CBLAS_UPLO ) UPLO,
375  tile_size_m, tile_size_n,
376  ALPHA, TILE_A[ki][ki], tile_size_k,
377  TILE_B[ki][ni], tile_size_n,
378  betat, TILE_C[mi][ni],
379  tile_size_n );
380  }
381  else
382  {
383  #pragma oss task in( TILE_A[mi][ki] ) \
384  in( TILE_B[ki][ni] ) \
385  inout( TILE_C[mi][ni] ) \
386  shared( TILE_A, TILE_B, TILE_C ) \
387  firstprivate( mi, ni, ki, betat )
388  cblas_dgemm( CblasRowMajor,
389  ( CBLAS_TRANSPOSE ) NoTrans,
390  ( CBLAS_TRANSPOSE ) NoTrans,
391  tile_size_m,
392  tile_size_n,
393  tile_size_k,
394  ALPHA, TILE_A[mi][ki], tile_size_k,
395  TILE_B[ki][ni], tile_size_n,
396  betat, TILE_C[mi][ni],
397  tile_size_n );
398  }
399  }
400  }
401  }
402  }
403  else
404  {
405  // --SIDE = Right & UPLO = Lower--
406  if ( UPLO == Lower)
407  {
408  for ( ki = 0; ki < kt; ki++ )
409  {
410  if (ki == 0)
411  {
412  betat = BETA;
413  }
414  else
415  {
416  betat = 1.0;
417  }
418  tile_size_k = ddss_tile_size( N, ki );
419 
420  if ( ki < ni )
421  {
422  #pragma oss task in( TILE_B[mi][ki] ) \
423  in( TILE_A[ni][ki] ) \
424  inout( TILE_C[mi][ni] ) \
425  shared( TILE_A, TILE_B, TILE_C ) \
426  firstprivate( mi, ni, ki, betat )
427  cblas_dgemm( CblasRowMajor,
428  ( CBLAS_TRANSPOSE ) NoTrans,
429  ( CBLAS_TRANSPOSE ) Trans,
430  tile_size_m,
431  tile_size_n,
432  tile_size_k,
433  ALPHA, TILE_B[mi][ki], tile_size_k,
434  TILE_A[ni][ki], tile_size_k,
435  betat, TILE_C[mi][ni], tile_size_n );
436  }
437  else
438  {
439  if ( ki == ni )
440  {
441  #pragma oss task in( TILE_A[ki][ki] ) \
442  in( TILE_B[mi][ki] ) \
443  inout( TILE_C[mi][ni] ) \
444  shared( TILE_A, TILE_B, TILE_C ) \
445  firstprivate( mi, ni, ki, betat )
446  cblas_dsymm( CblasRowMajor,
447  ( CBLAS_SIDE ) SIDE,
448  ( CBLAS_UPLO ) UPLO,
449  tile_size_m, tile_size_n,
450  ALPHA, TILE_A[ki][ki], tile_size_k,
451  TILE_B[mi][ki], tile_size_k,
452  betat, TILE_C[mi][ni],
453  tile_size_n );
454  }
455  else
456  {
457  #pragma oss task in( TILE_B[mi][ki] ) \
458  in( TILE_A[ki][ni] ) \
459  inout( TILE_C[mi][ni] ) \
460  shared( TILE_A, TILE_B, TILE_C ) \
461  firstprivate( mi, ni, ki, betat )
462  cblas_dgemm( CblasRowMajor,
463  ( CBLAS_TRANSPOSE ) NoTrans,
464  ( CBLAS_TRANSPOSE ) NoTrans,
465  tile_size_m,
466  tile_size_n,
467  tile_size_k,
468  ALPHA, TILE_B[mi][ki], tile_size_k,
469  TILE_A[ki][ni], tile_size_n,
470  betat, TILE_C[mi][ni],
471  tile_size_n );
472  }
473  }
474  }
475  }
476  // --SIDE = Right & UPLO = Upper--
477  else
478  {
479  for ( ki = 0; ki < kt; ki++ )
480  {
481  if (ki == 0)
482  {
483  betat = BETA;
484  }
485  else
486  {
487  betat = 1.0;
488  }
489  tile_size_k = ddss_tile_size( N, ki );
490 
491  if ( ki < ni )
492  {
493  #pragma oss task in( TILE_B[mi][ki] ) \
494  in( TILE_A[ki][ni] ) \
495  inout( TILE_C[mi][ni] ) \
496  shared( TILE_A, TILE_B, TILE_C ) \
497  firstprivate( mi, ni, ki, betat )
498  cblas_dgemm( CblasRowMajor,
499  ( CBLAS_TRANSPOSE ) NoTrans,
500  ( CBLAS_TRANSPOSE ) NoTrans,
501  tile_size_m,
502  tile_size_n,
503  tile_size_k,
504  ALPHA, TILE_B[mi][ki], tile_size_k,
505  TILE_A[ki][ni], tile_size_n,
506  betat, TILE_C[mi][ni], tile_size_n );
507  }
508  else
509  {
510  if ( ki == ni )
511  {
512  #pragma oss task in( TILE_A[ki][ki] ) \
513  in( TILE_B[ki][ni] ) \
514  inout( TILE_C[mi][ni] ) \
515  shared( TILE_A, TILE_B, TILE_C ) \
516  firstprivate( mi, ni, ki, betat )
517  cblas_dsymm( CblasRowMajor,
518  ( CBLAS_SIDE ) SIDE,
519  ( CBLAS_UPLO ) UPLO,
520  tile_size_m, tile_size_n,
521  ALPHA, TILE_A[ki][ki], tile_size_k,
522  TILE_B[mi][ki], tile_size_k,
523  betat, TILE_C[mi][ni],
524  tile_size_n );
525  }
526  else
527  {
528  #pragma oss task in( TILE_B[mi][ki] ) \
529  in( TILE_A[ni][ki] ) \
530  inout( TILE_C[mi][ni] ) \
531  shared( TILE_A, TILE_B, TILE_C ) \
532  firstprivate( mi, ni, ki, betat )
533  cblas_dgemm( CblasRowMajor,
534  ( CBLAS_TRANSPOSE ) NoTrans,
535  ( CBLAS_TRANSPOSE ) Trans,
536  tile_size_m,
537  tile_size_n,
538  tile_size_k,
539  ALPHA, TILE_B[mi][ki], tile_size_k,
540  TILE_A[ni][ki], tile_size_k,
541  betat, TILE_C[mi][ni],
542  tile_size_n );
543  }
544  }
545  }
546  }
547  }
548  }
549  }
550 
551  // From tile matrix TILE_C to flat matrix C
552  ddss_dtiled2flat( M, N, C, LDC, mt, nt, TILE_C );
553 
554  // --Tile matrices free--
555  free( TILE_A );
556  free( TILE_B );
557  free( TILE_C );
558 
559  return Success;
560 
561 }
562 
void ddss_dflat2tiled(int M, int N, double *A, int LDA, int MT, int NT, double(*TILE_A)[NT][TILE_SIZE *TILE_SIZE])
void ddss_dsymflat2tiled(int M, int N, double *A, int LDA, int MT, int NT, double(*TILE_A)[NT][TILE_SIZE *TILE_SIZE], enum DDSS_UPLO UPLO)
enum LASS_RETURN kdsymm(enum DDSS_SIDE SIDE, enum DDSS_UPLO UPLO, int M, int N, const double ALPHA, double *A, int LDA, double *B, int LDB, const double BETA, double *C, int LDC)
Definition: kdsymm.c:125
void ddss_dtiled2flat(int M, int N, double *A, int LDA, int MT, int NT, double(*TILE_A)[NT][TILE_SIZE *TILE_SIZE])
int ddss_tile_size(int M, int MT)
Definition: ddss_tile.c:52