LASs - Linear Algebra Routines on OmpSs  1.0.0
LASs
kdtrmm.c
Go to the documentation of this file.
1 #include "../include/lass.h"
2 
3 /**
4  *
5  * @file kdtrmm.c
6  *
7  * @brief LASs-DDSs kdtrmm routine.
8  *
9  * LASs-DDSs is a software package provided by:
10  * Barcelona Supercomputing Center - Centro Nacional de Supercomputacion
11  *
12  * @author Pedro Valero-Lara pedro.valero@bsc.es
13  * @author Boro Sofranac boro.sofranac@bsc.es
14  * @date 2018-03-19
15  * @reviewer
16  * @modified
17  *
18  **/
19 
20 /**
21  *
22  * @ingroup DDSS
23  *
24  * Performs one of the matrix-matrix operations:
25  *
26  * B = ALPHA * op( A ) * B, or B = ALPHA * B * op( A )
27  *
28  * where op( A ) is one of:
29  *
30  * op( A ) = A or
31  * op( A ) = A**T
32  *
33  * ALPHA is a scalar, B is a M by N matrix and A is a unit, or non-unit,
34  * upper or lower triangular matrix.
35  *
36  **/
37 
38 /**
39  *
40  * @param[in]
41  * SIDE enum DDSS_SIDE.
42  * SIDE specifies the position of the triangular A matrix in
43  * the operations:
44  * - Left: B = ALPHA * op( A ) * B.
45  * - Right: B = ALPHA * B * op( A ).
46  *
47  * @param[in]
48  * UPLO enum DDSS_UPLO.
49  * UPLO specifies the form in which A is stored:
50  * - Lower: Lower triangle of A is stored. The upper traingular part is
51  * not referenced.
52  * - Upper: Upper triangle of A is stored. The lower triangular part is
53  * not referenced.
54  *
55  * @param[in]
56  * TRANS_A enum DDSS_TRANS.
57  * TRANS_A specifies the form of op( A ) to be used:
58  * - NoTrans: op( A ) = A.
59  * - Trans: op( A ) = A**T.
60  *
61  * @param[in]
62  * DIAG enum DDSS_DIAG.
63  * DIAG specifies whether or not A is unit triangular as follows:
64  * - Unit: A is assumed to be unit triangular.
65  * - NonUnit: A is not assumed to be unit triangular.
66  *
67  * @param[in]
68  * M int.
69  * M specifies the number of rows of B. M must be at least zero.
70  *
71  * @param[in]
72  * N int.
73  * N specifies the number of columns of B. N must be at least zero.
74  *
75  * @param[in]
76  * ALPHA double.
77  * ALPHA specifies the scalar alpha.
78  *
79  * @param[in]
80  * A double *.
81  * A is a pointer to a matrix of dimension K by K, where K is M when
82  * SIDE = Left and is N otherwise.
83  * When UPLO = Uppper the strictly lower triangular part of A is not
84  * referenced and when UPLO = Lower the strictly upper triangular part
85  * of A is not referenced.
86  * Note that when DIAG = Unit, the diagonal elements of A are not
87  * referenced either, but are assumed to be unity.
88  *
89  * @param[in]
90  * LDA int.
91  * LDA specifies the number of columns of A ( row-major order ).
92  * When SIDE = Left then LDA must be at least max( 1, M ), otherwise
93  * LDA must be at least max( 1, N ).
94  *
95  * @param[in, out]
96  * B double *.
97  * B is a pointer to a matrix of dimension M by N.
98  * On exit the matrix B is overwritten by the transformed matrix.
99  *
100  * @param[in]
101  * LDB int.
102  * LDB specifies the number of columns of B ( row-major order ).
103  * LDB must be at least max( 1, N ).
104  *
105  **/
106 
107 /**
108  *
109  * @retval Success successful exit
110  * @retval NoSuccess unsuccessful exit
111  *
112  **/
113 
114 /**
115  *
116  * @sa ddss_tile
117  * @sa ddss_flat2tiled
118  * @sa ddss_dsymflat2tiled
119  * @sa ddss_tiled2flat
120  *
121  **/
122 
123 enum LASS_RETURN kdtrmm( enum DDSS_SIDE SIDE, enum DDSS_UPLO UPLO,
124  enum DDSS_TRANS TRANS_A, enum DDSS_DIAG DIAG,
125  int M, int N,
126  const double ALPHA, double *A, int LDA,
127  double *B, int LDB )
128 {
129 
130  // Local variables
131  int k;
132  int mt, nt;
133  int ki, ni, mi, nni, mmi;
134  int Am, An;
135  int Bm, Bn;
136  int tile_size_m;
137  int tile_size_n;
138  int tile_size_k;
139 
140  // Number of tiles
141  if ( M % TILE_SIZE == 0 )
142  {
143  mt = M / TILE_SIZE;
144  }
145  else
146  {
147  mt = ( M / TILE_SIZE ) + 1;
148  }
149 
150  if ( N % TILE_SIZE == 0 )
151  {
152  nt = N / TILE_SIZE;
153  }
154  else
155  {
156  nt = ( N / TILE_SIZE ) + 1;
157  }
158 
159  /****************************
160  --Tile matrices declaration--
161  ****************************/
162 
163  if ( SIDE == Left )
164  {
165  Am = An = mt;
166  k = M;
167  }
168  else
169  {
170  Am = An = nt;
171  k = N;
172  }
173 
174  Bm = mt;
175  Bn = nt;
176 
177  /***************************
178  --Tile matrices allocation--
179  ***************************/
180 
181  double ( *TILE_A )[An][TILE_SIZE * TILE_SIZE] = malloc(
182  Am * An * TILE_SIZE * TILE_SIZE * sizeof( double ) );
183 
184  if ( TILE_A == NULL )
185  {
186  fprintf( stderr, "Failure in ddss_dtile_alloc for matrix TILE_A\n" );
187  return NoSuccess;
188  }
189 
190  double ( *TILE_B )[Bn][TILE_SIZE * TILE_SIZE] = malloc(
191  Bm * Bn * TILE_SIZE * TILE_SIZE * sizeof( double ) );
192 
193  if ( TILE_B == NULL )
194  {
195  fprintf( stderr, "Failure in ddss_dtile_alloc for matrix TILE_B\n" );
196  return NoSuccess;
197  }
198 
199  /*********************************************
200  --From flat data layout to tiled data layout--
201  *********************************************/
202 
203  // From flat matrix A to tile matrix TILE_A
204  ddss_dsymflat2tiled( k, k, A, LDA, Am, An, TILE_A, UPLO );
205 
206  // From flat matrix B to tile matrix TILE_B
207  ddss_dflat2tiled( M, N, B, LDB, Bm, Bn, TILE_B );
208 
209  /*************
210  --DTRMM tile--
211  *************/
212 
213  if ( SIDE == Left )
214  {
215  if ( UPLO == Upper )
216  {
217  // --SIDE = Left & UPLO = Upper & TRANS_A = NoTrans--
218  if ( TRANS_A == NoTrans )
219  {
220  for ( ki = 0; ki < mt; ki++ )
221  {
222  tile_size_k = ddss_tile_size( M, ki );
223 
224  for ( ni = 0; ni < nt; ni++ )
225  {
226  tile_size_n = ddss_tile_size( N, ni );
227 
228  #pragma oss task in( TILE_A[ki][ki] ) \
229  inout( TILE_B[ki][ni] ) \
230  shared( TILE_A, TILE_B ) \
231  firstprivate( ki, ni ) \
232  label( dtrmm )
233  cblas_dtrmm( CblasRowMajor,
234  ( CBLAS_SIDE ) SIDE, ( CBLAS_UPLO ) UPLO,
235  ( CBLAS_TRANSPOSE ) TRANS_A,
236  ( CBLAS_DIAG ) DIAG,
237  tile_size_k,
238  tile_size_n,
239  ALPHA, TILE_A[ki][ki], tile_size_k,
240  TILE_B[ki][ni], tile_size_n );
241 
242  for ( mi = ki + 1; mi < mt; mi++ )
243  {
244  tile_size_m = ddss_tile_size( M, mi );
245 
246  #pragma oss task in( TILE_A[ki][mi] ) \
247  in( TILE_B[mi][ni] ) \
248  inout( TILE_B[ki][ni] ) \
249  shared( TILE_A, TILE_B ) \
250  firstprivate( ki, mi, ni ) \
251  label( dgemm )
252  cblas_dgemm( CblasRowMajor,
253  CblasNoTrans, CblasNoTrans,
254  tile_size_k,
255  tile_size_n,
256  tile_size_m,
257  ALPHA, TILE_A[ki][mi], tile_size_m,
258  TILE_B[mi][ni], tile_size_n,
259  1.0, TILE_B[ki][ni], tile_size_n );
260  }
261  }
262  }
263  }
264  // --SIDE = Left & UPLO = Upper & TRANS_A = Trans--
265  else
266  {
267  for ( ki = mt-1; ki > -1; ki-- )
268  {
269  tile_size_k = ddss_tile_size( M, ki );
270 
271  for ( ni = 0; ni < nt; ni++ )
272  {
273  tile_size_n = ddss_tile_size( N, ni );
274 
275  #pragma oss task in( TILE_A[ki][ki] ) \
276  inout( TILE_B[ki][ni] ) \
277  shared( TILE_A, TILE_B ) \
278  firstprivate( ki, ni ) \
279  label( dtrmm )
280  cblas_dtrmm( CblasRowMajor,
281  ( CBLAS_SIDE ) SIDE, ( CBLAS_UPLO ) UPLO,
282  ( CBLAS_TRANSPOSE ) TRANS_A,
283  ( CBLAS_DIAG ) DIAG,
284  tile_size_k,
285  tile_size_n,
286  ALPHA, TILE_A[ki][ki], tile_size_k,
287  TILE_B[ki][ni], tile_size_n );
288 
289  for ( mi = 0; mi < ki; mi++ )
290  {
291  tile_size_m = ddss_tile_size( M, mi );
292 
293  #pragma oss task in( TILE_A[mi][ki] ) \
294  in( TILE_B[mi][ni] ) \
295  inout( TILE_B[ki][ni] ) \
296  shared( TILE_A, TILE_B ) \
297  firstprivate( ki, mi, ni ) \
298  label( dgemm )
299  cblas_dgemm( CblasRowMajor,
300  CblasTrans, CblasNoTrans,
301  tile_size_k,
302  tile_size_n,
303  tile_size_m,
304  ALPHA, TILE_A[mi][ki], tile_size_k,
305  TILE_B[mi][ni], tile_size_n,
306  1.0, TILE_B[ki][ni], tile_size_n );
307  }
308  }
309  }
310 
311  }
312  }
313  // --SIDE = Left & UPLO = Lower & TRANS_A = NoTrans--
314  else
315  {
316  if ( TRANS_A == NoTrans )
317  {
318  for ( ki = mt-1; ki > -1; ki-- )
319  {
320  tile_size_k = ddss_tile_size( M, ki );
321 
322  for ( ni = 0; ni < nt; ni++ )
323  {
324  tile_size_n = ddss_tile_size( N, ni );
325 
326  #pragma oss task in( TILE_A[ki][ki] ) \
327  inout( TILE_B[ki][ni] ) \
328  shared( TILE_A, TILE_B ) \
329  firstprivate( ki, ni ) \
330  label( dtrmm )
331  cblas_dtrmm( CblasRowMajor,
332  ( CBLAS_SIDE ) SIDE, ( CBLAS_UPLO ) UPLO,
333  ( CBLAS_TRANSPOSE ) TRANS_A,
334  ( CBLAS_DIAG ) DIAG,
335  tile_size_k,
336  tile_size_n,
337  ALPHA, TILE_A[ki][ki], tile_size_k,
338  TILE_B[ki][ni], tile_size_n );
339 
340  for ( mi = 0; mi < ki; mi++ )
341  {
342  tile_size_m = ddss_tile_size( M, mi );
343 
344  #pragma oss task in( TILE_A[ki][mi] ) \
345  in( TILE_B[mi][ni] ) \
346  inout( TILE_B[ki][ni] ) \
347  shared( TILE_A, TILE_B ) \
348  firstprivate( ki, mi, ni ) \
349  label( dgemm )
350  cblas_dgemm( CblasRowMajor,
351  CblasNoTrans, CblasNoTrans,
352  tile_size_k,
353  tile_size_n,
354  tile_size_m,
355  ALPHA, TILE_A[ki][mi], tile_size_m,
356  TILE_B[mi][ni], tile_size_n,
357  1.0, TILE_B[ki][ni], tile_size_n );
358  }
359  }
360  }
361 
362  }
363  // --SIDE = Left & UPLO = Lower & TRANS_A = Trans--
364  else
365  {
366  for ( ki = 0; ki < mt; ki++ )
367  {
368  tile_size_k = ddss_tile_size( M, ki );
369 
370  for ( ni = 0; ni < nt; ni++ )
371  {
372  tile_size_n = ddss_tile_size( N, ni );
373 
374  #pragma oss task in( TILE_A[ki][ki] ) \
375  inout( TILE_B[ki][ni] ) \
376  shared( TILE_A, TILE_B ) \
377  firstprivate( ki, ni ) \
378  label( dtrmm )
379  cblas_dtrmm( CblasRowMajor,
380  ( CBLAS_SIDE ) SIDE, ( CBLAS_UPLO ) UPLO,
381  ( CBLAS_TRANSPOSE ) TRANS_A,
382  ( CBLAS_DIAG ) DIAG,
383  tile_size_k,
384  tile_size_n,
385  ALPHA, TILE_A[ki][ki], tile_size_k,
386  TILE_B[ki][ni], tile_size_n );
387 
388  for ( mi = ki + 1; mi < mt; mi++ )
389  {
390  tile_size_m = ddss_tile_size( M, mi );
391 
392  #pragma oss task in( TILE_A[mi][ki] ) \
393  in( TILE_B[mi][ni] ) \
394  inout( TILE_B[ki][ni] ) \
395  shared( TILE_A, TILE_B ) \
396  firstprivate( ki, mi, ni ) \
397  label( dgemm )
398  cblas_dgemm( CblasRowMajor,
399  CblasTrans, CblasNoTrans,
400  tile_size_k,
401  tile_size_n,
402  tile_size_m,
403  ALPHA, TILE_A[mi][ki], tile_size_k,
404  TILE_B[mi][ni], tile_size_n,
405  1.0, TILE_B[ki][ni], tile_size_n );
406  }
407  }
408  }
409  }
410  }
411  }
412  // --SIDE = Right & UPLO = Upper & TRANS_A = NoTrans--
413  else
414  {
415  if ( UPLO == Upper )
416  {
417  if ( TRANS_A == NoTrans )
418  {
419  for ( ki = nt-1; ki > -1; ki-- )
420  {
421  tile_size_k = ddss_tile_size( N, ki );
422 
423  for ( ni = 0; ni < mt; ni++ )
424  {
425  tile_size_n = ddss_tile_size( M, ni );
426 
427  #pragma oss task in( TILE_A[ki][ki] ) \
428  inout( TILE_B[ni][ki] ) \
429  shared( TILE_A, TILE_B ) \
430  firstprivate( ki, ni ) \
431  label( dtrmm )
432  cblas_dtrmm( CblasRowMajor,
433  ( CBLAS_SIDE ) SIDE, ( CBLAS_UPLO ) UPLO,
434  ( CBLAS_TRANSPOSE ) TRANS_A,
435  ( CBLAS_DIAG ) DIAG,
436  tile_size_n,
437  tile_size_k,
438  ALPHA, TILE_A[ki][ki], tile_size_k,
439  TILE_B[ni][ki], tile_size_k );
440 
441  for ( mi = 0; mi < ki; mi++ )
442  {
443  tile_size_m = ddss_tile_size( N, mi );
444 
445  #pragma oss task in( TILE_B[ni][mi] ) \
446  in( TILE_A[mi][ki] ) \
447  inout( TILE_B[ni][ki] ) \
448  shared( TILE_A, TILE_B ) \
449  firstprivate( ki, mi, ni ) \
450  label( dgemm )
451  cblas_dgemm( CblasRowMajor,
452  CblasNoTrans, CblasNoTrans,
453  tile_size_n,
454  tile_size_k,
455  tile_size_m,
456  ALPHA, TILE_B[ni][mi], tile_size_m,
457  TILE_A[mi][ki], tile_size_k,
458  1.0, TILE_B[ni][ki], tile_size_k );
459  }
460  }
461  }
462  }
463  // --SIDE = Right & UPLO = Upper & TRANS_A = Trans--
464  else
465  {
466  for ( ki = 0; ki < nt; ki++ )
467  {
468  tile_size_k = ddss_tile_size( N, ki );
469 
470  for ( ni = 0; ni < mt; ni++ )
471  {
472  tile_size_n = ddss_tile_size( M, ni );
473 
474  #pragma oss task in( TILE_A[ki][ki] ) \
475  inout( TILE_B[ni][ki] ) \
476  shared( TILE_A, TILE_B ) \
477  firstprivate( ki, ni ) \
478  label( dtrmm )
479  cblas_dtrmm( CblasRowMajor,
480  ( CBLAS_SIDE ) SIDE, ( CBLAS_UPLO ) UPLO,
481  ( CBLAS_TRANSPOSE ) TRANS_A,
482  ( CBLAS_DIAG ) DIAG,
483  tile_size_n,
484  tile_size_k,
485  ALPHA, TILE_A[ki][ki], tile_size_k,
486  TILE_B[ni][ki], tile_size_k );
487 
488  for ( mi = ki+1; mi < nt; mi++ )
489  {
490  tile_size_m = ddss_tile_size( N, mi );
491 
492  #pragma oss task in( TILE_B[ni][mi] ) \
493  in( TILE_A[ki][mi] ) \
494  inout( TILE_B[ni][ki] ) \
495  shared( TILE_A, TILE_B ) \
496  firstprivate( ki, mi, ni ) \
497  label( dgemm )
498  cblas_dgemm( CblasRowMajor,
499  CblasNoTrans, CblasTrans,
500  tile_size_n,
501  tile_size_k,
502  tile_size_m,
503  ALPHA, TILE_B[ni][mi], tile_size_m,
504  TILE_A[ki][mi], tile_size_m,
505  1.0, TILE_B[ni][ki], tile_size_k );
506  }
507  }
508  }
509  }
510  }
511  // --SIDE = Right & UPLO = Lower & TRANS_A = NoTrans--
512  else
513  {
514  if ( TRANS_A == NoTrans )
515  {
516  for ( ki = 0; ki < nt; ki++ )
517  {
518  tile_size_k = ddss_tile_size( N, ki );
519 
520  for ( ni = 0; ni < mt; ni++ )
521  {
522  tile_size_n = ddss_tile_size( M, ni );
523 
524  #pragma oss task in( TILE_A[ki][ki] ) \
525  inout( TILE_B[ni][ki] ) \
526  shared( TILE_A, TILE_B ) \
527  firstprivate( ki, ni ) \
528  label( dtrmm )
529  cblas_dtrmm( CblasRowMajor,
530  ( CBLAS_SIDE ) SIDE, ( CBLAS_UPLO ) UPLO,
531  ( CBLAS_TRANSPOSE ) TRANS_A,
532  ( CBLAS_DIAG ) DIAG,
533  tile_size_n,
534  tile_size_k,
535  ALPHA, TILE_A[ki][ki], tile_size_k,
536  TILE_B[ni][ki], tile_size_k );
537 
538  for ( mi = ki+1; mi < nt; mi++ )
539  {
540  tile_size_m = ddss_tile_size( N, mi );
541 
542  #pragma oss task in( TILE_B[ni][mi] ) \
543  in( TILE_A[mi][ki] ) \
544  inout( TILE_B[ni][ki] ) \
545  shared( TILE_A, TILE_B ) \
546  firstprivate( ki, mi, ni ) \
547  label( dgemm )
548  cblas_dgemm( CblasRowMajor,
549  CblasNoTrans, CblasNoTrans,
550  tile_size_n,
551  tile_size_k,
552  tile_size_m,
553  ALPHA, TILE_B[ni][mi], tile_size_m,
554  TILE_A[mi][ki], tile_size_k,
555  1.0, TILE_B[ni][ki], tile_size_k );
556  }
557  }
558  }
559  }
560  // --SIDE = Right & UPLO = Lower & TRANS_A = Trans--
561  else
562  {
563  for ( ki = nt-1; ki > -1; ki-- )
564  {
565  tile_size_k = ddss_tile_size( N, ki );
566 
567  for ( ni = 0; ni < mt; ni++ )
568  {
569  tile_size_n = ddss_tile_size( M, ni );
570 
571  #pragma oss task in( TILE_A[ki][ki] ) \
572  inout( TILE_B[ni][ki] ) \
573  shared( TILE_A, TILE_B ) \
574  firstprivate( ki, ni ) \
575  label( dtrmm )
576  cblas_dtrmm( CblasRowMajor,
577  ( CBLAS_SIDE ) SIDE, ( CBLAS_UPLO ) UPLO,
578  ( CBLAS_TRANSPOSE ) TRANS_A,
579  ( CBLAS_DIAG ) DIAG,
580  tile_size_n,
581  tile_size_k,
582  ALPHA, TILE_A[ki][ki], tile_size_k,
583  TILE_B[ni][ki], tile_size_k );
584 
585  for ( mi = 0; mi < ki; mi++ )
586  {
587  tile_size_m = ddss_tile_size( N, mi );
588 
589  #pragma oss task in( TILE_B[ni][mi] ) \
590  in( TILE_A[ki][mi] ) \
591  inout( TILE_B[ni][ki] ) \
592  shared( TILE_A, TILE_B ) \
593  firstprivate( ki, mi, ni ) \
594  label( dgemm )
595  cblas_dgemm( CblasRowMajor,
596  CblasNoTrans, CblasTrans,
597  tile_size_n,
598  tile_size_k,
599  tile_size_m,
600  ALPHA, TILE_B[ni][mi], tile_size_m,
601  TILE_A[ki][mi], tile_size_m,
602  1.0, TILE_B[ni][ki], tile_size_k );
603  }
604  }
605  }
606  }
607  }
608  }
609 
610  /*********************************************
611  --From tiled data layout to flat data layout--
612  *********************************************/
613 
614  ddss_dtiled2flat( M, N, B, LDB, Bm, Bn, TILE_B );
615 
616  // --Tile matrices free--
617  free( TILE_A );
618  free( TILE_B );
619 
620  return Success;
621 
622 }
623 
void ddss_dflat2tiled(int M, int N, double *A, int LDA, int MT, int NT, double(*TILE_A)[NT][TILE_SIZE *TILE_SIZE])
void ddss_dsymflat2tiled(int M, int N, double *A, int LDA, int MT, int NT, double(*TILE_A)[NT][TILE_SIZE *TILE_SIZE], enum DDSS_UPLO UPLO)
void ddss_dtiled2flat(int M, int N, double *A, int LDA, int MT, int NT, double(*TILE_A)[NT][TILE_SIZE *TILE_SIZE])
enum LASS_RETURN kdtrmm(enum DDSS_SIDE SIDE, enum DDSS_UPLO UPLO, enum DDSS_TRANS TRANS_A, enum DDSS_DIAG DIAG, int M, int N, const double ALPHA, double *A, int LDA, double *B, int LDB)
Definition: kdtrmm.c:123
int ddss_tile_size(int M, int MT)
Definition: ddss_tile.c:52