LASs - Linear Algebra Routines on OmpSs  1.0.0
LASs
Functions
kdtrmm.c File Reference

LASs-DDSs kdtrmm routine. More...

#include "../include/lass.h"
Include dependency graph for kdtrmm.c:

Go to the source code of this file.

Functions

enum LASS_RETURN kdtrmm (enum DDSS_SIDE SIDE, enum DDSS_UPLO UPLO, enum DDSS_TRANS TRANS_A, enum DDSS_DIAG DIAG, int M, int N, const double ALPHA, double *A, int LDA, double *B, int LDB)
 

Detailed Description

LASs-DDSs kdtrmm routine.

LASs-DDSs is a software package provided by: Barcelona Supercomputing Center - Centro Nacional de Supercomputacion

Author
Pedro Valero-Lara pedro.nosp@m..val.nosp@m.ero@b.nosp@m.sc.e.nosp@m.s
Boro Sofranac boro..nosp@m.sofr.nosp@m.anac@.nosp@m.bsc..nosp@m.es
Date
2018-03-19

Definition in file kdtrmm.c.

Function Documentation

enum LASS_RETURN kdtrmm ( enum DDSS_SIDE  SIDE,
enum DDSS_UPLO  UPLO,
enum DDSS_TRANS  TRANS_A,
enum DDSS_DIAG  DIAG,
int  M,
int  N,
const double  ALPHA,
double *  A,
int  LDA,
double *  B,
int  LDB 
)

Performs one of the matrix-matrix operations:

B = ALPHA * op( A ) * B, or B = ALPHA * B * op( A )

where op( A ) is one of:

op( A ) = A      or
op( A ) = A**T 

ALPHA is a scalar, B is a M by N matrix and A is a unit, or non-unit, upper or lower triangular matrix.

Parameters
[in]SIDEenum DDSS_SIDE. SIDE specifies the position of the triangular A matrix in the operations:
  • Left: B = ALPHA * op( A ) * B.
  • Right: B = ALPHA * B * op( A ).
[in]UPLOenum DDSS_UPLO. UPLO specifies the form in which A is stored:
  • Lower: Lower triangle of A is stored. The upper traingular part is not referenced.
  • Upper: Upper triangle of A is stored. The lower triangular part is not referenced.
[in]TRANS_Aenum DDSS_TRANS. TRANS_A specifies the form of op( A ) to be used:
  • NoTrans: op( A ) = A.
  • Trans: op( A ) = A**T.
[in]DIAGenum DDSS_DIAG. DIAG specifies whether or not A is unit triangular as follows:
  • Unit: A is assumed to be unit triangular.
  • NonUnit: A is not assumed to be unit triangular.
[in]Mint. M specifies the number of rows of B. M must be at least zero.
[in]Nint. N specifies the number of columns of B. N must be at least zero.
[in]ALPHAdouble. ALPHA specifies the scalar alpha.
[in]Adouble *. A is a pointer to a matrix of dimension K by K, where K is M when SIDE = Left and is N otherwise. When UPLO = Uppper the strictly lower triangular part of A is not referenced and when UPLO = Lower the strictly upper triangular part of A is not referenced. Note that when DIAG = Unit, the diagonal elements of A are not referenced either, but are assumed to be unity.
[in]LDAint. LDA specifies the number of columns of A ( row-major order ). When SIDE = Left then LDA must be at least max( 1, M ), otherwise LDA must be at least max( 1, N ).
[in,out]Bdouble *. B is a pointer to a matrix of dimension M by N. On exit the matrix B is overwritten by the transformed matrix.
[in]LDBint. LDB specifies the number of columns of B ( row-major order ). LDB must be at least max( 1, N ).
Return values
Successsuccessful exit
NoSuccessunsuccessful exit
See also
ddss_tile
ddss_flat2tiled
ddss_dsymflat2tiled
ddss_tiled2flat

Definition at line 123 of file kdtrmm.c.

References ddss_dflat2tiled(), ddss_dsymflat2tiled(), ddss_dtiled2flat(), and ddss_tile_size().

Referenced by ddss_dtrmm().

128 {
129 
130  // Local variables
131  int k;
132  int mt, nt;
133  int ki, ni, mi, nni, mmi;
134  int Am, An;
135  int Bm, Bn;
136  int tile_size_m;
137  int tile_size_n;
138  int tile_size_k;
139 
140  // Number of tiles
141  if ( M % TILE_SIZE == 0 )
142  {
143  mt = M / TILE_SIZE;
144  }
145  else
146  {
147  mt = ( M / TILE_SIZE ) + 1;
148  }
149 
150  if ( N % TILE_SIZE == 0 )
151  {
152  nt = N / TILE_SIZE;
153  }
154  else
155  {
156  nt = ( N / TILE_SIZE ) + 1;
157  }
158 
159  /****************************
160  --Tile matrices declaration--
161  ****************************/
162 
163  if ( SIDE == Left )
164  {
165  Am = An = mt;
166  k = M;
167  }
168  else
169  {
170  Am = An = nt;
171  k = N;
172  }
173 
174  Bm = mt;
175  Bn = nt;
176 
177  /***************************
178  --Tile matrices allocation--
179  ***************************/
180 
181  double ( *TILE_A )[An][TILE_SIZE * TILE_SIZE] = malloc(
182  Am * An * TILE_SIZE * TILE_SIZE * sizeof( double ) );
183 
184  if ( TILE_A == NULL )
185  {
186  fprintf( stderr, "Failure in ddss_dtile_alloc for matrix TILE_A\n" );
187  return NoSuccess;
188  }
189 
190  double ( *TILE_B )[Bn][TILE_SIZE * TILE_SIZE] = malloc(
191  Bm * Bn * TILE_SIZE * TILE_SIZE * sizeof( double ) );
192 
193  if ( TILE_B == NULL )
194  {
195  fprintf( stderr, "Failure in ddss_dtile_alloc for matrix TILE_B\n" );
196  return NoSuccess;
197  }
198 
199  /*********************************************
200  --From flat data layout to tiled data layout--
201  *********************************************/
202 
203  // From flat matrix A to tile matrix TILE_A
204  ddss_dsymflat2tiled( k, k, A, LDA, Am, An, TILE_A, UPLO );
205 
206  // From flat matrix B to tile matrix TILE_B
207  ddss_dflat2tiled( M, N, B, LDB, Bm, Bn, TILE_B );
208 
209  /*************
210  --DTRMM tile--
211  *************/
212 
213  if ( SIDE == Left )
214  {
215  if ( UPLO == Upper )
216  {
217  // --SIDE = Left & UPLO = Upper & TRANS_A = NoTrans--
218  if ( TRANS_A == NoTrans )
219  {
220  for ( ki = 0; ki < mt; ki++ )
221  {
222  tile_size_k = ddss_tile_size( M, ki );
223 
224  for ( ni = 0; ni < nt; ni++ )
225  {
226  tile_size_n = ddss_tile_size( N, ni );
227 
228  #pragma oss task in( TILE_A[ki][ki] ) \
229  inout( TILE_B[ki][ni] ) \
230  shared( TILE_A, TILE_B ) \
231  firstprivate( ki, ni ) \
232  label( dtrmm )
233  cblas_dtrmm( CblasRowMajor,
234  ( CBLAS_SIDE ) SIDE, ( CBLAS_UPLO ) UPLO,
235  ( CBLAS_TRANSPOSE ) TRANS_A,
236  ( CBLAS_DIAG ) DIAG,
237  tile_size_k,
238  tile_size_n,
239  ALPHA, TILE_A[ki][ki], tile_size_k,
240  TILE_B[ki][ni], tile_size_n );
241 
242  for ( mi = ki + 1; mi < mt; mi++ )
243  {
244  tile_size_m = ddss_tile_size( M, mi );
245 
246  #pragma oss task in( TILE_A[ki][mi] ) \
247  in( TILE_B[mi][ni] ) \
248  inout( TILE_B[ki][ni] ) \
249  shared( TILE_A, TILE_B ) \
250  firstprivate( ki, mi, ni ) \
251  label( dgemm )
252  cblas_dgemm( CblasRowMajor,
253  CblasNoTrans, CblasNoTrans,
254  tile_size_k,
255  tile_size_n,
256  tile_size_m,
257  ALPHA, TILE_A[ki][mi], tile_size_m,
258  TILE_B[mi][ni], tile_size_n,
259  1.0, TILE_B[ki][ni], tile_size_n );
260  }
261  }
262  }
263  }
264  // --SIDE = Left & UPLO = Upper & TRANS_A = Trans--
265  else
266  {
267  for ( ki = mt-1; ki > -1; ki-- )
268  {
269  tile_size_k = ddss_tile_size( M, ki );
270 
271  for ( ni = 0; ni < nt; ni++ )
272  {
273  tile_size_n = ddss_tile_size( N, ni );
274 
275  #pragma oss task in( TILE_A[ki][ki] ) \
276  inout( TILE_B[ki][ni] ) \
277  shared( TILE_A, TILE_B ) \
278  firstprivate( ki, ni ) \
279  label( dtrmm )
280  cblas_dtrmm( CblasRowMajor,
281  ( CBLAS_SIDE ) SIDE, ( CBLAS_UPLO ) UPLO,
282  ( CBLAS_TRANSPOSE ) TRANS_A,
283  ( CBLAS_DIAG ) DIAG,
284  tile_size_k,
285  tile_size_n,
286  ALPHA, TILE_A[ki][ki], tile_size_k,
287  TILE_B[ki][ni], tile_size_n );
288 
289  for ( mi = 0; mi < ki; mi++ )
290  {
291  tile_size_m = ddss_tile_size( M, mi );
292 
293  #pragma oss task in( TILE_A[mi][ki] ) \
294  in( TILE_B[mi][ni] ) \
295  inout( TILE_B[ki][ni] ) \
296  shared( TILE_A, TILE_B ) \
297  firstprivate( ki, mi, ni ) \
298  label( dgemm )
299  cblas_dgemm( CblasRowMajor,
300  CblasTrans, CblasNoTrans,
301  tile_size_k,
302  tile_size_n,
303  tile_size_m,
304  ALPHA, TILE_A[mi][ki], tile_size_k,
305  TILE_B[mi][ni], tile_size_n,
306  1.0, TILE_B[ki][ni], tile_size_n );
307  }
308  }
309  }
310 
311  }
312  }
313  // --SIDE = Left & UPLO = Lower & TRANS_A = NoTrans--
314  else
315  {
316  if ( TRANS_A == NoTrans )
317  {
318  for ( ki = mt-1; ki > -1; ki-- )
319  {
320  tile_size_k = ddss_tile_size( M, ki );
321 
322  for ( ni = 0; ni < nt; ni++ )
323  {
324  tile_size_n = ddss_tile_size( N, ni );
325 
326  #pragma oss task in( TILE_A[ki][ki] ) \
327  inout( TILE_B[ki][ni] ) \
328  shared( TILE_A, TILE_B ) \
329  firstprivate( ki, ni ) \
330  label( dtrmm )
331  cblas_dtrmm( CblasRowMajor,
332  ( CBLAS_SIDE ) SIDE, ( CBLAS_UPLO ) UPLO,
333  ( CBLAS_TRANSPOSE ) TRANS_A,
334  ( CBLAS_DIAG ) DIAG,
335  tile_size_k,
336  tile_size_n,
337  ALPHA, TILE_A[ki][ki], tile_size_k,
338  TILE_B[ki][ni], tile_size_n );
339 
340  for ( mi = 0; mi < ki; mi++ )
341  {
342  tile_size_m = ddss_tile_size( M, mi );
343 
344  #pragma oss task in( TILE_A[ki][mi] ) \
345  in( TILE_B[mi][ni] ) \
346  inout( TILE_B[ki][ni] ) \
347  shared( TILE_A, TILE_B ) \
348  firstprivate( ki, mi, ni ) \
349  label( dgemm )
350  cblas_dgemm( CblasRowMajor,
351  CblasNoTrans, CblasNoTrans,
352  tile_size_k,
353  tile_size_n,
354  tile_size_m,
355  ALPHA, TILE_A[ki][mi], tile_size_m,
356  TILE_B[mi][ni], tile_size_n,
357  1.0, TILE_B[ki][ni], tile_size_n );
358  }
359  }
360  }
361 
362  }
363  // --SIDE = Left & UPLO = Lower & TRANS_A = Trans--
364  else
365  {
366  for ( ki = 0; ki < mt; ki++ )
367  {
368  tile_size_k = ddss_tile_size( M, ki );
369 
370  for ( ni = 0; ni < nt; ni++ )
371  {
372  tile_size_n = ddss_tile_size( N, ni );
373 
374  #pragma oss task in( TILE_A[ki][ki] ) \
375  inout( TILE_B[ki][ni] ) \
376  shared( TILE_A, TILE_B ) \
377  firstprivate( ki, ni ) \
378  label( dtrmm )
379  cblas_dtrmm( CblasRowMajor,
380  ( CBLAS_SIDE ) SIDE, ( CBLAS_UPLO ) UPLO,
381  ( CBLAS_TRANSPOSE ) TRANS_A,
382  ( CBLAS_DIAG ) DIAG,
383  tile_size_k,
384  tile_size_n,
385  ALPHA, TILE_A[ki][ki], tile_size_k,
386  TILE_B[ki][ni], tile_size_n );
387 
388  for ( mi = ki + 1; mi < mt; mi++ )
389  {
390  tile_size_m = ddss_tile_size( M, mi );
391 
392  #pragma oss task in( TILE_A[mi][ki] ) \
393  in( TILE_B[mi][ni] ) \
394  inout( TILE_B[ki][ni] ) \
395  shared( TILE_A, TILE_B ) \
396  firstprivate( ki, mi, ni ) \
397  label( dgemm )
398  cblas_dgemm( CblasRowMajor,
399  CblasTrans, CblasNoTrans,
400  tile_size_k,
401  tile_size_n,
402  tile_size_m,
403  ALPHA, TILE_A[mi][ki], tile_size_k,
404  TILE_B[mi][ni], tile_size_n,
405  1.0, TILE_B[ki][ni], tile_size_n );
406  }
407  }
408  }
409  }
410  }
411  }
412  // --SIDE = Right & UPLO = Upper & TRANS_A = NoTrans--
413  else
414  {
415  if ( UPLO == Upper )
416  {
417  if ( TRANS_A == NoTrans )
418  {
419  for ( ki = nt-1; ki > -1; ki-- )
420  {
421  tile_size_k = ddss_tile_size( N, ki );
422 
423  for ( ni = 0; ni < mt; ni++ )
424  {
425  tile_size_n = ddss_tile_size( M, ni );
426 
427  #pragma oss task in( TILE_A[ki][ki] ) \
428  inout( TILE_B[ni][ki] ) \
429  shared( TILE_A, TILE_B ) \
430  firstprivate( ki, ni ) \
431  label( dtrmm )
432  cblas_dtrmm( CblasRowMajor,
433  ( CBLAS_SIDE ) SIDE, ( CBLAS_UPLO ) UPLO,
434  ( CBLAS_TRANSPOSE ) TRANS_A,
435  ( CBLAS_DIAG ) DIAG,
436  tile_size_n,
437  tile_size_k,
438  ALPHA, TILE_A[ki][ki], tile_size_k,
439  TILE_B[ni][ki], tile_size_k );
440 
441  for ( mi = 0; mi < ki; mi++ )
442  {
443  tile_size_m = ddss_tile_size( N, mi );
444 
445  #pragma oss task in( TILE_B[ni][mi] ) \
446  in( TILE_A[mi][ki] ) \
447  inout( TILE_B[ni][ki] ) \
448  shared( TILE_A, TILE_B ) \
449  firstprivate( ki, mi, ni ) \
450  label( dgemm )
451  cblas_dgemm( CblasRowMajor,
452  CblasNoTrans, CblasNoTrans,
453  tile_size_n,
454  tile_size_k,
455  tile_size_m,
456  ALPHA, TILE_B[ni][mi], tile_size_m,
457  TILE_A[mi][ki], tile_size_k,
458  1.0, TILE_B[ni][ki], tile_size_k );
459  }
460  }
461  }
462  }
463  // --SIDE = Right & UPLO = Upper & TRANS_A = Trans--
464  else
465  {
466  for ( ki = 0; ki < nt; ki++ )
467  {
468  tile_size_k = ddss_tile_size( N, ki );
469 
470  for ( ni = 0; ni < mt; ni++ )
471  {
472  tile_size_n = ddss_tile_size( M, ni );
473 
474  #pragma oss task in( TILE_A[ki][ki] ) \
475  inout( TILE_B[ni][ki] ) \
476  shared( TILE_A, TILE_B ) \
477  firstprivate( ki, ni ) \
478  label( dtrmm )
479  cblas_dtrmm( CblasRowMajor,
480  ( CBLAS_SIDE ) SIDE, ( CBLAS_UPLO ) UPLO,
481  ( CBLAS_TRANSPOSE ) TRANS_A,
482  ( CBLAS_DIAG ) DIAG,
483  tile_size_n,
484  tile_size_k,
485  ALPHA, TILE_A[ki][ki], tile_size_k,
486  TILE_B[ni][ki], tile_size_k );
487 
488  for ( mi = ki+1; mi < nt; mi++ )
489  {
490  tile_size_m = ddss_tile_size( N, mi );
491 
492  #pragma oss task in( TILE_B[ni][mi] ) \
493  in( TILE_A[ki][mi] ) \
494  inout( TILE_B[ni][ki] ) \
495  shared( TILE_A, TILE_B ) \
496  firstprivate( ki, mi, ni ) \
497  label( dgemm )
498  cblas_dgemm( CblasRowMajor,
499  CblasNoTrans, CblasTrans,
500  tile_size_n,
501  tile_size_k,
502  tile_size_m,
503  ALPHA, TILE_B[ni][mi], tile_size_m,
504  TILE_A[ki][mi], tile_size_m,
505  1.0, TILE_B[ni][ki], tile_size_k );
506  }
507  }
508  }
509  }
510  }
511  // --SIDE = Right & UPLO = Lower & TRANS_A = NoTrans--
512  else
513  {
514  if ( TRANS_A == NoTrans )
515  {
516  for ( ki = 0; ki < nt; ki++ )
517  {
518  tile_size_k = ddss_tile_size( N, ki );
519 
520  for ( ni = 0; ni < mt; ni++ )
521  {
522  tile_size_n = ddss_tile_size( M, ni );
523 
524  #pragma oss task in( TILE_A[ki][ki] ) \
525  inout( TILE_B[ni][ki] ) \
526  shared( TILE_A, TILE_B ) \
527  firstprivate( ki, ni ) \
528  label( dtrmm )
529  cblas_dtrmm( CblasRowMajor,
530  ( CBLAS_SIDE ) SIDE, ( CBLAS_UPLO ) UPLO,
531  ( CBLAS_TRANSPOSE ) TRANS_A,
532  ( CBLAS_DIAG ) DIAG,
533  tile_size_n,
534  tile_size_k,
535  ALPHA, TILE_A[ki][ki], tile_size_k,
536  TILE_B[ni][ki], tile_size_k );
537 
538  for ( mi = ki+1; mi < nt; mi++ )
539  {
540  tile_size_m = ddss_tile_size( N, mi );
541 
542  #pragma oss task in( TILE_B[ni][mi] ) \
543  in( TILE_A[mi][ki] ) \
544  inout( TILE_B[ni][ki] ) \
545  shared( TILE_A, TILE_B ) \
546  firstprivate( ki, mi, ni ) \
547  label( dgemm )
548  cblas_dgemm( CblasRowMajor,
549  CblasNoTrans, CblasNoTrans,
550  tile_size_n,
551  tile_size_k,
552  tile_size_m,
553  ALPHA, TILE_B[ni][mi], tile_size_m,
554  TILE_A[mi][ki], tile_size_k,
555  1.0, TILE_B[ni][ki], tile_size_k );
556  }
557  }
558  }
559  }
560  // --SIDE = Right & UPLO = Lower & TRANS_A = Trans--
561  else
562  {
563  for ( ki = nt-1; ki > -1; ki-- )
564  {
565  tile_size_k = ddss_tile_size( N, ki );
566 
567  for ( ni = 0; ni < mt; ni++ )
568  {
569  tile_size_n = ddss_tile_size( M, ni );
570 
571  #pragma oss task in( TILE_A[ki][ki] ) \
572  inout( TILE_B[ni][ki] ) \
573  shared( TILE_A, TILE_B ) \
574  firstprivate( ki, ni ) \
575  label( dtrmm )
576  cblas_dtrmm( CblasRowMajor,
577  ( CBLAS_SIDE ) SIDE, ( CBLAS_UPLO ) UPLO,
578  ( CBLAS_TRANSPOSE ) TRANS_A,
579  ( CBLAS_DIAG ) DIAG,
580  tile_size_n,
581  tile_size_k,
582  ALPHA, TILE_A[ki][ki], tile_size_k,
583  TILE_B[ni][ki], tile_size_k );
584 
585  for ( mi = 0; mi < ki; mi++ )
586  {
587  tile_size_m = ddss_tile_size( N, mi );
588 
589  #pragma oss task in( TILE_B[ni][mi] ) \
590  in( TILE_A[ki][mi] ) \
591  inout( TILE_B[ni][ki] ) \
592  shared( TILE_A, TILE_B ) \
593  firstprivate( ki, mi, ni ) \
594  label( dgemm )
595  cblas_dgemm( CblasRowMajor,
596  CblasNoTrans, CblasTrans,
597  tile_size_n,
598  tile_size_k,
599  tile_size_m,
600  ALPHA, TILE_B[ni][mi], tile_size_m,
601  TILE_A[ki][mi], tile_size_m,
602  1.0, TILE_B[ni][ki], tile_size_k );
603  }
604  }
605  }
606  }
607  }
608  }
609 
610  /*********************************************
611  --From tiled data layout to flat data layout--
612  *********************************************/
613 
614  ddss_dtiled2flat( M, N, B, LDB, Bm, Bn, TILE_B );
615 
616  // --Tile matrices free--
617  free( TILE_A );
618  free( TILE_B );
619 
620  return Success;
621 
622 }
void ddss_dflat2tiled(int M, int N, double *A, int LDA, int MT, int NT, double(*TILE_A)[NT][TILE_SIZE *TILE_SIZE])
void ddss_dsymflat2tiled(int M, int N, double *A, int LDA, int MT, int NT, double(*TILE_A)[NT][TILE_SIZE *TILE_SIZE], enum DDSS_UPLO UPLO)
void ddss_dtiled2flat(int M, int N, double *A, int LDA, int MT, int NT, double(*TILE_A)[NT][TILE_SIZE *TILE_SIZE])
int ddss_tile_size(int M, int MT)
Definition: ddss_tile.c:52