LASs - Linear Algebra Routines on OmpSs  1.0.0
LASs
Functions
kdsymm.c File Reference

LASs-DDSs kdsymm routine. More...

#include "../include/lass.h"
Include dependency graph for kdsymm.c:

Go to the source code of this file.

Functions

enum LASS_RETURN kdsymm (enum DDSS_SIDE SIDE, enum DDSS_UPLO UPLO, int M, int N, const double ALPHA, double *A, int LDA, double *B, int LDB, const double BETA, double *C, int LDC)
 

Detailed Description

LASs-DDSs kdsymm routine.

LASs-DDSs is a software package provided by: Barcelona Supercomputing Center - Centro Nacional de Supercomputacion

Author
Pedro Valero-Lara pedro.nosp@m..val.nosp@m.ero@b.nosp@m.sc.e.nosp@m.s
Date
2017-08-11

Definition in file kdsymm.c.

Function Documentation

enum LASS_RETURN kdsymm ( enum DDSS_SIDE  SIDE,
enum DDSS_UPLO  UPLO,
int  M,
int  N,
const double  ALPHA,
double *  A,
int  LDA,
double *  B,
int  LDB,
const double  BETA,
double *  C,
int  LDC 
)

Performs one of the matrix-matrix operations:

C = ALPHA * A * B + BETA * C

or

C = ALPHA * B * A + BETA * C

where op( X ) is one of:

op( X ) = X      or
op( X ) = X**T 

ALPHA and BETA are scalars, A is a symmetric matrix, and B and C are M by N matrices.

Parameters
[in]SIDEenum DDSS_SIDE. UPLO specifies the position of the symmetric A matrix in the operation:
  • Left: C = ALPHA * A * B + BETA * C
  • Right: C = ALPHA * B * A + BETA * C
[in]UPLOenum DDSS_UPLO. UPLO specifies the form of A is stored:
  • Lower: Lower triangle of A is stored. The upper traingular part is not referenced.
  • Upper: Upper triangle of A is stored. The lower triangular part is not referenced.
[in]Mint. M specifies the number of rows of the matrix C. M must be equal or greater than zero.
[in]Nint. N specifies the number of columns of the matrix C. N must be equal or greater than zero.
[in]ALPHAdouble.
[in]Adouble *. A is a pointer to a matrix of dimension Ma ( rows ) by Na (columns), where Ma is M and Na is M when SIDE = Left, and Ma is N and Na is N when SIDE = Right
[in]LDAint. LDA specifies the number of columns of A ( row-major order ). LDA must be at least max( 1, Na ).
[in]Bdouble *. B is a pointer to a matrix of dimension M by N.
[in]LDBint. LDB specifies the number of columns of B ( row-major order ). LDB must be at least max( 1, N ).
[in]BETAdouble.
[in,out]Cdouble *. C is a pointer to a matrix of dimension M by N. On exit, C is overwritten by the M by N matrix.
[in]LDCint. LDC specifies the number of columns of C ( row-major order ). LDC must be at least max( 1, N ).
Return values
Successsucessful exit
NoSuccessunsucessful exit
See also
ddss_dgemm
ddss_tile
ddss_flat2tiled
ddss_symflat2tiled
ddss_tiled2flat
ddss_symtiled2flat

Definition at line 125 of file kdsymm.c.

References ddss_dflat2tiled(), ddss_dsymflat2tiled(), ddss_dtiled2flat(), and ddss_tile_size().

Referenced by ddss_dsymm().

130 {
131 
132  // Local variables
133  int mt, kt, nt;
134  int mi, ki, ni;
135  int Am, An;
136  int Amt, Ant;
137  int tile_size_m;
138  int tile_size_n;
139  int tile_size_k;
140  int k_check;
141  double betat;
142 
143  // Number of tiles
144  if ( M % TILE_SIZE == 0 )
145  {
146  mt = M / TILE_SIZE;
147  }
148  else
149  {
150  mt = ( M / TILE_SIZE ) + 1;
151  }
152 
153  if ( N % TILE_SIZE == 0 )
154  {
155  nt = N / TILE_SIZE;
156  }
157  else
158  {
159  nt = ( N / TILE_SIZE ) + 1;
160  }
161 
162  if ( SIDE == Left )
163  {
164  if ( M % TILE_SIZE == 0 )
165  {
166  kt = M / TILE_SIZE;
167  }
168  else
169  {
170  kt = ( M / TILE_SIZE ) + 1;
171  }
172  }
173  else
174  {
175  if ( N % TILE_SIZE == 0 )
176  {
177  kt = N / TILE_SIZE;
178  }
179  else
180  {
181  kt = ( N / TILE_SIZE ) + 1;
182  }
183  }
184 
185  /****************************
186  --Tile matrices declaration--
187  ****************************/
188 
189  if ( SIDE == Left )
190  {
191  Am = M;
192  An = M;
193  Amt = mt;
194  Ant = mt;
195  }
196  else
197  {
198  Am = N;
199  An = N;
200  Amt = nt;
201  Ant = nt;
202  }
203 
204  /***************************
205  --Tile matrices allocation--
206  ***************************/
207 
208  double (*TILE_A)[Ant][TILE_SIZE * TILE_SIZE] = malloc ( Amt * Ant *
209  TILE_SIZE * TILE_SIZE * sizeof( double ) );
210 
211  if ( TILE_A == NULL)
212  {
213  fprintf( stderr, "Failure in kdsymm for matrix TILE_A\n" );
214  return NoSuccess;
215  }
216 
217  double (*TILE_B)[nt][TILE_SIZE * TILE_SIZE] = malloc ( mt * nt *
218  TILE_SIZE * TILE_SIZE * sizeof( double ) );
219 
220  if ( TILE_B == NULL)
221  {
222  fprintf( stderr, "Failure in kdsymm for matrix TILE_B\n" );
223  return NoSuccess;
224  }
225 
226  double (*TILE_C)[nt][TILE_SIZE * TILE_SIZE] = malloc ( mt * nt *
227  TILE_SIZE * TILE_SIZE * sizeof( double ) );
228 
229  if ( TILE_C == NULL)
230  {
231  fprintf( stderr, "Failure in kdsymm for matrix TILE_C\n" );
232  return NoSuccess;
233  }
234 
235  /************************************************
236  // --From flat data layout to tiled data layout--
237  ************************************************/
238 
239  // From flat and symmetric matrix A to tile matrix TILE_A
240  ddss_dsymflat2tiled( Am, An, A, LDA, Amt, Ant, TILE_A, UPLO );
241 
242  // From flat matrix B to tile matrix TILE_B
243  ddss_dflat2tiled( M, N, B, LDB, mt, nt, TILE_B );
244 
245  // From flat matrix C to tile matrix TILE_C
246  ddss_dflat2tiled( M, N, C, LDC, mt, nt, TILE_C );
247 
248  /*************
249  --DSYMM tile--
250  *************/
251 
252  for ( mi = 0; mi < mt; mi++ )
253  {
254  tile_size_m = ddss_tile_size( M, mi );
255  for ( ni = 0; ni < nt; ni++ )
256  {
257  tile_size_n = ddss_tile_size( N, ni );
258  if ( SIDE == Left )
259  {
260  // --SIDE = Left & UPLO = Lower--
261  if ( UPLO == Lower)
262  {
263  for ( ki = 0; ki < kt; ki++ )
264  {
265  if (ki == 0)
266  {
267  betat = BETA;
268  }
269  else
270  {
271  betat = 1.0;
272  }
273  tile_size_k = ddss_tile_size( M, ki );
274 
275  if ( ki < mi )
276  {
277  #pragma oss task in( TILE_A[mi][ki] ) \
278  in( TILE_B[ki][ni] ) \
279  inout( TILE_C[mi][ni] ) \
280  shared( TILE_A, TILE_B, TILE_C ) \
281  firstprivate( mi, ni, ki, betat )
282  cblas_dgemm( CblasRowMajor,
283  ( CBLAS_TRANSPOSE ) NoTrans,
284  ( CBLAS_TRANSPOSE ) NoTrans,
285  tile_size_m,
286  tile_size_n,
287  tile_size_k,
288  ALPHA, TILE_A[mi][ki], tile_size_k,
289  TILE_B[ki][ni], tile_size_n,
290  betat, TILE_C[mi][ni], tile_size_n );
291  }
292  else
293  {
294  if ( ki == mi )
295  {
296  #pragma oss task in( TILE_A[ki][ki] ) \
297  in( TILE_B[ki][ni] ) \
298  inout( TILE_C[mi][ni] ) \
299  shared( TILE_A, TILE_B, TILE_C ) \
300  firstprivate( mi, ni, ki, betat )
301  cblas_dsymm( CblasRowMajor,
302  ( CBLAS_SIDE ) SIDE,
303  ( CBLAS_UPLO ) UPLO,
304  tile_size_m, tile_size_n,
305  ALPHA, TILE_A[ki][ki], tile_size_k,
306  TILE_B[ki][ni], tile_size_n,
307  betat, TILE_C[mi][ni],
308  tile_size_n );
309  }
310  else
311  {
312  #pragma oss task in( TILE_A[ki][mi] ) \
313  in( TILE_B[ki][ni] ) \
314  inout( TILE_C[mi][ni] ) \
315  shared( TILE_A, TILE_B, TILE_C ) \
316  firstprivate( mi, ni, ki, betat )
317  cblas_dgemm( CblasRowMajor,
318  ( CBLAS_TRANSPOSE ) Trans,
319  ( CBLAS_TRANSPOSE ) NoTrans,
320  tile_size_m,
321  tile_size_n,
322  tile_size_k,
323  ALPHA, TILE_A[ki][mi], tile_size_m,
324  TILE_B[ki][ni], tile_size_n,
325  betat, TILE_C[mi][ni],
326  tile_size_n );
327  }
328  }
329  }
330  }
331  // --SIDE = Left & UPLO = Upper--
332  else
333  {
334  for ( ki = 0; ki < kt; ki++ )
335  {
336  if (ki == 0)
337  {
338  betat = BETA;
339  }
340  else
341  {
342  betat = 1.0;
343  }
344  tile_size_k = ddss_tile_size( M, ki );
345 
346  if ( ki < mi )
347  {
348  #pragma oss task in( TILE_A[ki][mi] ) \
349  in( TILE_B[ki][ni] ) \
350  inout( TILE_C[mi][ni] ) \
351  shared( TILE_A, TILE_B, TILE_C ) \
352  firstprivate( mi, ni, ki, betat )
353  cblas_dgemm( CblasRowMajor,
354  ( CBLAS_TRANSPOSE ) Trans,
355  ( CBLAS_TRANSPOSE ) NoTrans,
356  tile_size_m,
357  tile_size_n,
358  tile_size_k,
359  ALPHA, TILE_A[ki][mi], tile_size_m,
360  TILE_B[ki][ni], tile_size_n,
361  betat, TILE_C[mi][ni], tile_size_n );
362  }
363  else
364  {
365  if ( ki == mi )
366  {
367  #pragma oss task in( TILE_A[ki][ki] ) \
368  in( TILE_B[ki][ni] ) \
369  inout( TILE_C[mi][ni] ) \
370  shared( TILE_A, TILE_B, TILE_C ) \
371  firstprivate( mi, ni, ki, betat )
372  cblas_dsymm( CblasRowMajor,
373  ( CBLAS_SIDE ) SIDE,
374  ( CBLAS_UPLO ) UPLO,
375  tile_size_m, tile_size_n,
376  ALPHA, TILE_A[ki][ki], tile_size_k,
377  TILE_B[ki][ni], tile_size_n,
378  betat, TILE_C[mi][ni],
379  tile_size_n );
380  }
381  else
382  {
383  #pragma oss task in( TILE_A[mi][ki] ) \
384  in( TILE_B[ki][ni] ) \
385  inout( TILE_C[mi][ni] ) \
386  shared( TILE_A, TILE_B, TILE_C ) \
387  firstprivate( mi, ni, ki, betat )
388  cblas_dgemm( CblasRowMajor,
389  ( CBLAS_TRANSPOSE ) NoTrans,
390  ( CBLAS_TRANSPOSE ) NoTrans,
391  tile_size_m,
392  tile_size_n,
393  tile_size_k,
394  ALPHA, TILE_A[mi][ki], tile_size_k,
395  TILE_B[ki][ni], tile_size_n,
396  betat, TILE_C[mi][ni],
397  tile_size_n );
398  }
399  }
400  }
401  }
402  }
403  else
404  {
405  // --SIDE = Right & UPLO = Lower--
406  if ( UPLO == Lower)
407  {
408  for ( ki = 0; ki < kt; ki++ )
409  {
410  if (ki == 0)
411  {
412  betat = BETA;
413  }
414  else
415  {
416  betat = 1.0;
417  }
418  tile_size_k = ddss_tile_size( N, ki );
419 
420  if ( ki < ni )
421  {
422  #pragma oss task in( TILE_B[mi][ki] ) \
423  in( TILE_A[ni][ki] ) \
424  inout( TILE_C[mi][ni] ) \
425  shared( TILE_A, TILE_B, TILE_C ) \
426  firstprivate( mi, ni, ki, betat )
427  cblas_dgemm( CblasRowMajor,
428  ( CBLAS_TRANSPOSE ) NoTrans,
429  ( CBLAS_TRANSPOSE ) Trans,
430  tile_size_m,
431  tile_size_n,
432  tile_size_k,
433  ALPHA, TILE_B[mi][ki], tile_size_k,
434  TILE_A[ni][ki], tile_size_k,
435  betat, TILE_C[mi][ni], tile_size_n );
436  }
437  else
438  {
439  if ( ki == ni )
440  {
441  #pragma oss task in( TILE_A[ki][ki] ) \
442  in( TILE_B[mi][ki] ) \
443  inout( TILE_C[mi][ni] ) \
444  shared( TILE_A, TILE_B, TILE_C ) \
445  firstprivate( mi, ni, ki, betat )
446  cblas_dsymm( CblasRowMajor,
447  ( CBLAS_SIDE ) SIDE,
448  ( CBLAS_UPLO ) UPLO,
449  tile_size_m, tile_size_n,
450  ALPHA, TILE_A[ki][ki], tile_size_k,
451  TILE_B[mi][ki], tile_size_k,
452  betat, TILE_C[mi][ni],
453  tile_size_n );
454  }
455  else
456  {
457  #pragma oss task in( TILE_B[mi][ki] ) \
458  in( TILE_A[ki][ni] ) \
459  inout( TILE_C[mi][ni] ) \
460  shared( TILE_A, TILE_B, TILE_C ) \
461  firstprivate( mi, ni, ki, betat )
462  cblas_dgemm( CblasRowMajor,
463  ( CBLAS_TRANSPOSE ) NoTrans,
464  ( CBLAS_TRANSPOSE ) NoTrans,
465  tile_size_m,
466  tile_size_n,
467  tile_size_k,
468  ALPHA, TILE_B[mi][ki], tile_size_k,
469  TILE_A[ki][ni], tile_size_n,
470  betat, TILE_C[mi][ni],
471  tile_size_n );
472  }
473  }
474  }
475  }
476  // --SIDE = Right & UPLO = Upper--
477  else
478  {
479  for ( ki = 0; ki < kt; ki++ )
480  {
481  if (ki == 0)
482  {
483  betat = BETA;
484  }
485  else
486  {
487  betat = 1.0;
488  }
489  tile_size_k = ddss_tile_size( N, ki );
490 
491  if ( ki < ni )
492  {
493  #pragma oss task in( TILE_B[mi][ki] ) \
494  in( TILE_A[ki][ni] ) \
495  inout( TILE_C[mi][ni] ) \
496  shared( TILE_A, TILE_B, TILE_C ) \
497  firstprivate( mi, ni, ki, betat )
498  cblas_dgemm( CblasRowMajor,
499  ( CBLAS_TRANSPOSE ) NoTrans,
500  ( CBLAS_TRANSPOSE ) NoTrans,
501  tile_size_m,
502  tile_size_n,
503  tile_size_k,
504  ALPHA, TILE_B[mi][ki], tile_size_k,
505  TILE_A[ki][ni], tile_size_n,
506  betat, TILE_C[mi][ni], tile_size_n );
507  }
508  else
509  {
510  if ( ki == ni )
511  {
512  #pragma oss task in( TILE_A[ki][ki] ) \
513  in( TILE_B[ki][ni] ) \
514  inout( TILE_C[mi][ni] ) \
515  shared( TILE_A, TILE_B, TILE_C ) \
516  firstprivate( mi, ni, ki, betat )
517  cblas_dsymm( CblasRowMajor,
518  ( CBLAS_SIDE ) SIDE,
519  ( CBLAS_UPLO ) UPLO,
520  tile_size_m, tile_size_n,
521  ALPHA, TILE_A[ki][ki], tile_size_k,
522  TILE_B[mi][ki], tile_size_k,
523  betat, TILE_C[mi][ni],
524  tile_size_n );
525  }
526  else
527  {
528  #pragma oss task in( TILE_B[mi][ki] ) \
529  in( TILE_A[ni][ki] ) \
530  inout( TILE_C[mi][ni] ) \
531  shared( TILE_A, TILE_B, TILE_C ) \
532  firstprivate( mi, ni, ki, betat )
533  cblas_dgemm( CblasRowMajor,
534  ( CBLAS_TRANSPOSE ) NoTrans,
535  ( CBLAS_TRANSPOSE ) Trans,
536  tile_size_m,
537  tile_size_n,
538  tile_size_k,
539  ALPHA, TILE_B[mi][ki], tile_size_k,
540  TILE_A[ni][ki], tile_size_k,
541  betat, TILE_C[mi][ni],
542  tile_size_n );
543  }
544  }
545  }
546  }
547  }
548  }
549  }
550 
551  // From tile matrix TILE_C to flat matrix C
552  ddss_dtiled2flat( M, N, C, LDC, mt, nt, TILE_C );
553 
554  // --Tile matrices free--
555  free( TILE_A );
556  free( TILE_B );
557  free( TILE_C );
558 
559  return Success;
560 
561 }
void ddss_dflat2tiled(int M, int N, double *A, int LDA, int MT, int NT, double(*TILE_A)[NT][TILE_SIZE *TILE_SIZE])
void ddss_dsymflat2tiled(int M, int N, double *A, int LDA, int MT, int NT, double(*TILE_A)[NT][TILE_SIZE *TILE_SIZE], enum DDSS_UPLO UPLO)
void ddss_dtiled2flat(int M, int N, double *A, int LDA, int MT, int NT, double(*TILE_A)[NT][TILE_SIZE *TILE_SIZE])
int ddss_tile_size(int M, int MT)
Definition: ddss_tile.c:52