LASs - Linear Algebra Routines on OmpSs  1.0.0
LASs
Functions
kdsyr2k.c File Reference

LASs-DDSs kdsyr2k routine. More...

#include "../include/lass.h"
Include dependency graph for kdsyr2k.c:

Go to the source code of this file.

Functions

enum LASS_RETURN kdsyr2k (enum DDSS_UPLO UPLO, enum DDSS_TRANS TRANS, int N, int K, const double ALPHA, double *A, int LDA, double *B, int LDB, const double BETA, double *C, int LDC)
 

Detailed Description

LASs-DDSs kdsyr2k routine.

LASs-DDSs is a software package provided by: Barcelona Supercomputing Center - Centro Nacional de Supercomputacion

Author
Pedro Valero-Lara pedro.nosp@m..val.nosp@m.ero@b.nosp@m.sc.e.nosp@m.s
Boro Sofranac boro..nosp@m.sofr.nosp@m.anac@.nosp@m.bsc..nosp@m.es
Date
2018-02-21

Definition in file kdsyr2k.c.

Function Documentation

enum LASS_RETURN kdsyr2k ( enum DDSS_UPLO  UPLO,
enum DDSS_TRANS  TRANS,
int  N,
int  K,
const double  ALPHA,
double *  A,
int  LDA,
double *  B,
int  LDB,
const double  BETA,
double *  C,
int  LDC 
)

Performs one of the symmetric rank 2k operations:

C = ALPHA * A * B**T + ALPHA * B * A**T + BETA * C or
C = ALPHA * A**T * B + ALPHA * B**T * A + BETA * C

ALPHA and BETA are scalars, C is an N by N symmetric matrix and A and B are N by K matrices in the first case and K by N matrices in the second case.

Parameters
[in]UPLOenum DDSS_UPLO. UPLO specifies the form in which C is stored:
  • Lower: Lower triangular part of C is stored. The upper traingular part is not referenced.
  • Upper: Upper triangular part of C is stored. The lower triangular part is not referenced.
[in]TRANSenum DDSS_TRANS. TRANS specifies the operation to be performed as follows:
  • NoTrans: C = ALPHA * A * B**T + ALPHA * B * A**T + BETA * C
  • Trans: C = ALPHA * A**T * B + ALPHA * B**T * A + BETA * C
[in]Nint. N specifies the order of matrix C. N must be at least zero.
[in]Kint. With TRANS = NoTrans, K specifies the number of columns of the matrices A and B, and with TRANS = Trans, K specifies the number of rows of the matrices A and B. K must be at least zero.
[in]ALPHAdouble. ALPHA specifies the scalar alpha.
[in]Adouble *. A is a pointer to a matrix of dimension Na ( rows ) by Ka ( columns ), where Na is N and Ka is K when TRANS = NoTrans, and Na is K and Ka is N otherwise.
[in]LDAint. LDA specifies the number of columns of A ( row-major order ). When TRANS = NoTrans then LDA must be at least max( 1, K ), otherwise LDA must be at least max( 1, N ).
[in]Bdouble *. B is a pointer to a matrix of dimension Nb ( rows ) by Kb ( columns ), where Nb is N and Kb is K when TRANS = NoTrans, and Nb is K and Kb is N otherwise.
[in]LDBint. LDB specifies the number of columns of B ( row-major order ). When TRANS = NoTrans then LDB must be at least max( 1, K ), otherwise LDB must be at least max( 1, N ).
[in]BETAdouble. BETA specifies the scalar beta.
[in,out]Cdouble *. C is a pointer to a matrix of dimension N by N. When UPLO = Uppper the strictly lower triangular part of C is not referenced. On exit, the upper triangular part of C is overwritten by the upper triangular part of the updated solution matrix C. When UPLO = Lower the strictly upper triangular part of C is not referenced. On exit, the lower triangular part of C is overwritten by the lower triangular part of the updated solution matrix C.
[in]LDCint. LDC specifies the number of columns of C ( row-major order ). LDC must be at least max( 1, N ).
Return values
Successsuccessful exit
NoSuccessunsuccessful exit
See also
ddss_tile
ddss_flat2tiled
ddss_dsymflat2tiled
ddss_dsymtiled2flat

Definition at line 124 of file kdsyr2k.c.

References ddss_dflat2tiled(), ddss_dsymflat2tiled(), ddss_dsymtiled2flat(), and ddss_tile_size().

Referenced by ddss_dsyr2k().

129 {
130 
131  // Local variables
132  int nt, kt;
133  int mi, ni, ki;
134  int Am, An;
135  int Bm, Bn;
136  int Cm, Cn;
137  int tile_size_m;
138  int tile_size_n;
139  int tile_size_k;
140  int ka;
141  int kb;
142  double beta;
143 
144  // Number of tiles
145  if ( N % TILE_SIZE == 0 )
146  {
147  nt = N / TILE_SIZE;
148  }
149  else
150  {
151  nt = ( N / TILE_SIZE ) + 1;
152  }
153 
154  if ( K % TILE_SIZE == 0 )
155  {
156  kt = K / TILE_SIZE;
157  }
158  else
159  {
160  kt = ( K / TILE_SIZE ) + 1;
161  }
162 
163  /****************************
164  --Tile matrices declaration--
165  ****************************/
166 
167  if ( TRANS == NoTrans )
168  {
169  Am = Bm = nt;
170  An = Bn = kt;
171  ka = kb = N;
172  }
173  else
174  {
175  Am = Bm = kt;
176  An = Bn = nt;
177  ka = kb = K;
178  }
179 
180  Cm = Cn = nt;
181 
182  /***************************
183  --Tile matrices allocation--
184  ***************************/
185 
186  double ( *TILE_A )[An][TILE_SIZE * TILE_SIZE] = malloc(
187  Am * An * TILE_SIZE * TILE_SIZE * sizeof( double ) );
188 
189  if ( TILE_A == NULL )
190  {
191  fprintf( stderr, "Failure in ddss_dtile_alloc for matrix TILE_A\n" );
192  return NoSuccess;
193  }
194 
195  double ( *TILE_B )[Bn][TILE_SIZE * TILE_SIZE] = malloc(
196  Bm * Bn * TILE_SIZE * TILE_SIZE * sizeof( double ) );
197 
198  if ( TILE_B == NULL )
199  {
200  fprintf( stderr, "Failure in ddss_dtile_alloc for matrix TILE_B\n" );
201  return NoSuccess;
202  }
203 
204  double ( *TILE_C )[Cn][TILE_SIZE * TILE_SIZE] = malloc(
205  Cm * Cn * TILE_SIZE * TILE_SIZE * sizeof( double ) );
206 
207  if ( TILE_C == NULL )
208  {
209  fprintf( stderr, "Failure in ddss_dtile_alloc for matrix TILE_C\n" );
210  return NoSuccess;
211  }
212 
213  /*********************************************
214  --From flat data layout to tiled data layout--
215  *********************************************/
216 
217  // From flat matrix A to tile matrix TILE_A
218  ddss_dflat2tiled( ka, LDA, A, LDA, Am, An, TILE_A );
219 
220  // From flat matrix B to tile matrix TILE_B
221  ddss_dflat2tiled( kb, LDB, B, LDB, Bm, Bn, TILE_B );
222 
223  // From flat matrix C to tile matrix TILE_C
224  ddss_dsymflat2tiled( N, N, C, LDC, Cm, Cn, TILE_C, UPLO );
225 
226  /*************
227  --DSYRK tile--
228  *************/
229 
230  // --TRANS = NoTrans & UPLO = Upper--
231  if ( TRANS == NoTrans )
232  {
233  if ( UPLO == Upper )
234  {
235  for ( mi = 0; mi < nt; mi++ )
236  {
237  tile_size_m = ddss_tile_size( N, mi );
238  for ( ni = 0; ni < kt; ni++ )
239  {
240  tile_size_n = ddss_tile_size( K, ni );
241 
242  if ( ni == 0 )
243  {
244  beta = BETA;
245  }
246  else
247  {
248  beta = 1.0;
249  }
250 
251  #pragma oss task in( TILE_A[mi][ni] ) \
252  in( TILE_B[mi][ni] ) \
253  inout( TILE_C[mi][mi] ) \
254  shared( TILE_A, TILE_B, TILE_C ) \
255  firstprivate( mi,ni ) \
256  label( dsyr2k )
257  cblas_dsyr2k( CblasRowMajor,
258  ( CBLAS_UPLO ) UPLO, ( CBLAS_TRANSPOSE ) TRANS,
259  tile_size_m,
260  tile_size_n,
261  ALPHA, TILE_A[mi][ni], tile_size_n,
262  TILE_B[mi][ni], tile_size_n,
263  beta, TILE_C[mi][mi], tile_size_m );
264 
265  for ( ki = mi + 1; ki < nt; ki++ )
266  {
267  tile_size_k = ddss_tile_size( N, ki );
268 
269  #pragma oss task in( TILE_A[mi][ni] ) \
270  in( TILE_A[ki][ni] ) \
271  in( TILE_B[mi][ni] ) \
272  in( TILE_B[ki][ni] ) \
273  inout( TILE_C[mi][ki] ) \
274  shared( TILE_A, TILE_B, TILE_C ) \
275  firstprivate( mi,ni,ki ) \
276  label( dgemm )
277  {
278  cblas_dgemm( CblasRowMajor,
279  CblasNoTrans, CblasTrans,
280  tile_size_m,
281  tile_size_k,
282  tile_size_n,
283  ALPHA, TILE_A[mi][ni], tile_size_n,
284  TILE_B[ki][ni], tile_size_n,
285  beta, TILE_C[mi][ki], tile_size_k );
286 
287  cblas_dgemm( CblasRowMajor,
288  CblasNoTrans, CblasTrans,
289  tile_size_m,
290  tile_size_k,
291  tile_size_n,
292  ALPHA, TILE_B[mi][ni], tile_size_n,
293  TILE_A[ki][ni], tile_size_n,
294  1.0, TILE_C[mi][ki], tile_size_k );
295  }
296  }
297 
298  }
299  }
300  }
301  // --TRANS = NoTrans & UPLO = Lower--
302  else
303  {
304  for ( mi = 0; mi < nt; mi++ )
305  {
306  tile_size_m = ddss_tile_size( N, mi );
307  for ( ni = 0; ni < kt; ni++ )
308  {
309  tile_size_n = ddss_tile_size( K, ni );
310 
311  if ( ni == 0 )
312  {
313  beta = BETA;
314  }
315  else
316  {
317  beta = 1.0;
318  }
319 
320  #pragma oss task in( TILE_A[mi][ni] ) \
321  in( TILE_B[mi][ni] ) \
322  inout( TILE_C[mi][mi] ) \
323  shared( TILE_A, TILE_B, TILE_C ) \
324  firstprivate( mi,ni ) \
325  label( dsyr2k )
326  cblas_dsyr2k( CblasRowMajor,
327  ( CBLAS_UPLO ) UPLO, ( CBLAS_TRANSPOSE ) TRANS,
328  tile_size_m,
329  tile_size_n,
330  ALPHA, TILE_A[mi][ni], tile_size_n,
331  TILE_B[mi][ni], tile_size_n,
332  beta, TILE_C[mi][mi], tile_size_m );
333 
334  for ( ki = mi + 1; ki < nt; ki++ )
335  {
336  tile_size_k = ddss_tile_size( N, ki );
337 
338  #pragma oss task in( TILE_A[ki][ni] ) \
339  in( TILE_A[mi][ni] ) \
340  in( TILE_B[ki][ni] ) \
341  in( TILE_B[mi][ni] ) \
342  inout( TILE_C[ki][mi] ) \
343  shared( TILE_A, TILE_B, TILE_C ) \
344  firstprivate( mi,ni,ki ) \
345  label( dgemm )
346  {
347  cblas_dgemm( CblasRowMajor,
348  CblasNoTrans, CblasTrans,
349  tile_size_k,
350  tile_size_m,
351  tile_size_n,
352  ALPHA, TILE_A[ki][ni], tile_size_n,
353  TILE_B[mi][ni], tile_size_n,
354  beta, TILE_C[ki][mi], tile_size_m );
355 
356  cblas_dgemm( CblasRowMajor,
357  CblasNoTrans, CblasTrans,
358  tile_size_k,
359  tile_size_m,
360  tile_size_n,
361  ALPHA, TILE_B[ki][ni], tile_size_n,
362  TILE_A[mi][ni], tile_size_n,
363  1.0, TILE_C[ki][mi], tile_size_m );
364  }
365  }
366 
367  }
368  }
369  }
370  }
371  // --TRANS = Trans & UPLO = Upper--
372  else
373  {
374  if ( UPLO == Upper )
375  {
376  for ( mi = 0; mi < kt; mi++ )
377  {
378  tile_size_m = ddss_tile_size( K, mi );
379  for ( ni = 0; ni < nt; ni++ )
380  {
381  tile_size_n = ddss_tile_size( N, ni );
382 
383  if ( mi == 0 )
384  {
385  beta = BETA;
386  }
387  else
388  {
389  beta = 1.0;
390  }
391 
392  #pragma oss task in( TILE_A[mi][ni] ) \
393  in( TILE_B[mi][ni] ) \
394  inout( TILE_C[ni][ni] ) \
395  shared( TILE_A, TILE_B, TILE_C ) \
396  firstprivate( mi,ni ) \
397  label( dsyr2k )
398  cblas_dsyr2k( CblasRowMajor,
399  ( CBLAS_UPLO ) UPLO, ( CBLAS_TRANSPOSE ) TRANS,
400  tile_size_n,
401  tile_size_m,
402  ALPHA, TILE_A[mi][ni], tile_size_n,
403  TILE_B[mi][ni], tile_size_n,
404  beta, TILE_C[ni][ni], tile_size_n );
405 
406  for ( ki = ni + 1; ki < nt; ki++ )
407  {
408  tile_size_k = ddss_tile_size( N, ki );
409 
410  #pragma oss task in( TILE_A[mi][ni] ) \
411  in( TILE_A[mi][ki] ) \
412  in( TILE_B[mi][ni] ) \
413  in( TILE_B[mi][ki] ) \
414  inout( TILE_C[ni][ki] ) \
415  shared( TILE_A, TILE_B, TILE_C ) \
416  firstprivate( mi,ni,ki ) \
417  label( dgemm )
418  {
419  cblas_dgemm( CblasRowMajor,
420  CblasTrans, CblasNoTrans,
421  tile_size_n,
422  tile_size_k,
423  tile_size_m,
424  ALPHA, TILE_A[mi][ni], tile_size_n,
425  TILE_B[mi][ki], tile_size_k,
426  beta, TILE_C[ni][ki], tile_size_k );
427 
428  cblas_dgemm( CblasRowMajor,
429  CblasTrans, CblasNoTrans,
430  tile_size_n,
431  tile_size_k,
432  tile_size_m,
433  ALPHA, TILE_B[mi][ni], tile_size_n,
434  TILE_A[mi][ki], tile_size_k,
435  1.0, TILE_C[ni][ki], tile_size_k );
436  }
437  }
438  }
439  }
440  }
441  // --TRANS = Trans & UPLO = Lower--
442  else
443  {
444  for ( mi = 0; mi < kt; mi++ )
445  {
446  tile_size_m = ddss_tile_size( K, mi );
447  for ( ni = 0; ni < nt; ni++ )
448  {
449  tile_size_n = ddss_tile_size( N, ni );
450 
451  if ( mi == 0 )
452  {
453  beta = BETA;
454  }
455  else
456  {
457  beta = 1.0;
458  }
459 
460  #pragma oss task in( TILE_A[mi][ni] ) \
461  in( TILE_B[mi][ni] ) \
462  inout( TILE_C[ni][ni] ) \
463  shared( TILE_A, TILE_B, TILE_C ) \
464  firstprivate( mi,ni ) \
465  label( dsyr2k )
466  cblas_dsyr2k( CblasRowMajor,
467  ( CBLAS_UPLO ) UPLO, ( CBLAS_TRANSPOSE ) TRANS,
468  tile_size_n,
469  tile_size_m,
470  ALPHA, TILE_A[mi][ni], tile_size_n,
471  TILE_B[mi][ni], tile_size_n,
472  beta, TILE_C[ni][ni], tile_size_n );
473 
474  for ( ki = ni + 1; ki < nt; ki++ )
475  {
476  tile_size_k = ddss_tile_size( N, ki );
477 
478  #pragma oss task in( TILE_A[mi][ki] ) \
479  in( TILE_A[mi][ni] ) \
480  in( TILE_B[mi][ki] ) \
481  in( TILE_B[mi][ni] ) \
482  inout( TILE_C[ki][ni] ) \
483  shared( TILE_A, TILE_B, TILE_C ) \
484  firstprivate( mi,ni,ki ) \
485  label( dgemm )
486  {
487  cblas_dgemm( CblasRowMajor,
488  CblasTrans, CblasNoTrans,
489  tile_size_k,
490  tile_size_n,
491  tile_size_m,
492  ALPHA, TILE_A[mi][ki], tile_size_k,
493  TILE_B[mi][ni], tile_size_n,
494  beta, TILE_C[ki][ni], tile_size_n );
495 
496  cblas_dgemm( CblasRowMajor,
497  CblasTrans, CblasNoTrans,
498  tile_size_k,
499  tile_size_n,
500  tile_size_m,
501  ALPHA, TILE_B[mi][ki], tile_size_k,
502  TILE_A[mi][ni], tile_size_n,
503  1.0, TILE_C[ki][ni], tile_size_n );
504  }
505  }
506  }
507  }
508  }
509  }
510 
511  /*********************************************
512  --From tiled data layout to flat data layout--
513  *********************************************/
514 
515  ddss_dsymtiled2flat( N, N, C, LDC, Cm, Cn, TILE_C, UPLO );
516 
517  // --Tile matrices free--
518  free( TILE_A );
519  free( TILE_B );
520  free( TILE_C );
521 
522  return Success;
523 
524 }
void ddss_dflat2tiled(int M, int N, double *A, int LDA, int MT, int NT, double(*TILE_A)[NT][TILE_SIZE *TILE_SIZE])
void ddss_dsymflat2tiled(int M, int N, double *A, int LDA, int MT, int NT, double(*TILE_A)[NT][TILE_SIZE *TILE_SIZE], enum DDSS_UPLO UPLO)
void ddss_dsymtiled2flat(int M, int N, double *A, int LDA, int MT, int NT, double(*TILE_A)[NT][TILE_SIZE *TILE_SIZE], enum DDSS_UPLO UPLO)
int ddss_tile_size(int M, int MT)
Definition: ddss_tile.c:52