LASs - Linear Algebra Routines on OmpSs  1.0.0
LASs
Functions
kdsyrk.c File Reference

LASs-DDSs kdsyrk routine. More...

#include "../include/lass.h"
Include dependency graph for kdsyrk.c:

Go to the source code of this file.

Functions

enum LASS_RETURN kdsyrk (enum DDSS_UPLO UPLO, enum DDSS_TRANS TRANS_A, int N, int K, const double ALPHA, double *A, int LDA, const double BETA, double *C, int LDC)
 

Detailed Description

LASs-DDSs kdsyrk routine.

LASs-DDSs is a software package provided by: Barcelona Supercomputing Center - Centro Nacional de Supercomputacion

Author
Pedro Valero-Lara pedro.nosp@m..val.nosp@m.ero@b.nosp@m.sc.e.nosp@m.s
Boro Sofranac boro..nosp@m.sofr.nosp@m.anac@.nosp@m.bsc..nosp@m.es
Date
2018-02-13

Definition in file kdsyrk.c.

Function Documentation

enum LASS_RETURN kdsyrk ( enum DDSS_UPLO  UPLO,
enum DDSS_TRANS  TRANS_A,
int  N,
int  K,
const double  ALPHA,
double *  A,
int  LDA,
const double  BETA,
double *  C,
int  LDC 
)

Performs one of the symmetric rank k operations:

C = ALPHA * A * op( A ) + BETA * C  or
C = ALPHA * op( A ) * A + BETA * C

where op( X ) is:

op( X ) = X**T

ALPHA and BETA are scalars, C is an N by N symmetric matrix and A is an N by K matrix in the first case and a K by N matrix in the second case.

Parameters
[in]UPLOenum DDSS_UPLO. UPLO specifies the form in which C is stored:
  • Lower: Lower triangular part of C is stored. The upper traingular part is not referenced.
  • Upper: Upper triangular part of C is stored. The lower triangular part is not referenced.
[in]TRANS_Aenum DDSS_TRANS. TRANS_A specifies the operation to be performed as follows:
  • NoTrans: C = ALPHA * A * A**T + BETA * C
  • Trans: C = ALPHA * A**T * A + BETA * C
[in]Nint. N specifies the order of matrix C. N must be at least zero.
[in]Kint. With TRANS_A = NoTrans, K specifies the number of columns of the matrix A, and with TRANS_A = Trans, K specifies the number of rows of the matrix A. K must be at least zero.
[in]ALPHAdouble. ALPHA specifies the scalar alpha.
[in]Adouble *. A is a pointer to a matrix of dimension Na ( rows ) by Ka ( columns ), where Na is N and Ka is K when TRANS_A = NoTrans, and Na is K and Ka is N otherwise.
[in]LDAint. LDA specifies the number of columns of A ( row-major order ). When TRANS_A = NoTrans then LDA must be at least max( 1, K ), otherwise LDA must be at least max( 1, N ).
[in]BETAdouble. BETA specifies the scalar beta.
[in,out]Cdouble *. C is a pointer to a matrix of dimension N by N. When UPLO = Uppper the strictly lower triangular part of C is not referenced. On exit, the upper triangular part of C is overwritten by the upper triangular part of the updated solution matrix C. When UPLO = Lower the strictly upper triangular part of C is not referenced. On exit, the lower triangular part of C is overwritten by the lower triangular part of the updated solution matrix C.
[in]LDCint. LDC specifies the number of columns of C ( row-major order ). LDC must be at least max( 1, N ).
Return values
Successsuccessful exit
NoSuccessunsuccessful exit
See also
ddss_tile
ddss_flat2tiled
ddss_dsymflat2tiled
ddss_dsymtiled2flat

Definition at line 117 of file kdsyrk.c.

References ddss_dflat2tiled(), ddss_dsymflat2tiled(), ddss_dsymtiled2flat(), and ddss_tile_size().

Referenced by ddss_dsyrk().

121 {
122 
123  // Local variables
124  int nt, kt;
125  int mi, ni, ki;
126  int Am, An;
127  int Cm, Cn;
128  int tile_size_m;
129  int tile_size_n;
130  int tile_size_k;
131  int ka;
132  double beta;
133 
134  // Number of tiles
135  if ( N % TILE_SIZE == 0 )
136  {
137  nt = N / TILE_SIZE;
138  }
139  else
140  {
141  nt = ( N / TILE_SIZE ) + 1;
142  }
143 
144  if ( K % TILE_SIZE == 0 )
145  {
146  kt = K / TILE_SIZE;
147  }
148  else
149  {
150  kt = ( K / TILE_SIZE ) + 1;
151  }
152 
153  /****************************
154  --Tile matrices declaration--
155  ****************************/
156 
157  if ( TRANS_A == NoTrans )
158  {
159  Am = nt;
160  An = kt;
161  ka = N;
162  }
163  else
164  {
165  Am = kt;
166  An = nt;
167  ka = K;
168  }
169 
170  Cm = Cn = nt;
171 
172  /***************************
173  --Tile matrices allocation--
174  ***************************/
175 
176  double ( *TILE_A )[An][TILE_SIZE * TILE_SIZE] = malloc(
177  Am * An * TILE_SIZE * TILE_SIZE * sizeof( double ) );
178 
179  if ( TILE_A == NULL )
180  {
181  fprintf( stderr, "Failure in ddss_dtile_alloc for matrix TILE_A\n" );
182  return NoSuccess;
183  }
184 
185  double ( *TILE_C )[Cn][TILE_SIZE * TILE_SIZE] = malloc(
186  Cm * Cn * TILE_SIZE * TILE_SIZE * sizeof( double ) );
187 
188  if ( TILE_C == NULL )
189  {
190  fprintf( stderr, "Failure in ddss_dtile_alloc for matrix TILE_C\n" );
191  return NoSuccess;
192  }
193 
194  /*********************************************
195  --From flat data layout to tiled data layout--
196  *********************************************/
197 
198  // From flat matrix A to tile matrix TILE_A
199  ddss_dflat2tiled( ka, LDA, A, LDA, Am, An, TILE_A );
200 
201  // From flat matrix C to tile matrix TILE_C
202  ddss_dsymflat2tiled( N, N, C, LDC, Cm, Cn, TILE_C, UPLO );
203 
204  /*************
205  --DSYRK tile--
206  *************/
207 
208  // --TRANS_A = NoTrans & UPLO = Upper--
209  if ( TRANS_A == NoTrans )
210  {
211  if ( UPLO == Upper )
212  {
213  for ( mi = 0; mi < nt; mi++ )
214  {
215  tile_size_m = ddss_tile_size( N, mi );
216  for ( ni = 0; ni < kt; ni++ )
217  {
218  tile_size_n = ddss_tile_size( K, ni );
219 
220  if ( ni == 0 )
221  {
222  beta = BETA;
223  }
224  else
225  {
226  beta = 1.0;
227  }
228 
229  #pragma oss task in( TILE_A[mi][ni] ) \
230  inout( TILE_C[mi][mi] ) \
231  shared( TILE_A, TILE_C ) \
232  firstprivate( mi, ni ) \
233  label( dsyrk )
234  cblas_dsyrk( CblasRowMajor,
235  ( CBLAS_UPLO ) UPLO,
236  ( CBLAS_TRANSPOSE ) TRANS_A,
237  tile_size_m,
238  tile_size_n,
239  ALPHA, TILE_A[mi][ni], tile_size_n,
240  beta, TILE_C[mi][mi], tile_size_m );
241 
242  for ( ki = mi + 1; ki < nt; ki++ )
243  {
244  tile_size_k = ddss_tile_size( N, ki );
245 
246  #pragma oss task in( TILE_A[mi][ni] ) \
247  in( TILE_A[ki][ni] ) \
248  inout( TILE_C[mi][ki] ) \
249  shared( TILE_A, TILE_C ) \
250  firstprivate( mi, ni, ki ) \
251  label( dgemm )
252  cblas_dgemm( CblasRowMajor,
253  CblasNoTrans, CblasTrans,
254  tile_size_m,
255  tile_size_k,
256  tile_size_n,
257  ALPHA, TILE_A[mi][ni], tile_size_n,
258  TILE_A[ki][ni], tile_size_n,
259  beta, TILE_C[mi][ki], tile_size_k );
260  }
261 
262  }
263  }
264  }
265  // --TRANS_A = NoTrans & UPLO = Lower--
266  else
267  {
268  for ( mi = 0; mi < nt; mi++ )
269  {
270  tile_size_m = ddss_tile_size( N, mi );
271  for ( ni = 0; ni < kt; ni++ )
272  {
273  tile_size_n = ddss_tile_size( K, ni );
274 
275  if ( ni == 0 )
276  {
277  beta = BETA;
278  }
279  else
280  {
281  beta = 1.0;
282  }
283 
284  #pragma oss task in( TILE_A[mi][ni] ) \
285  inout( TILE_C[mi][mi] ) \
286  shared( TILE_A, TILE_C ) \
287  firstprivate( mi, ni ) \
288  label( dsyrk )
289  cblas_dsyrk( CblasRowMajor,
290  ( CBLAS_UPLO ) UPLO,
291  ( CBLAS_TRANSPOSE ) TRANS_A,
292  tile_size_m,
293  tile_size_n,
294  ALPHA, TILE_A[mi][ni], tile_size_n,
295  beta, TILE_C[mi][mi], tile_size_m );
296 
297  for ( ki = mi + 1; ki < nt; ki++ )
298  {
299  tile_size_k = ddss_tile_size( N, ki );
300 
301  #pragma oss task in( TILE_A[ki][ni] ) \
302  in( TILE_A[mi][ni] ) \
303  inout( TILE_C[ki][mi] ) \
304  shared( TILE_A, TILE_C ) \
305  firstprivate( mi, ni, ki ) \
306  label( dgemm )
307  cblas_dgemm( CblasRowMajor,
308  CblasNoTrans, CblasTrans,
309  tile_size_k,
310  tile_size_m,
311  tile_size_n,
312  ALPHA, TILE_A[ki][ni], tile_size_n,
313  TILE_A[mi][ni], tile_size_n,
314  beta, TILE_C[ki][mi], tile_size_m );
315  }
316 
317  }
318  }
319  }
320  }
321  // --TRANS_A = Trans & UPLO = Upper--
322  else
323  {
324  if ( UPLO == Upper )
325  {
326  for ( mi = 0; mi < kt; mi++ )
327  {
328  tile_size_m = ddss_tile_size( K, mi );
329  for ( ni = 0; ni < nt; ni++ )
330  {
331  tile_size_n = ddss_tile_size( N, ni );
332 
333  if ( mi == 0 )
334  {
335  beta = BETA;
336  }
337  else
338  {
339  beta = 1.0;
340  }
341 
342  #pragma oss task in( TILE_A[mi][ni] ) \
343  inout( TILE_C[ni][ni] ) \
344  shared( TILE_A, TILE_C ) \
345  firstprivate( mi, ni ) \
346  label( dsyrk )
347  cblas_dsyrk( CblasRowMajor,
348  ( CBLAS_UPLO ) UPLO,
349  ( CBLAS_TRANSPOSE ) TRANS_A,
350  tile_size_n,
351  tile_size_m,
352  ALPHA, TILE_A[mi][ni], tile_size_n,
353  beta, TILE_C[ni][ni], tile_size_n );
354 
355  for ( ki = ni + 1; ki < nt; ki++ )
356  {
357  tile_size_k = ddss_tile_size( N, ki );
358 
359  #pragma oss task in( TILE_A[mi][ni] ) \
360  in( TILE_A[mi][ki] ) \
361  inout( TILE_C[ni][ki] ) \
362  shared( TILE_A, TILE_C ) \
363  firstprivate( mi, ni, ki ) \
364  label( dgemm )
365  cblas_dgemm( CblasRowMajor,
366  CblasTrans, CblasNoTrans,
367  tile_size_n,
368  tile_size_k,
369  tile_size_m,
370  ALPHA, TILE_A[mi][ni], tile_size_n,
371  TILE_A[mi][ki], tile_size_k,
372  beta, TILE_C[ni][ki], tile_size_k );
373  }
374  }
375  }
376  }
377  // --TRANS_A = Trans & UPLO = Lower--
378  else
379  {
380  for ( mi = 0; mi < kt; mi++ )
381  {
382  tile_size_m = ddss_tile_size( K, mi );
383  for ( ni = 0; ni < nt; ni++ )
384  {
385  tile_size_n = ddss_tile_size( N, ni );
386 
387  if ( mi == 0 )
388  {
389  beta = BETA;
390  }
391  else
392  {
393  beta = 1.0;
394  }
395 
396  #pragma oss task in( TILE_A[mi][ni] ) \
397  inout( TILE_C[ni][ni] ) \
398  shared( TILE_A, TILE_C ) \
399  firstprivate( mi, ni ) \
400  label( dsyrk )
401  cblas_dsyrk( CblasRowMajor,
402  ( CBLAS_UPLO ) UPLO,
403  ( CBLAS_TRANSPOSE ) TRANS_A,
404  tile_size_n,
405  tile_size_m,
406  ALPHA, TILE_A[mi][ni], tile_size_n,
407  beta, TILE_C[ni][ni], tile_size_n );
408 
409  for ( ki = ni + 1; ki < nt; ki++ )
410  {
411  tile_size_k = ddss_tile_size( N, ki );
412 
413  #pragma oss task in( TILE_A[mi][ki] ) \
414  in( TILE_A[mi][ni] ) \
415  inout( TILE_C[ki][ni] ) \
416  shared( TILE_A, TILE_C ) \
417  firstprivate( mi, ni, ki ) \
418  label( dgemm )
419  cblas_dgemm( CblasRowMajor,
420  CblasTrans, CblasNoTrans,
421  tile_size_k,
422  tile_size_n,
423  tile_size_m,
424  ALPHA, TILE_A[mi][ki], tile_size_k,
425  TILE_A[mi][ni], tile_size_n,
426  beta, TILE_C[ki][ni], tile_size_n );
427  }
428  }
429  }
430  }
431  }
432 
433  /*********************************************
434  --From tiled data layout to flat data layout--
435  *********************************************/
436 
437  ddss_dsymtiled2flat( N, N, C, LDC, Cm, Cn, TILE_C, UPLO );
438 
439  // --Tile matrices free--
440  free( TILE_A );
441  free( TILE_C );
442 
443  return Success;
444 
445 }
void ddss_dflat2tiled(int M, int N, double *A, int LDA, int MT, int NT, double(*TILE_A)[NT][TILE_SIZE *TILE_SIZE])
void ddss_dsymflat2tiled(int M, int N, double *A, int LDA, int MT, int NT, double(*TILE_A)[NT][TILE_SIZE *TILE_SIZE], enum DDSS_UPLO UPLO)
void ddss_dsymtiled2flat(int M, int N, double *A, int LDA, int MT, int NT, double(*TILE_A)[NT][TILE_SIZE *TILE_SIZE], enum DDSS_UPLO UPLO)
int ddss_tile_size(int M, int MT)
Definition: ddss_tile.c:52