LASs - Linear Algebra Routines on OmpSs  1.0.0
LASs
Functions
kdgemm.c File Reference

LASs-DDSs kdgemm routine. More...

#include "../include/lass.h"
Include dependency graph for kdgemm.c:

Go to the source code of this file.

Functions

enum LASS_RETURN kdgemm (enum DDSS_TRANS TRANS_A, enum DDSS_TRANS TRANS_B, int M, int N, int K, const double ALPHA, double *A, int LDA, double *B, int LDB, const double BETA, double *C, int LDC)
 

Detailed Description

LASs-DDSs kdgemm routine.

LASs-DDSs is a software package provided by: Barcelona Supercomputing Center - Centro Nacional de Supercomputacion

Author
Pedro Valero-Lara pedro.nosp@m..val.nosp@m.ero@b.nosp@m.sc.e.nosp@m.s
Date
2017-01-05

Definition in file kdgemm.c.

Function Documentation

enum LASS_RETURN kdgemm ( enum DDSS_TRANS  TRANS_A,
enum DDSS_TRANS  TRANS_B,
int  M,
int  N,
int  K,
const double  ALPHA,
double *  A,
int  LDA,
double *  B,
int  LDB,
const double  BETA,
double *  C,
int  LDC 
)

Performs the matrix-matrix operation:

C = ALPHA * op( A ) * op( B ) + BETA * C

where op( X ) is one of:

op( X ) = X      or
op( X ) = X**T 

ALPHA and BETA are scalars, and A, B and C are matrices, with op( A ) an M by K matrix, op( B ) a K by N matrix and C an M by N matrix.

Parameters
[in]TRANS_Aenum DDSS_TRANS. TRANS_A specifies the form of op( A ) to be used in the matrix multiplication as follows:
  • NoTrans: op( A ) = A.
  • Trans: op( A ) = A**T.
[in]TRANS_Benum DDSS_TRANS. TRANS_B specifies the form of op( B ) to be used in the matrix multiplication as follows:
  • NoTrans: op( B ) = B.
  • Trans: op( B ) = B**T.
[in]Mint. M specifies the number of rows of the matrix A and of the matrix C. M must be greater than zero.
[in]Nint. N specifies the number of columns of the matrix B and the number of columns of the matrix C. N must be greater than zero.
[in]Kint. K specifies the number of columns of the matrix A and the number of rows of the matrix B. K must be greater than zero.
[in]ALPHAdouble.
[in]Adouble *. A is a pointer to a matrix of dimension Ma ( rows ) by Ka ( columns ), where Ma is M and Ka is K when TRANS_A = NoTrans, and Ma is K and Ka is M otherwise.
[in]LDAint. LDA specifies the number of columns of A ( row-major order ). When TRANS_A = NoTrans then LDA must be at least max( 1, K ), otherwise LDA must be at least max( 1, M ).
[in]Bdouble *. B is a pointer to a matrix of dimension Kb ( rows ) by Nb ( columns ), where Kb is K and Nb is N when TRANS_B = NoTrans, and Kb is N and Nb is K otherwise.
[in]LDBint. LDB specifies the number of columns of B ( row-major order ). When TRANS_B = NoTrans then LDB must be at least max( 1, N ), otherwise LDB must be at least max( 1, K ).
[in]BETAdouble.
[in,out]Cdouble *. C is a pointer to a matrix of dimension LDC by N. On exit, C is overwritten by the M by N matrix ( ALPHA*op( A )*op( B ) + BETA*C ).
[in]LDCint. LDC specifies the number of columns of C ( row-major order). LDC must be at least max( 1, M )
Return values
Successsucessful exit
NoSuccessunsucessful exit
See also
ddss_dgemm
ddss_tile
ddss_flat2tiled
ddss_tiled2flat

Definition at line 131 of file kdgemm.c.

References ddss_dflat2tiled(), ddss_dtiled2flat(), and ddss_tile_size().

Referenced by ddss_dgemm().

136 {
137 
138  // Local variables
139  int mt, kt, nt;
140  int mi, ki, ni;
141  int Am, An;
142  int Bm, Bn;
143  int tile_size_m;
144  int tile_size_n;
145  int tile_size_k;
146  int k_check;
147  double betat;
148 
149  // Number of tiles
150  if ( M % TILE_SIZE == 0 )
151  {
152  mt = M / TILE_SIZE;
153  }
154  else
155  {
156  mt = ( M / TILE_SIZE ) + 1;
157  }
158 
159  if ( K % TILE_SIZE == 0 )
160  {
161  kt = K / TILE_SIZE;
162  }
163  else
164  {
165  kt = ( K / TILE_SIZE ) + 1;
166  }
167 
168  if ( N % TILE_SIZE == 0 )
169  {
170  nt = N / TILE_SIZE;
171  }
172  else
173  {
174  nt = ( N / TILE_SIZE ) + 1;
175  }
176 
177  /****************************
178  --Tile matrices declaration--
179  ****************************/
180 
181  if ( TRANS_A == NoTrans )
182  {
183  Am = mt;
184  An = kt;
185  }
186  else
187  {
188  Am = kt;
189  An = mt;
190  }
191 
192  if ( TRANS_B == NoTrans )
193  {
194  Bm = kt;
195  Bn = nt;
196  }
197  else
198  {
199  Bm = nt;
200  Bn = kt;
201  }
202 
203  /***************************
204  --Tile matrices allocation--
205  ***************************/
206 
207  double (*TILE_A)[An][TILE_SIZE * TILE_SIZE] = malloc ( Am * An *
208  TILE_SIZE * TILE_SIZE * sizeof( double ) );
209 
210  if ( TILE_A == NULL)
211  {
212  fprintf( stderr, "Failure in kdgemm for matrix TILE_A\n" );
213  return NoSuccess;
214  }
215 
216  double (*TILE_B)[Bn][TILE_SIZE * TILE_SIZE] = malloc ( Bm * Bn *
217  TILE_SIZE * TILE_SIZE * sizeof( double ) );
218 
219  if ( TILE_B == NULL)
220  {
221  fprintf( stderr, "Failure in kdgemm for matrix TILE_B\n" );
222  return NoSuccess;
223  }
224 
225  double (*TILE_C)[nt][TILE_SIZE * TILE_SIZE] = malloc ( mt * nt *
226  TILE_SIZE * TILE_SIZE * sizeof( double ) );
227 
228  if ( TILE_C == NULL)
229  {
230  fprintf( stderr, "Failure in kdgemm for matrix TILE_C\n" );
231  return NoSuccess;
232  }
233 
234  /*********************************************
235  --From flat data layout to tiled data layout--
236  *********************************************/
237 
238  // From flat matrix A to tile matrix TILE_A
239  if ( TRANS_A == NoTrans )
240  {
241  ddss_dflat2tiled( M, K, A, LDA, mt, kt, TILE_A );
242  }
243  else
244  {
245  ddss_dflat2tiled( K, M, A, LDA, kt, mt, TILE_A );
246  }
247 
248  // From flat matrix B to tile matrix TILE_B
249  if ( TRANS_B == NoTrans )
250  {
251  ddss_dflat2tiled( K, N, B, LDB, kt, nt, TILE_B );
252  }
253  else
254  {
255  ddss_dflat2tiled( N, K, B, LDB, nt, kt, TILE_B );
256  }
257 
258  // From flat matrix C to tile matrix TILE_C
259  ddss_dflat2tiled( M, N, C, LDC, mt, nt, TILE_C );
260 
261  /*************
262  --DGEMM tile--
263  *************/
264 
265  for ( mi = 0; mi < mt; mi++ )
266  {
267  tile_size_m = ddss_tile_size( M, mi );
268  for ( ni = 0; ni < nt; ni++ )
269  {
270  tile_size_n = ddss_tile_size( N, ni );
271  if ( TRANS_A == NoTrans )
272  {
273  k_check = K;
274  }
275  else
276  {
277  k_check = M;
278  }
279  // --Scale on C ( C = BETA * C )--
280  if ( ( ALPHA == 0.0 ) || ( k_check == 0 ) )
281  {
282  #pragma oss task inout( TILE_C[mi][ni] ) \
283  shared( TILE_A, TILE_B, TILE_C ) \
284  firstprivate( mi, ni ) \
285  no_copy_deps
286  cblas_dgemm( CblasRowMajor,
287  ( CBLAS_TRANSPOSE ) TRANS_A,
288  ( CBLAS_TRANSPOSE ) TRANS_B,
289  tile_size_m,
290  tile_size_n,
291  0,
292  ALPHA, TILE_A[0][0], 1,
293  TILE_B[0][0], 1,
294  BETA, TILE_C[mi][ni], tile_size_n );
295  }
296  else if ( TRANS_A == NoTrans )
297  {
298  // --TRANS_A = NoTrans & TRANS_B = NoTrans--
299  if ( TRANS_B == NoTrans )
300  {
301  for ( ki = 0; ki < kt; ki++ )
302  {
303  tile_size_k = ddss_tile_size( K, ki );
304  if (ki == 0)
305  {
306  betat = BETA;
307  }
308  else
309  {
310  betat = 1.0;
311  }
312 
313  #pragma oss task in( TILE_A[mi][ki] ) \
314  in( TILE_B[ki][ni] ) \
315  inout( TILE_C[mi][ni] ) \
316  shared( TILE_A, TILE_B, TILE_C ) \
317  firstprivate( mi, ni, ki, betat ) \
318  no_copy_deps
319  cblas_dgemm( CblasRowMajor,
320  ( CBLAS_TRANSPOSE ) TRANS_A,
321  ( CBLAS_TRANSPOSE ) TRANS_B,
322  tile_size_m,
323  tile_size_n,
324  tile_size_k,
325  ALPHA, TILE_A[mi][ki], tile_size_k,
326  TILE_B[ki][ni], tile_size_n,
327  betat, TILE_C[mi][ni], tile_size_n );
328  }
329  }
330  // --TRANS_A = NoTrans & TRANS_B = Trans--
331  else
332  {
333  for ( ki = 0; ki < kt; ki++ )
334  {
335  tile_size_k = ddss_tile_size( K, ki );
336  if (ki == 0)
337  {
338  betat = BETA;
339  }
340  else
341  {
342  betat = 1.0;
343  }
344 
345  #pragma oss task in( TILE_A[mi][ki] ) \
346  in( TILE_B[ni][ki] ) \
347  inout( TILE_C[mi][ni] ) \
348  shared( TILE_A, TILE_B, TILE_C ) \
349  firstprivate( mi, ni, ki, betat )
350  cblas_dgemm( CblasRowMajor,
351  ( CBLAS_TRANSPOSE ) TRANS_A,
352  ( CBLAS_TRANSPOSE ) TRANS_B,
353  tile_size_m,
354  tile_size_n,
355  tile_size_k,
356  ALPHA, TILE_A[mi][ki], tile_size_k,
357  TILE_B[ni][ki], tile_size_k,
358  betat, TILE_C[mi][ni], tile_size_n );
359  }
360  }
361  }
362  else
363  {
364  // --TRANS_A = Trans & TRANS_B = NoTrans--
365  if ( TRANS_B == NoTrans )
366  {
367  for ( ki = 0; ki < kt; ki++ )
368  {
369  tile_size_k = ddss_tile_size( K, ki );
370  if (ki == 0)
371  {
372  betat = BETA;
373  }
374  else
375  {
376  betat = 1.0;
377  }
378 
379  #pragma oss task in( TILE_A[ki][mi] ) \
380  in( TILE_B[ki][ni] ) \
381  inout( TILE_C[mi][ni] ) \
382  shared( TILE_A, TILE_B, TILE_C ) \
383  firstprivate( mi, ni, ki, betat )
384  cblas_dgemm( CblasRowMajor,
385  ( CBLAS_TRANSPOSE ) TRANS_A,
386  ( CBLAS_TRANSPOSE ) TRANS_B,
387  tile_size_m,
388  tile_size_n,
389  tile_size_k,
390  ALPHA, TILE_A[ki][mi], tile_size_m,
391  TILE_B[ki][ni], tile_size_n,
392  betat, TILE_C[mi][ni], tile_size_n );
393  }
394  }
395  // --TRANS_A = Trans & TRANS_B = Trans--
396  else
397  {
398  for ( ki = 0; ki < kt; ki++ )
399  {
400  tile_size_k = ddss_tile_size( K, ki );
401  if (ki == 0)
402  {
403  betat = BETA;
404  }
405  else
406  {
407  betat = 1.0;
408  }
409 
410  #pragma oss task in( TILE_A[ki][mi] ) \
411  in( TILE_B[ni][ki] ) \
412  inout( TILE_C[mi][ni] ) \
413  shared( TILE_A, TILE_B, TILE_C ) \
414  firstprivate( mi, ni, ki, betat )
415  cblas_dgemm( CblasRowMajor,
416  ( CBLAS_TRANSPOSE ) TRANS_A,
417  ( CBLAS_TRANSPOSE ) TRANS_B,
418  tile_size_m,
419  tile_size_n,
420  tile_size_k,
421  ALPHA, TILE_A[ki][mi], tile_size_m,
422  TILE_B[ni][ki], tile_size_k,
423  betat, TILE_C[mi][ni], tile_size_n );
424  }
425  }
426  }
427  }
428  }
429 
430  /************************************************
431  // --From tiled data layout to flat data layout--
432  ************************************************/
433 
434  // From tile matrix TILE_C to flat matrix C
435  ddss_dtiled2flat( M, N, C, LDC, mt, nt, TILE_C );
436 
437  // --Tile matrices free--
438  free( TILE_A );
439  free( TILE_B );
440  free( TILE_C );
441 
442  return Success;
443 
444 }
void ddss_dflat2tiled(int M, int N, double *A, int LDA, int MT, int NT, double(*TILE_A)[NT][TILE_SIZE *TILE_SIZE])
void ddss_dtiled2flat(int M, int N, double *A, int LDA, int MT, int NT, double(*TILE_A)[NT][TILE_SIZE *TILE_SIZE])
int ddss_tile_size(int M, int MT)
Definition: ddss_tile.c:52