LASs - Linear Algebra Routines on OmpSs  1.0.0
LASs
Functions
kdtpgetrf.c File Reference

LASs-DDSs kdtpgetrf routine. More...

#include "../include/lass.h"
Include dependency graph for kdtpgetrf.c:

Go to the source code of this file.

Functions

enum LASS_RETURN kdtpgetrf (int M, int N, double *A, int LDA, int *IPIV)
 

Detailed Description

LASs-DDSs kdtpgetrf routine.

LASs-DDSs is a software package provided by: Barcelona Supercomputing Center - Centro Nacional de Supercomputacion

Author
Pedro Valero-Lara pedro.nosp@m..val.nosp@m.ero@b.nosp@m.sc.e.nosp@m.s
Boro Sofranac boro..nosp@m.sofr.nosp@m.anac@.nosp@m.bsc..nosp@m.es
Date
2018-04-08 2018-05-08

Definition in file kdtpgetrf.c.

Function Documentation

enum LASS_RETURN kdtpgetrf ( int  M,
int  N,
double *  A,
int  LDA,
int *  IPIV 
)

Performs the LU factorization with tiled pivoting ( row interchanges ) of a general M-by-N matrix A:

A = P * L * U

where P is a permutation matrix, L is a lower triangular ( lower trapezoidal if M > N ) matrix with unit diagonal elements and U is an upper triangular ( upper trapezoidal if M < N ) matrix.

Parameters
[in]Mint. M specifies the number of rows of the matrix A. M >= 0.
[in]Nint. N specifies the number of columns of the matrix A. N >= 0.
[in,out]Adouble *. A is a pointer to a regular matrix of dimension M-by-N. On exit, if return value is Success, the matrix A is overwriten by the factors L and U. The unit diagonal elements of L are not stored.
[in]LDAint. LDA specifies the number of columns of A ( row-major order ). LDA must be at least max( 1, N ).
[out]IPIVint *. ipiv is a pointer to an array of dimesion at least max( 1, min ( M, N ) ). ipiv( i ) = j, 1 <= i <= min( M, N ) implies that rows i and j have been interchanged.
Return values
Successsuccessful exit
NoSuccessunsuccessful exit
See also
ddss_tile
ddss_flat2tiled
ddss_tiled2flat

Definition at line 80 of file kdtpgetrf.c.

References ddss_dflat2tiled(), ddss_dtiled2flat(), and ddss_tile_size().

Referenced by ddss_dtpgetrf().

81 {
82 
83  // Local variables
84  int i;
85  int mt, nt;
86  int mi, mmi, ki, ni;
87  int tile_size_m;
88  int tile_size_mm;
89  int tile_size_n;
90  int tile_size_km;
91  int tile_size_kn;
92  int local_ipiv_size;
93  int current_position;
94  enum LASS_RETURN ret;
95  int *ipiv_local;
96 
97  // Number of tiles
98  if ( M % TILE_SIZE == 0 )
99  {
100  mt = M / TILE_SIZE;
101  }
102  else
103  {
104  mt = ( M / TILE_SIZE ) + 1;
105  }
106 
107  if ( N % TILE_SIZE == 0 )
108  {
109  nt = N / TILE_SIZE;
110  }
111  else
112  {
113  nt = ( N / TILE_SIZE ) + 1;
114  }
115 
116  /***************************
117  --Tile A matrix allocation--
118  ***************************/
119 
120  double ( *TILE_A )[nt][TILE_SIZE * TILE_SIZE] = malloc ( mt * nt *
121  TILE_SIZE * TILE_SIZE * sizeof( double ) );
122 
123  if ( TILE_A == NULL )
124  {
125  fprintf( stderr, "Failure in kdnpgetrf for matrix TILE_A\n" );
126  return NoSuccess;
127  }
128 
129  ipiv_local = ( int* ) malloc( MIN( M, N ) * sizeof( int ) );
130 
131  if ( ipiv_local == NULL )
132  {
133  fprintf( stderr, "Failure in kdnpgetrf for ipiv_local\n" );
134  return NoSuccess;
135  }
136 
137  /************************************************
138  // --From flat data layout to tiled data layout--
139  ************************************************/
140 
141  ddss_dflat2tiled( M, N, A, LDA, mt, nt, TILE_A );
142 
143  /****************
144  --DGETRF tile--
145  ****************/
146 
147  current_position = 0;
148  i = 0;
149  for ( ki = 0; ki < MIN( mt, nt ); ki++ )
150  {
151  tile_size_km = ddss_tile_size( M, ki );
152  tile_size_kn = ddss_tile_size( N, ki );
153 
154  #pragma oss task inout( TILE_A[ki][ki] ) \
155  inout( ipiv_local ) \
156  inout( current_position ) \
157  out( local_ipiv_size ) \
158  shared( TILE_A ) \
159  firstprivate( ki, i ) \
160  label( dgetrf )
161  {
162  LAPACKE_dgetrf( CblasRowMajor,
163  tile_size_km, tile_size_kn,
164  TILE_A[ki][ki], tile_size_kn,
165  ipiv_local + current_position );
166 
167  // Update the global ipiv array
168  local_ipiv_size = MIN( tile_size_km, tile_size_kn );
169  for ( i = 0; i < local_ipiv_size; i++ )
170  {
171  ( IPIV + current_position )[i] =
172  ( ipiv_local + current_position )[i] + current_position;
173  }
174  current_position += local_ipiv_size;
175  }
176  for ( mi = ki + 1; mi < mt; mi++ )
177  {
178  tile_size_m = ddss_tile_size( M, mi );
179 
180  #pragma oss task in( TILE_A[ki][ki] ) \
181  inout( TILE_A[mi][ki] ) \
182  shared( TILE_A ) \
183  firstprivate( mi, ki ) \
184  label( dtrsm_below )
185  cblas_dtrsm( CblasRowMajor,
186  ( CBLAS_SIDE ) Right, ( CBLAS_UPLO ) Upper,
187  ( CBLAS_TRANSPOSE ) NoTrans, ( CBLAS_DIAG ) NonUnit,
188  tile_size_m, tile_size_kn,
189  1.0, TILE_A[ki][ki], tile_size_kn,
190  TILE_A[mi][ki], tile_size_kn );
191  }
192  for ( ni = 0; ni < ki; ni++ )
193  {
194  tile_size_n = ddss_tile_size( N, ni );
195  // Swap the tiles to the left of A[ki][ki]
196  #pragma oss task in( ipiv_local ) \
197  in( current_position ) \
198  in( local_ipiv_size ) \
199  inout( TILE_A[ki][ni] ) \
200  shared( TILE_A ) \
201  firstprivate( ki, ni ) \
202  label( dlaswp_left )
203  LAPACKE_dlaswp( CblasRowMajor,
204  tile_size_n, TILE_A[ki][ni], tile_size_n,
205  1, local_ipiv_size,
206  ipiv_local + current_position - local_ipiv_size,
207  1 );
208 
209  }
210  for ( ni = ki + 1; ni < nt; ni++ )
211  {
212  tile_size_n = ddss_tile_size( N, ni );
213  // Swap the tiles to the right of A[ki][ki]
214  #pragma oss task in( ipiv_local ) \
215  in( current_position ) \
216  in( local_ipiv_size ) \
217  inout( TILE_A[ki][ni] ) \
218  shared( TILE_A ) \
219  firstprivate( ki, ni ) \
220  label( dlaswp_right )
221  LAPACKE_dlaswp( CblasRowMajor,
222  tile_size_n, TILE_A[ki][ni], tile_size_n,
223  1, local_ipiv_size,
224  ipiv_local + current_position - local_ipiv_size,
225  1 );
226 
227  #pragma oss task in( TILE_A[ki][ki] ) \
228  inout( TILE_A[ki][ni] ) \
229  shared( TILE_A ) \
230  firstprivate( ni, ki ) \
231  label( dtrsm_right )
232  cblas_dtrsm( CblasRowMajor,
233  ( CBLAS_SIDE ) Left, ( CBLAS_UPLO ) Lower,
234  ( CBLAS_TRANSPOSE ) NoTrans, ( CBLAS_DIAG ) Unit,
235  tile_size_km, tile_size_n,
236  1.0, TILE_A[ki][ki], tile_size_kn,
237  TILE_A[ki][ni], tile_size_n );
238 
239  for ( mmi = ki + 1; mmi < mt; mmi++ )
240  {
241  tile_size_mm = ddss_tile_size( M, mmi );
242 
243  #pragma oss task in( TILE_A[mmi][ki] ) \
244  in( TILE_A[ki][ni] ) \
245  inout( TILE_A[mmi][ni] ) \
246  shared( TILE_A ) \
247  firstprivate( ni, mmi, ki ) \
248  label( dgemm )
249  cblas_dgemm( CblasRowMajor,
250  ( CBLAS_TRANSPOSE ) NoTrans,
251  ( CBLAS_TRANSPOSE ) NoTrans,
252  tile_size_mm,
253  tile_size_n,
254  TILE_SIZE,
255  -1.0, TILE_A[mmi][ki], tile_size_kn,
256  TILE_A[ki][ni], tile_size_n,
257  1.0, TILE_A[mmi][ni], tile_size_n );
258  }
259  }
260  }
261 
262  // --From tile data layout to flat data layout--
263  ddss_dtiled2flat( M, N, A, LDA, mt, nt, TILE_A );
264 
265  // --Free--
266  free( TILE_A );
267  free( ipiv_local );
268 
269  return Success;
270 
271 }
void ddss_dflat2tiled(int M, int N, double *A, int LDA, int MT, int NT, double(*TILE_A)[NT][TILE_SIZE *TILE_SIZE])
void ddss_dtiled2flat(int M, int N, double *A, int LDA, int MT, int NT, double(*TILE_A)[NT][TILE_SIZE *TILE_SIZE])
int ddss_tile_size(int M, int MT)
Definition: ddss_tile.c:52