LASs - Linear Algebra Routines on OmpSs
1.0.0
LASs
Main Page
Files
File List
File Members
include
lass_macros.h
Go to the documentation of this file.
1
/**
2
*
3
* @file lass_macros.h
4
*
5
* @brief Macros definition.
6
*
7
* LASs is a software package provided by:
8
* Barcelona Supercomputing Center - Centro Nacional de Supercomputacion
9
*
10
* @author Pedro Valero-Lara pedro.valero@bsc.es
11
* @date 2017-01-02
12
* @reviewer
13
* @modified
14
*
15
**/
16
17
#ifndef DDSS_MACROS_H
18
#define DDSS_MACROS_H
19
20
// VERBOSE (activate prints in testings)
21
//#define VERBOSE
22
23
// Tile size
24
#define TILE_SIZE 512
25
26
// Number of cores
27
#define NUM_CORES 48
28
29
// Return the max/min of two numbers
30
#define MAX( a, b ) ( ( ( a ) > ( b ) ) ? ( a ) : ( b ) )
31
#define MIN( a, b ) ( ( ( a ) < ( b ) ) ? ( a ) : ( b ) )
32
33
// Number of operations
34
// GEMM
35
// Number of multiplications in GEMM
36
#define FMULS_GEMM(m_, n_, k_) ( (m_) * (n_) * (k_) )
37
// Number of additions in GEMM
38
#define FADDS_GEMM(m_, n_, k_) ( (m_) * (n_) * (k_) )
39
// Flops in DGEMM
40
#define FLOPS_DGEMM(m_, n_, k_) ( FMULS_GEMM((double)(m_), (double)(n_), \
41
(double)(k_)) + FADDS_GEMM((double)(m_), (double)(n_), (double)(k_)) )
42
43
// SYMM
44
// Number of multiplications in SYMM
45
#define FMULS_SYMM(side_, m_, n_) ( ( (side_) == Left ) ? FMULS_GEMM((m_), (m_), (n_)) : FMULS_GEMM((m_), (n_), (n_)) )
46
// Number of additions in SYMM
47
#define FADDS_SYMM(side_, m_, n_) ( ( (side_) == Left ) ? FADDS_GEMM((m_), (m_), (n_)) : FADDS_GEMM((m_), (n_), (n_)) )
48
// Flops in DSYMM
49
#define FLOPS_DSYMM(side_, m_, n_) ( FMULS_SYMM(side_, (double)(m_), (double)(n_)) + FADDS_SYMM(side_, (double)(m_), (double)(n_)) )
50
51
// TRSM
52
// Number of multiplications in TRSM
53
#define FMULS_TRSM_2(m_, n_) ( 0.5 * (n_) * (m_) * ( (m_) + 1 ) )
54
//Number of additions in TRSM
55
#define FADDS_TRSM_2(m_, n_) ( 0.5 * (n_) * (m_) * ( (m_) - 1 ) )
56
// Number of multiplies in TRSM
57
#define FMULS_TRSM(side_, m_, n_) ( ( (side_) == Left ) ? FMULS_TRSM_2((m_), (n_)) : FMULS_TRSM_2((n_), (m_)) )
58
// Number of additions in TRSM
59
#define FADDS_TRSM(side_, m_, n_) ( ( (side_) == Left ) ? FADDS_TRSM_2((m_), (n_)) : FADDS_TRSM_2((n_), (m_)) )
60
// Flops in DTRSM
61
#define FLOPS_DTRSM(side_, m_, n_) ( FMULS_TRSM(side_, (double)(m_), (double)(n_)) + FADDS_TRSM(side_, (double)(m_), (double)(n_)) )
62
63
// TRMM
64
// Subfunction: Number of multiplications in TRMM
65
#define FMULS_TRMM_2(m_, n_) ( 0.5 * (n_) * (m_) * ( (m_) + 1 ) )
66
// Subfunction: Number of additions in TRMM
67
#define FADDS_TRMM_2(m_, n_) ( 0.5 * (n_) * (m_) * ( (m_) - 1 ) )
68
// Number of multiplies in TRMM
69
#define FMULS_TRMM(side_, m_, n_) ( ( (side_) == Left ) ? FMULS_TRMM_2((m_), (n_)) : FMULS_TRMM_2((n_), (m_)) )
70
// Number of additions in TRMM
71
#define FADDS_TRMM(side_, m_, n_) ( ( (side_) == Left ) ? FADDS_TRMM_2((m_), (n_)) : FADDS_TRMM_2((n_), (m_)) )
72
// Flops in DTRMM
73
#define FLOPS_DTRMM(side_, m_, n_) (FMULS_TRMM(side_, (double)(m_), (double)(n_)) + FADDS_TRMM(side_, (double)(m_), (double)(n_)) )
74
75
// SYRK
76
// Number of multiplications in SYRK
77
#define FMULS_SYRK(k_, n_) ( 0.5 * (k_) * (n_) * ( (n_) + 1 ) )
78
// Number of additions in SYRK
79
#define FADDS_SYRK(k_, n_) ( 0.5 * (k_) * (n_) * ( (n_)+1 ) )
80
// Flops in DSYRK
81
#define FLOPS_DSYRK(k_, n_) ( FMULS_SYRK( (double)(k_), (double)(n_) ) + FADDS_SYRK( (double)(k_), (double)(n_) ) )
82
83
//SYR2K
84
// Number of multiplications in SYR2K
85
#define FMULS_SYR2K(k_, n_) ((k_) * (n_) * (n_))
86
// Number of additions in SYR2K
87
#define FADDS_SYR2K(k_, n_) ((k_) * (n_) * (n_) + (n_))
88
// Flops in DSYR2K
89
#define FLOPS_DSYR2K(k_, n_) (FMULS_SYR2K((double)(k_), (double)(n_)) + FADDS_SYR2K((double)(k_), (double)(n_)))
90
91
// POTRF
92
// Number of multiplications in POTRF
93
#define FMULS_POTRF(n_) ( (1./6.) * (n_) * (n_) * (n_) + (0.5) * (n_) * (n_) \
94
+ (1./3.) * n_ )
95
// Number of additions in POTRF
96
#define FADDS_POTRF(n_) ( (1./6.) * (n_) * (n_) * (n_) - (1./6.) * (n_))
97
// Flops in DPOTRF
98
#define FLOPS_DPOTRF(n_) ( FMULS_POTRF((double)(n_)) + FADDS_POTRF((double)(n_)))
99
100
// GETRF
101
// Number of multiplications in GETRF
102
#define FMULS_GETRF( m_, n_) ( ( (m_) >= (n_) ) ? 0.5 * (m_) * (n_) * (n_) - 1./6. * (n_) * (n_) * (n_) + 0.5 * (m_) * (n_) - 0.5 * (n_) * (n_) + 2./3. * (n_) : 0.5 * (n_) * (m_) * (m_) - 1./6. * (m_) * (m_) * (m_) + 0.5 * (n_) * (m_) - 0.5 * (m_) * (m_) + 2./3. * (m_) )
103
// Number of additions in GETRF
104
#define FADDS_GETRF( m_, n_) ( ( (m_) >= (n_) ) ? 0.5 * (m_) * (n_) * (n_) - 1./6. * (n_) * (n_) * (n_) - 0.5 * (m_) * (n_) + 1./6. * (n_) : 0.5 * (n_) * (m_) * (m_) - 1./6. * (m_) * (m_) * (m_) - 0.5 * (n_) * (m_) + 1./6. * (m_) )
105
// Flops in DGETRF
106
#define FLOPS_DGETRF(m_, n_) ( FMULS_GETRF((double)(m_), (double)(n_)) + FADDS_GETRF((double)(m_), (double)(n_)) )
107
108
#endif
109
110
#ifndef DSSS_MACROS_H
111
#define DSSS_MACROS_H
112
113
// Number of operations
114
// GTSV
115
// Flops in DGTSV
116
#define FLOPS_DGTSV( n_ ) ( 8 * (n_) )
117
118
// SPMV
119
// Flops in DSPMV
120
#define FLOPS_DSPMV( nnz_ ) ( 2 * (nnz_) )
121
122
#endif
123
Generated by
1.8.11