LASs - Linear Algebra Routines on OmpSs  1.0.0
LASs
lass_macros.h
Go to the documentation of this file.
1 /**
2  *
3  * @file lass_macros.h
4  *
5  * @brief Macros definition.
6  *
7  * LASs is a software package provided by:
8  * Barcelona Supercomputing Center - Centro Nacional de Supercomputacion
9  *
10  * @author Pedro Valero-Lara pedro.valero@bsc.es
11  * @date 2017-01-02
12  * @reviewer
13  * @modified
14  *
15  **/
16 
17 #ifndef DDSS_MACROS_H
18 #define DDSS_MACROS_H
19 
20 // VERBOSE (activate prints in testings)
21 //#define VERBOSE
22 
23 // Tile size
24 #define TILE_SIZE 512
25 
26 // Number of cores
27 #define NUM_CORES 48
28 
29 // Return the max/min of two numbers
30 #define MAX( a, b ) ( ( ( a ) > ( b ) ) ? ( a ) : ( b ) )
31 #define MIN( a, b ) ( ( ( a ) < ( b ) ) ? ( a ) : ( b ) )
32 
33 // Number of operations
34 // GEMM
35 // Number of multiplications in GEMM
36 #define FMULS_GEMM(m_, n_, k_) ( (m_) * (n_) * (k_) )
37 // Number of additions in GEMM
38 #define FADDS_GEMM(m_, n_, k_) ( (m_) * (n_) * (k_) )
39 // Flops in DGEMM
40 #define FLOPS_DGEMM(m_, n_, k_) ( FMULS_GEMM((double)(m_), (double)(n_), \
41  (double)(k_)) + FADDS_GEMM((double)(m_), (double)(n_), (double)(k_)) )
42 
43 // SYMM
44 // Number of multiplications in SYMM
45 #define FMULS_SYMM(side_, m_, n_) ( ( (side_) == Left ) ? FMULS_GEMM((m_), (m_), (n_)) : FMULS_GEMM((m_), (n_), (n_)) )
46 // Number of additions in SYMM
47 #define FADDS_SYMM(side_, m_, n_) ( ( (side_) == Left ) ? FADDS_GEMM((m_), (m_), (n_)) : FADDS_GEMM((m_), (n_), (n_)) )
48 // Flops in DSYMM
49 #define FLOPS_DSYMM(side_, m_, n_) ( FMULS_SYMM(side_, (double)(m_), (double)(n_)) + FADDS_SYMM(side_, (double)(m_), (double)(n_)) )
50 
51 // TRSM
52 // Number of multiplications in TRSM
53 #define FMULS_TRSM_2(m_, n_) ( 0.5 * (n_) * (m_) * ( (m_) + 1 ) )
54 //Number of additions in TRSM
55 #define FADDS_TRSM_2(m_, n_) ( 0.5 * (n_) * (m_) * ( (m_) - 1 ) )
56 // Number of multiplies in TRSM
57 #define FMULS_TRSM(side_, m_, n_) ( ( (side_) == Left ) ? FMULS_TRSM_2((m_), (n_)) : FMULS_TRSM_2((n_), (m_)) )
58 // Number of additions in TRSM
59 #define FADDS_TRSM(side_, m_, n_) ( ( (side_) == Left ) ? FADDS_TRSM_2((m_), (n_)) : FADDS_TRSM_2((n_), (m_)) )
60 // Flops in DTRSM
61 #define FLOPS_DTRSM(side_, m_, n_) ( FMULS_TRSM(side_, (double)(m_), (double)(n_)) + FADDS_TRSM(side_, (double)(m_), (double)(n_)) )
62 
63 // TRMM
64 // Subfunction: Number of multiplications in TRMM
65 #define FMULS_TRMM_2(m_, n_) ( 0.5 * (n_) * (m_) * ( (m_) + 1 ) )
66 // Subfunction: Number of additions in TRMM
67 #define FADDS_TRMM_2(m_, n_) ( 0.5 * (n_) * (m_) * ( (m_) - 1 ) )
68 // Number of multiplies in TRMM
69 #define FMULS_TRMM(side_, m_, n_) ( ( (side_) == Left ) ? FMULS_TRMM_2((m_), (n_)) : FMULS_TRMM_2((n_), (m_)) )
70 // Number of additions in TRMM
71 #define FADDS_TRMM(side_, m_, n_) ( ( (side_) == Left ) ? FADDS_TRMM_2((m_), (n_)) : FADDS_TRMM_2((n_), (m_)) )
72 // Flops in DTRMM
73 #define FLOPS_DTRMM(side_, m_, n_) (FMULS_TRMM(side_, (double)(m_), (double)(n_)) + FADDS_TRMM(side_, (double)(m_), (double)(n_)) )
74 
75 // SYRK
76 // Number of multiplications in SYRK
77 #define FMULS_SYRK(k_, n_) ( 0.5 * (k_) * (n_) * ( (n_) + 1 ) )
78 // Number of additions in SYRK
79 #define FADDS_SYRK(k_, n_) ( 0.5 * (k_) * (n_) * ( (n_)+1 ) )
80 // Flops in DSYRK
81 #define FLOPS_DSYRK(k_, n_) ( FMULS_SYRK( (double)(k_), (double)(n_) ) + FADDS_SYRK( (double)(k_), (double)(n_) ) )
82 
83 //SYR2K
84 // Number of multiplications in SYR2K
85 #define FMULS_SYR2K(k_, n_) ((k_) * (n_) * (n_))
86 // Number of additions in SYR2K
87 #define FADDS_SYR2K(k_, n_) ((k_) * (n_) * (n_) + (n_))
88 // Flops in DSYR2K
89 #define FLOPS_DSYR2K(k_, n_) (FMULS_SYR2K((double)(k_), (double)(n_)) + FADDS_SYR2K((double)(k_), (double)(n_)))
90 
91 // POTRF
92 // Number of multiplications in POTRF
93 #define FMULS_POTRF(n_) ( (1./6.) * (n_) * (n_) * (n_) + (0.5) * (n_) * (n_) \
94  + (1./3.) * n_ )
95 // Number of additions in POTRF
96 #define FADDS_POTRF(n_) ( (1./6.) * (n_) * (n_) * (n_) - (1./6.) * (n_))
97 // Flops in DPOTRF
98 #define FLOPS_DPOTRF(n_) ( FMULS_POTRF((double)(n_)) + FADDS_POTRF((double)(n_)))
99 
100 // GETRF
101 // Number of multiplications in GETRF
102 #define FMULS_GETRF( m_, n_) ( ( (m_) >= (n_) ) ? 0.5 * (m_) * (n_) * (n_) - 1./6. * (n_) * (n_) * (n_) + 0.5 * (m_) * (n_) - 0.5 * (n_) * (n_) + 2./3. * (n_) : 0.5 * (n_) * (m_) * (m_) - 1./6. * (m_) * (m_) * (m_) + 0.5 * (n_) * (m_) - 0.5 * (m_) * (m_) + 2./3. * (m_) )
103 // Number of additions in GETRF
104 #define FADDS_GETRF( m_, n_) ( ( (m_) >= (n_) ) ? 0.5 * (m_) * (n_) * (n_) - 1./6. * (n_) * (n_) * (n_) - 0.5 * (m_) * (n_) + 1./6. * (n_) : 0.5 * (n_) * (m_) * (m_) - 1./6. * (m_) * (m_) * (m_) - 0.5 * (n_) * (m_) + 1./6. * (m_) )
105 // Flops in DGETRF
106 #define FLOPS_DGETRF(m_, n_) ( FMULS_GETRF((double)(m_), (double)(n_)) + FADDS_GETRF((double)(m_), (double)(n_)) )
107 
108 #endif
109 
110 #ifndef DSSS_MACROS_H
111 #define DSSS_MACROS_H
112 
113 // Number of operations
114 // GTSV
115 // Flops in DGTSV
116 #define FLOPS_DGTSV( n_ ) ( 8 * (n_) )
117 
118 // SPMV
119 // Flops in DSPMV
120 #define FLOPS_DSPMV( nnz_ ) ( 2 * (nnz_) )
121 
122 #endif
123