| Prototype | C Source | SSE Source (NASM, Inline GCC) |
| void mult_su3_nn ( su3_matrix *a, su3_matrix *b, su3_matrix *c ); |
m_mat_nn.c |
sse_mult_nn.nas, sse_mult_nn.h |
| void mult_su3_na ( su3_matrix *a, su3_matrix *b, su3_matrix *c ); |
m_mat_na.c |
sse_mult_na.nas, sse_mult_na.h |
| void mult_su3_an ( su3_matrix *a, su3_matrix *b, su3_matrix *c ); |
m_mat_an.c |
sse_mult_an.nas, sse_mult_an.h |
| float realtrace_su3( su3_matrix *a, su3_matrix *b ); |
realtr.c |
none |
| complex trace_su3( su3_matrix *a ); |
trace_su3.c |
none |
| complex complextrace_su3( su3_matrix *a, su3_matrix *b ); |
complextr.c |
none |
| complex det_su3( su3_matrix *a ); |
det_su3.c |
none |
| void add_su3_matrix( su3_matrix *a, su3_matrix *b, su3_matrix *c ); |
addmat.c |
none |
| void sub_su3_matrix( su3_matrix *a, su3_matrix *b, su3_matrix *c ); |
submat.c |
none |
| void scalar_mult_su3_matrix( su3_matrix *src, float scalar, su3_matrix *dest); |
s_m_mat.c |
none |
| void scalar_mult_add_su3_matrix( su3_matrix *src1, su3_matrix *src2,
float scalar, su3_matrix *dest); |
s_m_a_mat.c |
sse_s_m_a_mat.nas, sse_s_m_a_mat.h |
| void scalar_mult_sub_su3_matrix( su3_matrix *src1, su3_matrix *src2,
float scalar, su3_matrix *dest); |
s_m_s_mat.c |
none |
| void c_scalar_mult_su3mat( su3_matrix *src, complex *scalar,
su3_matrix *dest); |
cs_m_mat.c |
none |
| void c_scalar_mult_add_su3mat( su3_matrix *src1, su3_matrix *src2,
complex *scalar, su3_matrix *dest); |
cs_m_a_mat.c |
none |
| void c_scalar_mult_sub_su3mat( su3_matrix *src1, su3_matrix *src2,
complex *scalar, su3_matrix *dest); |
cs_m_s_mat.c |
none |
| void su3_adjoint( su3_matrix *a, su3_matrix *b ); |
su3_adjoint.c |
none |
| void make_anti_hermitian( su3_matrix *m3, anti_hermitmat *ah3 ); |
make_ahmat.c |
none |
| void random_anti_hermitian( anti_hermitmat *mat_antihermit, void *prn_pt ); |
rand_ahmat.c |
none |
| void uncompress_anti_hermitian( anti_hermitmat *mat_anti, su3_matrix *mat ); |
uncmp_ahmat.c |
none |
| void compress_anti_hermitian( su3_matrix *mat, anti_hermitmat *mat_anti); |
cmp_ahmat.c |
none |
| void clear_su3mat( su3_matrix *dest ); |
clear_mat.c |
none |
| void su3mat_copy( su3_matrix *a, su3_matrix *b ); |
su3mat_copy.c |
none |
| void dumpmat( su3_matrix *m ); |
dumpmat.c |
none |
| void su3_projector( su3_vector *a, su3_vector *b, su3_matrix *c ); |
su3_proj.c |
sse_su3_proj.nas, sse_su3_proj.h |
| complex su3_dot( su3_vector *a, su3_vector *b ); |
su3_dot.c |
none |
| float su3_rdot( su3_vector *a, su3_vector *b ); |
su3_rdot.c |
none |
| float magsq_su3vec( su3_vector *a ); |
msq_su3vec.c |
none |
| void su3vec_copy( su3_vector *a, su3_vector *b ); |
su3vec_copy.c |
none |
| void dumpvec( su3_vector *v ); |
dumpmat.c |
none |
| void clearvec( su3_vector *v ); |
clearvec.c |
none |
| void mult_su3_mat_vec( su3_matrix *a, su3_vector *b, su3_vector *c ); |
m_matvec.c |
sse_mat_vec.nas, sse_mat_vec.h |
| void mult_su3_mat_vec_sum( su3_matrix *a, su3_vector *b, su3_vector *c ); |
m_matvec_s.c |
none |
| void mult_su3_mat_vec_sum_4dir( su3_matrix *a, su3_vector *b0,
su3_vector *b1, su3_vector *b2, su3_vector *b3, su3_vector *c ); |
m_mv_s_4dir.c |
sse_mat_vec_sum_4dir.nas, sse_mat_vec_sum_4dir.h |
| void mult_su3_mat_vec_nsum( su3_matrix *a, su3_vector *b, su3_vector *c ); |
m_matvec_ns.c |
none |
| void mult_adj_su3_mat_vec( su3_matrix *a, su3_vector *b, su3_vector *c ); |
m_amatvec.c |
sse_adj_mat_vec.nas, sse_adj_mat_vec.h |
| void mult_adj_su3_mat_vec_4dir( su3_matrix *a, su3_vector *b, su3_vector *c ); |
m_amv_4dir.c |
sse_adj_mat_vec_4dir.nas, sse_adj_mat_vec_4dir.h |
| void mult_adj_su3_mat_vec_sum( su3_matrix *a, su3_vector *b, su3_vector *c ); |
m_amatvec_s.c |
none |
| void mult_adj_su3_mat_vec_nsum( su3_matrix *a, su3_vector *b, su3_vector *c ); |
m_amatvec_s.c |
none |
| void add_su3_vector( su3_vector *a, su3_vector *b, su3_vector *c ); |
addvec.c |
sse_addvec.nas, sse_addvec.h |
| void sub_su3_vector( su3_vector *a, su3_vector *b, su3_vector *c ); |
subvec.c |
none |
| void sub_four_su3_vecs( su3_vector *a, su3_vector *b1, su3_vector *b2,
su3_vector *b3, su3_vector *b4 ); |
sub4vecs.c |
sse_sub4vecs.nas, sse_sub4vecs.h |
| void scalar_mult_su3_vector( su3_vector *src, float scalar,
su3_vector *dest); |
s_m_vec.c |
none |
| void scalar_mult_add_su3_vector( su3_vector *src1, su3_vector *src2,
float scalar, su3_vector *dest); |
s_m_a_vec.c |
sse_s_m_a_vec.nas, sse_s_m_a_vec.h |
| void scalar_mult_sum_su3_vector( su3_vector *src1, su3_vector *src2,
float scalar); |
s_m_sum_vec.c |
none |
| void scalar_mult_sub_su3_vector( su3_vector *src1, su3_vector *src2,
float scalar, su3_vector *dest); |
s_m_s_vec.c |
none |
| void scalar_mult_wvec( wilson_vector *src, float s, wilson_vector *dest ); |
s_m_wvec.c |
none |
| void scalar_mult_hwvec( half_wilson_vector *src, float s,
half_wilson_vector *dest ); |
s_m_hwvec.c |
none |
| void scalar_mult_add_wvec( wilson_vector *src1, wilson_vector *src2,
float scalar, wilson_vector *dest ); |
s_m_a_wvec.c |
none |
| void scalar_mult_addtm_wvec( wilson_vector *src1, wilson_vector *src2,
float scalar, wilson_vector *dest ); |
s_m_atm_wvec.c |
none |
| void c_scalar_mult_add_wvec(wilson_vector *src1, wilson_vector *src2,
complex *phase, wilson_vector *dest ); |
cs_m_a_wvec.c |
none |
| void c_scalar_mult_add_wvec2(wilson_vector *src1, wilson_vector *src2,
complex s, wilson_vector *dest ); |
cs_m_a_wvec2.c |
none |
| void c_scalar_mult_su3vec( su3_vector *src, complex *phase, su3_vector *dest ); |
cs_m_vec.c |
none |
| void c_scalar_mult_add_su3vec(su3_vector *v1, complex *phase, su3_vector *v2); |
cs_m_a_vec.c |
none |
| void c_scalar_mult_sub_su3vec(su3_vector *v1, complex *phase, su3_vector *v2); |
cs_m_s_vec.c |
none |
| void mult_mat_wilson_vec( su3_matrix *mat, wilson_vector *src,
wilson_vector *dest ); |
m_mat_wvec.c |
none |
| void mult_su3_mat_hwvec( su3_matrix *mat, half_wilson_vector *src,
half_wilson_vector *dest ); |
m_mat_hwvec.c |
sse_mat_hwvec.nas, sse_mat_hwvec.h |
| void mult_adj_mat_wilson_vec( su3_matrix *mat, wilson_vector *src,
wilson_vector *dest); |
m_amat_wvec.c |
none |
| void mult_adj_su3_mat_hwvec( su3_matrix *mat, half_wilson_vector *src,
half_wilson_vector *dest ); |
m_amat_hwvec.c |
sse_adj_mat_hwvec.nas, sse_adj_mat_hwvec.h |
| void add_wilson_vector( wilson_vector *src1, wilson_vector *src2,
wilson_vector *dest ); |
add_wvec.c |
none |
| void sub_wilson_vector( wilson_vector *src1, wilson_vector *src2,
wilson_vector *dest ); |
sub_wvec.c |
none |
| float magsq_wvec( wilson_vector *src ); |
msq_wvec.c |
none |
| complex wvec_dot( wilson_vector *src1, wilson_vector *src2 ); |
wvec_dot.c |
none |
| complex wvec2_dot( wilson_vector *src1, wilson_vector *src2 ); |
wvec2_dot.c |
none |
| float wvec_rdot( wilson_vector *a, wilson_vector *b ); |
wvec_rdot.c |
none |
| void wp_shrink( wilson_vector *src, half_wilson_vector *dest,
int dir, int sign ); |
wp_shrink.c |
none |
| void wp_shrink_4dir( wilson_vector *a, half_wilson_vector *b1,
half_wilson_vector *b2, half_wilson_vector *b3,
half_wilson_vector *b4, int sign ); |
wp_shrink4.c |
none |
| void wp_grow( half_wilson_vector *src, wilson_vector *dest,
int dir, int sign ); |
wp_grow.c |
none |
| void wp_grow_add( half_wilson_vector *src, wilson_vector *dest,
int dir, int sign ); |
wp_grow_a.c |
none |
| void grow_add_four_wvecs( wilson_vector *a, half_wilson_vector *b1,
half_wilson_vector *b2, half_wilson_vector *b3,
half_wilson_vector *b4, int sign, int sum ); |
grow4vecs.c |
none |
| void mult_by_gamma( wilson_vector *src, wilson_vector *dest, int dir ); |
mb_gamma.c |
none |
| void mult_by_gamma_left( wilson_matrix *src, wilson_matrix *dest, int dir ); |
mb_gamma_l.c |
none |
| void mult_by_gamma_right( wilson_matrix *src, wilson_matrix *dest, int dir ); |
mb_gamma_r.c |
none |
| void su3_projector_w( wilson_vector *a, wilson_vector *b, su3_matrix *c ); |
su3_proj_w.c |
none |
| void clear_wvec( wilson_vector *dest ); |
clear_wvec.c |
none |
| void copy_wvec( wilson_vector *src, wilson_vector *dest ); |
copy_wvec.c |
none |
| void dump_wilson_vec( wilson_vector *src ); |
none |
none |
| float gaussian_rand_no( void *prn_pt ); |
rand_ahmat.c |
none |