Project Ne10
An Open Optimized Software Library Project for the ARM Architecture
NE10_mulcmatvec.c
1 /*
2  * Copyright 2011-15 ARM Limited and Contributors.
3  * All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions are met:
7  * * Redistributions of source code must retain the above copyright
8  * notice, this list of conditions and the following disclaimer.
9  * * Redistributions in binary form must reproduce the above copyright
10  * notice, this list of conditions and the following disclaimer in the
11  * documentation and/or other materials provided with the distribution.
12  * * Neither the name of ARM Limited nor the
13  * names of its contributors may be used to endorse or promote products
14  * derived from this software without specific prior written permission.
15  *
16  * THIS SOFTWARE IS PROVIDED BY ARM LIMITED AND CONTRIBUTORS "AS IS" AND
17  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
18  * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
19  * DISCLAIMED. IN NO EVENT SHALL ARM LIMITED AND CONTRIBUTORS BE LIABLE FOR ANY
20  * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
21  * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
22  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
23  * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
24  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
25  * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26  */
27 
28 /*
29  * NE10 Library : math/NE10_mulcmatvec.neon.s
30  */
31 
32 #include "NE10_types.h"
33 #include "macros.h"
34 
35 #include <assert.h>
36 
37 ne10_result_t ne10_mulcmatvec_cm2x2f_v2f_c (ne10_vec2f_t * dst, const ne10_mat2x2f_t * cst, ne10_vec2f_t * src, ne10_uint32_t count)
38 {
39 #define A1 cst->c1.r1
40 #define B1 cst->c1.r2
41 #define C1 cst->c2.r1
42 #define D1 cst->c2.r2
43 
44  NE10_CMATVEC_OPERATION_X_C
45  (
46  dst[ itr ].x = A1 * src[ itr ].x + C1 * src[ itr ].y;
47  dst[ itr ].y = B1 * src[ itr ].x + D1 * src[ itr ].y;
48  );
49 
50 #undef A1
51 #undef B1
52 #undef C1
53 #undef D1
54 }
55 
56 ne10_result_t ne10_mulcmatvec_cm3x3f_v3f_c (ne10_vec3f_t * dst, const ne10_mat3x3f_t * cst, ne10_vec3f_t * src, ne10_uint32_t count)
57 {
58 #define A1 cst->c1.r1
59 #define B1 cst->c1.r2
60 #define C1 cst->c1.r3
61 #define D1 cst->c2.r1
62 #define E1 cst->c2.r2
63 #define F1 cst->c2.r3
64 #define G1 cst->c3.r1
65 #define H1 cst->c3.r2
66 #define I1 cst->c3.r3
67 
68  NE10_CMATVEC_OPERATION_X_C
69  (
70  dst[ itr ].x = A1 * src[ itr ].x + D1 * src[ itr ].y + G1 * src[ itr ].z;
71  dst[ itr ].y = B1 * src[ itr ].x + E1 * src[ itr ].y + H1 * src[ itr ].z;
72  dst[ itr ].z = C1 * src[ itr ].x + F1 * src[ itr ].y + I1 * src[ itr ].z;
73  );
74 
75 #undef A1
76 #undef B1
77 #undef C1
78 #undef D1
79 #undef E1
80 #undef F1
81 #undef G1
82 #undef H1
83 #undef I1
84 }
85 
86 extern ne10_result_t ne10_mulcmatvec_cm4x4f_v4f_c (ne10_vec4f_t * dst, const ne10_mat4x4f_t * cst, ne10_vec4f_t * src, ne10_uint32_t count)
87 {
88 #define A1 cst->c1.r1
89 #define B1 cst->c1.r2
90 #define C1 cst->c1.r3
91 #define D1 cst->c1.r4
92 #define E1 cst->c2.r1
93 #define F1 cst->c2.r2
94 #define G1 cst->c2.r3
95 #define H1 cst->c2.r4
96 #define I1 cst->c3.r1
97 #define J1 cst->c3.r2
98 #define K1 cst->c3.r3
99 #define L1 cst->c3.r4
100 #define M1 cst->c4.r1
101 #define N1 cst->c4.r2
102 #define O1 cst->c4.r3
103 #define P1 cst->c4.r4
104 
105  NE10_CMATVEC_OPERATION_X_C
106  (
107  dst[ itr ].x = A1 * src[ itr ].x + E1 * src[ itr ].y + I1 * src[ itr ].z + M1 * src[ itr ].w;
108  dst[ itr ].y = B1 * src[ itr ].x + F1 * src[ itr ].y + J1 * src[ itr ].z + N1 * src[ itr ].w;
109  dst[ itr ].z = C1 * src[ itr ].x + G1 * src[ itr ].y + K1 * src[ itr ].z + O1 * src[ itr ].w;
110  dst[ itr ].w = D1 * src[ itr ].x + H1 * src[ itr ].y + L1 * src[ itr ].z + P1 * src[ itr ].w;
111  );
112 
113 #undef A1
114 #undef B1
115 #undef C1
116 #undef D1
117 #undef E1
118 #undef F1
119 #undef G1
120 #undef H1
121 #undef I1
122 #undef J1
123 #undef K1
124 #undef L1
125 #undef M1
126 #undef N1
127 #undef O1
128 #undef P1
129 }
a 2-tuple of ne10_float32_t values.
Definition: NE10_types.h:88
a 3-tuple of ne10_float32_t values.
Definition: NE10_types.h:97
a 4-tuple of ne10_float32_t values.
Definition: NE10_types.h:107