SMIL  0.9.1
DLineArith_SSE.h
1 /*
2  * Copyright (c) 2011-2016, Matthieu FAESSEL and ARMINES
3  * All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions are met:
7  *
8  * * Redistributions of source code must retain the above copyright
9  * notice, this list of conditions and the following disclaimer.
10  * * Redistributions in binary form must reproduce the above copyright
11  * notice, this list of conditions and the following disclaimer in the
12  * documentation and/or other materials provided with the distribution.
13  * * Neither the name of Matthieu FAESSEL, or ARMINES nor the
14  * names of its contributors may be used to endorse or promote products
15  * derived from this software without specific prior written permission.
16  *
17  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS ``AS IS'' AND ANY
18  * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
19  * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
20  * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDERS AND CONTRIBUTORS BE LIABLE FOR ANY
21  * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
22  * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
23  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
24  * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
26  * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27  */
28 
29 
30 #ifndef _D_LINE_ARITH_SSE_H
31 #define _D_LINE_ARITH_SSE_H
32 
33 
34 
35 #include <emmintrin.h>
36 
37 
38 
39 namespace smil
40 {
46  template <>
47  struct addLine<UINT8> : public binaryLineFunctionBase<UINT8>
48  {
49  inline void _exec(UINT8 *lIn1, UINT8 *lIn2, size_t size, UINT8 *lOut)
50  {
51  for (int i=0;i<size;i++)
52  lOut[i] = lIn1[i] > (UINT8)(numeric_limits<UINT8>::max()- lIn2[i]) ? numeric_limits<UINT8>::max() : lIn1[i] + lIn2[i];
53  }
54  inline void _exec_aligned(UINT8 *lIn1, UINT8 *lIn2, size_t size, UINT8 *lOut)
55  {
56  __m128i r0,r1;
57  __m128i *l1 = (__m128i*) lIn1;
58  __m128i *l2 = (__m128i*) lIn2;
59  __m128i *l3 = (__m128i*) lOut;
60 
61  unsigned long alignLen = size-size%SIMD_VEC_SIZE;
62 
63  for(size_t i=0 ; i<alignLen ; i+=16, l1++, l2++, l3++)
64  {
65  r0 = _mm_load_si128(l1);
66  r1 = _mm_load_si128(l2);
67  r1 = _mm_adds_epu8(r0, r1);
68  _mm_store_si128(l3, r1);
69  }
70 
71  _exec(lIn1+alignLen, lIn2+alignLen, size%SIMD_VEC_SIZE, lOut+alignLen);
72  }
73  };
74 
75  template <>
76  struct addNoSatLine<UINT8> : public binaryLineFunctionBase<UINT8>
77  {
78  inline void _exec(UINT8 *lIn1, UINT8 *lIn2, size_t size, UINT8 *lOut)
79  {
80  for (int i=0;i<size;i++)
81  lOut[i] = lIn1[i] + lIn2[i];
82  }
83  inline void _exec_aligned(UINT8 *lIn1, UINT8 *lIn2, size_t size, UINT8 *lOut)
84  {
85  __m128i r0,r1;
86  __m128i *l1 = (__m128i*) lIn1;
87  __m128i *l2 = (__m128i*) lIn2;
88  __m128i *l3 = (__m128i*) lOut;
89 
90  unsigned long alignLen = size-size%SIMD_VEC_SIZE;
91 
92  for(size_t i=0 ; i<alignLen ; i+=16, l1++, l2++, l3++)
93  {
94  r0 = _mm_load_si128(l1);
95  r1 = _mm_load_si128(l2);
96  r1 = _mm_add_epi8(r0, r1);
97  _mm_store_si128(l3, r1);
98  }
99 
100  _exec(lIn1+alignLen, lIn2+alignLen, size%SIMD_VEC_SIZE, lOut+alignLen);
101  }
102  };
103 
104  template <>
105  struct subLine<UINT8> : public binaryLineFunctionBase<UINT8>
106  {
107  inline void _exec(UINT8 *lIn1, UINT8 *lIn2, size_t size, UINT8 *lOut)
108  {
109  for (int i=0;i<size;i++)
110  lOut[i] = lIn1[i] < (UINT8)(numeric_limits<UINT8>::max() + lIn2[i]) ? numeric_limits<UINT8>::min() : lIn1[i] - lIn2[i];
111  }
112  inline void _exec_aligned(UINT8 *lIn1, UINT8 *lIn2, size_t size, UINT8 *lOut)
113  {
114  __m128i r0,r1;
115  __m128i *l1 = (__m128i*) lIn1;
116  __m128i *l2 = (__m128i*) lIn2;
117  __m128i *l3 = (__m128i*) lOut;
118 
119  unsigned long alignLen = size-size%SIMD_VEC_SIZE;
120 
121  for(size_t i=0 ; i<alignLen ; i+=16, l1++, l2++, l3++)
122  {
123  r0 = _mm_load_si128(l1);
124  r1 = _mm_load_si128(l2);
125  r1 = _mm_subs_epu8(r0, r1);
126  _mm_store_si128(l3, r1);
127  }
128 
129  _exec(lIn1+alignLen, lIn2+alignLen, size%SIMD_VEC_SIZE, lOut+alignLen);
130  }
131  };
132 
133  template <>
134  struct subNoSatLine<UINT8> : public binaryLineFunctionBase<UINT8>
135  {
136  inline void _exec(UINT8 *lIn1, UINT8 *lIn2, size_t size, UINT8 *lOut)
137  {
138  for (int i=0;i<size;i++)
139  lOut[i] = lIn1[i] - lIn2[i];
140  }
141  inline void _exec_aligned(UINT8 *lIn1, UINT8 *lIn2, size_t size, UINT8 *lOut)
142  {
143  __m128i r0,r1;
144  __m128i *l1 = (__m128i*) lIn1;
145  __m128i *l2 = (__m128i*) lIn2;
146  __m128i *l3 = (__m128i*) lOut;
147 
148  unsigned long alignLen = size-size%SIMD_VEC_SIZE;
149 
150  for(size_t i=0 ; i<alignLen ; i+=16, l1++, l2++, l3++)
151  {
152  r0 = _mm_load_si128(l1);
153  r1 = _mm_load_si128(l2);
154  r1 = _mm_sub_epi8(r0, r1);
155  _mm_store_si128(l3, r1);
156  }
157 
158  _exec(lIn1+alignLen, lIn2+alignLen, size%SIMD_VEC_SIZE, lOut+alignLen);
159  }
160  };
161 
162  template <>
163  struct supLine<UINT8> : public binaryLineFunctionBase<UINT8>
164  {
165  inline void _exec(UINT8 *lIn1, UINT8 *lIn2, size_t size, UINT8 *lOut)
166  {
167  for (int i=0;i<size;i++)
168  lOut[i] = lIn1[i] > lIn2[i] ? lIn1[i] : lIn2[i];
169  }
170  inline void _exec_aligned(UINT8 *lIn1, UINT8 *lIn2, size_t size, UINT8 *lOut)
171  {
172  __m128i r0,r1;
173  __m128i *l1 = (__m128i*) lIn1;
174  __m128i *l2 = (__m128i*) lIn2;
175  __m128i *l3 = (__m128i*) lOut;
176 
177  unsigned long alignLen = size-size%SIMD_VEC_SIZE;
178 
179  for(size_t i=0 ; i<alignLen ; i+=16, l1++, l2++, l3++)
180  {
181  r0 = _mm_load_si128(l1);
182  r1 = _mm_load_si128(l2);
183  r1 = _mm_max_epu8(r0, r1);
184  _mm_store_si128(l3, r1);
185  }
186 
187  _exec(lIn1+alignLen, lIn2+alignLen, size%SIMD_VEC_SIZE, lOut+alignLen);
188  }
189  };
190 
191  template <>
192  struct infLine<UINT8> : public binaryLineFunctionBase<UINT8>
193  {
194  inline void _exec(UINT8 *lIn1, UINT8 *lIn2, size_t size, UINT8 *lOut)
195  {
196  for (int i=0;i<size;i++)
197  lOut[i] = lIn1[i] < lIn2[i] ? lIn1[i] : lIn2[i];
198  }
199  inline void _exec_aligned(UINT8 *lIn1, UINT8 *lIn2, size_t size, UINT8 *lOut)
200  {
201  __m128i r0,r1;
202  __m128i *l1 = (__m128i*) lIn1;
203  __m128i *l2 = (__m128i*) lIn2;
204  __m128i *l3 = (__m128i*) lOut;
205 
206  unsigned long alignLen = size-size%SIMD_VEC_SIZE;
207 
208  for(size_t i=0 ; i<alignLen ; i+=16, l1++, l2++, l3++)
209  {
210  r0 = _mm_load_si128(l1);
211  r1 = _mm_load_si128(l2);
212  r1 = _mm_min_epu8(r0, r1);
213  _mm_store_si128(l3, r1);
214  }
215 
216  _exec(lIn1+alignLen, lIn2+alignLen, size%SIMD_VEC_SIZE, lOut+alignLen);
217  }
218  };
219 
222 } // namespace smil
223 
224 
225 #endif // _D_LINE_ARITH_SSE_H
Definition: DColorConvert.h:38
Definition: DLineArith.hpp:166
Definition: DLineArith.hpp:144
Definition: DLineArith.hpp:155
Definition: DLineArith.hpp:177
Definition: DBaseLineOperations.hpp:109
Definition: DLineArith.hpp:122
Definition: DLineArith.hpp:133