@@ -84,12 +84,12 @@ void ConvertColor_BGR2GRAY_BT709_simd(const cv::Mat& src, cv::Mat& dst)
84
84
dst.create (sz, CV_8UC1);
85
85
86
86
#ifdef HAVE_SSE
87
- // __m128i ssse3_blue_indices_0 = _mm_set_epi8(-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 15, 12, 9, 6, 3, 0);
88
- // __m128i ssse3_blue_indices_1 = _mm_set_epi8(-1, -1, -1, -1, -1, 14, 11, 8, 5, 2, -1, -1, -1, -1, -1, -1);
89
- // __m128i ssse3_blue_indices_2 = _mm_set_epi8(13, 10, 7, 4, 1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1);
90
- // __m128i ssse3_green_indices_0 = _mm_set_epi8(-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 13, 10, 7, 4, 1);
91
- // __m128i ssse3_green_indices_1 = _mm_set_epi8(-1, -1, -1, -1, -1, 15, 12, 9, 6, 3, 0, -1, -1, -1, -1, -1);
92
- // __m128i ssse3_green_indices_2 = _mm_set_epi8(14, 11, 8, 5, 2, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1);
87
+ __m128i ssse3_blue_indices_0 = _mm_set_epi8 (-1 , -1 , -1 , -1 , -1 , -1 , -1 , -1 , -1 , -1 , 15 , 12 , 9 , 6 , 3 , 0 );
88
+ __m128i ssse3_blue_indices_1 = _mm_set_epi8 (-1 , -1 , -1 , -1 , -1 , 14 , 11 , 8 , 5 , 2 , -1 , -1 , -1 , -1 , -1 , -1 );
89
+ __m128i ssse3_blue_indices_2 = _mm_set_epi8 (13 , 10 , 7 , 4 , 1 , -1 , -1 , -1 , -1 , -1 , -1 , -1 , -1 , -1 , -1 , -1 );
90
+ __m128i ssse3_green_indices_0 = _mm_set_epi8 (-1 , -1 , -1 , -1 , -1 , -1 , -1 , -1 , -1 , -1 , -1 , 13 , 10 , 7 , 4 , 1 );
91
+ __m128i ssse3_green_indices_1 = _mm_set_epi8 (-1 , -1 , -1 , -1 , -1 , 15 , 12 , 9 , 6 , 3 , 0 , -1 , -1 , -1 , -1 , -1 );
92
+ __m128i ssse3_green_indices_2 = _mm_set_epi8 (14 , 11 , 8 , 5 , 2 , -1 , -1 , -1 , -1 , -1 , -1 , -1 , -1 , -1 , -1 , -1 );
93
93
__m128i ssse3_red_indices_0 = _mm_set_epi8 (-1 , -1 , -1 , -1 , -1 , -1 , -1 , -1 , -1 , -1 , -1 , 14 , 11 , 8 , 5 , 2 );
94
94
__m128i ssse3_red_indices_1 = _mm_set_epi8 (-1 , -1 , -1 , -1 , -1 , -1 , 13 , 10 , 7 , 4 , 1 , -1 , -1 , -1 , -1 , -1 );
95
95
__m128i ssse3_red_indices_2 = _mm_set_epi8 (15 , 12 , 9 , 6 , 3 , 0 , -1 , -1 , -1 , -1 , -1 , -1 , -1 , -1 , -1 , -1 );
@@ -101,6 +101,7 @@ void ConvertColor_BGR2GRAY_BT709_simd(const cv::Mat& src, cv::Mat& dst)
101
101
__m128i zero = _mm_setzero_si128 ();
102
102
#endif
103
103
104
+
104
105
for (int y = 0 ; y < sz.height ; y++)
105
106
{
106
107
const uchar *psrc = src.ptr <uchar>(y);
@@ -119,10 +120,37 @@ void ConvertColor_BGR2GRAY_BT709_simd(const cv::Mat& src, cv::Mat& dst)
119
120
__m128i red = _mm_or_si128 (_mm_or_si128 (_mm_shuffle_epi8 (chunk0, ssse3_red_indices_0),
120
121
_mm_shuffle_epi8 (chunk1, ssse3_red_indices_1)),
121
122
_mm_shuffle_epi8 (chunk2, ssse3_red_indices_2));
123
+ __m128i green = _mm_or_si128 (_mm_or_si128 (_mm_shuffle_epi8 (chunk0, ssse3_green_indices_0),
124
+ _mm_shuffle_epi8 (chunk1, ssse3_green_indices_1)),
125
+ _mm_shuffle_epi8 (chunk2, ssse3_green_indices_2));
126
+ __m128i blue = _mm_or_si128 (_mm_or_si128 (_mm_shuffle_epi8 (chunk0, ssse3_blue_indices_0),
127
+ _mm_shuffle_epi8 (chunk1, ssse3_blue_indices_1)),
128
+ _mm_shuffle_epi8 (chunk2, ssse3_blue_indices_2));
129
+
130
+ __m128i red8_1 = _mm_unpacklo_epi8 (red,zero);
131
+ __m128i red8_2 = _mm_unpackhi_epi8 (red,zero);
132
+
133
+ __m128i green8_1 = _mm_unpacklo_epi8 (green,zero);
134
+ __m128i green8_2 = _mm_unpackhi_epi8 (green,zero);
135
+
136
+ __m128i blue8_1 = _mm_unpacklo_epi8 (blue,zero);
137
+ __m128i blue8_2 = _mm_unpackhi_epi8 (blue,zero);
138
+
139
+ __m128i bluecomp1 = _mm_mullo_epi16 (blue8_1,blue_coeff);
140
+ __m128i greencomp1 = _mm_mullo_epi16 (green8_1,green_coeff);
141
+ __m128i redcomp1 = _mm_mullo_epi16 (red8_1,red_coeff);
122
142
123
- /* ??? */
143
+ __m128i bluecomp2 = _mm_mullo_epi16 (blue8_2,blue_coeff);
144
+ __m128i greencomp2 = _mm_mullo_epi16 (green8_2,green_coeff);
145
+ __m128i redcomp2 = _mm_mullo_epi16 (red8_2,red_coeff);
124
146
125
- __m128i gray_packed; // Initialize it properly
147
+ __m128i graypack1 = _mm_add_epi16 ( _mm_add_epi16 ( _mm_add_epi16 (greencomp1,bluecomp1), redcomp1), bias);
148
+ __m128i graypack2 = _mm_add_epi16 ( _mm_add_epi16 ( _mm_add_epi16 (greencomp2,bluecomp2), redcomp2), bias);
149
+
150
+ __m128i gp1s = _mm_srli_epi16 (graypack1,8 );
151
+ __m128i gp2s = _mm_srli_epi16 (graypack2,8 );
152
+
153
+ __m128i gray_packed = _mm_packus_epi16 (gp1s,gp2s); // Initialize it properly
126
154
127
155
_mm_storeu_si128 ((__m128i*)(pdst + x), gray_packed);
128
156
}
@@ -131,12 +159,13 @@ void ConvertColor_BGR2GRAY_BT709_simd(const cv::Mat& src, cv::Mat& dst)
131
159
// Process leftover pixels
132
160
for (; x < sz.width ; x++)
133
161
{
162
+
134
163
float color = 0.2126 * psrc[3 * x + 2 ] + 0.7152 * psrc[3 * x + 1 ] + 0.0722 * psrc[3 * x];
135
164
pdst[x] = (int )(color + 0.5 );
136
165
}
137
166
}
138
167
139
168
// ! Remove this before writing your optimizations !
140
- ConvertColor_BGR2GRAY_BT709_fpt (src, dst);
169
+ // ConvertColor_BGR2GRAY_BT709_fpt(src, dst);
141
170
// ! Remove this before writing your optimizations !
142
171
}
0 commit comments