1
1
#include " skeleton_filter.hpp"
2
2
3
+ #if defined __SSSE3__ || (defined _MSC_VER && _MSC_VER >= 1500)
4
+ # include " tmmintrin.h"
5
+ # define HAVE_SSE
6
+ #endif
7
+
8
+ #include < string>
9
+ #include < sstream>
10
+
11
+ template <typename T>
12
+ std::string __m128i_toString (const __m128i var) // function for debug prints
13
+ {
14
+ std::stringstream sstr;
15
+ const T* values = (const T*) &var;
16
+ if (sizeof (T) == 1 )
17
+ {
18
+ for (unsigned int i = 0 ; i < sizeof (__m128i); i++)
19
+ {
20
+ sstr << (int ) values[i] << " " ;
21
+ }
22
+ }
23
+ else
24
+ {
25
+ for (unsigned int i = 0 ; i < sizeof (__m128i) / sizeof (T); i++)
26
+ {
27
+ sstr << values[i] << " " ;
28
+ }
29
+ }
30
+ return sstr.str ();
31
+ }
32
+
3
33
void ConvertColor_BGR2GRAY_BT709 (const cv::Mat& src, cv::Mat& dst)
4
34
{
5
35
CV_Assert (CV_8UC3 == src.type ());
@@ -20,3 +50,77 @@ void ConvertColor_BGR2GRAY_BT709(const cv::Mat& src, cv::Mat& dst)
20
50
}
21
51
}
22
52
}
53
+
54
+ void ConvertColor_BGR2GRAY_BT709_fpt (const cv::Mat& src, cv::Mat& dst)
55
+ {
56
+ CV_Assert (CV_8UC3 == src.type ());
57
+ cv::Size sz = src.size ();
58
+ dst.create (sz, CV_8UC1);
59
+
60
+ const int bidx = 0 ;
61
+
62
+ for (int y = 0 ; y < sz.height ; y++)
63
+ {
64
+ const cv::Vec3b *psrc = src.ptr <cv::Vec3b>(y);
65
+ uchar *pdst = dst.ptr <uchar>(y);
66
+
67
+ for (int x = 0 ; x < sz.width ; x++)
68
+ {
69
+ float color = 0.2126 * psrc[x][2 -bidx] + 0.7152 * psrc[x][1 ] + 0.0722 * psrc[x][bidx];
70
+ pdst[x] = (int )(color + 0.5 );
71
+ }
72
+ }
73
+ }
74
+
75
+ void ConvertColor_BGR2GRAY_BT709_simd (const cv::Mat& src, cv::Mat& dst)
76
+ {
77
+ CV_Assert (CV_8UC3 == src.type ());
78
+ cv::Size sz = src.size ();
79
+ dst.create (sz, CV_8UC1);
80
+
81
+ #ifdef HAVE_SSE
82
+ // __m128i ssse3_blue_indices_0 = _mm_set_epi8(-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 15, 12, 9, 6, 3, 0);
83
+ // __m128i ssse3_blue_indices_1 = _mm_set_epi8(-1, -1, -1, -1, -1, 14, 11, 8, 5, 2, -1, -1, -1, -1, -1, -1);
84
+ // __m128i ssse3_blue_indices_2 = _mm_set_epi8(13, 10, 7, 4, 1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1);
85
+ // __m128i ssse3_green_indices_0 = _mm_set_epi8(-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 13, 10, 7, 4, 1);
86
+ // __m128i ssse3_green_indices_1 = _mm_set_epi8(-1, -1, -1, -1, -1, 15, 12, 9, 6, 3, 0, -1, -1, -1, -1, -1);
87
+ // __m128i ssse3_green_indices_2 = _mm_set_epi8(14, 11, 8, 5, 2, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1);
88
+ __m128i ssse3_red_indices_0 = _mm_set_epi8 (-1 , -1 , -1 , -1 , -1 , -1 , -1 , -1 , -1 , -1 , -1 , 14 , 11 , 8 , 5 , 2 );
89
+ __m128i ssse3_red_indices_1 = _mm_set_epi8 (-1 , -1 , -1 , -1 , -1 , -1 , 13 , 10 , 7 , 4 , 1 , -1 , -1 , -1 , -1 , -1 );
90
+ __m128i ssse3_red_indices_2 = _mm_set_epi8 (15 , 12 , 9 , 6 , 3 , 0 , -1 , -1 , -1 , -1 , -1 , -1 , -1 , -1 , -1 , -1 );
91
+ #endif
92
+
93
+ for (int y = 0 ; y < sz.height ; y++)
94
+ {
95
+ const uchar *psrc = src.ptr <uchar>(y);
96
+ uchar *pdst = dst.ptr <uchar>(y);
97
+
98
+ int x = 0 ;
99
+
100
+ #ifdef HAVE_SSE
101
+ // here is 16 times unrolled loop for vector processing
102
+ for (; x <= sz.width - 16 ; x += 16 )
103
+ {
104
+ __m128i chunk0 = _mm_loadu_si128 ((const __m128i*)(psrc + x*3 + 16 *0 ));
105
+ __m128i chunk1 = _mm_loadu_si128 ((const __m128i*)(psrc + x*3 + 16 *1 ));
106
+ __m128i chunk2 = _mm_loadu_si128 ((const __m128i*)(psrc + x*3 + 16 *2 ));
107
+
108
+ __m128i red = _mm_or_si128 (_mm_or_si128 (_mm_shuffle_epi8 (chunk0, ssse3_red_indices_0),
109
+ _mm_shuffle_epi8 (chunk1, ssse3_red_indices_1)),
110
+ _mm_shuffle_epi8 (chunk2, ssse3_red_indices_2));
111
+
112
+ /* ??? */
113
+
114
+ _mm_storeu_si128 ((__m128i*)(pdst + x), red);
115
+ }
116
+ #endif
117
+
118
+ // process leftover pixels
119
+ for (; x < sz.width ; x++)
120
+ {
121
+ /* ??? */
122
+ }
123
+ }
124
+
125
+ ConvertColor_BGR2GRAY_BT709_fpt (src, dst); // !remove this!
126
+ }
0 commit comments