Skip to content

Commit 0d93736

Browse files
committed
SIMD optimized version
1 parent 86e3107 commit 0d93736

File tree

2 files changed

+89
-60
lines changed

2 files changed

+89
-60
lines changed

perf/perf_skeleton.cpp

+51-51
Original file line numberDiff line numberDiff line change
@@ -29,32 +29,32 @@ using std::tr1::get;
2929

3030
typedef perf::TestBaseWithParam<Size> Size_Only;
3131

32-
PERF_TEST_P(Size_Only, ImageResize, testing::Values(MAT_SIZES))
33-
{
34-
Size sz = GetParam();
35-
Size sz_to(sz.width / 1.7, sz.height / 1.4);
36-
37-
cv::Mat src(sz, CV_8UC1);
38-
cv::Mat dst(Size(sz_to), CV_8UC1);
39-
cv::Mat gold(Size(sz_to), CV_8UC1);
40-
declare.in(src, WARMUP_RNG).out(dst);
41-
42-
cv::RNG rng(234231412);
43-
rng.fill(src, CV_8UC1, 0, 255);
44-
45-
ImageResize(src, gold, sz_to);
46-
47-
TEST_CYCLE()
48-
{
49-
ImageResize_optimized(src, dst, sz_to);
50-
}
51-
52-
cv::Mat diff; cv::absdiff(dst, gold, diff);
53-
cv::threshold(diff, diff, 1, 0, cv::THRESH_TOZERO);
54-
ASSERT_EQ(0, cv::countNonZero(diff));
55-
56-
SANITY_CHECK(dst);
57-
}
32+
//PERF_TEST_P(Size_Only, ImageResize, testing::Values(MAT_SIZES))
33+
//{
34+
// Size sz = GetParam();
35+
// Size sz_to(sz.width / 1.7, sz.height / 1.4);
36+
//
37+
// cv::Mat src(sz, CV_8UC1);
38+
// cv::Mat dst(Size(sz_to), CV_8UC1);
39+
// cv::Mat gold(Size(sz_to), CV_8UC1);
40+
// declare.in(src, WARMUP_RNG).out(dst);
41+
//
42+
// cv::RNG rng(234231412);
43+
// rng.fill(src, CV_8UC1, 0, 255);
44+
//
45+
// ImageResize(src, gold, sz_to);
46+
//
47+
// TEST_CYCLE()
48+
// {
49+
// ImageResize_optimized(src, dst, sz_to);
50+
// }
51+
//
52+
// cv::Mat diff; cv::absdiff(dst, gold, diff);
53+
// cv::threshold(diff, diff, 1, 0, cv::THRESH_TOZERO);
54+
// ASSERT_EQ(0, cv::countNonZero(diff));
55+
//
56+
// SANITY_CHECK(dst);
57+
//}
5858

5959
//
6060
// Test(s) for the skeletonize function
@@ -77,31 +77,31 @@ PERF_TEST_P(Size_Only, ImageResize, testing::Values(MAT_SIZES))
7777
// Test(s) for the Thinning function
7878
//
7979

80-
PERF_TEST_P(Size_Only, Thinning, testing::Values(MAT_SIZES))
81-
{
82-
Size sz = GetParam();
83-
84-
cv::Mat image(sz, CV_8UC1);
85-
declare.in(image, WARMUP_RNG).out(image);
86-
declare.time(40);
87-
88-
cv::RNG rng(234231412);
89-
rng.fill(image, CV_8UC1, 0, 255);
90-
cv::threshold(image, image, 240, 255, cv::THRESH_BINARY_INV);
91-
92-
cv::Mat gold; GuoHallThinning(image, gold);
93-
94-
cv::Mat thinned_image;
95-
TEST_CYCLE()
96-
{
97-
GuoHallThinning_optimized(image, thinned_image);
98-
}
99-
100-
cv::Mat diff; cv::absdiff(thinned_image, gold, diff);
101-
ASSERT_EQ(0, cv::countNonZero(diff));
102-
103-
SANITY_CHECK(image);
104-
}
80+
//PERF_TEST_P(Size_Only, Thinning, testing::Values(MAT_SIZES))
81+
//{
82+
// Size sz = GetParam();
83+
//
84+
// cv::Mat image(sz, CV_8UC1);
85+
// declare.in(image, WARMUP_RNG).out(image);
86+
// declare.time(40);
87+
//
88+
// cv::RNG rng(234231412);
89+
// rng.fill(image, CV_8UC1, 0, 255);
90+
// cv::threshold(image, image, 240, 255, cv::THRESH_BINARY_INV);
91+
//
92+
// cv::Mat gold; GuoHallThinning(image, gold);
93+
//
94+
// cv::Mat thinned_image;
95+
// TEST_CYCLE()
96+
// {
97+
// GuoHallThinning_optimized(image, thinned_image);
98+
// }
99+
//
100+
// cv::Mat diff; cv::absdiff(thinned_image, gold, diff);
101+
// ASSERT_EQ(0, cv::countNonZero(diff));
102+
//
103+
// SANITY_CHECK(image);
104+
//}
105105

106106
PERF_TEST_P(Size_Only, ConvertColor_fpt, testing::Values(MAT_SIZES))
107107
{

src/convertcolor.cpp

+38-9
Original file line numberDiff line numberDiff line change
@@ -84,12 +84,12 @@ void ConvertColor_BGR2GRAY_BT709_simd(const cv::Mat& src, cv::Mat& dst)
8484
dst.create(sz, CV_8UC1);
8585

8686
#ifdef HAVE_SSE
87-
// __m128i ssse3_blue_indices_0 = _mm_set_epi8(-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 15, 12, 9, 6, 3, 0);
88-
// __m128i ssse3_blue_indices_1 = _mm_set_epi8(-1, -1, -1, -1, -1, 14, 11, 8, 5, 2, -1, -1, -1, -1, -1, -1);
89-
// __m128i ssse3_blue_indices_2 = _mm_set_epi8(13, 10, 7, 4, 1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1);
90-
// __m128i ssse3_green_indices_0 = _mm_set_epi8(-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 13, 10, 7, 4, 1);
91-
// __m128i ssse3_green_indices_1 = _mm_set_epi8(-1, -1, -1, -1, -1, 15, 12, 9, 6, 3, 0, -1, -1, -1, -1, -1);
92-
// __m128i ssse3_green_indices_2 = _mm_set_epi8(14, 11, 8, 5, 2, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1);
87+
__m128i ssse3_blue_indices_0 = _mm_set_epi8(-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 15, 12, 9, 6, 3, 0);
88+
__m128i ssse3_blue_indices_1 = _mm_set_epi8(-1, -1, -1, -1, -1, 14, 11, 8, 5, 2, -1, -1, -1, -1, -1, -1);
89+
__m128i ssse3_blue_indices_2 = _mm_set_epi8(13, 10, 7, 4, 1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1);
90+
__m128i ssse3_green_indices_0 = _mm_set_epi8(-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 13, 10, 7, 4, 1);
91+
__m128i ssse3_green_indices_1 = _mm_set_epi8(-1, -1, -1, -1, -1, 15, 12, 9, 6, 3, 0, -1, -1, -1, -1, -1);
92+
__m128i ssse3_green_indices_2 = _mm_set_epi8(14, 11, 8, 5, 2, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1);
9393
__m128i ssse3_red_indices_0 = _mm_set_epi8(-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 14, 11, 8, 5, 2);
9494
__m128i ssse3_red_indices_1 = _mm_set_epi8(-1, -1, -1, -1, -1, -1, 13, 10, 7, 4, 1, -1, -1, -1, -1, -1);
9595
__m128i ssse3_red_indices_2 = _mm_set_epi8(15, 12, 9, 6, 3, 0, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1);
@@ -101,6 +101,7 @@ void ConvertColor_BGR2GRAY_BT709_simd(const cv::Mat& src, cv::Mat& dst)
101101
__m128i zero = _mm_setzero_si128();
102102
#endif
103103

104+
104105
for (int y = 0; y < sz.height; y++)
105106
{
106107
const uchar *psrc = src.ptr<uchar>(y);
@@ -119,10 +120,37 @@ void ConvertColor_BGR2GRAY_BT709_simd(const cv::Mat& src, cv::Mat& dst)
119120
__m128i red = _mm_or_si128(_mm_or_si128(_mm_shuffle_epi8(chunk0, ssse3_red_indices_0),
120121
_mm_shuffle_epi8(chunk1, ssse3_red_indices_1)),
121122
_mm_shuffle_epi8(chunk2, ssse3_red_indices_2));
123+
__m128i green = _mm_or_si128(_mm_or_si128(_mm_shuffle_epi8(chunk0, ssse3_green_indices_0),
124+
_mm_shuffle_epi8(chunk1, ssse3_green_indices_1)),
125+
_mm_shuffle_epi8(chunk2, ssse3_green_indices_2));
126+
__m128i blue = _mm_or_si128(_mm_or_si128(_mm_shuffle_epi8(chunk0, ssse3_blue_indices_0),
127+
_mm_shuffle_epi8(chunk1, ssse3_blue_indices_1)),
128+
_mm_shuffle_epi8(chunk2, ssse3_blue_indices_2));
129+
130+
__m128i red8_1 = _mm_unpacklo_epi8(red,zero);
131+
__m128i red8_2 = _mm_unpackhi_epi8(red,zero);
132+
133+
__m128i green8_1 = _mm_unpacklo_epi8(green,zero);
134+
__m128i green8_2 = _mm_unpackhi_epi8(green,zero);
135+
136+
__m128i blue8_1 = _mm_unpacklo_epi8(blue,zero);
137+
__m128i blue8_2 = _mm_unpackhi_epi8(blue,zero);
138+
139+
__m128i bluecomp1 = _mm_mullo_epi16(blue8_1,blue_coeff);
140+
__m128i greencomp1 = _mm_mullo_epi16(green8_1,green_coeff);
141+
__m128i redcomp1 = _mm_mullo_epi16(red8_1,red_coeff);
122142

123-
/* ??? */
143+
__m128i bluecomp2 = _mm_mullo_epi16(blue8_2,blue_coeff);
144+
__m128i greencomp2 = _mm_mullo_epi16(green8_2,green_coeff);
145+
__m128i redcomp2 = _mm_mullo_epi16(red8_2,red_coeff);
124146

125-
__m128i gray_packed; // Initialize it properly
147+
__m128i graypack1 = _mm_add_epi16( _mm_add_epi16( _mm_add_epi16(greencomp1,bluecomp1), redcomp1), bias);
148+
__m128i graypack2 = _mm_add_epi16( _mm_add_epi16( _mm_add_epi16(greencomp2,bluecomp2), redcomp2), bias);
149+
150+
__m128i gp1s = _mm_srli_epi16(graypack1,8);
151+
__m128i gp2s = _mm_srli_epi16(graypack2,8);
152+
153+
__m128i gray_packed = _mm_packus_epi16(gp1s,gp2s); // Initialize it properly
126154

127155
_mm_storeu_si128((__m128i*)(pdst + x), gray_packed);
128156
}
@@ -131,12 +159,13 @@ void ConvertColor_BGR2GRAY_BT709_simd(const cv::Mat& src, cv::Mat& dst)
131159
// Process leftover pixels
132160
for (; x < sz.width; x++)
133161
{
162+
134163
float color = 0.2126 * psrc[3 * x + 2] + 0.7152 * psrc[3 * x + 1] + 0.0722 * psrc[3 * x];
135164
pdst[x] = (int)(color + 0.5);
136165
}
137166
}
138167

139168
// ! Remove this before writing your optimizations !
140-
ConvertColor_BGR2GRAY_BT709_fpt(src, dst);
169+
//ConvertColor_BGR2GRAY_BT709_fpt(src, dst);
141170
// ! Remove this before writing your optimizations !
142171
}

0 commit comments

Comments
 (0)