Skip to content

Commit 569aa2f

Browse files
author
Andrey Kamaev
committed
Add stubs for fixed-point and simd ConvertColor and tests
1 parent df99255 commit 569aa2f

File tree

4 files changed

+201
-0
lines changed

4 files changed

+201
-0
lines changed

CMakeLists.txt

+6
Original file line numberDiff line numberDiff line change
@@ -36,6 +36,12 @@ endforeach()
3636
set(PROJECT_NAME skeleton)
3737
project(${PROJECT_NAME})
3838

39+
if(MSVC)
40+
set(OPENCV_EXTRA_FLAGS "${OPENCV_EXTRA_FLAGS} /arch:SSE3 /Oi")
41+
else()
42+
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -mssse3")
43+
endif()
44+
3945
set(LIBRARY "${PROJECT_NAME}_filter")
4046
set(TESTS "test_${PROJECT_NAME}")
4147
set(PERF_TESTS "perf_${PROJECT_NAME}")

include/skeleton_filter.hpp

+2
Original file line numberDiff line numberDiff line change
@@ -24,3 +24,5 @@ void GuoHallThinning(const cv::Mat& src, cv::Mat& dst);
2424
// Optimized versions
2525
void GuoHallThinning_optimized(const cv::Mat& src, cv::Mat& dst);
2626
void ImageResize_optimized(const cv::Mat &src, cv::Mat &dst, const cv::Size sz);
27+
void ConvertColor_BGR2GRAY_BT709_fpt(const cv::Mat& src, cv::Mat& dst);
28+
void ConvertColor_BGR2GRAY_BT709_simd(const cv::Mat& src, cv::Mat& dst);

perf/perf_skeleton.cpp

+89
Original file line numberDiff line numberDiff line change
@@ -102,3 +102,92 @@ PERF_TEST_P(Size_Only, Thinning, testing::Values(MAT_SIZES))
102102

103103
SANITY_CHECK(image);
104104
}
105+
106+
PERF_TEST_P(Size_Only, ConvertColor_fpt, testing::Values(MAT_SIZES))
107+
{
108+
Size sz = GetParam();
109+
110+
cv::Mat src(sz, CV_8UC3);
111+
cv::Mat dst(sz, CV_8UC1);
112+
cv::Mat gold(sz, CV_8UC1);
113+
declare.in(src, WARMUP_RNG).out(dst);
114+
115+
cv::theRNG().fill(src, cv::RNG::UNIFORM, 0, 256);
116+
117+
ConvertColor_BGR2GRAY_BT709(src, gold);
118+
119+
TEST_CYCLE()
120+
{
121+
ConvertColor_BGR2GRAY_BT709_fpt(src, dst);
122+
}
123+
124+
cv::Mat diff; cv::absdiff(dst, gold, diff);
125+
cv::Mat diff1; cv::threshold(diff, diff1, 1, 0, cv::THRESH_TOZERO);
126+
ASSERT_EQ(0, cv::countNonZero(diff1));
127+
// ASSERT_EQ(0, cv::countNonZero(diff));
128+
129+
SANITY_CHECK(dst);
130+
}
131+
132+
PERF_TEST_P(Size_Only, ConvertColor_simd, testing::Values(MAT_SIZES))
133+
{
134+
Size sz = GetParam();
135+
136+
cv::Mat src(sz, CV_8UC3);
137+
cv::Mat dst(sz, CV_8UC1);
138+
cv::Mat gold(sz, CV_8UC1);
139+
declare.in(src, WARMUP_RNG).out(dst);
140+
141+
cv::theRNG().fill(src, cv::RNG::UNIFORM, 0, 256);
142+
143+
ConvertColor_BGR2GRAY_BT709(src, gold);
144+
145+
TEST_CYCLE()
146+
{
147+
ConvertColor_BGR2GRAY_BT709_simd(src, dst);
148+
}
149+
150+
cv::Mat diff; cv::absdiff(dst, gold, diff);
151+
cv::Mat diff1; cv::threshold(diff, diff1, 1, 0, cv::THRESH_TOZERO);
152+
ASSERT_EQ(0, cv::countNonZero(diff1));
153+
154+
// even if it is 1-off error there should be no mo than 20% of such pixels
155+
ASSERT_LT(cv::countNonZero(diff), sz.width*sz.height*20/100);
156+
157+
SANITY_CHECK(dst);
158+
}
159+
160+
161+
// accuracy test by the way...
162+
TEST(CompleteColorSpace, ConvertColor_fpt)
163+
{
164+
Size sz(4096, 4096);
165+
166+
cv::Mat src(sz, CV_8UC3);
167+
cv::Mat dst(sz, CV_8UC1);
168+
cv::Mat gold(sz, CV_8UC1);
169+
170+
int val = 0;
171+
for (int y = 0; y < sz.height; y++)
172+
{
173+
uchar* ptr = src.ptr<uchar>(y);
174+
175+
for(int x = 0; x < sz.width; x++)
176+
{
177+
ptr[3*x + 0] = val;
178+
ptr[3*x + 1] = val >> 8;
179+
ptr[3*x + 2] = val >> 16;
180+
val++;
181+
}
182+
}
183+
184+
ConvertColor_BGR2GRAY_BT709(src, gold);
185+
ConvertColor_BGR2GRAY_BT709_fpt(src, dst);
186+
187+
cv::Mat diff; cv::absdiff(dst, gold, diff);
188+
cv::Mat diff1; cv::threshold(diff, diff1, 1, 0, cv::THRESH_TOZERO);
189+
190+
EXPECT_EQ(0, cv::countNonZero(diff1));
191+
ASSERT_LT(cv::countNonZero(diff), 7565);
192+
// ASSERT_EQ(0, cv::countNonZero(diff));
193+
}

src/convertcolor.cpp

+104
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,35 @@
11
#include "skeleton_filter.hpp"
22

3+
#if defined __SSSE3__ || (defined _MSC_VER && _MSC_VER >= 1500)
4+
# include "tmmintrin.h"
5+
# define HAVE_SSE
6+
#endif
7+
8+
#include <string>
9+
#include <sstream>
10+
11+
template <typename T>
12+
std::string __m128i_toString(const __m128i var) // function for debug prints
13+
{
14+
std::stringstream sstr;
15+
const T* values = (const T*) &var;
16+
if (sizeof(T) == 1)
17+
{
18+
for (unsigned int i = 0; i < sizeof(__m128i); i++)
19+
{
20+
sstr << (int) values[i] << " ";
21+
}
22+
}
23+
else
24+
{
25+
for (unsigned int i = 0; i < sizeof(__m128i) / sizeof(T); i++)
26+
{
27+
sstr << values[i] << " ";
28+
}
29+
}
30+
return sstr.str();
31+
}
32+
333
void ConvertColor_BGR2GRAY_BT709(const cv::Mat& src, cv::Mat& dst)
434
{
535
CV_Assert(CV_8UC3 == src.type());
@@ -20,3 +50,77 @@ void ConvertColor_BGR2GRAY_BT709(const cv::Mat& src, cv::Mat& dst)
2050
}
2151
}
2252
}
53+
54+
void ConvertColor_BGR2GRAY_BT709_fpt(const cv::Mat& src, cv::Mat& dst)
55+
{
56+
CV_Assert(CV_8UC3 == src.type());
57+
cv::Size sz = src.size();
58+
dst.create(sz, CV_8UC1);
59+
60+
const int bidx = 0;
61+
62+
for (int y = 0; y < sz.height; y++)
63+
{
64+
const cv::Vec3b *psrc = src.ptr<cv::Vec3b>(y);
65+
uchar *pdst = dst.ptr<uchar>(y);
66+
67+
for (int x = 0; x < sz.width; x++)
68+
{
69+
float color = 0.2126 * psrc[x][2-bidx] + 0.7152 * psrc[x][1] + 0.0722 * psrc[x][bidx];
70+
pdst[x] = (int)(color + 0.5);
71+
}
72+
}
73+
}
74+
75+
void ConvertColor_BGR2GRAY_BT709_simd(const cv::Mat& src, cv::Mat& dst)
76+
{
77+
CV_Assert(CV_8UC3 == src.type());
78+
cv::Size sz = src.size();
79+
dst.create(sz, CV_8UC1);
80+
81+
#ifdef HAVE_SSE
82+
// __m128i ssse3_blue_indices_0 = _mm_set_epi8(-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 15, 12, 9, 6, 3, 0);
83+
// __m128i ssse3_blue_indices_1 = _mm_set_epi8(-1, -1, -1, -1, -1, 14, 11, 8, 5, 2, -1, -1, -1, -1, -1, -1);
84+
// __m128i ssse3_blue_indices_2 = _mm_set_epi8(13, 10, 7, 4, 1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1);
85+
// __m128i ssse3_green_indices_0 = _mm_set_epi8(-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 13, 10, 7, 4, 1);
86+
// __m128i ssse3_green_indices_1 = _mm_set_epi8(-1, -1, -1, -1, -1, 15, 12, 9, 6, 3, 0, -1, -1, -1, -1, -1);
87+
// __m128i ssse3_green_indices_2 = _mm_set_epi8(14, 11, 8, 5, 2, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1);
88+
__m128i ssse3_red_indices_0 = _mm_set_epi8(-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 14, 11, 8, 5, 2);
89+
__m128i ssse3_red_indices_1 = _mm_set_epi8(-1, -1, -1, -1, -1, -1, 13, 10, 7, 4, 1, -1, -1, -1, -1, -1);
90+
__m128i ssse3_red_indices_2 = _mm_set_epi8(15, 12, 9, 6, 3, 0, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1);
91+
#endif
92+
93+
for (int y = 0; y < sz.height; y++)
94+
{
95+
const uchar *psrc = src.ptr<uchar>(y);
96+
uchar *pdst = dst.ptr<uchar>(y);
97+
98+
int x = 0;
99+
100+
#ifdef HAVE_SSE
101+
// here is 16 times unrolled loop for vector processing
102+
for (; x <= sz.width - 16; x += 16)
103+
{
104+
__m128i chunk0 = _mm_loadu_si128((const __m128i*)(psrc + x*3 + 16*0));
105+
__m128i chunk1 = _mm_loadu_si128((const __m128i*)(psrc + x*3 + 16*1));
106+
__m128i chunk2 = _mm_loadu_si128((const __m128i*)(psrc + x*3 + 16*2));
107+
108+
__m128i red = _mm_or_si128(_mm_or_si128(_mm_shuffle_epi8(chunk0, ssse3_red_indices_0),
109+
_mm_shuffle_epi8(chunk1, ssse3_red_indices_1)),
110+
_mm_shuffle_epi8(chunk2, ssse3_red_indices_2));
111+
112+
/* ??? */
113+
114+
_mm_storeu_si128((__m128i*)(pdst + x), red);
115+
}
116+
#endif
117+
118+
// process leftover pixels
119+
for (; x < sz.width; x++)
120+
{
121+
/* ??? */
122+
}
123+
}
124+
125+
ConvertColor_BGR2GRAY_BT709_fpt(src, dst); // !remove this!
126+
}

0 commit comments

Comments
 (0)