集
给定⼀副图像I,如何有效地计算图像上每个位置的梯度I x,I y,梯度幅值M,⽅向⾓Theta:
Ix(x,y) =I(x+1,y) - I(x-1,y)
Iy(x,y) =I(x,y+1) -I(x,y-1)
M(x,y) = sqrt(Ix(x,y)*Ix(x,y) +Iy(x,y)*Iy(x,y) )
Theta(x,y) = atan2(Iy(x,y),Ix(x,y) )
从上⾯的公⽰来看,计算起来并不困难,oepnCV⾥有Sobel函数,可以直接计算出Ix,Iy。这⾥我们主要讲的是⾼效率编程,⾃⼰编程来解决这个问题。 Intel⽣产的CPU基本上都⽀持SSE指令集,这⼀指令集允许同时对四个单精度浮点数进⾏运算,⽐如在⼀个CPU的clock ⾥完成计算4个float与另外4个float的分别对应相乘。我在之前的博⽂⾥公开了我写的DPM⽬标检测代码,其中⽤到了SSE,那是我第⼀次使⽤SSE进⾏加速计算,效果颇好。之后有幸看到 了⼤神的关于⾏⼈检测的(⽂章是:The Fastest Pedestrian Detector in the West),⾥⾯的mex函数⽂件⼤量使⽤了SSE指令集,让我对SSE编程有了进⼀步了解。
我以前⽤OpenCV的Sobel函数来计算Ix,Iy,然后再算M和Theta,⼀⽅⾯效率不够理想,另⼀⽅⾯程序的可控性较差——⾃⼰想在计算过程中增加⼀两个简单运算不太⽅便。由此我⾃⼰写了相关代码,⼏经修改,形成了⼏个不同的版本,下⾯将这些代码⼀⼀贴出来并作简单讲解。
------------------------------------
代码⼀:oriented_gradient.cpp
说明:该cpp中定义了两个版本的yuOrientedGradient函数,该函数的功能是,输⼊⼀幅灰度或彩⾊图像,计算图像上每个像素位置的梯度幅值和⽅向⾓。若输⼊是多通道图像,梯度幅值是取各个颜⾊通道梯度幅值的最⼤值。输出的⽅向⾓不是实数值,⽽是离散的整数值,⽐如指定
orientation_bins=9,sensitive=true,则将⼀个取值在[0,2*pi)的⽅向⾓划分到[0,20)(度),[20,40),[40,60),...,[340,360)共18个bin中的⼀个。这种做法其实是为了后续进⼀步计算HOG特征服务的。如果将orientation的数据类型改为int型,将orientation_bins设置为360,则可以计算出每个像素位置⽅向⾓的⾓度,精度为1°. 继续提⾼orientation_bins的值,可以增加⽅向⾓的估计精度。
#include "cv.h"
using namespace cv;
// The two versions of function proposed here share the same functionality whereas the V2 is relatively faster.
void yuOrientedGradient( const Mat &img, Mat &orientation, Mat &gradient, int orientation_bins, bool sensitive );
void yuOrientedGradient_V2( const Mat &img, Mat &orientation, Mat &gradient, int orientation_bins, bool sensitive );
/* test:
Mat im = imread("0001.jpg");
Mat imF; im.convertTo(imF,CV_32F);
Mat O1, O2, G1, G2;
yuOrientedGradient( imF, O1, G1, 9, true );
yuOrientedGradient( imF, O2, G2, 9, true );
absdiff( O1, O2, O1 );
absdiff( G1, G2, G1 );
double a, b;通信与信息管理
minMaxLoc( G1, 0, &a );
minMaxLoc( O1, 0, &b );
cout<<a<<endl<<b<<endl; // a==0, b==0
*/
/*
Calculate the orientation at each pixel.
亨润成型机炮筒公司
The calculated orientations are snapped to one of the N bins which are equally spaced in:
[0,180), if sensitive==true, then values of orientation are between [0,num_orientation_bins-1);
[0,360), if sensitive==false, then values of orientation are between [0,2*num_orientation_bins-1).
[0,360), if sensitive==false, then values of orientation are between [0,2*num_orientation_bins-1).
The output orientation (CV_8UC1) & gradient (CV_32FC1) are 2 pixels smaller both in rows
and in cols than input img (multi-channel,float type).
theta = angle( OP ), where O = (0,0), P = (dx,dy)
theta is then snapped to one of nine orientations [0,20) [20,40), ... , [160 180)
How to snap:
< we set the bins as [0,20), [20,40), ..., [160,180),
then for any theta in [0,180), cos(theta-i*20) achieves max when theta is in [i*20,i*20+20)
as: cos(a-b) = cos(a)*cos(b) + sin(a)*sin(b)
so: cos(theta-i*20) = cos(theta)*cos(i*20) + sin(theta)*sin(i*20)
now that: cos(theta) = x/sqrt(x^2+y^2), sin(theta) = y/sqrt(x^2+y^2)
so: x*cos(i*20)+y*sin(i*20) will achieve max when the orientation of (x,y) is in [i*20,i*20+20)
make: uu = [cos(0) cos(20) ... cos(160)]; vv = [sin(0) sin(20) ... sin(180)];
then: x*uu(i)+y*vv(i) achieves max when theta is in [i*20,i*20+20), namely the i-th orientation bin.
by: YU Xianguo, 2015/06/24
*/
void yuOrientedGradient( const Mat &img, Mat &orientation, Mat &gradient, int orientation_bins, bool sensitive ) {
assert( img.depth()==CV_32F );
assert( ws>2 && ls>2 );
assert( orientation_bins>1 && orientation_bins<256 );
// create output: only calc for img(Rect(1,1,cols-2,rows-2)).
int rows = ws, cols = ls, chans = img.channels();
// calculate gradient for img(Rect(1,1,cols-2,rows-2))
// multi-channel operation
Mat Left = img( Rect(0,1,cols-2,rows-2) );
Mat Right = img( Rect(2,1,cols-2,rows-2) );
Mat Up = img( Rect(1,0,cols-2,rows-2) );
Mat Down = img( Rect(1,2,cols-2,rows-2) );
Mat _Dx = Right - Left;
Mat _Dy = Down - Up;
Mat Dx, Dy;
if( chans==1 ){
Dx = _Dx;
Dy = _Dy;
gradient = 0;
accumulateSquare( Dx, gradient );
accumulateSquare( Dy, gradient );
}
else{
rows = _Dx.rows, cols = _Dx.cols;
// for each element in Dx & Dy: <dx0,dx1,dx2> & <dy0,dy1,dy2>
// calculate the square sum: <d0,d1,d2>, where d = dx^2 + dy^2
// select d(i) = max(d0,d1,d2)
// then set the corresponding value of DDx by dx(i), set DDy by dy(i)
float *a = (float*)_Dx.data;
float *b = (float*)_Dy.data;
float *c = (float*)Dx.data;
float *d = (float*)Dy.data;
float *g = (float*)gradient.data;
int pg = gradient.step1()-cols;
int x, y, chn;
float dv, mdx, mdy, mdv;
for( y=0; y++<rows; g+=pg ){
for( x=0; x++<cols; ){
for( mdv=-1, chn=0; chn++<chans; ){
for( mdv=-1, chn=0; chn++<chans; ){
float &dx = *(a++);
float &dy = *(b++);
dv = dx*dx + dy*dy;
if( mdv<dv ){
mdv = dv;
mdx = dx;
mdy = dy;
}
}
*(c++) = mdx;
*(d++) = mdy;
*(g++) = mdv; // gradient = Dx.^2 + Dy.^2
}
}
}
// construct orientation snaps
vector<double> uu(orientation_bins);
vector<double> vv(orientation_bins);
double bin_span = CV_PI / orientation_bins;
for( int k=0; k<orientation_bins; k++ ){
double theta = k * bin_span;
螺旋锥蝇
镭射贴uu[k] = cos( theta );
vv[k] = sin( theta );
}
// val = DDx * uu[0] + DDy * vv[0] = DDx
Mat val, maxval, bw;
maxval = abs( val );
orientation = 0;
if( !sensitive ){
for( int i=1; i<orientation_bins; i++ ){
val = Dx*uu[i] + Dy*vv[i]; //addWeighted( Dx, uu[i], Dy, vv[i], 0, val, CV_32FC1 );
val = abs(val);
bw = maxval < val;
if( i<orientation_bins-1 )
orientation.setTo(i,bw);
}
}
else{
bw = val < 0;
orientation.setTo( orientation_bins, bw );
for( int i=1; i<orientation_bins; i++ ){
val = Dx*uu[i] + Dy*vv[i]; //addWeighted( Dx, uu[i], Dy, vv[i], 0, val, CV_32FC1 );
bw = maxval < val;
if( i<orientation_bins-1 )
orientation.setTo( i, bw );
val = -val;
bw = maxval < val;
if( i<orientation_bins-1 )
orientation.setTo( i+orientation_bins, bw );
}
}
cv::sqrt( gradient, gradient );
return;
}
void yuOrientedGradient_V2( const Mat &img, Mat &orientation, Mat &gradient, int orientation_bins, bool sensitive )
void yuOrientedGradient_V2( const Mat &img, Mat &orientation, Mat &gradient, int orientation_bins, bool sensitive ) {
typedef uchar T; // data type of orientation
const int orientation_type = CV_8UC1; // must accord with T
assert( img.depth()==CV_32F );
assert( ws>2 && ls>2 );
assert( orientation_bins>1 && orientation_bins<256 ); // cause we use uchar type orientation
int rows = ws, cols = ls, channels = img.channels();
int result_rows = rows - 2, result_cols = cols - 2;
// construct orientation snaps
vector<float> uu(orientation_bins);
vector<float> vv(orientation_bins);
float bin_span = float(CV_PI) / orientation_bins;
for( int k=0; k<orientation_bins; k++ ){
float theta = k * bin_span;
uu[k] = cosf( theta );
vv[k] = sinf( theta );
}
T *orient = (T*)orientation.data; int gap1=orientation.step1()-result_cols;
float *grad = (float*)gradient.data; int gap2=gradient.step1()-result_cols;
// orientation(y,x) is from: img(y,x),img(y+2,x),img(y,x+2),img(y+2,x+2)
const float *Up = (float*)img.data + channels;
const float *Down = Up + img.step1()*2;
const float *Left = (float*)img.data + img.step1();
const float *Right = Left + 2*channels;
int gap = img.step1() - result_cols*channels;
top技术
float dx, dy, dv, mdx, mdy, mdv;
for( int y=0; y<result_rows; y++ ){
for( int x=0; x<result_cols; x++ ){
mdx = *(Right++) - *(Left++);
mdy = *(Down++) - *(Up++);
mdv = mdx*mdx + mdy*mdy;
for( int c=1; c<channels; c++ ){
dx = *(Right++) - *(Left++);
dy = *(Down++) - *(Up++);
dv = dx*dx + dy*dy;
if( mdv<dv ){
mdv = dv;
mdx = dx;
mdy = dy;
}
}
// snap to one orientation bin
float maxVal = mdx < 0 ? -mdx : mdx; // uu[0]*mdx + vv[0]*mdy == mdx
int maxOrient = 0;
if( sensitive ){
if( mdx<0 ) maxOrient += orientation_bins;
for( int k=1; k<orientation_bins; k++ ){
float val = uu[k]*mdx + vv[k]*mdy;
if( maxVal<val ){
maxVal = val;
maxOrient = k;
}
else if( maxVal<-val ){
maxVal = -val;
maxOrient = k + orientation_bins;
}
}
}
}
else{
for( int k=1; k<orientation_bins; k++ ){
float val = uu[k]*mdx + vv[k]*mdy;
if( val<0 ) val = -val;
if( maxVal<val ){
maxVal = val;
maxOrient = k;
}
}
}
*(orient++) = maxOrient;
*(grad++) = mdv;
}
Up+=gap, Down+=gap, Left+=gap, Right+=gap;
orient+=gap1, grad+=gap2;
}
cv::sqrt( gradient, gradient );
return;
}
代码⼆:sse.h
说明:将常⽤sse指令打包放⼊⼀个头⽂件中,⽅便使⽤。这个头⽂件是从P'Dollar的⼯具箱中拿出来的,其中注释部分是我写的,⼀些函数的形式被我修改了,另外我还加⼊了若⼲个函数。这个头⽂件极⼤地⽅便了sse编程,对我⽤处颇⼤。
/*******************************************************************************
* Piotr's Image&Video Toolbox Version 3.23
* Copyright 2013 Piotr Dollar & Ron Appel. [pdollar-at-caltech.edu]
* Please email me if you find bugs, or have suggestions or questions!
* Licensed under the Simplified BSD License [see ]
*******************************************************************************/
/* The interpretations are written by YU Xianguo, 2015/06/26.
* Notification:
* The defined functions accords with the form: __m128(i) fun( dst, src ); OR: __m128(i) fun( src1, src2 );
* In my interpretation, x[4] means x is a float* (or int*), or it is a __m128 (or __m128i), and the 4 values
* of x are treated independently. x_4 means x is a __m128 (or __m128i), and it is treated as a 128 byte variable.
*
节能* Besides the comments, some functions' parameters form are changed:
* if the input parameter is a float[4], in original form, it is formed as float&, here I changed it to float*.
*
* I also add some new functions by checking the SSE instructions presented in online MSDN:
* msdn.microsoft/en-us/library/vstudio/ff5d607a(v=vs.100).aspx
*/
#pragma once
#include <xmmintrin.h>
#include <emmintrin.h> // SSE2:<e*.h>, SSE3:<p*.h>, SSE4:<s*.h>
#define RETf inline __m128
#define RETi inline __m128i
/
* set, load and store values */
// return all zeros
RETf SSE_ZERO() { return _mm_setzero_ps(); }