Sunday, June 15, 2014

Accurate Eye Center Location through Invariant Isocentric Patterns



Below, is a Python implementation of the paper Accurate Eye Center Location through Invariant Isocentric Patterns. Most of the comments in the code are copied directly from the paper.

Download: https://gist.github.com/dimitrs/d667ccb433d5f2baa8df




Tuesday, April 01, 2014

Mixing OpenCV and SciKit-image


I saw a Mathematica post that described how to detect and flatten a label on a jar. My goal here is to do something similar in Python. I am familiar with OpenCV-Python which is what I have always used for my computer vision projects, but it occurred to me that there is no reason why I should only use OpenCV-Python. I could use both OpenCV-Python and SciKit-image at the same time. After all, they are both based on Numpy. For this project I start with OpenCV-Python and then switch to SciKit-image for the last step.

jar.png


#Import both skimage and cv
from skimage import transform as tf
from skimage import io
import cv2

import numpy as np
from scipy import optimize
import matplotlib.pyplot as plt

# Could use either skimage or cv to read the image
# img = io.imread('jar.png')   
img = cv2.imread('jar.png')
gray_image = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
ret, thresh = cv2.threshold(gray_image,127,255,cv2.THRESH_BINARY)
edges = cv2.Canny(thresh ,100, 200)

# Find largest contour (should be the label)
contours,hierarchy = cv2.findContours(edges, 0, 1)
areas = [cv2.contourArea(c) for c in contours]
max_index = np.argmax(areas)
cnt=contours[max_index]

# Create a mask of the label
mask = np.zeros(img.shape,np.uint8)
cv2.drawContours(mask, [cnt],0,255,-1)

Mask of the label



# Find the 4 borders
scale = 1
delta = 0
ddepth = cv2.CV_8U
borderType=cv2.BORDER_DEFAULT
left=cv2.Sobel(mask,ddepth,1,0,ksize=1,scale=1,delta=0,borderType)
right=cv2.Sobel(mask,ddepth,1,0,ksize=1,scale=-1,delta=0, borderType)
top=cv2.Sobel(mask,ddepth,0,1,ksize=1,scale=1,delta=0,borderType)
bottom=cv2.Sobel(mask,ddepth,0,1,ksize=1,scale=-1,delta=0,borderType)
Left & Right borders after Sobel

# Remove noise from borders
kernel = np.ones((2,2),np.uint8)
left_border = cv2.erode(left,kernel,iterations = 1)
right_border = cv2.erode(right,kernel,iterations = 1)
top_border = cv2.erode(top,kernel,iterations = 1)
bottom_border = cv2.erode(bottom,kernel,iterations = 1)
Left & Right borders after calling erode


# Find coeficients c1,c2, ... ,c7,c8 by minimizing the error function. 
# Points on the left border should be mapped to (0,anything).
# Points on the right border should be mapped to (108,anything)
# Points on the top border should be mapped to (anything,0)
# Points on the bottom border should be mapped to (anything,70)
# Equations 1 and 2: 
#    c1 + c2*x + c3*y + c4*x*y, c5 + c6*y + c7*x + c8*x^2

sumOfSquares_y = '+'.join(["(c[0]+c[1]*%s+c[2]*%s+c[3]*%s*%s)**2" %
    (x,y,x,y) for y,x,z in np.transpose(np.nonzero(left_border)) ])
sumOfSquares_y += " + "
sumOfSquares_y += \
    '+'.join(["(-108+c[0]+c[1]*%s+c[2]*%s+c[3]*%s*%s)**2" % \
    (x,y,x,y) for y,x,z in np.transpose(np.nonzero(right_border)) ])
res_y = optimize.minimize(lambda c: eval(sumOfSquares_y),(0,0,0,0),method='SLSQP')

sumOfSquares_x = \
    '+'.join(["(-70+c[0]+c[1]*%s+c[2]*%s+c[3]*%s*%s)**2" % \
    (y,x,x,x) for y,x,z in np.transpose(np.nonzero(bottom_border))])
sumOfSquares_x += " + "
sumOfSquares_x += \
    '+'.join( [ "(c[0]+c[1]*%s+c[2]*%s+c[3]*%s*%s)**2" % \
    (y,x,x,x) for y,x,z in np.transpose(np.nonzero(top_border)) ] )
res_x = optimize.minimize(lambda c: eval(sumOfSquares_x),(0,0,0,0), method='SLSQP')


# Map the image using equatinos 1 and 2 (coeficients c1...c8 in res_x and res_y)
def map_x(res, cord):
    m = res[0]+res[1]*cord[1]+res[2]*cord[0]+res[3]*cord[1]*cord[0]
    return m
def map_y(res, cord):
    m = res[0]+res[1]*cord[0]+res[2]*cord[1]+res[3]*cord[1]*cord[1]
    return m

flattened = np.zeros(img.shape, img.dtype) 
for y,x,z in np.transpose(np.nonzero(mask)):
    new_y = map_y(res_x.x,[y,x]) 
    new_x = map_x(res_y.x,[y,x])
    flattened[float(new_y)][float(new_x)] = img[y][x]
# Crop the image 
flattened = flattened[0:70, 0:105]
Flattened Image

There is fair amount of distortion in the flattened image. Alternatively, use PiecewiseAffineTransform from SciKit-image:
# Use  skimage to transform the image
leftmost = tuple(cnt[cnt[:,:,0].argmin()][0])
rightmost = tuple(cnt[cnt[:,:,0].argmax()][0])
topmost = tuple(cnt[cnt[:,:,1].argmin()][0])
bottommost = tuple(cnt[cnt[:,:,1].argmax()][0])

dst = list()
src = list()
for y,x,z in np.transpose(np.nonzero(top_border)):
    dst.append([x,y])
    src.append([x,topmost[1]])
for y,x,z in np.transpose(np.nonzero(bottom_border)):
    dst.append([x,y])
    src.append([x,bottommost[1]])
for y,x,z in np.transpose(np.nonzero(left_border)):
    dst.append([x,y])
    src.append([leftmost[0],y])
for y,x,z in np.transpose(np.nonzero(right_border)):
    dst.append([x,y])
    src.append([rightmost[0],y])
src = np.array(src)
dst = np.array(dst)

tform3 = tf.PiecewiseAffineTransform()
tform3.estimate(src, dst)
warped = tf.warp(img, tform3, order=2)
warped = warped[85:170, 31:138]

Flattened label usgin skimage

Tuesday, March 11, 2014

VM Images for Windows, Android, and OS X


If you need to test your source code on multiple platforms, you will probably end-up using one or more virtual machines. I test my code on five different operating systems. I have an Ubuntu development machine, on which I have virtual machines for Windows 7, Android, and OS X Maverick (with the iOS simulator). These are all readily available for download. 

Windows VM images can be found here: http://www.modern.ie/en-us/virtualization-tools#downloads



OS X VM image can be found here (link to a torrent): http://www.souldevteam.net/blog/2012/06/15/os-x-mountain-lion-10-8-dp4-vmware-image-release-notes-links/

VMware tools for OS X can be found here:

iOS Simulator is available on OS X via Xcode.



Android VM can be found here: http://www.genymotion.com/
See my previous post on how-to install.



Monday, February 24, 2014

How-to Install OpenCV on an Android VM

Installing an Android emulator on Ubuntu is actually quite easy. Below, are the steps I took to get OpenCV 2.4.5 working on a Android emulator installed on my Ubuntu 12.04 machine. Most guides I have read assume that you are going to be using Eclipse. I don't use Eclipse.

1) Install the JDK and ant. 

:~$ sudo apt-get install openjdk-6-jdk 
:~$ sudo apt-get install ant 
:~$ export JAVA_HOME=/usr/lib/jvm/java-6-openjdk 
:~$ export PATH=$PATH:/usr/lib/jvm/java-6-openjdk/bin 

Sanity check: type “javac” in your terminal to confirm JDK is available. 

2) Install the Android SDK. Download android-sdk_r22.3-linux.tgz and unpack it: 

:~$ cd android-sdk-linux 
:~$ tools/android update sdk –no-ui 
:~$ export PATH=$PATH:~android-sdk-linux/tools :~android-sdk-linux/platform-tools 

3) Install the Android emulator from Genymotion and add a virtual device. Once installed, use its UI to “Add” a virtual device: 

:~$ cd genymotion 
:~$ ./genymotion 

In order to enable the drag & drop of “apk” files from the host to the Android VM, enter the path to the android-sdk-linux directory in the Genymotion settings, on the ADB tab. 

Start the VM. 

4) Install OpenCV on the Android VM. 
Download OpenCV 2.4.5  and unpack it. 
Drag & drop OpenCV-2.4.5-android-sdk/apk/OpenCV_2.4.8_Manager_2.16_x86.apk into the VM. 

Sanity check: In the Android emulator click the “OpenCV Manager” to confirm that you see no error messages. 

5) Test an OpenCV sample application. Compile the 15-puzzle application, for example: 

:~$ cd OpenCV-2.4.8-android-sdk/samples/15-puzzle 
:~$ android update project --path . 
:~$ ant debug 
:~$ cd bin 

Sanity check: Drag & drop OpenCV-2.4.8-android-sdk/samples/15-puzzle/bin/Puzzle15Activity-debug.apk into the emulator. Confirm that it works. 

6) Test an OpenCV native C++ application. Download and unpack android-ndk-r9c-linux-x86_64.tar from http://developer.android.com/tools/sdk/ndk/index.html 

:~$ export PATH=$PATH:~android-ndk-r9c/ 
:~$ cd OpenCV-2.4.8-android-sdk/samples/native-activity 

Modify the ABI in Android.mk: APP_ABI := all 
:~$ vi jni/Android.mk 

Build it:

:~$ android update project --path . 
:~$ ndk-build -B 
:~$ ant debug 
:~$ cd bin

Wednesday, February 05, 2014

Implement Data Parallelism on a GPU Directly in C++

Download: https://github.com/dimitrs/cpp-opencl

The cpp-opencl project provides a way to make programming GPUs easy for the developer. It allows you to implement data parallelism on a GPU directly in C++ instead of using OpenCL. See the example below. The code in the parallel_for_each lambda function is executed on the GPU, and all the rest is executed on the CPU. More specifically, the “square” function is executed both on the CPU (via a call to std::transform) and the GPU (via a call to compute::parallel_for_each). Conceptually, compute::parallel_for_each is similar to std::transform except that one executes code on the GPU and the other on the CPU.

#include <vector>
#include <stdio.h>
#include "ParallelForEach.h"

template<class T> 
T square(T x)  
{
    return x * x;
}

void func() {
  std::vector<int> In {1,2,3,4,5,6};
  std::vector<int> OutGpu(6);
  std::vector<int> OutCpu(6);

  compute::parallel_for_each(In.begin(), In.end(), OutGpu.begin(), [](int x){
      return square(x);
  });

  
  std::transform(In.begin(), In.end(), OutCpu.begin(), [](int x) {
    return square(x);
  });

  // 
  // Do something with OutCpu and OutGpu …..........

  //

}

int main() {
  func();
  return 0;
}


Function Overloading 

Additionally, it is possible to overload functions. The “A::GetIt” member function below is overloaded. The function marked as “gpu” will be executed on the GPU and other on the CPU.

struct A {
  int GetIt() const __attribute__((amp_restrict("cpu"))) {
    return 2;
  }
  int GetIt() const __attribute__((amp_restrict("gpu"))) {
    return 4;
  }
};

compute::parallel_for_each(In.begin(), In.end(), OutGpu.begin(), [](int x){
    A a; 
    return a.GetIt(); // returns 4
});



Build the Executable 

The tool uses a special compiler based on Clang/LLVM. 

cpp_opencl -x c++ -std=c++11 -O3 -o Input.cc.o -c Input.cc 

The above command generates four files: 
1. Input.cc.o 
2. Input.cc.cl 
3. Input.cc_cpu.cpp 
4. Input.cc_gpu.cpp 

Use the Clang C++ compiler directly to link: 

clang++ ./Input.cc.o -o test -lOpenCL 


Then just execute: 

./test

Friday, December 27, 2013

Writing OpenCL Kernels in C++

Download: https://github.com/dimitrs/cpp-opencl/tree/first_blog_post

In this post I would like to present my C++ to OpenCL C source transformation tool. Source code written in the OpenCL C language can now be written in C++ instead (even C++11). It is even possible to execute some of the C++ standard libraries on the GPU.

The tool compiles the C++ source into LLVM byte-code, and uses a modified version of the LLVM 'C' back-end to disassemble the byte-code into OpenCL 'C'. You can see how it is used by looking at the tests provided in the project.

This code using std::find_if and a lambda function is executed on the GPU.

#include <algorithm>

extern "C" void _Kernel_find_if(int* arg, int len, int* out) {
    int* it = std::find_if (arg, arg+len, [] (int i) { return ((i%2)==1); } );
    out[0] = *it;
}

This code using C++11's std::enable_if is executed on the GPU.

#include <type_traits>

template<class T>
T foo(T t, typename std::enable_if<std::is_integral<T>::value >::type* = 0)
{
    return 1;
}

template<class T>
T foo(T t, typename std::enable_if<std::is_floating_point<T>::value >::type* = 0)
{
    return 0;
}

extern "C" void _Kernel_enable_if_int_argument(int* arg0, int* out)
{
    out[0] = foo(arg0[0]);
}

Please not that this project is experimental. It would require a lot more work to make it a production quality tool. One limitation of the tool is that containers such as std::vector are not supported (without a stack-based allocator) because OpenCL does not support dynamically or statically allocated memory. The tool does not support atomics which disallows the use of std::string. It does not have support for memcpy and memset which means that some standard algorithms can not be used; std::copy's underlying implementation sometimes uses memcpy.

Thursday, December 26, 2013

C++ Generic Programming: Implementing Runga-kutta

Download: https://github.com/dimitrs/odes

Below, listing 1. is an implementation of a generic algorithm – the Runga-kutta method which is well known for its use in the approximation of solutions of ordinary differential equations. Although the Runga-kutta method is well known and relatively simple, the generic algorithm implementation below is difficult to read. I won't attempt to explain how it works; I wrote it several months ago and I don't remember all the details. The reason why I list it here is to be able to compare it with a non-generic implementation of the algorithm I wrote in listing 2. 

Listing 2. is far easier to read and understand. It is almost a carbon copy of the mathematical equations for the Runga-kutta4 method (see listing 3). The non-generic implementation is easy to read (assuming you are familiar with the equations) and it was very easy to write. I was done in about 30 minutes, whereas the generic version took me a couple of days to complete. So why make the effort ? The generic version allows you to use any container that supports forward iterators e.g. std::array, std::vector, C array, boost::ublas vectors, Blitz++. With the non-generic version you are tied into using only boost::ublas vectors. 

As far as performance is concerned, the generic version using boost::ublas vectors is about 20% slower than Ublass non-generic version. However, if I substitute the container for a std::array, the generic-algorithm is 40% faster. 

Runga-kutta4 Performance of the generic-algorithm using different containers compared to the non-generic version: 

std::vector: 25% faster
Blitz: about the same
ublas:       20% slower
std::array: 40% faster

template <class InputIterator, class ForwardIterator, class T, class Time=double>
std::pair<std::vector<Time>, ForwardIterator> __runge_kutta4(
    InputIterator inBegin, InputIterator inEnd, 
    Time x0, Time x1, int nr_steps, T f, ForwardIterator yout)
{
  typedef typename std::iterator_traits<InputIterator>::value_type 
      value_type;
  auto delta_t = static_cast<Time>((x1-x0)/nr_steps);
  auto nr_equations = std::distance(inBegin, inEnd);
  ForwardIterator youtP = yout;
  yout = std::copy(inBegin, inEnd, yout);
  std::vector<Time> ytime(nr_steps);

  for (auto ii=0; ii< nr_steps; ++ii) {
    typedef typename std::vector<value_type> vec;

    // K1 = f(x, u(x))
    vec v1(nr_equations);
    typename vec::iterator k1 = f(yout-nr_equations, x0, begin(v1));

    // K2 = f(x+deltaX/2, u(x)+K1*deltaX/2)
    vec v2(nr_equations);
    vec y2(nr_equations);
    std::transform(yout-nr_equations, yout, k1, begin(y2), 
      [delta_t](value_type& y,value_type& k) {
          return y+0.5*delta_t*k;
      });
    typename vec::iterator k2 = 
        f(begin(y2), 0.5*delta_t+x0, begin(v2));

    // K3 = f(x+deltaX/2, u(x)+K2*deltaX/2)
    vec v3(nr_equations);
    vec y3(nr_equations);
    std::transform(yout-nr_equations, yout, k2, begin(y3), 
      [delta_t](value_type& y,value_type& k){
          return y+0.5*delta_t*k;
      });
    typename vec::iterator k3 = 
        f(begin(y3), 0.5*delta_t+x0, begin(v3));

    // K4 = f(x+deltaX, u(x)+K2*deltaX)
    vec v4(nr_equations);
    vec y4(nr_equations);
    std::transform(yout-nr_equations, yout, k3, begin(y4), 
      [delta_t](value_type& y, value_type& k){ return y+delta_t*k; });
    typename vec::const_iterator k4 
        = f(begin(y4), delta_t+x0, begin(v4));

    // u(x+deltaX) = u(x) + (1/6) (K1 + 2 K2 + 2 K3 + K4) * deltaX
    for (auto i=0; i<nr_equations; ++i,++yout) {
      *yout = *(yout-nr_equations) + 
        (1.0/6.0) * (k1[i] + 2.0*k2[i] + 2.0*k3[i] + k4[i]) * delta_t;
    }
    ytime[ii] = x0;
    x0 += delta_t;
  }

  return std::make_pair(ytime, yout);
}

Listing 1: Generic Implementation of Runga-kutta4

template <class T, class Vector=boost::numeric::ublas::vector<double> >
Vector rk4(const Vector& init, double x0, double x1, int nr_steps, T f)
{
 Vector y = init;
 auto h = static_cast<double>((x1-x0)/nr_steps);
 double xn = x0;

 for (auto ii=0; ii< nr_steps; ++ii) {
   Vector k1 = f(y, xn);
   Vector k2 = f(y+0.5*h*k1, 0.5*h+xn);
   Vector k3 = f(y+0.5*h*k2, 0.5*h+xn);
   Vector k4 = f(y+h*k3, h+xn);

   xn += h;
   y = u + (1.0/6.0) * (k1 + 2.0*k2 + 2.0*k3 + k4) * h;
 }
 return y;
}

Listing 2: Non-Generic Implementation of Runga-kutta4 (using ublas)

for n = 0,1,2,3,..., using
 k1 = h f(xn, yn)
 k2 = h f(xn + h/2, yn + k1/2)
 k3 = h f(xn + h/2, yn + k2/2)
 k4 = h f(xn + h, yn + k3)

 xn+1 = xn + h
 yn+1 = yn + h(1/6)(k1 + 2k2 + 2k3 + k4)

Listing 3: Runga-kutta4 equations.