Optical Character Recognition

 2017/12/23

Image Segmentation

import cv2
import matplotlib.pyplot as plt
import matplotlib.image as mpimg
import numpy as np

def showImage(image):
    plt.figure(figsize=(8,8))
    plt.imshow(image)
    plt.show()

1 2	img = cv2.imread('IMG_0377_o.JPG', cv2.IMREAD_COLOR) showImage(img)

png

Use the Canny Algorithm in OpenCV to extract the edges

Canny algorithm is applied to extracted edges in this image, then the edges can be use to local contours

edges = cv2.Canny(img,200,240)
plt.figure(figsize=(16,8))
plt.subplot(121),plt.imshow(img,cmap = 'gray')
plt.title('Original Image'), plt.xticks([]), plt.yticks([])
plt.subplot(122),plt.imshow(edges,cmap = 'gray')
plt.title('Edge Image'), plt.xticks([]), plt.yticks([])
plt.show()

png

Find countours based on the edges extracted from image

Edges found by canny is used to find contours of this images, from the images showed belowed we can see that a lot of contours are extracted from the image.

im2, contours, hierarchy = cv2.findContours(edges, cv2.RETR_EXTERNAL , cv2.CHAIN_APPROX_SIMPLE)
imcopy = img.copy()
cv2.drawContours(imcopy, contours, -1, (0,255,0), 10)
showImage(imcopy)

png

Find the Max Contor with largest contour area

For this problem, the white paper sheet in this image has the largest contour, we can extracted the contour of this white paper sheet by finding the largest contour.

c = max(contours, key = cv2.contourArea)
imcopy = img.copy()
cv2.drawContours(imcopy, c, -1, (0,255,0), 10)
showImage(imcopy)

png

Approximate the Contor

After finding the contour of this white paper sheet, we can use Geometric shape such as rectangle or polygon to approximate this countour. From the result showed belowed, we can see that polygon did well in the shape approxiamtion.

rectangle approximation

imcopy = img.copy()
x,y,w,h = cv2.boundingRect(c)
cv2.rectangle(imcopy,(x,y),(x+w,y+h),(0,255,0),10)
showImage(imcopy)

png

Polygon approximation

By applying polygon approximation, the 4 coner points was extracted, which can be used in the later experiment of perspective transformation.

epsilon = 0.01*cv2.arcLength(c,True)
approx = cv2.approxPolyDP(c,epsilon,True)
imcopy = img.copy()
cv2.drawContours(imcopy, [approx], -1, (0,255,0), 12)
cv2.putText(imcopy, 'Point 1', (approx[0][0][0], approx[0][0][1]),cv2.FONT_HERSHEY_PLAIN, 6,(255,0,0),10, cv2.LINE_AA)
cv2.putText(imcopy, 'Point 2', (approx[1][0][0], approx[1][0][1]),cv2.FONT_HERSHEY_PLAIN, 6,(255,0,0),10, cv2.LINE_AA)
cv2.putText(imcopy, 'Point 3', (approx[2][0][0], approx[2][0][1]),cv2.FONT_HERSHEY_PLAIN, 6,(255,0,0),10, cv2.LINE_AA)
cv2.putText(imcopy, 'Point 4', (approx[3][0][0], approx[3][0][1]),cv2.FONT_HERSHEY_PLAIN, 6,(255,0,0),10, cv2.LINE_AA)
showImage(imcopy)

png

Applied Perspective Transformation

Projective transformation(Perspective transformation) is the combination of affine transformation and projective wrap.
Suppose(x, y, 1) is a point in homogeneous coordinate. The projective transformation of this point is as followed. $\begin{bmatrix} x'\\ y'\\ w'\\ \end{bmatrix} = \begin{bmatrix} a & b & c \\ d & e & f \\ g & h & 1 \\ \end{bmatrix} * \begin{bmatrix} x\\ y\\ 1\\ \end{bmatrix}$

This 8 parameters matrix maps point$(x,y,1)$ in one projective to point $(x’/w’,y’/w’,1)$ in another projective.

$x' = \frac{ax+by+c}{gx+hy}$ $y' = \frac{dx+ey+f}{gx+hy}$

We can get 2 equations from one point mapping, to solve this 8 parameter tranformation equation, we need more than 4 points mapping. When this tranformation equation be solved, can can applied it to get a new image.

imcopy = img.copy()
pts1 = np.float32(approx)
## the size is propotional to a US letter's size 425:550 = 8.5:11
pts2 = np.float32([[0,550],[425,550],[425,0],[0,0]])
M = cv2.getPerspectiveTransform(pts1,pts2)
dst = cv2.warpPerspective(imcopy,M,(425,550))
plt.figure(figsize=(8,16))
plt.subplot(121),plt.imshow(imcopy),plt.title('Input')
plt.subplot(122),plt.imshow(dst),plt.title('Output')
plt.show()

png

Build a CNN model with tensorflow

1
2
3

import tensorflow as tf
import matplotlib.pyplot as plt
import numpy as np

1 2	from tensorflow.examples.tutorials.mnist import input_data mnist = input_data.read_data_sets('MNIST_data', one_hot=True)

def weight_variable(shape,name):
    initial = tf.truncated_normal(shape, stddev=0.1)
    return tf.Variable(initial, name=name)
def bias_variable(shape,name):
    initial = tf.constant(0.1, shape=shape)
    return tf.Variable(initial, name=name)

def conv2d(x, W):
    return tf.nn.conv2d(x, W, strides=[1, 1, 1, 1], padding='SAME')
def max_pool_2x2(x):
    return tf.nn.max_pool(x, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding='SAME')

def create_placeholders(n_x=784, n_y=10):
    x = tf.placeholder(tf.float32, shape=[None, n_x])
    y_ = tf.placeholder(tf.float32, shape=[None, n_y])
    keep_prob = tf.placeholder(tf.float32)
    return x, y_, keep_prob

def initialize_parameters():
    ## first CNN layer
    W_conv1 = weight_variable([5, 5, 1, 32], 'W_conv1')
    b_conv1 = bias_variable([32], 'b_conv1')
    
    ## second CNN layer
    W_conv2 = weight_variable([5, 5, 32, 64], 'W_conv2')
    b_conv2 = bias_variable([64], 'b_conv2')
    
    ## fully connected layer
    W_fc1 = weight_variable([7 * 7 * 64, 1024], 'W_fc1')
    b_fc1 = bias_variable([1024], 'b_fc1')
    
    ## read out layer
    W_fc2 = weight_variable([1024, 10], 'W_fc2')
    b_fc2 = bias_variable([10], 'b_fc2')
    
    parameters = {'W_conv1': W_conv1,
                  'b_conv1': b_conv1,
                  'W_conv2': W_conv2,
                  'b_conv2': b_conv2,
                  'W_fc1': W_fc1,
                  'b_fc1': b_fc1,
                  'W_fc2': W_fc2,
                  'b_fc2': b_fc2
                 }
    return parameters

def forward_prop(x, keep_prob, parameters):
    W_conv1 = parameters['W_conv1']
    b_conv1 = parameters['b_conv1']
    W_conv2 = parameters['W_conv2']
    b_conv2 = parameters['b_conv2']
    W_fc1 = parameters['W_fc1']
    b_fc1 = parameters['b_fc1']
    W_fc2 = parameters['W_fc2']
    b_fc2 = parameters['b_fc2']
    
    x_image = tf.reshape(x, [-1, 28, 28, 1])
    
    h_conv1 = tf.nn.relu(conv2d(x_image, W_conv1) + b_conv1)
    h_pool1 = max_pool_2x2(h_conv1)
    
    h_conv2 = tf.nn.relu(conv2d(h_pool1, W_conv2) + b_conv2)
    h_pool2 = max_pool_2x2(h_conv2)
    
    h_pool2_flat = tf.reshape(h_pool2, [-1, 7*7*64])
    h_fc1 = tf.nn.relu(tf.matmul(h_pool2_flat, W_fc1) + b_fc1)
    h_fc1_drop = tf.nn.dropout(h_fc1, keep_prob)
    y_conv = tf.matmul(h_fc1_drop, W_fc2) + b_fc2
    
    return y_conv

tf.reset_default_graph()
with tf.Session() as sess:
    x, y_, keep_prob = create_placeholders();
    parameters = initialize_parameters()
    y_conv = forward_prop(x, 1.0, parameters)
    print("y_conv = " + str(y_conv))

num_epochs = 16
learning_rate = 1e-4
costs = []
x, y_, keep_prob = create_placeholders()
parameters = initialize_parameters()
y_conv = forward_prop(x, keep_prob, parameters)

cross_entropy = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(labels=y_, logits=y_conv))
## Optimizer
train_step = tf.train.AdamOptimizer(learning_rate).minimize(cross_entropy)
correct_prediction = tf.equal(tf.argmax(y_conv, 1), tf.argmax(y_, 1))
accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
init = tf.global_variables_initializer()

with tf.Session() as sess:
    sess.run(init)
    
    for epoch in range(num_epochs):
        epoch_cost = 0.0
        num_minibatches = int(55000/50)
        
        for i in range(1100):
            batch = mnist.train.next_batch(50)

            if i % 100 == 0:
                train_accuracy = accuracy.eval(feed_dict={x: batch[0], y_: batch[1],keep_prob:1.0})
                print('epoch %d, step %d, training accuracy %g' % (epoch, i, train_accuracy))

            _, minibatch_cost = sess.run([train_step, cross_entropy],feed_dict={x: batch[0], y_: batch[1], keep_prob: 0.5})
            epoch_cost += minibatch_cost / num_minibatches
            
        if epoch % 2 == 0:
            print ("Cost after epoch %i: %f" % (epoch, epoch_cost))
        costs.append(epoch_cost)
    
    plt.plot(np.squeeze(costs))
    plt.ylabel('cost')
    plt.xlabel('iterations per epoch')
    plt.title("Learning rate =" + str(learning_rate))
    plt.show()
    
    parameters = sess.run(parameters)
    print ("Parameters have been trained!")
    
    print('test accuracy %g' % accuracy.eval(feed_dict={x: mnist.test.images, y_: mnist.test.labels, keep_prob: 1.0}))

png

Parameters have been trained!
test accuracy 0.9927

Save the parameters to local data

Because the CNN model takes a long long time to train, thus it will save time if we can save the trained parameters to a local file

1 2	import pickle pickle.dump(parameters, open("params.pkl", "wb"))

Load data from saved file

1	testparams = pickle.load(open("params.pkl","rb"))

def predict(x, parameters):
    W_conv1 = tf.convert_to_tensor(parameters['W_conv1'])
    b_conv1 = tf.convert_to_tensor(parameters['b_conv1'])
    W_conv2 = tf.convert_to_tensor(parameters['W_conv2'])
    b_conv2 = tf.convert_to_tensor(parameters['b_conv2'])
    W_fc1 = tf.convert_to_tensor(parameters['W_fc1'])
    b_fc1 = tf.convert_to_tensor(parameters['b_fc1'])
    W_fc2 = tf.convert_to_tensor(parameters['W_fc2'])
    b_fc2 = tf.convert_to_tensor(parameters['b_fc2'])
    
    parameters = {'W_conv1': W_conv1,
                  'b_conv1': b_conv1,
                  'W_conv2': W_conv2,
                  'b_conv2': b_conv2,
                  'W_fc1': W_fc1,
                  'b_fc1': b_fc1,
                  'W_fc2': W_fc2,
                  'b_fc2': b_fc2
                 }
    
    x_input = tf.placeholder(tf.float32, shape=[None, 784])
    
    y_out = forward_propagation_for_predict(x_input, parameters)
    p = tf.argmax(y_out, 1)
    
    sess = tf.Session()
    prediction = sess.run(p, feed_dict = {x_input: x})
    return prediction

def forward_propagation_for_predict(x, parameters):
    W_conv1 = parameters['W_conv1']
    b_conv1 = parameters['b_conv1']
    W_conv2 = parameters['W_conv2']
    b_conv2 = parameters['b_conv2']
    W_fc1 = parameters['W_fc1']
    b_fc1 = parameters['b_fc1']
    W_fc2 = parameters['W_fc2']
    b_fc2 = parameters['b_fc2']
    
    x_image = tf.reshape(x, [-1, 28, 28, 1])
    
    h_conv1 = tf.nn.relu(conv2d(x_image, W_conv1) + b_conv1)
    h_pool1 = max_pool_2x2(h_conv1)
    
    h_conv2 = tf.nn.relu(conv2d(h_pool1, W_conv2) + b_conv2)
    h_pool2 = max_pool_2x2(h_conv2)
    
    h_pool2_flat = tf.reshape(h_pool2, [-1, 7*7*64])
    h_fc1 = tf.nn.relu(tf.matmul(h_pool2_flat, W_fc1) + b_fc1)
    h_fc1_drop = tf.nn.dropout(h_fc1, 1.0)
    y_conv = tf.matmul(h_fc1_drop, W_fc2) + b_fc2
    
    return y_conv

Experiment of Self-Written Digits Image

1	import cv2

1
2
3

img = cv2.imread('test_digit4.png', cv2.IMREAD_COLOR)
img = cv2.cvtColor( img, cv2.COLOR_RGB2GRAY )
im_gray = cv2.GaussianBlur(img, (5, 5), 0)

1
2
3

plt.figure(figsize=(10,5))
plt.imshow(im_gray, cmap = 'gray', interpolation = 'bicubic')
plt.show()

png

1	blur_img = cv2.GaussianBlur(img, (5,5), 0)

1
2
3

ret,thresh = cv2.threshold(img,127,255,cv2.THRESH_BINARY_INV)
im2, contours, hierarchy = cv2.findContours(thresh, cv2.RETR_EXTERNAL , cv2.CHAIN_APPROX_SIMPLE)
rects = [cv2.boundingRect(contour) for contour in contours]

1 2	plt.imshow(thresh, cmap = 'gray', interpolation = 'bicubic') plt.show()

png

def generateImage(small):
    row, col = small.shape
    
    small = cv2.resize(small, (20*col//row, 20), interpolation=cv2.INTER_AREA)
    ##small = cv2.dilate(small, (3, 3))
    small = cv2.dilate(small, (3, 3))
    blank_image = np.zeros((28,28), np.uint8)
    row, col = small.shape
    shift_x = (28 - row) // 2
    shift_y = (28 - col) // 2
    for i in range(0,row):
        for j in range(0,col):
            blank_image[i+shift_x][j+shift_y] = small[i][j]
    return blank_image

images = []
for i, rect in enumerate(rects):
    # Draw the rectangles
    cv2.rectangle(img, (rect[0], rect[1]), (rect[0] + rect[2], rect[1] + rect[3]), (0, 255, 0), 3) 
    # Make the rectangular region around the digit
    width = int(rect[3] * 1.1)
    height = int(rect[2] * 1.1)
    pt1 = int(rect[1] + rect[3] // 2 - width // 2)
    pt2 = int(rect[0] + rect[2] // 2 - height // 2)
    roi = thresh[pt1:pt1+width, pt2:pt2+height]
    # Resize the image
    roi = generateImage(roi)
    # dilate the image
    images.append(roi)

fig=plt.figure(figsize=(12,4))
for i, image in enumerate(images):
    #image = cv2.bitwise_not(image)
    sub_fig = fig.add_subplot(2,5,i+1)
    sub_fig.imshow(image, cmap = 'gray', interpolation = 'bicubic')
    images[i] = image
plt.show()

png

Test my Own hand written

fig_out=plt.figure(figsize=(12,6))
for i, image in enumerate(images):
    test_im = np.array([image.reshape(28*28)], 'float32')
    my_image_prediction=predict(test_im, testparams)
    label = np.squeeze(my_image_prediction)
    sub_fig = fig_out.add_subplot(2,5,i+1)
    sub_fig.annotate(label, xy=(2, 1),size= 25,color='#ee8d18', xytext=(4, 4))
    sub_fig.imshow(image.reshape(28,28), cmap = 'gray', interpolation = 'bicubic')
plt.show()

png

CATALOG

1. Image Segmentation
2. Build a CNN model with tensorflow



缺失模块。
1、请确保node版本大于6.2
2、在博客根目录（注意不是archer根目录）执行以下命令：
npm i hexo-generator-json-content --save
3、在根目录_config.yml里添加配置：

jsonContent:
  meta: false
  pages: false
  posts:
    title: true
    date: true
    path: true
    text: false
    raw: false
    content: false
    slug: false
    updated: false
    comments: false
    link: false
    permalink: false
    excerpt: false
    categories: false
    tags: true