-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathdigitcapture.py
104 lines (85 loc) · 3.45 KB
/
digitcapture.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
#!/usr/bin/env python
'''
Scans free-standing digits.
'''
import numpy as np
import cv2, sys, os
from opencv_functions import mosaic, prepKNN
SCRIPT_DIRECTORY = os.path.dirname(os.path.realpath(__file__)) # needed because ev3's brickman messes with relative paths - see https://github.com/ev3dev/ev3dev/issues/263
GAUSSIAN_BLUR_RADIUS = 11 # must be odd
CELL_SIZE = 20
CELL_SPACING = 2
DIGIT_MIN_SIZE = 20
KNN_K = 6
def read(inputImage, dataset='handwritten_digits'):
'''
Processes inputImage to find digits.
Returns (retval, digits, digitImages).
retval will be True if digit(s) are found, False otherwise.
digits will be returned as a list of integers.
digitsImage will contain a list of images of each found digit, each CELL_SIZE x CELL_SIZE pixels large,
in numpy float32 array format.
'''
assert (dataset=='sudoku_digits') or (dataset=='handwritten_digits') # safety check - dataset parameter will be used in file paths
samplesFile = SCRIPT_DIRECTORY + '/data/' + dataset + '/samples.npy'
labelsFile = SCRIPT_DIRECTORY + '/data/' + dataset + '/labels.npy'
# pre-process image
processedImage = cv2.cvtColor(inputImage, cv2.COLOR_BGR2GRAY)
processedImage = cv2.GaussianBlur(processedImage, (GAUSSIAN_BLUR_RADIUS,GAUSSIAN_BLUR_RADIUS), 0)
processedImage = cv2.adaptiveThreshold(processedImage, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C, cv2.THRESH_BINARY_INV, 11, 1)
# find contours in image
contours, hierarchy = cv2.findContours(processedImage.copy(), cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE)
# separate digits
cells = []
for contour in contours:
x, y, width, height = cv2.boundingRect(contour)
if (width > DIGIT_MIN_SIZE) and (height > DIGIT_MIN_SIZE):
if width > height:
y -= (width-height)//2
height = width
else:
x -= (height-width)//2
width = height
x -= CELL_SPACING
y -= CELL_SPACING
height += CELL_SPACING*2
width += CELL_SPACING*2
perspectiveMatrix = cv2.getPerspectiveTransform(np.float32([[x,y], [x,y+height], [x+width,y+height], [x+width,y]]), np.float32([[0,0], [0,CELL_SIZE], [CELL_SIZE,CELL_SIZE], [CELL_SIZE,0]]))
cell = cv2.warpPerspective(processedImage, perspectiveMatrix, (CELL_SIZE,CELL_SIZE))
cells.append(cell)
if len(cells) == 0:
return (False, [], [])
# prepare and train KNN
samples = np.load(samplesFile)
samples = prepKNN(samples, CELL_SIZE)
labels = np.load(labelsFile).astype(int)
knn = cv2.KNearest()
knn.train(samples, labels)
# process each cell - if the cell has a digit (largest contour area exceeds threshold area), then apply knn to cell
digits = [0 for i in range(len(cells))]
for i in range(len(cells)):
retval, results, neighborResponses, dists = knn.find_nearest(prepKNN([cells[i]], CELL_SIZE), KNN_K)
digits[i] = int(results.ravel()[0])
return (True, digits, cells)
if __name__ == '__main__':
WEBCAM_NUMBER = 0
# capture an image from webcam
print "Capturing digits image from webcam #" + str(WEBCAM_NUMBER) + "..."
cam = cv2.VideoCapture(WEBCAM_NUMBER)
retval, inputImage = cam.read()
if not retval:
print 'Failed to read from webcam #' + str(WEBCAM_NUMBER)
sys.exit(1)
# read digits from image
retval, digits, digitImages = read(inputImage)
if retval:
print 'Found ' + str(len(digitImages)) + ' digit(s):'
print digits
cv2.imshow('Input Image', inputImage)
cv2.imshow('Digit Images', mosaic(25, digitImages))
else:
print 'No digit found in input image.'
cv2.imshow('Input Image', inputImage)
print 'Press any key to exit...'
cv2.waitKey(0)
cv2.destroyAllWindows()