Computer vision practice (14) answer card identification (with complete code)

My WeChat public name: AI research subscription number
WeChat public address ID:MultiAgent1024
Introduction to the public address: we mainly study related contents such as reinforcement learning, computer vision, deep learning, machine learning and so on, and share learning notes and experiences in the learning process. Looking forward to your attention, welcome to learn, exchange and progress together!

Project introduction:

You need to identify which of the following answer sheets is selected:

# Preprocessing
image = cv2.imread(args["image"])
contours_img = image.copy()
gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
blurred = cv2.GaussianBlur(gray, (5, 5), 0)
cv_show('blurred',blurred)
edged = cv2.Canny(blurred, 75, 200)
cv_show('edged',edged)

                  

Edge detection after  :

After                      .

cnts = cv2.findContours(edged.copy(), cv2.RETR_EXTERNAL,
   cv2.CHAIN_APPROX_SIMPLE)[1]
cv2.drawContours(contours_img,cnts,-1,(0,0,255),3) 
cv_show('contours_img',contours_img)
docCnt = None

Sort by contour area after   

# Make sure it's detected
if len(cnts) > 0:
   # Sort by outline size
   cnts = sorted(cnts, key=cv2.contourArea, reverse=True)

   # Traverse every profile
   for c in cnts:
      # Approximate
      peri = cv2.arcLength(c, True)
      approx = cv2.approxPolyDP(c, 0.02 * peri, True)

      # Prepare for perspective transformation if the polygon has four vertices
      if len(approx) == 4:
         docCnt = approx
         break

After  , we perform the perspective transformation:

def four_point_transform(image, pts):
   # Get input coordinate point
   rect = order_points(pts)
   (tl, tr, br, bl) = rect

   # Calculate the input w and h values
   widthA = np.sqrt(((br[0] - bl[0]) ** 2) + ((br[1] - bl[1]) ** 2))
   widthB = np.sqrt(((tr[0] - tl[0]) ** 2) + ((tr[1] - tl[1]) ** 2))
   maxWidth = max(int(widthA), int(widthB))

   heightA = np.sqrt(((tr[0] - br[0]) ** 2) + ((tr[1] - br[1]) ** 2))
   heightB = np.sqrt(((tl[0] - bl[0]) ** 2) + ((tl[1] - bl[1]) ** 2))
   maxHeight = max(int(heightA), int(heightB))

   # Corresponding coordinate position after transformation
   dst = np.array([
      [0, 0],
      [maxWidth - 1, 0],
      [maxWidth - 1, maxHeight - 1],
      [0, maxHeight - 1]], dtype = "float32")

   # Calculate transformation matrix
   M = cv2.getPerspectiveTransform(rect, dst)
   warped = cv2.warpPerspective(image, M, (maxWidth, maxHeight))

   # Return the result after transformation
   return warped

   adaptive thresholds are used to process it:

# Otsu's threshold processing
thresh = cv2.threshold(warped, 0, 255,
   cv2.THRESH_BINARY_INV | cv2.THRESH_OTSU)[1] 
cv_show('thresh',thresh)

After                  

# Find each circle outline
cnts = cv2.findContours(thresh.copy(), cv2.RETR_EXTERNAL,
   cv2.CHAIN_APPROX_SIMPLE)[1]
cv2.drawContours(thresh_Contours,cnts,-1,(0,0,255),3) 
cv_show('thresh_Contours',thresh_Contours)

                     

questionCnts = []

# ergodic
for c in cnts:
   # Calculate scale and size
   (x, y, w, h) = cv2.boundingRect(c)
   ar = w / float(h)

   # Specify the standard according to the actual situation
   if w >= 20 and h >= 20 and ar >= 0.9 and ar <= 1.1:
      questionCnts.append(c)

# Sort from top to bottom
questionCnts = sort_contours(questionCnts,
   method="top-to-bottom")[0]

    sort them according to the y coordinate

def sort_contours(cnts, method="left-to-right"):
    reverse = False
    i = 0
    if method == "right-to-left" or method == "bottom-to-top":
        reverse = True
    if method == "top-to-bottom" or method == "bottom-to-top":
        i = 1
    boundingBoxes = [cv2.boundingRect(c) for c in cnts]
    (cnts, boundingBoxes) = zip(*sorted(zip(cnts, boundingBoxes),
                                        key=lambda b: b[1][i], reverse=reverse))

Choose from each row, that is, each topic:

# 5 options per row
for (q, i) in enumerate(np.arange(0, len(questionCnts), 5)):
   # sort
   cnts = sort_contours(questionCnts[i:i + 5])[0]
   bubbled = None

   # Traverse every result
   for (j, c) in enumerate(cnts):
      # Use mask to judge the result
      mask = np.zeros(thresh.shape, dtype="uint8")
      cv2.drawContours(mask, [c], -1, 255, -1) #-1 for fill
      cv_show('mask',mask)
      # Choose this answer by calculating the number of nonzero points
      mask = cv2.bitwise_and(thresh, thresh, mask=mask)
      total = cv2.countNonZero(mask)

      # Judging by threshold
      if bubbled is None or total > bubbled[0]:
         bubbled = (total, j)

   # Compare the right answers
   color = (0, 0, 255)
   k = ANSWER_KEY[q]

   # Correct judgement
   if k == bubbled[1]:
      color = (0, 255, 0)
      correct += 1

   # Mapping
   cv2.drawContours(warped, [cnts[k]], -1, color, 3)

   the final result shows that:

Complete code public address background reply card identification

137 original articles published, praised by 111, visited 160000+
Private letter follow

Tags: Lambda

Posted on Wed, 15 Jan 2020 07:20:06 -0500 by hcspider