How do I detect vertical text with OpenCV for extraction

Question

I am new to OpenCV and trying to see if I can find a way to detect vertical text for the image attached. In this case on row 3 , I would like to get the bounding box around Original Cost and the amount below ($200,000.00).
Similarly I would like to get the bounding box around Amount Existing Liens and the associated amount below. I then would use this data to send to an OCR engine to read text. Traditional OCR engines go line by line and extract and loses the context. Here is what I have tried so far -

import cv2
import numpy as np

img = cv2.imread('Test3.png')
gray = cv2.cvtColor(img,cv2.COLOR_BGR2GRAY)

edges = cv2.Canny(gray,100,100,apertureSize = 3)
cv2.imshow('edges',edges)
cv2.waitKey(0)

minLineLength = 20
maxLineGap = 10
lines = cv2.HoughLinesP(edges,1,np.pi/180,15,minLineLength=minLineLength,maxLineGap=maxLineGap)

for x in range(0, len(lines)):
    for x1,y1,x2,y2 in lines[x]:
        cv2.line(img,(x1,y1),(x2,y2),(0,255,0),2)

cv2.imshow('hough',img)
cv2.waitKey(0)

Can you consider that it is in same position every time, so you crop a box in this position and execute OCR? — dpetrini
This is potentially just one kid of image type and typical OCR's read left to right an in the above image would read " Year " - Bounding Box , "Original Cost - Bounding Box, " Amount Existing Liens - Bounding Box and so forth.Hence I was trying to find an OpenCV solution that can detection rectangles and text within so it does not lose the context. — ESLearner

S_Bersier S_Bersier · Accepted Answer · 2020-08-18T14:21:57

Here is my solution based on Kanan Vyas and Adrian Rosenbrock

It's probably not as "canonical" as you'd wish. But it seems to work (more or less...) with the image you provided.

Just a word of CAUTION: The code looks within the directory from which it is running, for a folder named "Cropped" where cropped images will be stored. So, don't run it in a directory which already contains a folder named "Cropped" because it deletes everything in this folder at each run. Understood? If you're unsure run it in a separate folder.

The code:

# Import required packages 
import cv2 
import numpy as np
import pathlib

###################################################################################################################################
# https://www.pyimagesearch.com/2015/04/20/sorting-contours-using-python-and-opencv/
###################################################################################################################################
def sort_contours(cnts, method="left-to-right"):
    # initialize the reverse flag and sort index
    reverse = False
    i = 0
    # handle if we need to sort in reverse
    if method == "right-to-left" or method == "bottom-to-top":
        reverse = True
    # handle if we are sorting against the y-coordinate rather than
    # the x-coordinate of the bounding box
    if method == "top-to-bottom" or method == "bottom-to-top":
        i = 1
    # construct the list of bounding boxes and sort them from top to
    # bottom
    boundingBoxes = [cv2.boundingRect(c) for c in cnts]
    (cnts, boundingBoxes) = zip(*sorted(zip(cnts, boundingBoxes),
        key=lambda b:b[1][i], reverse=reverse))
    # return the list of sorted contours and bounding boxes
    return (cnts, boundingBoxes)




###################################################################################################################################
# https://medium.com/coinmonks/a-box-detection-algorithm-for-any-image-containing-boxes-756c15d7ed26    (with a few modifications)
###################################################################################################################################
def box_extraction(img_for_box_extraction_path, cropped_dir_path):
    img = cv2.imread(img_for_box_extraction_path, 0)  # Read the image
    (thresh, img_bin) = cv2.threshold(img, 128, 255,
                                      cv2.THRESH_BINARY | cv2.THRESH_OTSU)  # Thresholding the image
    img_bin = 255-img_bin  # Invert the imagecv2.imwrite("Image_bin.jpg",img_bin)
   
    # Defining a kernel length
    kernel_length = np.array(img).shape[1]//200
     
    # A verticle kernel of (1 X kernel_length), which will detect all the verticle lines from the image.
    verticle_kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (1, kernel_length))
    # A horizontal kernel of (kernel_length X 1), which will help to detect all the horizontal line from the image.
    hori_kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (kernel_length, 1))
    # A kernel of (3 X 3) ones.
    kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (3, 3))# Morphological operation to detect verticle lines from an image
    img_temp1 = cv2.erode(img_bin, verticle_kernel, iterations=3)
    verticle_lines_img = cv2.dilate(img_temp1, verticle_kernel, iterations=3)
    #cv2.imwrite("verticle_lines.jpg",verticle_lines_img)# Morphological operation to detect horizontal lines from an image
    img_temp2 = cv2.erode(img_bin, hori_kernel, iterations=3)
    horizontal_lines_img = cv2.dilate(img_temp2, hori_kernel, iterations=3)
    #cv2.imwrite("horizontal_lines.jpg",horizontal_lines_img)# Weighting parameters, this will decide the quantity of an image to be added to make a new image.
    alpha = 0.5
    beta = 1.0 - alpha
    # This function helps to add two image with specific weight parameter to get a third image as summation of two image.
    img_final_bin = cv2.addWeighted(verticle_lines_img, alpha, horizontal_lines_img, beta, 0.0)
    img_final_bin = cv2.erode(~img_final_bin, kernel, iterations=2)
    (thresh, img_final_bin) = cv2.threshold(img_final_bin, 128, 255, cv2.THRESH_BINARY | cv2.THRESH_OTSU)# For Debugging
    # Enable this line to see verticle and horizontal lines in the image which is used to find boxes
    #cv2.imwrite("img_final_bin.jpg",img_final_bin)
    # Find contours for image, which will detect all the boxes
    contours, hierarchy = cv2.findContours(
        img_final_bin, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE)
    # Sort all the contours by top to bottom.
    (contours, boundingBoxes) = sort_contours(contours, method="top-to-bottom")
    idx = 0
    for c in contours:
        # Returns the location and width,height for every contour
        x, y, w, h = cv2.boundingRect(c)# If the box height is greater then 20, widht is >80, then only save it as a box in "cropped/" folder.
        if (w > 50 and h > 20):# and w > 3*h:
            idx += 1
            new_img = img[y:y+h, x:x+w]
            cv2.imwrite(cropped_dir_path+str(x)+'_'+str(y) + '.png', new_img)


###########################################################################################################################################################
def prepare_cropped_folder():
   p=pathlib.Path('./Cropped')
   if p.exists():   # Cropped folder non empty. Let's clean up
      files = [x for x in p.glob('*.*') if x.is_file()]
      for f in files:
         f.unlink()
   else:
      p.mkdir()

###########################################################################################################################################################
# MAIN
###########################################################################################################################################################
prepare_cropped_folder()

# Read image from which text needs to be extracted 
img = cv2.imread("dkesg.png") 

gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY) 
  
# Performing OTSU threshold 
ret, thresh1 = cv2.threshold(gray, 0, 255, cv2.THRESH_OTSU | cv2.THRESH_BINARY_INV) 

thresh1=255-thresh1
bin_y=np.zeros(thresh1.shape[0])

for x in range(0,len(bin_y)):
    bin_y[x]=sum(thresh1[x,:])

bin_y=bin_y/max(bin_y)

ry=np.where(bin_y>0.995)[0]

for i in range(0,len(ry)):
   cv2.line(img, (0, ry[i]), (thresh1.shape[1], ry[i]), (0, 0, 0), 1)

# We need to draw abox around the picture with a white border in order for box_detection to work
cv2.line(img,(0,0),(0,img.shape[0]-1),(255,255,255),2)
cv2.line(img,(img.shape[1]-1,0),(img.shape[1]-1,img.shape[0]-1),(255,255,255),2)
cv2.line(img,(0,0),(img.shape[1]-1,0),(255,255,255),2)
cv2.line(img,(0,img.shape[0]-1),(img.shape[1]-1,img.shape[0]-1),(255,255,255),2)

cv2.line(img,(0,0),(0,img.shape[0]-1),(0,0,0),1)
cv2.line(img,(img.shape[1]-3,0),(img.shape[1]-3,img.shape[0]-1),(0,0,0),1)
cv2.line(img,(0,0),(img.shape[1]-1,0),(0,0,0),1)
cv2.line(img,(0,img.shape[0]-2),(img.shape[1]-1,img.shape[0]-2),(0,0,0),1)


cv2.imwrite('out.png',img)
box_extraction("out.png", "./Cropped/")

Now... It puts the cropped regions in the Cropped folder. They are named as x_y.png with (x,y) the position on the original image.

Here are two examples of the outputs

and

Now, in a terminal. I used pytesseract on these two images.

The results are the following:

1)

Original Cost

$200,000.00

2)

Amount Existing Liens

$494,215.00

As you can see, pytesseract got the amount wrong in the second case... So, be careful.

Best regards, Stéphane

How do I detect vertical text with OpenCV for extraction

3 Answers