1
votes

Background details

I have a model that classifies a hand gesture into one of the 26 classes. The accuracy of my model is 99%.

Now I want to take image from a web cam and make real time predctions. for this purpose, I want to create a region on web cam where I can place my hand gesture and consider only that region as input image, this way I can eliminate the background noise that affects my model real time accuracy.

current source code :

import cv2

videoCaptureObject = cv2.VideoCapture(0)
result = True
while(result):
    ret,frame = videoCaptureObject.read()
    #here I want to display a square region on web cam and capture only that 
    #sqaure regin
    cv2.imshow("test", frame)
    k = cv2.waitKey(1)

    if k%256 == 27:
       print("Escape hit, closing...")
       break
    elif k%256 == 32:
        cv2.imwrite("input_image.jpg",frame)
        img = cv2.imread("input_image.jpg")
        gray_image = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY) 
        gray_img = cv2.resize(gray_image, (28, 28)).reshape(1,28,28,1)
        #ax[2,1].imshow(gray_img.reshape(28, 28) , cmap = "gray")
        cv2.imshow("image", gray_img.reshape(28, 28))
         y_pred =  model.predict_classes(gray_img)
         print("predicted alphabet  = ", y_pred) 
         #text_to_audio(myDict.get(y_pred[0]))
    
        result = False
videoCaptureObject.release()
cv2.destroyAllWindows()

Question : Using OpenCv how can I create a region on web cam and capture image of only that region instead of the entire frame.

2

2 Answers

2
votes

you can create a Region of Interest (ROI) inside a video streaming using OpenCV in Python

you can use the below code to get a specific region of the input camera feed.

import cv2

videoCaptureObject = cv2.VideoCapture(0)

def sketch_transform(image):  # in here you can do image filteration
    image_grayscale = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
    image_grayscale_blurred = cv2.GaussianBlur(image_grayscale, (7,7), 0)
    image_canny = cv2.Canny(image_grayscale_blurred, 10, 80)
    _, mask = image_canny_inverted = cv2.threshold(image_canny, 30, 255, cv2.THRESH_BINARY_INV)
    return mask

upper_left = (50, 50)
bottom_right = (300, 300)

result = True
while(result):
    ret,image_frame = videoCaptureObject.read()

    #Rectangle marker
    r = cv2.rectangle(image_frame, upper_left, bottom_right, (100, 50, 200), 5)
    rect_img = image_frame[upper_left[1] : bottom_right[1], upper_left[0] : bottom_right[0]]
    
    sketcher_rect = rect_img
    sketcher_rect = sketch_transform(sketcher_rect)
    
    #Conversion for 3 channels to put back on original image (streaming)
    sketcher_rect_rgb = cv2.cvtColor(sketcher_rect, cv2.COLOR_GRAY2RGB)
    
    #Replacing the sketched image on Region of Interest
    image_frame[upper_left[1] : bottom_right[1], upper_left[0] : bottom_right[0]] = sketcher_rect_rgb
    
    cv2.imshow("test", image_frame)
    k = cv2.waitKey(1)

    if k%256 == 27:
        print("Escape hit, closing...")
        break
    elif k%256 == 32:
        cv2.imwrite("input_image.jpg",image_frame)
        img = cv2.imread("input_image.jpg")

        # you can do those function inside the sketch_transform def

        #gray_image = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY) 
        #gray_img = cv2.resize(gray_image, (28, 28)).reshape(1,28,28,1)
        #ax[2,1].imshow(gray_img.reshape(28, 28) , cmap = "gray")
        #cv2.imshow("image", gray_img.reshape(28, 28))

        y_pred =  model.predict_classes(img)
        print("predicted alphabet  = ", y_pred) 
        #text_to_audio(myDict.get(y_pred[0]))   
videoCaptureObject.release()
cv2.destroyAllWindows()

to get working output comment on this part

elif k%256 == 32:
    cv2.imwrite("input_image.jpg",image_frame)
    img = cv2.imread("input_image.jpg")

    # you can do those function inside the sketch_transform def

    #gray_image = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY) 
    #gray_img = cv2.resize(gray_image, (28, 28)).reshape(1,28,28,1)
    #ax[2,1].imshow(gray_img.reshape(28, 28) , cmap = "gray")
    #cv2.imshow("image", gray_img.reshape(28, 28))

    y_pred =  model.predict_classes(img)
    print("predicted alphabet  = ", y_pred) 
    #text_to_audio(myDict.get(y_pred[0]))  
2
votes

if you have a 1920 x 1080 frame you can cut a ROI using this function

point = [0,0,300,300] # points of region of interest

frame = camera.read() # here you will read the frame of the camera

roi = frame[point[1]:point[3], point[0]:point[2]]
cv2.imshow(roi)
cv2.waitkey(0)