I have a python function which takes a three dimensional array representing an RGB image as input, and outputs another three dimensional array that represents the scaled down version of the original image. I use the concept of local averaging to calculate the pixel values of the scaled down matrix. Even though my implementation is linear in size of the input array, it still takes a whole lot of time for the function to finish, for slightly large images (eg. 1536 X 2048). Here's my code
import numpy as np
import math
def scale_down_image(image, height, width):
'''
Scales down a given image bitmap to new dimensions, provided that
the new dimesions dont exceed those of the image to be
scaled
'''
# determine original image dimensions
image_height = image.shape[0]
image_width = image.shape[1]
assert image_height >= height and image_width >= width
# create row_arr and col_arr arrays which help in querying window sum
image = np.array(image, dtype=np.uint64)
row_arr = np.zeros(image.shape, dtype=np.uint64)
col_arr = np.zeros(image.shape, dtype=np.uint64)
for i in range(image.shape[2]):
for j in range(image.shape[0]):
for k in range(image.shape[1]):
if j == 0 and k == 0:
row_arr[j][k][i] = image[j][k][i]
col_arr[j][k][i] = image[j][k][i]
elif j == 0:
row_arr[j][k][i] = row_arr[j][k - 1][i] + image[j][k][i]
col_arr[j][k][i] = image[j][k][i]
elif k == 0:
row_arr[j][k][i] = image[j][k][i]
col_arr[j][k][i] = col_arr[j - 1][k][i] + image[j][k][i]
else:
row_arr[j][k][i] = row_arr[j][k - 1][i] + image[j][k][i]
col_arr[j][k][i] = col_arr[j - 1][k][i] + image[j][k][i]
# create range_query_arr array, which helps in querying window sum
range_query_arr = np.zeros(image.shape, dtype=np.uint64)
for i in range(image.shape[2]):
for j in range(image.shape[0]):
for k in range(image.shape[1]):
if j == 0 and k == 0:
range_query_arr[j][k][i] = image[j][k][i]
elif j == 0:
range_query_arr[j][k][i] = row_arr[j][k][i]
elif k == 0:
range_query_arr[j][k][i] = col_arr[j][k][i]
else:
range_query_arr[j][k][i] = (range_query_arr[j - 1][k - 1][i] +
row_arr[j][k - 1][i] + col_arr[j - 1][k][i] + image[j][k][i])
# define a recursive function query, which computes the sum of elements in any window
def query(y1, x1, y2, x2, channel):
assert 0 <= channel < image.shape[2]
if not (0 <= y1 < image.shape[0] and 0 <= x1 < image.shape[1] and 0 <= y2 < image.shape[0]
and 0 <= x2 < image.shape[1] and y1 <= y2 and x1 <= x2):
return 0
else:
return (range_query_arr[y2][x2][channel] - query(0, 0, y2, x1 - 1, channel) -
query(0, 0, y1 - 1, x2, channel) + query(0, 0, y1 - 1, x1 - 1, channel))
# determine window dimensions
window_height = int(math.ceil(image_height / height))
window_width = int(math.ceil(image_width / width))
# compute scaled image
scaled_image = np.zeros((height, width, image.shape[2]), dtype=np.uint64)
for i in range(scaled_image.shape[2]):
for j in range(scaled_image.shape[0]):
for k in range(scaled_image.shape[1]):
y1 = max(0, int(((j + 1) / height) * image_height - 1 - window_height / 2))
x1 = max(0, int(((k + 1) / width) * image_width - 1 - window_width / 2))
y2 = min(image_height - 1, y1 + window_height - 1)
x2 = min(image_width - 1, x1 + window_width - 1)
scaled_image[j][k][i] = query(y1, x1, y2, x2, i) // ((y2 - y1 + 1) *
(x2 - x1 + 1))
# return scaled image
return np.array(scaled_image, dtype=np.uint8)
Could you suggest something that would make this run faster? Should I use another algorithm? Will moving my code to C++ help?