Can someone help me create a multiprocessing version of the makeSubsets function? It basically calculates an index using the degree function and adds the element index (referring to the input set) to the subsets vector at the position calculated by the degree function.
My functional code, without multiprocessing.
import numpy as np
import time
from scipy.spatial import distance
import math
class Testing(object):
def __init__(self, inputSet, divisor):
self.set = inputSet
self.divisor = divisor
def __build__(self):
self.reference = self.__reference__()
self.num_subsets = ((len(self.set)-1) // self.divisor)
self.subsets = [[] for i in range(self.num_subsets)]
self.__makeSubsets__()
def __degree__(self, xk, c1, c2):
d_xk_c1 = distance.euclidean(xk, c1)
d_xk_c2 = distance.euclidean(xk, c2)
return d_xk_c1/(d_xk_c1 + d_xk_c2)
def __makeSubsets__(self):
start_time = time.time()
for i in range(len(self.set)):
index = int(self.__degree__(self.set[i], self.reference[0], self.reference[1]) * self.num_subsets)
self.subsets[index].append(i)
print("MAKE SUBSETS --- %s seconds ---" % (time.time() - total_time))
def __reference__(self):
dimension = len(self.set[0])
size = len(self.set)
#reference[0] = min of set iset; reference[1] = max of set iset
reference = [[], []]
for i in range(dimension):
reference[0].append(math.inf)
reference[1].append(-math.inf)
for i in range(size):
for j in range(dimension):
if self.set[i][j] < reference[0][j]:
reference[0][j] = self.set[i][j]
if self.set[i][j] > reference[1][j]:
reference[1][j] = self.set[i][j]
return reference
if __name__ == '__main__':
total_time = time.time()
N = 10000
Set = np.random.random((N,2))
anObjetct = Testing(inputSet = Set.tolist(), divisor = 1000)
anObjetct .__build__()
print("TOTAL --- %s seconds ---" % (time.time() - total_time))
EXAMPLE INPUT
10 vectors of two positions (as points in two-dimensional space)
N = 10
Set = np.random.random((N,2))
OUTPUT
Subsets containing input set slices. The values in the subsets correspond to the indexes of the elements in the input vector.
>>> objeto.subsets
[[0, 2, 9], [5, 7], [1, 3, 6, 8], [4]]
Execution time: 0.009 seconds
-----
I tried to create a version with multiprocessing, but it does not present the correct result, nor does it produce any results. Furthermore, the processing time is totally inconsistent.
import multiprocessing as mp
import numpy as np
import time
from scipy.spatial import distance
import math
class Testing(object):
def __init__(self, inputSet, divisor):
self.set = inputSet
self.divisor = divisor
def __build__(self):
self.reference = self.__reference__()
self.num_subsets = ((len(self.set)-1) // self.divisor)
self.subsets = [[] for i in range(self.num_subsets)]
with mp.Pool() as pool:
pool.map(self.__makeParallel__,self.set)
#self.__makeSubsets__()
def __degree__(self, xk, c1, c2):
d_xk_c1 = distance.euclidean(xk, c1)
d_xk_c2 = distance.euclidean(xk, c2)
return d_xk_c1/(d_xk_c1 + d_xk_c2)
def __makeParallel__(self, i):
index = math.floor(self.__degree__(i, self.reference[0], self.reference[1]) * self.num_subsets)
self.subsets[index].append(self.set.index(i))
def __makeSubsets__(self):
start_time = time.time()
for i in range(len(self.set)):
index = int(self.__degree__(self.set[i], self.reference[0], self.reference[1]) * self.num_subsets)
self.subsets[index].append(i)
print("MAKE SUBSETS --- %s seconds ---" % (time.time() - total_time))
def __reference__(self):
dimension = len(self.set[0])
size = len(self.set)
#reference[0] = min of set iset; reference[1] = max of set iset
reference = [[], []]
for i in range(dimension):
reference[0].append(math.inf)
reference[1].append(-math.inf)
for i in range(size):
for j in range(dimension):
if self.set[i][j] < reference[0][j]:
reference[0][j] = self.set[i][j]
if self.set[i][j] > reference[1][j]:
reference[1][j] = self.set[i][j]
return reference
if __name__ == '__main__':
total_time = time.time()
N = 10000
Set = np.random.random((N,2))
anObjetct = Testing(inputSet = Set.tolist(), divisor = 1000)
anObjetct .__build__()
print("TOTAL --- %s seconds ---" % (time.time() - total_time))
EXAMPLE INPUT
10 vectors of two positions (as points in two-dimensional space)
N = 10
Set = np.random.random((N,2))
OUTPUT
Totally inconsistent result.
>>> objeto.subsets
[[], [], [], []]
Execution time: 0.5 seconds
I just need a multiprocessing version of the makeSubsets function. pls, help! thanks