I am trying run a set of code in parallel and it seems to work in some cases but not others. The code below runs in parallel with saved_models[item] is None or not present but runs very slow and in serially when it has data.
Any thoughts? The saved_model object is not that big and its different for every single run. from statsmodels.tsa.arima_model import ARIMA def do_parallel_work(self): with mp.Pool(processes=self.max_workers) as pool: job_args = [(item , target_col , saved_models[item] if saved_models is not None and item in saved_models else None ) for item in items] results = pool.map(self.do_work_helper, job_args)
for result in results:
if result[1] is not None:
results_dict[result[0]] = result[1]
def do_work_helper(self, args):
return self.do_work(*args)
def do_work(self, item, target_cols, saved_model):
# can't show exactly what this but essentially it does something to the affect of:
my_model = ARIMA()
# if saved_model is None
fit_model = my_model.fit(trend='nc', maxiter=1000, disp=0)
# else
my_model.predict()
return item, stuff