I was able to reproduce the error on synthetic data:
import pandas as pd
from datetime import datetime
df1 = pd.DataFrame({'A': ['A0', 'A1', 'A2', 'A3'],
'B': ['B0', 'B1', 'B2', 'B3'],
'C': [datetime.now(), datetime.now(), datetime.now(), datetime.now()],
'D': ['D0', 'D1', 'D2', 'D3']},
index=[0, 1, 2, 3]);
df2 = pd.DataFrame({'A': ['A1', 'A2', 'A3', 'A4'],
'E': ['E1', 'E2', 'E3', 'E4']},
index=[0,1,2,3]);
df = pd.merge(df1, df2, how='left', on=['A', 'A']);
def getList(row):
r = [];
if row["A"] == "A1": r.append("test-01");
if row["B"] == "B1": r.append("test-02");
if row["B"] == "B2": r.append("test-03");
return r;
df["NEW_COLUMN"] = df.apply(lambda row: getList(row), axis = 1);
Original post: I would like to create a new column in pandas dataframe based on multiple conditions. The value of the new column should be list. However I got "ValueError: Empty data passed with indices specified." in case of empty list.
def getList(p_row):
r = [];
if p_row["field1"] > 0: r.append("x");
...
return r;
df["new_list_field"] = df.apply(lambda row: getList(row), axis = 1);
The complete error:
ValueError Traceback (most recent call last) C:\ProgramData\Anaconda3\lib\site-packages\pandas\core\internals.py in create_block_manager_from_arrays(arrays, names, axes) 4636 try: -> 4637 blocks = form_blocks(arrays, names, axes) 4638 mgr = BlockManager(blocks, axes)
C:\ProgramData\Anaconda3\lib\site-packages\pandas\core\internals.py in form_blocks(arrays, names, axes) 4728 if len(object_items) > 0: -> 4729 object_blocks = _simple_blockify(object_items, np.object_) 4730 blocks.extend(object_blocks)
C:\ProgramData\Anaconda3\lib\site-packages\pandas\core\internals.py in _simple_blockify(tuples, dtype) 4758 """ -> 4759 values, placement = _stack_arrays(tuples, dtype) 4760
C:\ProgramData\Anaconda3\lib\site-packages\pandas\core\internals.py in _stack_arrays(tuples, dtype) 4822 for i, arr in enumerate(arrays): -> 4823 stacked[i] = _asarray_compat(arr) 4824
ValueError: could not broadcast input array from shape (2) into shape (195)
During handling of the above exception, another exception occurred:
ValueError Traceback (most recent call last) in () ----> 1 df["new_list_field"] = df.apply(lambda row: getList(row), axis = 1);
C:\ProgramData\Anaconda3\lib\site-packages\pandas\core\frame.py in apply(self, func, axis, broadcast, raw, reduce, args, **kwds) 4875 f, axis, 4876 reduce=reduce, -> 4877 ignore_failures=ignore_failures) 4878 else: 4879 return self._apply_broadcast(f, axis)
C:\ProgramData\Anaconda3\lib\site-packages\pandas\core\frame.py in _apply_standard(self, func, axis, ignore_failures, reduce) 4988 index = None 4989 -> 4990 result = self._constructor(data=results, index=index) 4991 result.columns = res_index 4992
C:\ProgramData\Anaconda3\lib\site-packages\pandas\core\frame.py in init(self, data, index, columns, dtype, copy) 328 dtype=dtype, copy=copy) 329 elif isinstance(data, dict): --> 330 mgr = self._init_dict(data, index, columns, dtype=dtype) 331 elif isinstance(data, ma.MaskedArray): 332 import numpy.ma.mrecords as mrecords
C:\ProgramData\Anaconda3\lib\site-packages\pandas\core\frame.py in _init_dict(self, data, index, columns, dtype) 459 arrays = [data[k] for k in keys] 460 --> 461 return _arrays_to_mgr(arrays, data_names, index, columns, dtype=dtype) 462 463 def _init_ndarray(self, values, index, columns, dtype=None, copy=False):
C:\ProgramData\Anaconda3\lib\site-packages\pandas\core\frame.py in _arrays_to_mgr(arrays, arr_names, index, columns, dtype) 6171 axes = [_ensure_index(columns), _ensure_index(index)] 6172 -> 6173 return create_block_manager_from_arrays(arrays, arr_names, axes) 6174 6175
C:\ProgramData\Anaconda3\lib\site-packages\pandas\core\internals.py in create_block_manager_from_arrays(arrays, names, axes) 4640
return mgr 4641 except ValueError as e: -> 4642 construction_error(len(arrays), arrays[0].shape, axes, e) 4643 4644C:\ProgramData\Anaconda3\lib\site-packages\pandas\core\internals.py in construction_error(tot_items, block_shape, axes, e) 4604
raise e 4605 if block_shape[0] == 0: -> 4606 raise ValueError("Empty data passed with indices specified.")4607 raise ValueError("Shape of passed values is {0}, indices imply {1}".format( 4608 passed, implied))
ValueError: Empty data passed with indices specified.
dataframe
in a Minimal, Complete, and Verifiable manner. – moshevi