I have a dataset that contains, among other columns, 3 columns titled Gender (either M or F), House (either A or B or C), and Indicator (either 0 or 1). I want to plot the histogram of House A colored by Gender. This is my code to do this:
import pandas as pd
df = pd.read_csv('dataset.csv', usecols=['House','Gender','Indicator')
A = df[df['House']=='A']
A = pd.DataFrame(A, columns=['Indicator', 'Gender'])
This imports the values of House A for the respective genders correctly, as shown by its contents:
print(A)
Indicator Gender
0 1 Male
1 1 Male
2 1 Male
4 1 Female
7 1 Male
8 1 Male
11 1 Male
14 1 Male
17 1 Male
18 1 Female
19 1 Female
20 1 Female
21 1 Male
24 1 Male
26 1 Female
27 1 Male
... ... ...
Now when I want to plot the histogram of A colored by gender the way I did in MATLAB, it gives an error:
import matplotlib.pyplot as plt
plt.hist(A)
---------------------------------------------------------------------------
TypeError Traceback (most recent call last)
<ipython-input-130-81c3aef1748b> in <module>()
----> 1 plt.hist(A)
~\AppData\Local\Continuum\anaconda3\lib\site-packages\matplotlib\pyplot.py in hist(x, bins, range, density, weights, cumulative, bottom, histtype, align, orientation, rwidth, log, color, label, stacked, normed, hold, data, **kwargs)
3130 histtype=histtype, align=align, orientation=orientation,
3131 rwidth=rwidth, log=log, color=color, label=label,
-> 3132 stacked=stacked, normed=normed, data=data, **kwargs)
3133 finally:
3134 ax._hold = washold
~\AppData\Local\Continuum\anaconda3\lib\site-packages\matplotlib\__init__.py in inner(ax, *args, **kwargs)
1853 "the Matplotlib list!)" % (label_namer, func.__name__),
1854 RuntimeWarning, stacklevel=2)
-> 1855 return func(ax, *args, **kwargs)
1856
1857 inner.__doc__ = _add_data_doc(inner.__doc__,
~\AppData\Local\Continuum\anaconda3\lib\site-packages\matplotlib\axes\_axes.py in hist(***failed resolving arguments***)
6512 for xi in x:
6513 if len(xi) > 0:
-> 6514 xmin = min(xmin, xi.min())
6515 xmax = max(xmax, xi.max())
6516 bin_range = (xmin, xmax)
~\AppData\Local\Continuum\anaconda3\lib\site-packages\numpy\core\_methods.py in _amin(a, axis, out, keepdims)
27
28 def _amin(a, axis=None, out=None, keepdims=False):
---> 29 return umr_minimum(a, axis, None, out, keepdims)
30
31 def _sum(a, axis=None, dtype=None, out=None, keepdims=False):
TypeError: '<=' not supported between instances of 'int' and 'str'
It seems we need to specify the exact column we want to make histogram of. It can't automatically understand (unlike MATLAB) that it needs to color according to the other column. So, doing the following plots the histogram, but with no color indicating the Gender:
plt.hist(A['Indicator'])
So, how do I make either a stacked histogram, or a side-by-side one colored by gender? Something like this, except there'll be only 2 bars for each Indicator, at x=0 and x=1:
x = np.random.randn(1000, 2)
colors = ['red', 'green']
plt.hist(x, color=colors)
plt.legend(['Male', 'Female'])
plt.title('Male and Female indicator by gender')
I have tried to imitate the above by copying the 2 dataframe columns into 2 columns of a list, and then trying to plot the histogram:
y=[]
y[0] = A[A['Gender'=='M']].tolist()
y[1] = A[A['Gender'=='F']].tolist()
plt.hist(y)
But this gives the following error:
KeyError Traceback (most recent call last)
~\AppData\Local\Continuum\anaconda3\lib\site-packages\pandas\core\indexes\base.py in get_loc(self, key, method, tolerance)
3062 try:
-> 3063 return self._engine.get_loc(key)
3064 except KeyError:
pandas\_libs\index.pyx in pandas._libs.index.IndexEngine.get_loc()
pandas\_libs\index.pyx in pandas._libs.index.IndexEngine.get_loc()
pandas\_libs\hashtable_class_helper.pxi in pandas._libs.hashtable.PyObjectHashTable.get_item()
pandas\_libs\hashtable_class_helper.pxi in pandas._libs.hashtable.PyObjectHashTable.get_item()
KeyError: False
During handling of the above exception, another exception occurred:
KeyError Traceback (most recent call last)
<ipython-input-152-138cb74b6e00> in <module>()
2 A= pd.DataFrame(A, columns=['Indicator', 'Gender'])
3 y=[]
----> 4 y[0] = A[A['Gender'=='M']].tolist()
5 y[1] = A[A['Gender'=='F']].tolist()
6 plt.hist(y)
~\AppData\Local\Continuum\anaconda3\lib\site-packages\pandas\core\frame.py in __getitem__(self, key)
2683 return self._getitem_multilevel(key)
2684 else:
-> 2685 return self._getitem_column(key)
2686
2687 def _getitem_column(self, key):
~\AppData\Local\Continuum\anaconda3\lib\site-packages\pandas\core\frame.py in _getitem_column(self, key)
2690 # get column
2691 if self.columns.is_unique:
-> 2692 return self._get_item_cache(key)
2693
2694 # duplicate columns & possible reduce dimensionality
~\AppData\Local\Continuum\anaconda3\lib\site-packages\pandas\core\generic.py in _get_item_cache(self, item)
2484 res = cache.get(item)
2485 if res is None:
-> 2486 values = self._data.get(item)
2487 res = self._box_item_values(item, values)
2488 cache[item] = res
~\AppData\Local\Continuum\anaconda3\lib\site-packages\pandas\core\internals.py in get(self, item, fastpath)
4113
4114 if not isna(item):
-> 4115 loc = self.items.get_loc(item)
4116 else:
4117 indexer = np.arange(len(self.items))[isna(self.items)]
~\AppData\Local\Continuum\anaconda3\lib\site-packages\pandas\core\indexes\base.py in get_loc(self, key, method, tolerance)
3063 return self._engine.get_loc(key)
3064 except KeyError:
-> 3065 return self._engine.get_loc(self._maybe_cast_indexer(key))
3066
3067 indexer = self.get_indexer([key], method=method, tolerance=tolerance)
pandas\_libs\index.pyx in pandas._libs.index.IndexEngine.get_loc()
pandas\_libs\index.pyx in pandas._libs.index.IndexEngine.get_loc()
pandas\_libs\hashtable_class_helper.pxi in pandas._libs.hashtable.PyObjectHashTable.get_item()
pandas\_libs\hashtable_class_helper.pxi in pandas._libs.hashtable.PyObjectHashTable.get_item()
KeyError: False

