I'm writing a small script that calculates and plots a rarefaction curve, on a given data. (As described in Wikipedia: http://en.wikipedia.org/wiki/Rarefaction_%28ecology%29) But I try to plot the function for values larger than 170 i keep getting the following error: OverflowError: (34, 'Result too large')
Here is a sample of code with some data:
import numpy as np
import math
import matplotlib.pyplot as plt
import decimal
def pltCurve():
data = [[367, 172, 503, 1404, 8, 83, 7, 2, 7, 1, 0, 6, 31, 0, 6, 40, 0, 18, 132, 41, 1, 2, 15, 1, 0, 10, 0, 63, 59, 3, 0, 7, 9, 9, 4, 0, 2, 0, 23, 20, 4, 0, 0, 1, 11, 55, 0, 0, 1, 1, 0, 1, 4, 11, 0, 10, 6, 0, 4, 0, 443, 2, 49, 29, 0, 5, 6, 0, 0, 1, 0, 0, 0, 0, 0, 32, 0, 1, 14, 1, 0, 1, 3, 1, 1, 0, 7, 0, 2, 32, 2, 1, 55, 0, 21, 1, 7, 2, 0, 0, 0, 0, 0, 0, 0, 1, 76, 5, 9, 28, 1, 0, 72, 0, 0, 0, 0, 61, 6, 5, 0, 5, 2, 0, 1, 9, 1, 0, 1, 1, 1, 1, 1, 1, 34, 28, 1, 1, 1, 3, 3, 0, 0, 1, 0, 0, 3, 1, 3, 55, 19, 18, 87, 0, 1, 2, 6, 15, 10, 1, 2]]
for d in range(len(data)):
x = np.arange(1,170 , 10)
y = computeFn(d,x)
#plt.plot(x,y)
plt.errorbar(x,y,yerr=0.95)
plt.show()
def computeFn(i, n):
N = 4467
res = []
r = Decimal(0)
numOfGroups = 161
data = [[367, 172, 503, 1404, 8, 83, 7, 2, 7, 1, 0, 6, 31, 0, 6, 40, 0, 18, 132, 41, 1, 2, 15, 1, 0, 10, 0, 63, 59, 3, 0, 7, 9, 9, 4, 0, 2, 0, 23, 20, 4, 0, 0, 1, 11, 55, 0, 0, 1, 1, 0, 1, 4, 11, 0, 10, 6, 0, 4, 0, 443, 2, 49, 29, 0, 5, 6, 0, 0, 1, 0, 0, 0, 0, 0, 32, 0, 1, 14, 1, 0, 1, 3, 1, 1, 0, 7, 0, 2, 32, 2, 1, 55, 0, 21, 1, 7, 2, 0, 0, 0, 0, 0, 0, 0, 1, 76, 5, 9, 28, 1, 0, 72, 0, 0, 0, 0, 61, 6, 5, 0, 5, 2, 0, 1, 9, 1, 0, 1, 1, 1, 1, 1, 1, 34, 28, 1, 1, 1, 3, 3, 0, 0, 1, 0, 0, 3, 1, 3, 55, 19, 18, 87, 0, 1, 2, 6, 15, 10, 1, 2]]
#print N
for k in n:
r = (sum((logchoose(N-N_i,k)) for N_i in data[i]))*(logchoose(N,k))**-1
r = Decimal(numOfGroups) - r
print r # Debug
res.append(r)
return res
def logchoose(ni, ki):
"""
:rtype : N choose K Function
"""
try:
lgn1 = sum(math.log10(ii) for ii in range(1,ni))
lgk1 = sum(math.log10(ii) for ii in range(1,ki))
lgnk1 = sum(math.log10(ii) for ii in range(1,ni-ki+1))
except ValueError:
#print ni,ki
raise ValueError
#print 10**(lgn1 - (lgnk1 + lgk1))
return Decimal((10**(lgn1 - (lgnk1 + lgk1))))
pltCurve()
I've seen solutions to this problem using 'Decimal' module. I've played with it and still the error was raised. Any suggestions? Regards.
Edit: Here is the exact traceback:
Traceback (most recent call last):
File "C:\Users\user\Documents\Rarefactor\test.py", line 48, in <module>
pltCurve()
File "C:\Users\user\Documents\Rarefactor\test.py", line 11, in pltCurve
y = computeFn(d,x)
File "C:\Users\user\Documents\Rarefactor\test.py", line 26, in computeFn
r = (sum((logchoose(N-N_i,k)) for N_i in data[i]))*(logchoose(N,k))**-1
File "C:\Users\user\Documents\Rarefactor\test.py", line 26, in <genexpr>
r = (sum((logchoose(N-N_i,k)) for N_i in data[i]))*(logchoose(N,k))**-1
File "C:\Users\user\Documents\Rarefactor\test.py", line 45, in logchoose
return (10**(lgn1 - (lgnk1 + lgk1)))
OverflowError: (34, 'Result too large')