# Python code to demonstrate SQL to fetch data.
# importing the module
import sqlite3
import numpy as np
from sklearn.linear_model import LinearRegression
import matplotlib.pyplot as plt
from scipy.stats import chisquare
# connect withe the myTable database
connection = sqlite3.connect(r"C:\Users\Aidan\Desktop\INA_DB.db")
# cursor object
crsr = connection.cursor()
dog= crsr.execute("Select s, ei, ki FROM INa_VC WHERE s IN ('d') ")
ans= crsr.fetchall()
#x = [0]*len(ans); y = [0]*len(ans)
x= np.zeros(len(ans)); y= np.zeros(len(ans))
for i in range(0,len(ans)):
x[i] = float(ans[i][1])
y[i] = float(ans[i][2])
# Reshaping
x, y = x.reshape(-1,1), y.reshape(-1, 1)
# Linear Regression Object
lin_regression = LinearRegression()
# Fitting linear model to the data
lin_regression.fit(x,y)
# Get slope of fitted line
m = lin_regression.coef_
# Get y-Intercept of the Line
b = lin_regression.intercept_
# Get Predictions for original x values
# you can also get predictions for new data
predictions = lin_regression.predict(x)
chi= chisquare(predictions, y)
# following slope intercept form
print ("formula: y = {0}x + {1}".format(m, b))
print(chi)
plt.scatter(x, y, color='black')
plt.plot(x, predictions, color='blue',linewidth=3)
plt.show()
Error:
runfile('C:/Users/Aidan/.spyder-py3/temp.py', wdir='C:/Users/Aidan/.spyder-py3')
Traceback (most recent call last):
File "", line 1, in
runfile('C:/Users/Aidan/.spyder-py3/temp.py', wdir='C:/Users/Aidan/.spyder-py3')
File "C:\Users\Aidan\Anaconda3\lib\site-packages\spyder\utils\site\sitecustomize.py", line 705, in runfile
execfile(filename, namespace)
File "C:\Users\Aidan\Anaconda3\lib\site-packages\spyder\utils\site\sitecustomize.py", line 102, in execfile
exec(compile(f.read(), filename, 'exec'), namespace)
File "C:/Users/Aidan/.spyder-py3/temp.py", line 28, in
y[i] = float(ans[i][2])
ValueError: could not convert string to float:
The issue that I am 99 percent sure of is an issue with the Y value. For my data set I have some y values purposely missing and this is leading to a float error. Given my current script what would be a quick fix in order to filter OUT missing NAN y values?
This script works perfectly if y values are in there.