I am getting a negative R2 score for the air quality challenge. Please help in identifying error. My functions are as follows:
def hypothesis(X,theta):
return np.dot(X,theta)
def gradient(X,y,theta):
y_=hypothesis(X,theta)
grad=np.dot(X.T,(y_-y))
m=X.shape[0]
return grad/m
def gradient_descent(X,y,learning_rate=0.1,max_epochs=300):
error_list=[]
n=X.shape[1]
theta=np.zeros((n,))
for i in range(max_epochs):
#y_=hypothesis(X,theta)
grad=gradient(X,y,theta)
theta=theta-learning_rate*grad
#error_list.append(np.sum((y_-y)**2)/m)
#plt.scatter((y_-y)**2/m,i)
#plt.plot(error_list)
return theta,error_list
theta,error_list=gradient_descent(X,y)
For making predictions:
df2 = pd.read_csv(‘Test.csv’)
x0=np.ones((df2.shape[0],1),dtype=int)
df2=np.hstack((x0,df2))
data=hypothesis(df2,theta).reshape(400,1) #400x1
num=data.shape[0]
idx=np.arange(num,dtype=int).reshape(num,1)
temp=np.hstack((idx,data))
print(temp.shape)
preds=pd.DataFrame(temp.astype(int),columns=[‘id’,‘target’])
preds should be my answer.