I have tried the following code and a lot of combinations of theta 0 and theta 1. However, when I plot the error function, it is observed that initially the error is zero and then it jumps to infinity. please help.
My code:
#!/usr/bin/env python
# coding: utf-8
# In[90]:
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
from mpl_toolkits.mplot3d import axes3d
# Setting data
dfx = pd.read_csv("../Training Data/Linear_X_Train.csv')
dfy = pd.read_csv("../Training Data/Linear_Y_Train.csv')
x = dfx.values
y = dfy.values
print(x.shape)
print(y.shape)
x = x.reshape((-1,))
y = y.reshape((-1,))
print(x.shape)
print(y.shape)
print (x[:20])
print (y[:20])
#print (max(x))
plt.scatter(x,y)
X = (x-x.mean())/x.std()
#X=x
Y = y
print (X.shape[0])
plt.scatter(X,Y)
#plt.show()
Y2 = (50*X+20)
plt.scatter(X, Y2)
plt.show()
print(Y[0])
print (max(Y))
plt.hist(X, 10)
plt.show()
plt.hist(Y,10)
plt.show()
# In[127]:
def hypothesis(x,theta):
return theta[0] + theta[1]*x
def error(X,Y,theta):
m = X.shape[0]
error = 0
for i in range(m):
hx = hypothesis(X[i],theta)
error += (hx-Y[i])**2
return error
def gradient(X,Y,theta):
grad = np.zeros((2,))
m = X.shape[0]
for i in range(m):
hx = hypothesis(X[i],theta)
grad[0] += (hx-Y[i])
grad[1] += (hx-Y[i])*X[i]
return grad
#Algorithm
def gradientDescent(X,Y,learning_rate=0.1):
theta = np.array([1.0,0.0])
#theta = theta.astype('float64')
print (type(theta))
itr = 0
max_itr = 100
error_list = []
theta_list = []
while(itr<=max_itr):
grad = gradient(X,Y,theta)
e = error(X,Y,theta)
#print (e)
error_list.append(e)
theta_list.append((theta[0],theta[1]))
theta[0] = theta[0] - learning_rate*grad[0]
theta[1] = theta[1] - learning_rate*grad[1]
itr += 1
return theta,error_list,theta_list
# In[128]:
final_theta, error_list,theta_list = gradientDescent(X,Y)
plt.plot(theta_list)
plt.show()
plt.plot(error_list)
plt.show()
print (final_theta)